Skip to content
Prev 13674 / 398502 Next

G-test : log-likelihood ratio test

I've written a g.test() aka log-likelihood ratio test function for my
opwn use.  It's something I've seen requested (and looked to find
myself) on this list a few times.

It has the same basic syntax as chisq.test().
It does both goodness of fit tests and tests of independence.
Yates' and Williams' corrections are implemented.

I've put some examples from Sokal & Rohlf up at
http://www.psych.ualberta.ca/~phurd/cruft

Feedback welcomed,
-P.

# Kludgy log-likelihood test of independence & goodness of fit
# Does Williams' and Yates' correction
#
# G (TOI) calculation from Zar (2000) Biostatistical Analysis 4th ed.
# G (GOF) calculation from Sokal & Rohlf (1995) Biometry 3rd ed.
# q calculation from Sokal & Rohlf (1995) Biometry 3rd ed.
# TOI Yates' correction taken from Mike Camann's 2x2 G-test fn.
# GOF Yates' correction as described in Zar (2000)
# more stuff taken from ctest's chisq.test()
#
# ToDo:
# 1) Beautify
# 2) Add warnings for violations
# 3) Make appropriate corrections happen by default
#
# V2.1 Pete Hurd Sept 20 2001.

g.test <- function(x, y = NULL, correct="none", p = rep(1/length(x), length(x)))
{
  DNAME <- deparse(substitute(x))
  if (is.data.frame(x)) x <- as.matrix(x)
  if (is.matrix(x)) {
    if (min(dim(x)) == 1) 
      x <- as.vector(x)
  }
  if (!is.matrix(x) && !is.null(y)) {
    if (length(x) != length(y)) 
      stop("x and y must have the same length")
    DNAME <- paste(DNAME, "and", deparse(substitute(y)))
    OK <- complete.cases(x, y)
    x <- as.factor(x[OK])
    y <- as.factor(y[OK])
    if ((nlevels(x) < 2) || (nlevels(y) < 2)) 
      stop("x and y must have at least 2 levels")
    x <- table(x, y)
  }
  if (any(x < 0) || any(is.na(x))) 
    stop("all entries of x must be nonnegative and finite")
  if ((n <- sum(x)) == 0) 
    stop("at least one entry of x must be positive")
  #If x is matrix, do test of independence
  if (is.matrix(x)) {
    #this block was the separate g.stat function
    cell.tot <- row.tot <- col.tot <- grand.tot <- 0
    nrows<-nrow(x)
    ncols<-ncol(x)
    if (correct=="yates"){ # Do Yates' correction
      if(dim(x)[1]!=2 || dim(x)[2]!=2) # check for 2x2 matrix
        stop("Yates' correction requires a 2 x 2 matrix")
      if((x[1,1]*x[2,2])-(x[1,2]*x[2,1]) > 0)
        {
          x[1,1] <- x[1,1] - 0.5
          x[2,2] <- x[2,2] - 0.5
          x[1,2] <- x[1,2] + 0.5
          x[2,1] <- x[2,1] + 0.5
        }
      else
        {
          x[1,1] <- x[1,1] + 0.5
          x[2,2] <- x[2,2] + 0.5
          x[1,2] <- x[1,2] - 0.5
          x[2,1] <- x[2,1] - 0.5
        }
    }
    # calculate G (Zar, 2000)
    for (i in 1:nrows){
      for (j in 1:ncols){
        if (x[i,j] != 0) cell.tot <- cell.tot + x[i,j] * log10(x[i,j])
      }
    }
    for (i in 1:nrows){ row.tot <- row.tot + (sum(x[i,])) * log10(sum(x[i,])) }
    for (j in 1:ncols){ col.tot <- col.tot + (sum(x[,j])) * log10(sum(x[,j])) }
    grand.tot <- sum(x)*log10(sum(x))
    total <- cell.tot - row.tot - col.tot + grand.tot
    G <- 4.60517 * total
    q <- 1
    if (correct=="williams"){ # Do Williams' correction
      row.tot <- col.tot <- 0    
      for (i in 1:nrows){ row.tot <- row.tot + 1/(sum(x[i,])) }
      for (j in 1:ncols){ col.tot <- col.tot + 1/(sum(x[,j])) }
      q <- 1+ ((n*row.tot-1)*(n*col.tot-1))/(6*n*(ncols-1)*(nrows-1))
    }
    G <- (G/q)
    # end of old g.stat function
    
    STATISTIC <- G
    PARAMETER <- (nrow(x)-1)*(ncol(x)-1)
    PVAL <- 1-pchisq(STATISTIC,df=PARAMETER)
    if(correct=="none")
      METHOD <- "Log likelihood ratio (G-test) test of independence without correction"
    if(correct=="williams")
      METHOD <- "Log likelihood ratio (G-test) test of independence with Williams' correction"
    if(correct=="yates")
      METHOD <- "Log likelihood ratio (G-test) test of independence with Yates' correction"
  }
  else {
    # x is not a matrix, so we do Goodness of Fit
    METHOD <- "Log likelihood ratio (G-test) goodness of fit test"
    if (length(x) == 1) 
      stop("x must at least have 2 elements")
    if (length(x) != length(p)) 
      stop("x and p must have the same number of elements")
    E <- n * p
    
    if (correct=="yates"){ # Do Yates' correction
      if(length(x)!=2)
        stop("Yates' correction requires 2 data values")
      if ( (x[1]-E[1]) > 0.25) {
        x[1] <- x[1]-0.5
        x[2] <- x[2]+0.5
      }
      else if ( (E[1]-x[1]) > 0.25){
        x[1] <- x[1]+0.5
        x[2] <- x[2]-0.5
      }
    }
    names(E) <- names(x)
    tot <- 0
    for (i in 1:length(x)){
      if (x[i] != 0) tot <- tot + x[i] * log(x[i]/E[i])
    }
    G <- (2*tot)
    if (correct=="williams"){ # Do Williams' correction
      q <- 1+(length(x)+1)/(6*n)
      G <- (G/q)
    }
    STATISTIC <- (G)
    PARAMETER <- length(x) - 1
    PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE)
  }
  names(STATISTIC) <- "Log likelihood ratio statistic (G)"
  names(PARAMETER) <- "X-squared df"
  names(PVAL) <- "p.vlaue"
  structure(list(statistic=STATISTIC,parameter=PARAMETER,p.value=PVAL,
                 method=METHOD,data.name=DNAME),class="htest")
}