Skip to content

Contingency tables from data.frames

4 messages · Gabor Grothendieck, Jose Claudio Faria

#
Dear list,

I'm trying to do a set of generic functions do make contingency tables from 
data.frames. It is just running "nice" (I'm learning R), but I think it can be 
better.

I would like to filter the data.frame, i.e, eliminate all not numeric variables.
And I don't know how to make it: please, help me.

Below one of the my functions ('er' is a mention to EasieR, because I'm trying 
to do a package for myself and the my students):

#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)------------
er.table.df.br <- function(df,
                            breaks = c('Sturges', 'Scott', 'FD'),
                            right = FALSE) {

   if (is.data.frame(df) != 'TRUE')
     stop('need "data.frame" data')

   dim_df <- dim(df)

   tmpList <- list()

   for (i in 1:dim_df[2]) {

     x <- as.matrix(df[ ,i])
     x <- na.omit(x)

     k <- switch(breaks[1],
                 'Sturges' = nclass.Sturges(x),
                 'Scott'   = nclass.scott(x),
                 'FD'      = nclass.FD(x),
                 stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))

     tmp      <- range(x)
     classIni <- tmp[1] - tmp[2]/100
     classEnd <- tmp[2] + tmp[2]/100
     R        <- classEnd-classIni
     h        <- R/k

     # Absolut frequency
     f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))

     # Relative frequency
     fr <- f/length(x)

     # Relative frequency, %
     frP <- 100*(f/length(x))

     # Cumulative frequency
     fac <- cumsum(f)

     # Cumulative frequency, %
     facP <- 100*(cumsum(f/length(x)))

     fi   <- round(f, 2)
     fr   <- round(as.numeric(fr), 2)
     frP  <- round(as.numeric(frP), 2)
     fac  <- round(as.numeric(fac), 2)
     facP <- round(as.numeric(facP),2)

     # Table
     res <- data.frame(fi, fr, frP, fac, facP)
     names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
     tmpList <- c(tmpList, list(res))
   }
   names(tmpList) <- names(df)
   return(tmpList)
}

To try the function:

#a) runing nice
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
mydf=data.frame(y1, y2, y3)
#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
print(tbdf)


#b) One of the problems
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
y4=rep(letters[1:10], 10)
mydf=data.frame(y1, y2, y3, y4)
tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
print(tbdf)

Could anyone give me a hint how to work around this?

PS: Excuse my bad English ;-)
#
On 5/24/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
Try this:

sapply(my.data.frame, is.numeric)

Also you might want to look up:

?match.arg
?stopifnot
?ncol
?sapply
?lapply
#
Gabor Grothendieck wrote:
Thanks Gabor, you suggestion solve my basic problem.
I'm working is same basic (but I think useful) functions
for begginiers.

Below you can see the set of functions:

#######################
#   EasyeR - Package  #
#######################

# Common function---------------------------------------------------------------
er.make.table <- function(x,
                           classIni,
                           classEnd,
                           h,
                           right) {
   # Absolut frequency
   f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))

   # Relative frequency
   fr <- f/length(x)

   # Relative frequency, %
   frP <- 100*(f/length(x))

   # Cumulative frequency
   fac <- cumsum(f)

   # Cumulative frequency, %
   facP <<- 100*(cumsum(f/length(x)))

   fi   <- round(f, 2)
   fr   <- round(as.numeric(fr), 2)
   frP  <- round(as.numeric(frP), 2)
   fac  <- round(as.numeric(fac), 2)
   facP <- round(as.numeric(facP),2)

   # Table
   res <- data.frame(fi, fr, frP, fac, facP)
   names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
   return(res)
}


#1. Tables from vectors
#1.1---er.table.br (User define breaks and right)-------------------------------
er.table.br <- function(x,
                         breaks = c('Sturges', 'Scott', 'FD'),
                         right = FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')

   x <- na.omit(x)

   k <- switch(breaks[1],
               'Sturges' = nclass.Sturges(x),
               'Scott'   = nclass.scott(x),
               'FD'      = nclass.FD(x),
               stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))

   tmp      <- range(x)
   classIni <- tmp[1] - abs(tmp[2])/100
   classEnd <- tmp[2] + abs(tmp[2])/100
   R        <- classEnd-classIni
   h        <- R/k

   tbl <- er.make.table(x, classIni, classEnd, h, right)
   return(tbl)
}


#1.2---er.table.kr (User define the class number (k) and right)-----------------
er.table.kr <- function(x,
                         k,
                         right = FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
   if ((k == '') || (k == ' ')) stop('k not defined')

   x <- na.omit(x)

   tmp      <- range(x)
   classIni <- tmp[1] - abs(tmp[2])/100
   classEnd <- tmp[2] + abs(tmp[2])/100
   R        <- classEnd-classIni
   h        <- R/k

   tbl <- er.make.table(x, classIni, classEnd, h, right)
   return(tbl)
}

#1.3---er.table.ier (User define the classIni, classEnd and right)--------------
er.table.ier <- function(x,
                          classIni,
                          classEnd,
                          right = FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
   if ((classIni == '') || (classIni == ' ')) stop('classIni not defined')
   if ((classEnd == '') || (classEnd == ' ')) stop('classEnd not defined')

   x <- na.omit(x)

   tmp <- range(x)
   R   <- classEnd-classIni
   k   <- sqrt(abs(R))
   if (k < 5)  k <- 5
   h   <- R/k

   tbl <- er.make.table(x, classIni, classEnd, h, right)
   return(tbl)
}


#1.4---er.table.all (User define classIni, ClassEnd, h and right)---------------
er.table.iehr <- function(x,
                          classIni,
                          classEnd,
                          h,
                          right=FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
   if ((classIni == '') || (classIni == ' ')) stop('classIni not defined')
   if ((classEnd == '') || (classEnd == ' ')) stop('classEnd not defined')
   if ((h == '')        || (h == ' '))        stop('h not defined')

   x <- na.omit(x)

   tbl <- er.make.table(x, classIni, classEnd, h, right)
   return(tbl)
}

#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)----------------------------
er.table.df.br <- function(df,
                            breaks = c('Sturges', 'Scott', 'FD'),
                            right = FALSE) {

   tmpList <- list()

   if (is.data.frame(df) != 'TRUE') stop('need "data.frame" data')

   logCol  <- sapply(df, is.numeric)
   dim_df  <- dim(df)

   for (i in 1:dim_df[2]) {
     if (logCol[i]!=FALSE) {
       x <- as.matrix(df[ ,i])
       x <- na.omit(x)

       k <- switch(breaks[1],
                   'Sturges' = nclass.Sturges(x),
                   'Scott'   = nclass.scott(x),
                   'FD'      = nclass.FD(x),
                   stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))

       tmp      <- range(x)
       classIni <- tmp[1] - abs(tmp[2])/100
       classEnd <- tmp[2] + abs(tmp[2])/100
       R        <- classEnd-classIni
       h        <- R/k

       tbl <- er.make.table(x, classIni, classEnd, h, right)
       tmpList <- c(tmpList, list(tbl))
     }
   }
   valCol <- logCol[logCol!=FALSE]
   names(tmpList) <- names(valCol)
   return(tmpList)
}


#2.2---er.table.df.kr (User define the class number (k) and right)--------------
er.table.df.kr <- function(df,
                            k,
                            right = FALSE) {

   if ((k == '') || (k == ' ')) stop('k not defined')
   if (is.data.frame(df) != 'TRUE') stop('need "data.frame" data')

   tmpList <- list()

   logCol  <- sapply(df, is.numeric)
   dim_df  <- dim(df)

   for (i in 1:dim_df[2]) {
     if (logCol[i]!=FALSE) {
       x <- as.matrix(df[ ,i])
       x <- na.omit(x)

       tmp      <- range(x)
       classIni <- tmp[1] - abs(tmp[2])/100
       classEnd <- tmp[2] + abs(tmp[2])/100
       R        <- classEnd-classIni
       h        <- R/k

       tbl <- er.make.table(x, classIni, classEnd, h, right)
       tmpList <- c(tmpList, list(tbl))
     }
   }
   valCol <- logCol[logCol!=FALSE]
   names(tmpList) <- names(valCol)
   return(tmpList)
}

Best regards,
1 day later
#
The final version with the help of Gabor Grotendieck (thanks Gabor, very much!)

#######################
#   EasieR - Package  #
#######################

# Common function
er.make.table <- function(x,
                           start,
                           end,
                           h,
                           right) {
   # Absolut frequency
   f <- table(cut(x, br=seq(start, end, h), right=right))

   # Relative frequency
   fr <- f/length(x)

   # Relative frequency, %
   frP <- 100*(f/length(x))

   # Cumulative frequency
   fac <- cumsum(f)

   # Cumulative frequency, %
   facP <<- 100*(cumsum(f/length(x)))

   fi   <- round(f, 2)
   fr   <- round(as.numeric(fr), 2)
   frP  <- round(as.numeric(frP), 2)
   fac  <- round(as.numeric(fac), 2)
   facP <- round(as.numeric(facP),2)

   # Make final table
   res <- data.frame(fi, fr, frP, fac, facP)
   names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
   return(res)

}

#With Gabor Grotendieck suggestions (thanks Gabor, very much!)
er.table <- function(x, ...) UseMethod("er.table")

er.table.default <- function(x,
                              k,
                              start,
                              end,
                              h,
                              breaks=c('Sturges', 'Scott', 'FD'),
                              right=FALSE) {

   #User define nothing or not 'x' isn't numeric -> stop
   stopifnot(is.numeric(x))

   #User define only 'x'
   #(x, {k, start, end, h}, [breaks, right])
   if (missing(k) && missing(start) && missing(end) && missing(h) ){

     x <- na.omit(x)

     brk <- match.arg(breaks)
     switch(brk,
            Sturges = k <- nclass.Sturges(x),
            Scott   = k <- nclass.scott(x),
            FD      = k <- nclass.FD(x))

     tmp   <- range(x)
     start <- tmp[1] - abs(tmp[2])/100
     end   <- tmp[2] + abs(tmp[2])/100
     R     <- end-start
     h     <- R/k

   }

   #User define 'x' and 'k'
   #(x, k, {start, end, h}, [breaks, right])
   else if (missing(start) && missing(end) && missing(h)) {

     stopifnot(length(k) >= 1)

     x <- na.omit(x)

     tmp   <- range(x)
     start <- tmp[1] - abs(tmp[2])/100
     end   <- tmp[2] + abs(tmp[2])/100
     R     <- end-start
     h     <- R/abs(k)

   }

   #User define 'x', 'start' and 'end'
   #(x, {k,} start, end, {h,} [breaks, right])
   else if (missing(k) && missing(h)) {

     stopifnot(length(start) >= 1, length(end) >=1)

     x <- na.omit(x)

     tmp <- range(x)
     R   <- end-start
     k   <- sqrt(abs(R))
     if (k < 5)  k <- 5 #min value of k
     h   <- R/k

   }

   #User define 'x', 'start', 'end' and 'h'
   #(x, {k,} start, end, h, [breaks, right])
   else if (missing(k)) {

     stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1)
     x <- na.omit(x)

   }

   else stop('Error, please, see the function sintax!')

   tbl <- er.make.table(x, start, end, h, right)
   return(tbl)

}

er.table.data.frame <- function(df,
                                 k,
                                 breaks=c('Sturges', 'Scott', 'FD'),
                                 right=FALSE) {

   stopifnot(is.data.frame(df))

   tmpList <- list()
   logCol  <- sapply(df, is.numeric)

   for (i in 1:ncol(df)) {

     if (logCol[i]) {

       x <- as.matrix(df[ ,i])
       x <- na.omit(x)

       #User define only x and/or 'breaks'
       #(x, {k,}[breaks, right])
       if (missing(k)) {

         brk <- match.arg(breaks)
         switch(brk,
                Sturges = k <- nclass.Sturges(x),
                Scott   = k <- nclass.scott(x),
                FD      = k <- nclass.FD(x))

         tmp   <- range(x)
         start <- tmp[1] - abs(tmp[2])/100
         end   <- tmp[2] + abs(tmp[2])/100
         R     <- end-start
         h     <- R/k

       }

       #User define 'x' and 'k'
       #(x, k,[breaks, right])
       else {

         tmp   <- range(x)
         start <- tmp[1] - abs(tmp[2])/100
         end   <- tmp[2] + abs(tmp[2])/100
         R     <- end-start
         h     <- R/abs(k)

       }

       tbl     <- er.make.table(x, start, end, h, right)
       tmpList <- c(tmpList, list(tbl))

     }

   }

   valCol <- logCol[logCol]
   names(tmpList) <- names(valCol)
   return(tmpList)

}

Best,