Dear list,
I'm trying to do a set of generic functions do make contingency tables from
data.frames. It is just running "nice" (I'm learning R), but I think it can be
better.
I would like to filter the data.frame, i.e, eliminate all not numeric variables.
And I don't know how to make it: please, help me.
Below one of the my functions ('er' is a mention to EasieR, because I'm trying
to do a package for myself and the my students):
#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)------------
er.table.df.br <- function(df,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {
if (is.data.frame(df) != 'TRUE')
stop('need "data.frame" data')
dim_df <- dim(df)
tmpList <- list()
for (i in 1:dim_df[2]) {
x <- as.matrix(df[ ,i])
x <- na.omit(x)
k <- switch(breaks[1],
'Sturges' = nclass.Sturges(x),
'Scott' = nclass.scott(x),
'FD' = nclass.FD(x),
stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))
tmp <- range(x)
classIni <- tmp[1] - tmp[2]/100
classEnd <- tmp[2] + tmp[2]/100
R <- classEnd-classIni
h <- R/k
# Absolut frequency
f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))
# Relative frequency
fr <- f/length(x)
# Relative frequency, %
frP <- 100*(f/length(x))
# Cumulative frequency
fac <- cumsum(f)
# Cumulative frequency, %
facP <- 100*(cumsum(f/length(x)))
fi <- round(f, 2)
fr <- round(as.numeric(fr), 2)
frP <- round(as.numeric(frP), 2)
fac <- round(as.numeric(fac), 2)
facP <- round(as.numeric(facP),2)
# Table
res <- data.frame(fi, fr, frP, fac, facP)
names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
tmpList <- c(tmpList, list(res))
}
names(tmpList) <- names(df)
return(tmpList)
}
To try the function:
#a) runing nice
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
mydf=data.frame(y1, y2, y3)
#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
print(tbdf)
#b) One of the problems
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
y4=rep(letters[1:10], 10)
mydf=data.frame(y1, y2, y3, y4)
tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
print(tbdf)
Could anyone give me a hint how to work around this?
PS: Excuse my bad English ;-)
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
joseclaudio.faria at terra.com.br
jc_faria at uesc.br
jc_faria at uol.com.br
On 5/24/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
Dear list,
I'm trying to do a set of generic functions do make contingency tables from
data.frames. It is just running "nice" (I'm learning R), but I think it can be
better.
I would like to filter the data.frame, i.e, eliminate all not numeric variables.
And I don't know how to make it: please, help me.
Below one of the my functions ('er' is a mention to EasieR, because I'm trying
to do a package for myself and the my students):
#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)------------
er.table.df.br <- function(df,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {
if (is.data.frame(df) != 'TRUE')
stop('need "data.frame" data')
dim_df <- dim(df)
tmpList <- list()
for (i in 1:dim_df[2]) {
x <- as.matrix(df[ ,i])
x <- na.omit(x)
k <- switch(breaks[1],
'Sturges' = nclass.Sturges(x),
'Scott' = nclass.scott(x),
'FD' = nclass.FD(x),
stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))
tmp <- range(x)
classIni <- tmp[1] - tmp[2]/100
classEnd <- tmp[2] + tmp[2]/100
R <- classEnd-classIni
h <- R/k
# Absolut frequency
f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))
# Relative frequency
fr <- f/length(x)
# Relative frequency, %
frP <- 100*(f/length(x))
# Cumulative frequency
fac <- cumsum(f)
# Cumulative frequency, %
facP <- 100*(cumsum(f/length(x)))
fi <- round(f, 2)
fr <- round(as.numeric(fr), 2)
frP <- round(as.numeric(frP), 2)
fac <- round(as.numeric(fac), 2)
facP <- round(as.numeric(facP),2)
# Table
res <- data.frame(fi, fr, frP, fac, facP)
names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
tmpList <- c(tmpList, list(res))
}
names(tmpList) <- names(df)
return(tmpList)
}
To try the function:
#a) runing nice
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
mydf=data.frame(y1, y2, y3)
#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
print(tbdf)
#b) One of the problems
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
y4=rep(letters[1:10], 10)
mydf=data.frame(y1, y2, y3, y4)
tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
print(tbdf)
Try this:
sapply(my.data.frame, is.numeric)
Also you might want to look up:
?match.arg
?stopifnot
?ncol
?sapply
?lapply
On 5/24/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
Dear list,
I'm trying to do a set of generic functions do make contingency tables from
data.frames. It is just running "nice" (I'm learning R), but I think it can be
better.
I would like to filter the data.frame, i.e, eliminate all not numeric variables.
And I don't know how to make it: please, help me.
Below one of the my functions ('er' is a mention to EasieR, because I'm trying
to do a package for myself and the my students):
#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)------------
er.table.df.br <- function(df,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {
if (is.data.frame(df) != 'TRUE')
stop('need "data.frame" data')
dim_df <- dim(df)
tmpList <- list()
for (i in 1:dim_df[2]) {
x <- as.matrix(df[ ,i])
x <- na.omit(x)
k <- switch(breaks[1],
'Sturges' = nclass.Sturges(x),
'Scott' = nclass.scott(x),
'FD' = nclass.FD(x),
stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))
tmp <- range(x)
classIni <- tmp[1] - tmp[2]/100
classEnd <- tmp[2] + tmp[2]/100
R <- classEnd-classIni
h <- R/k
# Absolut frequency
f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))
# Relative frequency
fr <- f/length(x)
# Relative frequency, %
frP <- 100*(f/length(x))
# Cumulative frequency
fac <- cumsum(f)
# Cumulative frequency, %
facP <- 100*(cumsum(f/length(x)))
fi <- round(f, 2)
fr <- round(as.numeric(fr), 2)
frP <- round(as.numeric(frP), 2)
fac <- round(as.numeric(fac), 2)
facP <- round(as.numeric(facP),2)
# Table
res <- data.frame(fi, fr, frP, fac, facP)
names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
tmpList <- c(tmpList, list(res))
}
names(tmpList) <- names(df)
return(tmpList)
}
To try the function:
#a) runing nice
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
mydf=data.frame(y1, y2, y3)
#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
print(tbdf)
#b) One of the problems
y1=rnorm(100, 10, 1)
y2=rnorm(100, 58, 4)
y3=rnorm(100, 500, 10)
y4=rep(letters[1:10], 10)
mydf=data.frame(y1, y2, y3, y4)
tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
print(tbdf)
Try this:
sapply(my.data.frame, is.numeric)
Also you might want to look up:
?match.arg
?stopifnot
?ncol
?sapply
?lapply
Thanks Gabor, you suggestion solve my basic problem.
I'm working is same basic (but I think useful) functions
for begginiers.
Below you can see the set of functions:
#######################
# EasyeR - Package #
#######################
# Common function---------------------------------------------------------------
er.make.table <- function(x,
classIni,
classEnd,
h,
right) {
# Absolut frequency
f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))
# Relative frequency
fr <- f/length(x)
# Relative frequency, %
frP <- 100*(f/length(x))
# Cumulative frequency
fac <- cumsum(f)
# Cumulative frequency, %
facP <<- 100*(cumsum(f/length(x)))
fi <- round(f, 2)
fr <- round(as.numeric(fr), 2)
frP <- round(as.numeric(frP), 2)
fac <- round(as.numeric(fac), 2)
facP <- round(as.numeric(facP),2)
# Table
res <- data.frame(fi, fr, frP, fac, facP)
names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
return(res)
}
#1. Tables from vectors
#1.1---er.table.br (User define breaks and right)-------------------------------
er.table.br <- function(x,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {
if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
x <- na.omit(x)
k <- switch(breaks[1],
'Sturges' = nclass.Sturges(x),
'Scott' = nclass.scott(x),
'FD' = nclass.FD(x),
stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))
tmp <- range(x)
classIni <- tmp[1] - abs(tmp[2])/100
classEnd <- tmp[2] + abs(tmp[2])/100
R <- classEnd-classIni
h <- R/k
tbl <- er.make.table(x, classIni, classEnd, h, right)
return(tbl)
}
#1.2---er.table.kr (User define the class number (k) and right)-----------------
er.table.kr <- function(x,
k,
right = FALSE) {
if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
if ((k == '') || (k == ' ')) stop('k not defined')
x <- na.omit(x)
tmp <- range(x)
classIni <- tmp[1] - abs(tmp[2])/100
classEnd <- tmp[2] + abs(tmp[2])/100
R <- classEnd-classIni
h <- R/k
tbl <- er.make.table(x, classIni, classEnd, h, right)
return(tbl)
}
#1.3---er.table.ier (User define the classIni, classEnd and right)--------------
er.table.ier <- function(x,
classIni,
classEnd,
right = FALSE) {
if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
if ((classIni == '') || (classIni == ' ')) stop('classIni not defined')
if ((classEnd == '') || (classEnd == ' ')) stop('classEnd not defined')
x <- na.omit(x)
tmp <- range(x)
R <- classEnd-classIni
k <- sqrt(abs(R))
if (k < 5) k <- 5
h <- R/k
tbl <- er.make.table(x, classIni, classEnd, h, right)
return(tbl)
}
#1.4---er.table.all (User define classIni, ClassEnd, h and right)---------------
er.table.iehr <- function(x,
classIni,
classEnd,
h,
right=FALSE) {
if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
if ((classIni == '') || (classIni == ' ')) stop('classIni not defined')
if ((classEnd == '') || (classEnd == ' ')) stop('classEnd not defined')
if ((h == '') || (h == ' ')) stop('h not defined')
x <- na.omit(x)
tbl <- er.make.table(x, classIni, classEnd, h, right)
return(tbl)
}
#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)----------------------------
er.table.df.br <- function(df,
breaks = c('Sturges', 'Scott', 'FD'),
right = FALSE) {
tmpList <- list()
if (is.data.frame(df) != 'TRUE') stop('need "data.frame" data')
logCol <- sapply(df, is.numeric)
dim_df <- dim(df)
for (i in 1:dim_df[2]) {
if (logCol[i]!=FALSE) {
x <- as.matrix(df[ ,i])
x <- na.omit(x)
k <- switch(breaks[1],
'Sturges' = nclass.Sturges(x),
'Scott' = nclass.scott(x),
'FD' = nclass.FD(x),
stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))
tmp <- range(x)
classIni <- tmp[1] - abs(tmp[2])/100
classEnd <- tmp[2] + abs(tmp[2])/100
R <- classEnd-classIni
h <- R/k
tbl <- er.make.table(x, classIni, classEnd, h, right)
tmpList <- c(tmpList, list(tbl))
}
}
valCol <- logCol[logCol!=FALSE]
names(tmpList) <- names(valCol)
return(tmpList)
}
#2.2---er.table.df.kr (User define the class number (k) and right)--------------
er.table.df.kr <- function(df,
k,
right = FALSE) {
if ((k == '') || (k == ' ')) stop('k not defined')
if (is.data.frame(df) != 'TRUE') stop('need "data.frame" data')
tmpList <- list()
logCol <- sapply(df, is.numeric)
dim_df <- dim(df)
for (i in 1:dim_df[2]) {
if (logCol[i]!=FALSE) {
x <- as.matrix(df[ ,i])
x <- na.omit(x)
tmp <- range(x)
classIni <- tmp[1] - abs(tmp[2])/100
classEnd <- tmp[2] + abs(tmp[2])/100
R <- classEnd-classIni
h <- R/k
tbl <- er.make.table(x, classIni, classEnd, h, right)
tmpList <- c(tmpList, list(tbl))
}
}
valCol <- logCol[logCol!=FALSE]
names(tmpList) <- names(valCol)
return(tmpList)
}
Best regards,
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
joseclaudio.faria at terra.com.br
jc_faria at uesc.br
jc_faria at uol.com.br
tel: 73-3634.2779
The final version with the help of Gabor Grotendieck (thanks Gabor, very much!)
#######################
# EasieR - Package #
#######################
# Common function
er.make.table <- function(x,
start,
end,
h,
right) {
# Absolut frequency
f <- table(cut(x, br=seq(start, end, h), right=right))
# Relative frequency
fr <- f/length(x)
# Relative frequency, %
frP <- 100*(f/length(x))
# Cumulative frequency
fac <- cumsum(f)
# Cumulative frequency, %
facP <<- 100*(cumsum(f/length(x)))
fi <- round(f, 2)
fr <- round(as.numeric(fr), 2)
frP <- round(as.numeric(frP), 2)
fac <- round(as.numeric(fac), 2)
facP <- round(as.numeric(facP),2)
# Make final table
res <- data.frame(fi, fr, frP, fac, facP)
names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
return(res)
}
#With Gabor Grotendieck suggestions (thanks Gabor, very much!)
er.table <- function(x, ...) UseMethod("er.table")
er.table.default <- function(x,
k,
start,
end,
h,
breaks=c('Sturges', 'Scott', 'FD'),
right=FALSE) {
#User define nothing or not 'x' isn't numeric -> stop
stopifnot(is.numeric(x))
#User define only 'x'
#(x, {k, start, end, h}, [breaks, right])
if (missing(k) && missing(start) && missing(end) && missing(h) ){
x <- na.omit(x)
brk <- match.arg(breaks)
switch(brk,
Sturges = k <- nclass.Sturges(x),
Scott = k <- nclass.scott(x),
FD = k <- nclass.FD(x))
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/k
}
#User define 'x' and 'k'
#(x, k, {start, end, h}, [breaks, right])
else if (missing(start) && missing(end) && missing(h)) {
stopifnot(length(k) >= 1)
x <- na.omit(x)
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/abs(k)
}
#User define 'x', 'start' and 'end'
#(x, {k,} start, end, {h,} [breaks, right])
else if (missing(k) && missing(h)) {
stopifnot(length(start) >= 1, length(end) >=1)
x <- na.omit(x)
tmp <- range(x)
R <- end-start
k <- sqrt(abs(R))
if (k < 5) k <- 5 #min value of k
h <- R/k
}
#User define 'x', 'start', 'end' and 'h'
#(x, {k,} start, end, h, [breaks, right])
else if (missing(k)) {
stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1)
x <- na.omit(x)
}
else stop('Error, please, see the function sintax!')
tbl <- er.make.table(x, start, end, h, right)
return(tbl)
}
er.table.data.frame <- function(df,
k,
breaks=c('Sturges', 'Scott', 'FD'),
right=FALSE) {
stopifnot(is.data.frame(df))
tmpList <- list()
logCol <- sapply(df, is.numeric)
for (i in 1:ncol(df)) {
if (logCol[i]) {
x <- as.matrix(df[ ,i])
x <- na.omit(x)
#User define only x and/or 'breaks'
#(x, {k,}[breaks, right])
if (missing(k)) {
brk <- match.arg(breaks)
switch(brk,
Sturges = k <- nclass.Sturges(x),
Scott = k <- nclass.scott(x),
FD = k <- nclass.FD(x))
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/k
}
#User define 'x' and 'k'
#(x, k,[breaks, right])
else {
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/abs(k)
}
tbl <- er.make.table(x, start, end, h, right)
tmpList <- c(tmpList, list(tbl))
}
}
valCol <- logCol[logCol]
names(tmpList) <- names(valCol)
return(tmpList)
}
Best,
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
joseclaudio.faria at terra.com.br
jc_faria at uesc.br
jc_faria at uol.com.br
tel: 73-3634.2779