Skip to content

Adding Functionality to stat.table in Epi

2 messages · Greg Snow, Rick Bilonick

#
After you copy stat.table to stat.table2 and modify stat.table2
try:
(you should only need to do that 1 time after creating/editing
stat.table2).

hope this helps,

Greg Snow, Ph.D.
Statistical Data Center, LDS Hospital
Intermountain Health Care
greg.snow at ihc.com
(801) 408-8111
The stat.table function in the Epi package won't do standard
deviations.
It didn't seem that it would be difficult to add an "sd" function to
the
stat.table function. Following the example for the mean, I set up a
similar function for the sd (and included it as an options) but it
just
won't work. (I tried sending messages to the Epi mailing list after
subscribing but my mail is always returned. I don't have the exact
error
messages at the moment or I would post them.)

Even if I just copy stat.table to stat.table2 and try to run
stat.table2,
I get:
stat.table2(index=list(race,gender),list(count(),percent(race)),margins=TRUE)
Error: couldn't find function "array.subset"

I can't find any "array.subset" function, yet the original stat.table
works just fine.

I've copied other functions and made changes to them and they would
work
just fine. I must be missing something here.

Any insights would be appreciated.

Rick B.

______________________________________________
R-help at stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help 
PLEASE do read the posting guide!
http://www.R-project.org/posting-guide.html
#
Thanks Greg. That helps but I still get the following error message:
Error in if (digits < 0) digits <- 6 : missing value where TRUE/FALSE needed

Rick

Below is the code (sorry it's kind of long). The mean function works but
the sd function produces the error message:

stat.table2 <- function (index, contents = count(), data, margins = FALSE)
{
    index.sub <- substitute(index)
    index <- if (missing(data))
        eval(index)
    else eval(index.sub, data)
    deparse.name <- function(x) if (is.symbol(x))
        as.character(x)
    else ""
    if (is.list(index)) {
        if (is.call(index.sub)) {
            index.names <- names(index.sub)
            fixup <- if (is.null(index.names))
                seq(along = index.sub)
            else index.names == ""
            dep <- sapply(index.sub[fixup], deparse.name)
            if (is.null(index.names))
                index.labels <- dep
            else {
                index.labels <- index.names
                index.labels[fixup] <- dep
            }
            index.labels <- index.labels[-1]
        }
        else {
            index.labels <- if (!is.null(names(index))) {
                names(index)
            }
            else {
                rep("", length(index))
            }
        }
    }
    else {
        index.labels <- deparse.name(index.sub)
    }
    if (!is.list(index))
        index <- list(index)
    index <- lapply(index, as.factor)
    contents <- substitute(contents)
    if (!identical(deparse(contents[[1]]), "list")) {
        contents <- call("list", contents)
    }
    valid.functions <- c("count", "mean", "sd","weighted.mean", "sum",
        "quantile", "median", "IQR", "max", "min", "ratio", "percent")
    table.fun <- character(length(contents) - 1)
    for (i in 2:length(contents)) {
        if (!is.call(contents[[i]]))
            stop("contents must be a list of function calls")
        FUN <- deparse(contents[[i]][[1]])
        if (!FUN %in% valid.functions)
            stop(paste("Function", FUN, "not permitted in stat.table"))
        else table.fun[i - 1] <- FUN
    }
    stat.labels <- sapply(contents, deparse)[-1]
    content.names <- names(contents)
    if (!is.null(content.names)) {
        for (i in 2:length(content.names)) {
            if (nchar(content.names[i]) > 0)
                stat.labels[i - 1] <- content.names[i]
        }
    }
    count <- function(id) {
        if (missing(id)) {
            id <- seq(along = index[[1]])
        }
        y <- tapply(id, INDEX = subindex, FUN = function(x)
length(unique(x)))
        y[is.na(y)] <- 0
        return(y)
    }
    mean <- function(x, trim = 0, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = base::mean, trim = trim,
            na.rm = na.rm)
    }
    sd <- function(x, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::sd,
            na.rm = na.rm)
    }

    weighted.mean <- function(x, w, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::weighted.mean,
            w = w, na.rm = na.rm)
    }
    sum <- function(..., na.rm = TRUE) {
        tapply(..., INDEX = subindex, FUN = base::sum, na.rm = na.rm)
    }
    quantile <- function(x, probs, na.rm = TRUE, names = TRUE,
        type = 7, ...) {
        if (length(probs > 1))
            stop("The quantile function only accepts scalar prob values
within stat.table")
        tapply(x, INDEX = subindex, FUN = stats::quantile, probs = prob,
            na.rm = na.rm, names = names, type = type, ...)
    }
    median <- function(x, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::median, na.rm = na.rm)
    }
    IQR <- function(x, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::IQR, na.rm = na.rm)
    }
    max <- function(..., na.rm = TRUE) {
        tapply(..., INDEX = subindex, FUN = base::max, na.rm = na.rm)
    }
    min <- function(..., na.rm = TRUE) {
        tapply(..., INDEX = subindex, FUN = base::min, na.rm = na.rm)
    }
    ratio <- function(d, y, scale = 1, na.rm = TRUE) {
        if (length(scale) != 1)
            stop("Scale parameter must be a scalar")
        if (na.rm) {
            w <- (!is.na(d) & !is.na(y))
            tab1 <- tapply(d * w, INDEX = subindex, FUN = base::sum,
                na.rm = TRUE)
            tab2 <- tapply(y * w, INDEX = subindex, FUN = base::sum,
                na.rm = TRUE)
        }
        else {
            tab1 <- tapply(d, INDEX = subindex, FUN = base::sum,
                na.rm = FALSE)
            tab2 <- tapply(y, INDEX = subindex, FUN = base::sum,
                na.rm = FALSE)
        }
        return(scale * tab1/tab2)
    }
    percent <- function(...) {
        x <- list(...)
        if (length(x) == 0)
            stop("No variables to calculate percent")
        n <- count()
        sweep.index <- logical(length(subindex))
        for (i in seq(along = subindex)) {
            sweep.index[i] <- !any(sapply(x, identical, subindex[[i]]))
        }
        if (!any(sweep.index)) {
            return(100 * n/base::sum(n, na.rm = TRUE))
        }
        else {
            margin <- apply(n, which(sweep.index), base::sum,
                na.rm = TRUE)
            margin[margin == 0] <- NA
            return(100 * sweep(n, which(sweep.index), margin,
                "/"))
        }
    }
    n.dim <- length(index)
    tab.dim <- sapply(index, nlevels)
    if (length(margins) == 1)
        margins <- rep(margins, n.dim)
    else if (length(margins) != n.dim)
        stop("Incorrect length for margins argument")
    fac.list <- vector("list", n.dim)
    for (i in 1:n.dim) {
        fac.list[[i]] <- if (margins[i])
            c(0, 1)
        else 1
    }
    subtable.grid <- as.matrix(expand.grid(fac.list))
    ans.dim <- c(length(contents) - 1, tab.dim + margins)
    ans <- numeric(prod(ans.dim))
    for (i in 1:nrow(subtable.grid)) {
        in.subtable <- as.logical(subtable.grid[i, ])
        llim <- rep(1, n.dim) + ifelse(in.subtable, rep(0, n.dim),
            tab.dim)
        ulim <- tab.dim + ifelse(in.subtable, rep(0, n.dim),
            rep(1, n.dim))
        subindex <- index[in.subtable]
        subtable.list <- if (missing(data))
            eval(contents)
        else eval(as.expression(contents), data)
        for (j in 1:length(subtable.list)) {
            ans[array.subset(ans.dim, c(j, llim), c(j, ulim))] <-
subtable.list[[j]]
        }
    }
    ans <- array(ans, dim = ans.dim)
    ans.dimnames <- lapply(index, levels)
    names(ans.dimnames) <- index.labels
    for (i in 1:length(index)) {
        if (margins[i])
            ans.dimnames[[i]] <- c(ans.dimnames[[i]], "Total")
    }
    dimnames(ans) <- c(list(contents = stat.labels), ans.dimnames)
    attr(ans, "table.fun") <- table.fun
    class(ans) <- c("stat.table", class(ans))
    return(ans)
}
environment(stat.table2) <- environment(stat.table)

stat.table2(index=list(race),list(count(),mean(age.at.scanning)),margins=TRUE)

stat.table2(index=list(race),list(count(),sd(age.at.scanning)),margins=TRUE)