Andrew Robinson
Department of Mathematics and Statistics Tel: +61-3-8344-9763
University of Melbourne, VIC 3010 Australia Fax: +61-3-8344-4599
http://www.ms.unimelb.edu.au/~andrewpr
http://blogs.mbs.edu/fishing-in-the-bay/
-------------- next part --------------
## The new function
my.tapply <- function (X, INDEX, FUN=NULL, ..., simplify=TRUE)
{
FUN <- if (!is.null(FUN)) match.fun(FUN)
if (!is.list(INDEX)) INDEX <- list(INDEX)
nI <- length(INDEX)
namelist <- vector("list", nI)
names(namelist) <- names(INDEX)
extent <- integer(nI)
nx <- length(X)
one <- as.integer(1)
group <- rep.int(one, nx)#- to contain the splitting vector
ngroup <- one
for (i in seq.int(INDEX)) {
index <- as.factor(INDEX[[i]])
if (length(index) != nx)
stop("arguments must have same length")
namelist[[i]] <- levels(index)#- all of them, yes !
extent[i] <- nlevels(index)
group <- group + ngroup * (as.integer(index) - one)
ngroup <- ngroup * nlevels(index)
}
if (is.null(FUN)) return(group)
ans <- lapply(split(X, group), FUN, ...)
index <- as.numeric(names(ans))
if (simplify && all(unlist(lapply(ans, length)) == 1)) {
ansmat <- array(dim=extent, dimnames=namelist)
ans <- unlist(ans, recursive = FALSE)
}
else {
ansmat <- array(vector("list", prod(extent)),
dim=extent, dimnames=namelist)
}
## old : ansmat[as.numeric(names(ans))] <- ans
names(ans) <- NULL
ansmat[index] <- ans
if (sum(table(INDEX) < 1) > 0)
ansmat[table(INDEX) < 1] <- do.call(FUN, list(c(NULL), ...))
ansmat
}
## Check its utility
group <- factor(c(1,1,3,3), levels=c("1","2","3"))
x <- c(1,2,3,4)
## Ok with mean?
tapply(x, group, mean)
my.tapply(x, group, mean)
## Ok with sum?
tapply(x, group, sum)
my.tapply(x, group, sum)
## Check that other arguments are carried through
x <- c(NA,2,3,10)
tapply(x, group, sum, na.rm=TRUE)
tapply(x, group, mean, na.rm=TRUE)
my.tapply(x, group, sum, na.rm=TRUE)
my.tapply(x, group, mean, na.rm=TRUE)
## Check that listed groups work ok also
group.2 <- factor(c(1,2,3,3), levels=c("1","2","3"))
tapply(x, list(group, group.2), sum, na.rm=TRUE)
tapply(x, list(group, group.2), mean, na.rm=TRUE)
my.tapply(x, list(group, group.2), sum, na.rm=TRUE)
my.tapply(x, list(group, group.2), mean, na.rm=TRUE)