SLOW split() function
Very nice! I am quite impressed at how flexible data.table is.
On Thu, Oct 13, 2011 at 1:05 AM, Matthew Dowle <mdowle at mdowle.plus.com> wrote:
Using Josh's nice example, with data.table's built-in 'by' (optimised grouping) yields a 6 times speedup (100 seconds down to 15 on my netbook).
system.time(all.2b <- lapply(si, function(.indx) { coef(lm(y ~
+ x, data=d[.indx,])) })) ? user ?system elapsed 144.501 ? 0.300 145.525
system.time(all.2c <- lapply(si, function(.indx) { minimal.lm(y
+ = d[.indx, y], x = d[.indx, list(int, x)]) })) ? user ?system elapsed 100.819 ? 0.084 101.552
system.time(all.2d <- d[,minimal.lm2(y=y, x=cbind(int, x)),by=key])
? user ?system elapsed ?15.269 ? 0.012 ?15.323 ? # 6 times faster
head(all.2c)
$`1` ? ? ? ?coef ? ? ? ?se x1 0.5152438 0.6277254 x2 0.5621320 0.5754560 $`2` ? ? ? ?coef ? ? ? se x1 0.2228235 0.312918 x2 0.3312261 0.261529 $`3` ? ? ? ? coef ? ? ? ?se x1 -0.1972439 0.4674000 x2 -0.1674313 0.4479957 $`4` ? ? ? ? ?coef ? ? ? ?se x1 -0.13915746 0.2729158 x2 -0.03409833 0.2212416 $`5` ? ? ? ? ? coef ? ? ? ?se x1 ?0.007969786 0.2389103 x2 -0.083776526 0.2046823 $`6` ? ? ? ? ?coef ? ? ? ?se x1 -0.58576454 0.5677619 x2 -0.07249539 0.5009013
head(all.2d)
? ? key ? ? ? coef ? ? ? ?V2 [1,] ? 1 ?0.5152438 0.6277254 [2,] ? 1 ?0.5621320 0.5754560 [3,] ? 2 ?0.2228235 0.3129180 [4,] ? 2 ?0.3312261 0.2615290 [5,] ? 3 -0.1972439 0.4674000 [6,] ? 3 -0.1674313 0.4479957
minimal.lm2 ? # slightly modified version of Josh's
function(y, x) {
?obj <- lm.fit(x = x, y = y)
?resvar <- sum(obj$residuals^2)/obj$df.residual
?p <- obj$rank
?R <- .Call("La_chol2inv", x = obj$qr$qr[1L:p, 1L:p, drop = FALSE],
size = p, PACKAGE = "base")
?m <- min(dim(R))
?d <- c(R)[1L + 0L:(m - 1L) * (dim(R)[1L] + 1L)]
?se <- sqrt(d * resvar)
?list(coef = obj$coefficients, se)
}
-- View this message in context: http://r.789695.n4.nabble.com/SLOW-split-function-tp3892349p3900851.html Sent from the R help mailing list archive at Nabble.com.
______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Joshua Wiley Ph.D. Student, Health Psychology Programmer Analyst II, ATS Statistical Consulting Group University of California, Los Angeles https://joshuawiley.com/