Hi,
I have R code like so:
num.columns.back.since.last.occurence <- function(m, outcome) {
nrows <- dim(m)[1];
ncols <- dim(m)[2];
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows);
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res;
}
but on the very large matrices I apply this the execution times are a
problem. I would appreciate any help to rewrite this with more
"standard"/native R functions to speed things up.
--
View this message in context: http://r.789695.n4.nabble.com/Speeding-up-accumulation-code-in-large-matrix-calc-tp4417911p4417911.html
Sent from the R help mailing list archive at Nabble.com.
Speeding up "accumulation" code in large matrix calc?
7 messages · robertfeldt, Petr Savicky, Berend Hasselman
On Fri, Feb 24, 2012 at 08:59:44AM -0800, robertfeldt wrote:
Hi,
I have R code like so:
num.columns.back.since.last.occurence <- function(m, outcome) {
nrows <- dim(m)[1];
ncols <- dim(m)[2];
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows);
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res;
}
but on the very large matrices I apply this the execution times are a
problem. I would appreciate any help to rewrite this with more
"standard"/native R functions to speed things up.
Hi.
If the number of columns is large, so the rows are long, then
the following can be more efficient.
oneRow <- function(x, outcome)
{
n <- length(x)
y <- c(0, cumsum(x[-n] == outcome))
ave(x, y, FUN = function(z) seq.int(along=z) - 1)
}
# random matrix
A <- matrix((runif(49) < 0.2) + 0, nrow=7)
# the required transformation
B <- t(apply(A, 1, oneRow, outcome=1))
# verify
all(num.columns.back.since.last.occurence(A, 1) == B)
[1] TRUE
This solution performs a loop over rows (in apply), so if the
number of rows is large and the number of columns is not,
then a solution, which uses a loop over columns, may be
better.
Hope this helps.
Petr Savicky.
On Fri, Feb 24, 2012 at 08:59:44AM -0800, robertfeldt wrote:
Hi,
I have R code like so:
num.columns.back.since.last.occurence <- function(m, outcome) {
nrows <- dim(m)[1];
ncols <- dim(m)[2];
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows);
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res;
}
but on the very large matrices I apply this the execution times are a
problem. I would appreciate any help to rewrite this with more
"standard"/native R functions to speed things up.
Hi.
If the number of rows is large and the number of columns is not,
then try the following.
# random matrix
A <- matrix((runif(49) < 0.2) + 0, nrow=7)
outcome <- 1
# transformation
B <- array(0, dim=dim(A))
curr <- B[, 1]
for (i in seq.int(from=2, length=ncol(A)-1)) {
curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
B[, i] <- curr
}
# verify
all(num.columns.back.since.last.occurence(A, 1) == B)
[1] TRUE
Hope this helps.
Petr Savicky.
On 24-02-2012, at 20:02, Petr Savicky wrote:
On Fri, Feb 24, 2012 at 08:59:44AM -0800, robertfeldt wrote:
Hi,
I have R code like so:
num.columns.back.since.last.occurence <- function(m, outcome) {
nrows <- dim(m)[1];
ncols <- dim(m)[2];
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows);
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res;
}
but on the very large matrices I apply this the execution times are a
problem. I would appreciate any help to rewrite this with more
"standard"/native R functions to speed things up.
Hi.
If the number of rows is large and the number of columns is not,
then try the following.
# random matrix
A <- matrix((runif(49) < 0.2) + 0, nrow=7)
outcome <- 1
# transformation
B <- array(0, dim=dim(A))
curr <- B[, 1]
for (i in seq.int(from=2, length=ncol(A)-1)) {
curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
B[, i] <- curr
}
I've done some speed tests.
t1 <- function(m, outcome) {
nrows <- dim(m)[1]
ncols <- dim(m)[2]
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows)
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res
}
# by row
t2 <- function(A, outcome) {
oneRow <- function(x, outcome)
{
n <- length(x)
y <- c(0, cumsum(x[-n] == outcome))
ave(x, y, FUN = function(z) seq.int(along=z) - 1)
}
t(apply(A, 1, oneRow, outcome=1))
}
# transformation
t3 <- function(A, outcome) {
B <- array(0, dim=dim(A))
curr <- B[, 1]
for (i in seq.int(from=2, length=ncol(A)-1)) {
curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
B[, i] <- curr
}
B
}
# Compile functions to byte code
library(compiler)
t1.c <- cmpfun(t1)
t2.c <- cmpfun(t2)
t3.c <- cmpfun(t3)
Nrow <- 100
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
Nrow <- 1000
Ncol <- 100
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
Nrow <- 1000
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
------------------------------------------
Summary of results
Nrow <- 100 Ncol <- 1000 A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow) print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child z1 0.543 0.005 0.551 0 0 z2 0.299 0.004 0.304 0 0 z3 0.070 0.012 0.082 0 0 z4 0.112 0.002 0.114 0 0 z5 0.263 0.007 0.271 0 0 z6 0.062 0.009 0.070 0 0
Nrow <- 1000 Ncol <- 100 A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow) z1 <- system.time(res1 <- t1(A,outcome=1)) z2 <- system.time(res2 <- t2(A,outcome=1)) z3 <- system.time(res3 <- t3(A, outcome=1)) z4 <- system.time(res4 <- t1.c(A,outcome=1)) z5 <- system.time(res5 <- t2.c(A,outcome=1)) z6 <- system.time(res6 <- t3.c(A, outcome=1))
user.self sys.self elapsed user.child sys.child z1 0.543 0.005 0.551 0 0 z2 0.299 0.004 0.304 0 0 z3 0.070 0.012 0.082 0 0 z4 0.112 0.002 0.114 0 0 z5 0.263 0.007 0.271 0 0 z6 0.062 0.009 0.070 0 0
Nrow <- 1000 Ncol <- 1000 A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow) print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child z1 5.381 0.019 5.417 0 0 z2 2.812 0.044 2.858 0 0 z3 0.307 0.049 0.357 0 0 z4 1.176 0.015 1.191 0 0 z5 2.686 0.038 2.728 0 0 z6 0.305 0.049 0.355 0 0 <End of timing results> The original function (t1) benefits a lot from compiling to byte code. The function that operates on columns (t3) seems to be always the quickest in this experiment. Berend
On Fri, Feb 24, 2012 at 09:06:28PM +0100, Berend Hasselman wrote:
[...]
I've done some speed tests.
t1 <- function(m, outcome) {
nrows <- dim(m)[1]
ncols <- dim(m)[2]
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows)
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res
}
# by row
t2 <- function(A, outcome) {
oneRow <- function(x, outcome)
{
n <- length(x)
y <- c(0, cumsum(x[-n] == outcome))
ave(x, y, FUN = function(z) seq.int(along=z) - 1)
}
t(apply(A, 1, oneRow, outcome=1))
}
# transformation
t3 <- function(A, outcome) {
B <- array(0, dim=dim(A))
curr <- B[, 1]
for (i in seq.int(from=2, length=ncol(A)-1)) {
curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
B[, i] <- curr
}
B
}
# Compile functions to byte code
library(compiler)
t1.c <- cmpfun(t1)
t2.c <- cmpfun(t2)
t3.c <- cmpfun(t3)
Nrow <- 100
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
[...]
------------------------------------------ Summary of results
Nrow <- 100 Ncol <- 1000 A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow) print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child z1 0.543 0.005 0.551 0 0 z2 0.299 0.004 0.304 0 0 z3 0.070 0.012 0.082 0 0 z4 0.112 0.002 0.114 0 0 z5 0.263 0.007 0.271 0 0 z6 0.062 0.009 0.070 0 0
Nrow <- 1000 Ncol <- 100 A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow) z1 <- system.time(res1 <- t1(A,outcome=1)) z2 <- system.time(res2 <- t2(A,outcome=1)) z3 <- system.time(res3 <- t3(A, outcome=1)) z4 <- system.time(res4 <- t1.c(A,outcome=1)) z5 <- system.time(res5 <- t2.c(A,outcome=1)) z6 <- system.time(res6 <- t3.c(A, outcome=1))
user.self sys.self elapsed user.child sys.child z1 0.543 0.005 0.551 0 0 z2 0.299 0.004 0.304 0 0 z3 0.070 0.012 0.082 0 0 z4 0.112 0.002 0.114 0 0 z5 0.263 0.007 0.271 0 0 z6 0.062 0.009 0.070 0 0
[...]
The original function (t1) benefits a lot from compiling to byte code. The function that operates on columns (t3) seems to be always the quickest in this experiment.
Thank you for this. Intuition may be wrong.
Function t2() calls ave(), which contains a loop over the groups, so
its efficiency depends also on the number of groups. If the matrix is
generated with fewer 1's, then i obtained
Nrow <- 100
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.002)+0, nrow=Nrow)
library(rbenchmark)
benchmark(t1(A,outcome=1),
t2(A,outcome=1),
t3(A,outcome=1),
t1.c(A,outcome=1),
t2.c(A,outcome=1),
t3.c(A,outcome=1),
columns=c("test", "user.self", "relative"),
replications=1)
1 t1(A, outcome = 1) 0.776 13.362069
4 t1.c(A, outcome = 1) 0.308 5.275862
2 t2(A, outcome = 1) 0.172 2.965517
5 t2.c(A, outcome = 1) 0.176 3.034483
3 t3(A, outcome = 1) 0.060 1.051724
6 t3.c(A, outcome = 1) 0.056 1.000000
So, the result is slightly better for t2() with probability 0.002
of 1 than with probability 0.2. At least, it is faster than t1.c().
But still, t3() or t3.c() is faster.
Petr.
On Fri, Feb 24, 2012 at 09:06:28PM +0100, Berend Hasselman wrote:
[...]
Summary of results
Nrow <- 100 Ncol <- 1000 A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow) print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child z1 0.543 0.005 0.551 0 0 z2 0.299 0.004 0.304 0 0 z3 0.070 0.012 0.082 0 0 z4 0.112 0.002 0.114 0 0 z5 0.263 0.007 0.271 0 0 z6 0.062 0.009 0.070 0 0
[...]
The original function (t1) benefits a lot from compiling to byte code. The function that operates on columns (t3) seems to be always the quickest in this experiment.
Hi.
Let me include one more solution operating on rows to the test.
t4 <- function(A, outcome) {
oneRow <- function(x, outcome)
{
n <- length(x)
y <- 1:n
z <- y
z[c(FALSE, x[-n] != outcome)] <- 0
y - cummax(z)
}
t(apply(A, 1, oneRow, outcome=outcome))
}
library(compiler)
t1.c <- cmpfun(t1)
t3.c <- cmpfun(t3)
t4.c <- cmpfun(t4)
Nrow <- 100
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
library(rbenchmark)
benchmark(t1(A,outcome=1),
t3(A,outcome=1),
t4(A,outcome=1),
t1.c(A,outcome=1),
t3.c(A,outcome=1),
t4.c(A,outcome=1),
columns=c("test", "user.self", "sys.self", "relative"),
replications=1)
test user.self sys.self relative
1 t1(A, outcome = 1) 0.744 0 46.5000
4 t1.c(A, outcome = 1) 0.284 0 17.6875
2 t3(A, outcome = 1) 0.076 0 4.7500
5 t3.c(A, outcome = 1) 0.072 0 4.6250
3 t4(A, outcome = 1) 0.016 0 1.0000
6 t4.c(A, outcome = 1) 0.020 0 1.1250
Here, t4(), t4.c() is faster than t3(), t3.c().
With
Nrow <- 20000
Ncol <- 50
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
i get
test user.self sys.self relative
1 t1(A, outcome = 1) 7.444 0 20.233696
4 t1.c(A, outcome = 1) 2.740 0 7.442935
2 t3(A, outcome = 1) 0.368 0 1.000000
5 t3.c(A, outcome = 1) 0.368 0 1.002717
3 t4(A, outcome = 1) 0.592 0 1.605978
6 t4.c(A, outcome = 1) 0.536 0 1.456522
Here, t3() and t3.c() are faster than t4(), t4.c().
Petr.
Wow! Thanks to both Petr and Berend for this extensive help.
I learned a lot not only about this specific case but about R in general
from studying your answers.
The compiled version t4 seems to give the most consistently quickest results
and for my case (about 6000 rows and 500 columns with a probability of the
sought for outcome 0.04) I see speedups from my original of 30-40 times. See
below for details.
Excellent help thank you!
# t1-t4 as above in thread and then compiled to t1.c-t4.c ...
random_matrix <- function(nrows, ncols, probabilityOfOne) {
matrix((runif(nrows*ncols)<probabilityOfOne)+0, nrow=nrows);
}
library(benchmark)
compare.exec.times <- function(A) {
benchmark(t1(A,outcome=1),
t2(A,outcome=1),
t3(A,outcome=1),
t4(A,outcome=1),
t1.c(A,outcome=1),
t2.c(A,outcome=1),
t3.c(A,outcome=1),
t4.c(A,outcome=1),
columns=c("test", "user.self", "relative"),
replications=3)
}
compare.exec.times(random_matrix(100, 1000, 0.10)) # t4.c quickest, 47 times
speedup
compare.exec.times(random_matrix(1000, 100, 0.10)) # t4.c quickest, 25 times
speedup
compare.exec.times(random_matrix(1000, 1000, 0.10)) # t4.c quickest, 37
times speedup
# Most realistic for my data:
compare.exec.times(random_matrix(6000, 400, 0.04)) # t4.c quickest, 30
times speedup
test user.self relative
1 t1(A, outcome = 1) 35.372 30.145038
5 t1.c(A, outcome = 1) 8.591 7.329092
2 t2(A, outcome = 1) 14.598 12.761662
6 t2.c(A, outcome = 1) 14.413 12.587786
3 t3(A, outcome = 1) 1.579 1.743851
7 t3.c(A, outcome = 1) 1.608 1.818490
4 t4(A, outcome = 1) 1.092 1.120441
8 t4.c(A, outcome = 1) 0.894 1.000000
--
View this message in context: http://r.789695.n4.nabble.com/Speeding-up-accumulation-code-in-large-matrix-calc-tp4417911p4419422.html
Sent from the R help mailing list archive at Nabble.com.