An embedded and charset-unspecified text was scrubbed... Name: not available URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20130202/63aa4678/attachment.pl>
vectorisation
4 messages · Brett Robinson, arun, Rui Barradas +1 more
Hi,
Not sure this helps:
ml <- data.frame(matrix(sample(1:50,80, replace=TRUE),20,4))
mm <- apply(ml, 2, cumsum)
starts<- data.frame(matrix(0,600,4))
starts1<- data.frame(matrix(0,600,4))
for (i in 1:4){
starts1[,i][mm[,i]] <-1
}
starts2<-as.data.frame(do.call(cbind,lapply(1:4,function(i) {starts[,i][mm[,i]]<-1;starts[,i]})))
colnames(starts2)<- colnames(starts1)
identical(starts1,starts2)
#[1] TRUE
#loop should be fast compared to lapply
ml <- data.frame(matrix(sample(1:1e4,1e3, replace=TRUE),100,4))
mm <- apply(ml, 2, cumsum)
starts<- data.frame(matrix(0,1e6,4))
starts1<- data.frame(matrix(0,1e6,4))
system.time({for (i in 1:4){
starts1[,i][mm[,i]] <-1
}})
?#user ?system elapsed?
? # 0.63 ? ?0.30 ? ?0.92?
system.time(starts2<-as.data.frame(do.call(cbind,lapply(1:4,function(i) {starts[,i][mm[,i]]<-1;starts[,i]}))))
# user ?system elapsed?
?# ?0.74 ? ?0.26 ? ?1.03?
colnames(starts2)<- colnames(starts1)
identical(starts1,starts2)
#[1] TRUE
A.K.
----- Original Message -----
From: Brett Robinson <brett.robinson at 7dials.com>
To: "r-help at r-project.org" <r-help at r-project.org>
Cc:
Sent: Saturday, February 2, 2013 11:38 AM
Subject: [R] vectorisation
Hi
I'm trying to set up a simulation problem without resorting to (m)any loops. I want to set entries in a data frame of zeros ('starts' in the code below) to 1 at certain points and the points have been randomly generated and stored in a separate data.frame ('sl'), which has the same number of columns.
An example of the procedure is as follows:
ml <- data.frame(matrix(sample(1:50,80, replace=TRUE),20,4))
mm <- apply(ml, 2, cumsum)
starts<- data.frame(matrix(0,600,4))
I can achieve the result I want with a loop:
for (i in 1:4){
lstarts[,i][mm[,i]] <-1
}
But as I want to use a large number of columns I would like to do away with the loop
Can anyone suggest how this might be done?
Thanks in advance
Brett Robinson
______________________________________________________________________
This email has been scanned by the Symantec Email Security.cloud service.
For more information please visit http://www.symanteccloud.com
______________________________________________________________________
??? [[alternative HTML version deleted]]
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Hello,
Try the following.
set.seed(4315)
ml <- data.frame(matrix(sample(1:50,80, replace=TRUE),20,4))
mm <- apply(ml, 2, cumsum)
s2 <- starts <- data.frame(matrix(0,600,4))
for (i in 1:4){
starts[,i][mm[,i]] <- 1
}
s2[] <- lapply(seq_len(ncol(mm)), function(i) {s2[,i][mm[,i]] <- 1; s2[,i]})
identical(s2, starts) # TRUE
Note that lapply is a loop in disguise.
Hope this helps,
Rui Barradas
Em 02-02-2013 16:38, Brett Robinson escreveu:
Hi
I'm trying to set up a simulation problem without resorting to (m)any loops. I want to set entries in a data frame of zeros ('starts' in the code below) to 1 at certain points and the points have been randomly generated and stored in a separate data.frame ('sl'), which has the same number of columns.
An example of the procedure is as follows:
ml <- data.frame(matrix(sample(1:50,80, replace=TRUE),20,4))
mm <- apply(ml, 2, cumsum)
starts<- data.frame(matrix(0,600,4))
I can achieve the result I want with a loop:
for (i in 1:4){
lstarts[,i][mm[,i]] <-1
}
But as I want to use a large number of columns I would like to do away with the loop
Can anyone suggest how this might be done?
Thanks in advance
Brett Robinson
______________________________________________________________________ This email has been scanned by the Symantec Email Security.cloud service. For more information please visit http://www.symanteccloud.com ______________________________________________________________________ [[alternative HTML version deleted]] ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
On 02-02-2013, at 17:38, Brett Robinson <brett.robinson at 7dials.com> wrote:
Hi
I'm trying to set up a simulation problem without resorting to (m)any loops. I want to set entries in a data frame of zeros ('starts' in the code below) to 1 at certain points and the points have been randomly generated and stored in a separate data.frame ('sl'), which has the same number of columns.
An example of the procedure is as follows:
ml <- data.frame(matrix(sample(1:50,80, replace=TRUE),20,4))
mm <- apply(ml, 2, cumsum)
starts<- data.frame(matrix(0,600,4))
I can achieve the result I want with a loop:
for (i in 1:4){
lstarts[,i][mm[,i]] <-1
}
But as I want to use a large number of columns I would like to do away with the loop
Can anyone suggest how this might be done?
Another way is this
f2 <- function(starts, mm) {
mn <- cbind(as.vector(mm),rep(1:ncol(mm),each=nrow(mm)))
x <- as.matrix(starts)
x[mn] <- 1
as.data.frame(x)
}
starts2 <- f2(starts,mm)
#> identical(starts2,starts1)
# [1] TRUE
Collect all the options presented so far in functions, use the compiler package to see if that helps
and do some speed tests with Arun's parameters.
# Brett
f1 <- function(starts, mm) {
for (i in 1:ncol(mm)){
starts[,i][mm[,i]] <-1
}
starts
}
# Berend
f2 <- function(starts, mm) {
mn <- cbind(as.vector(mm),rep(1:ncol(mm),each=nrow(mm)))
x <- as.matrix(starts)
x[mn] <- 1
as.data.frame(x)
}
# Rui
f3 <- function(s2,mm) {
s2[] <- lapply(seq_len(ncol(mm)), function(i) {s2[,i][mm[,i]] <- 1; s2[,i]})
s2
}
# Arun
f4 <- function(starts,mm) {
starts2 <- as.data.frame(do.call(cbind,lapply(1:ncol(mm),function(i) {starts[,i][mm[,i]]<-1;starts[,i]})))
colnames(starts2)<- colnames(starts)
starts2
}
library(compiler)
f1c <- cmpfun(f1)
f2c <- cmpfun(f2)
f3c <- cmpfun(f3)
f4c <- cmpfun(f4)
library(rbenchmark)
# Arun's test
set.seed(11)
starts <- data.frame(matrix(0,1e6,4))
ml <- data.frame(matrix(sample(1:1e4,1e3, replace=TRUE),100,4))
mm <- apply(ml, 2, cumsum)
z1 <- f1(starts,mm)
z2 <- f2(starts,mm)
z3 <- f3(starts,mm)
z4 <- f4(starts,mm)
z1c <- f1c(starts,mm)
z2c <- f2c(starts,mm)
z3c <- f3c(starts,mm)
z4c <- f4c(starts,mm)
identical(z2,z1)
identical(z3,z1)
identical(z4,z1)
identical(z1c,z1)
identical(z2c,z1)
identical(z3c,z1)
identical(z4c,z1)
benchmark( f1(starts,mm) , f2(starts,mm),
f1c(starts,mm), f2c(starts,mm),
f3(starts,mm) , f4(starts,mm),
f3c(starts,mm), f4c(starts,mm),
replications=1,order="relative", columns=c("test","relative","elapsed","replications"))
Result:
# > identical(z2,z1)
# [1] TRUE
# > identical(z3,z1)
# [1] TRUE
# > identical(z4,z1)
# [1] TRUE
# > identical(z1c,z1)
# [1] TRUE
# > identical(z2c,z1)
# [1] TRUE
# > identical(z3c,z1)
# [1] TRUE
# > identical(z4c,z1)
# [1] TRUE
# >
# > benchmark( f1(starts,mm) , f2(starts,mm),
# + f1c(starts,mm), f2c(starts,mm),
# + f3(starts,mm) , f4(starts,mm),
# + f3c(starts,mm), f4c(starts,mm),
# + replications=1,order="relative", columns=c("test","relative","elapsed","replications"))
# test relative elapsed replications
# 2 f2(starts, mm) 1.000 0.195 1
# 4 f2c(starts, mm) 1.005 0.196 1
# 1 f1(starts, mm) 2.990 0.583 1
# 3 f1c(starts, mm) 3.082 0.601 1
# 7 f3c(starts, mm) 3.903 0.761 1
# 5 f3(starts, mm) 3.949 0.770 1
# 8 f4c(starts, mm) 4.436 0.865 1
# 6 f4(starts, mm) 4.462 0.870 1
Compiling doesn't deliver significant speed gains in this case.
Function f2 is the quickest.
Berend