Skip to content

index instead of loop?

11 messages · Rui Barradas, Ben quant

#
Hello,
Maybe, let's see.
# Yes.
# First, start by using dates, not characters

fdate <- function(x, format="%Y%m%d"){
	DF <- data.frame(x)
	for(i in colnames(DF)){
		DF[, i] <- as.Date(DF[, i], format=format)
		class(DF[, i]) <- "Date"
	}
	DF
}

rd1 <- fdate(rd1)
# This is yours, use it.
dt1 <- seq(from =as.Date(z.dates[1]), to = as.Date("2009-03-25"), by =
"day")
# Set up the result, no time expensive 'cbind' inside a loop
fin1 <- data.frame(matrix(NA, nrow=length(dt1), ncol=ncol(ua) + 1))
fin1[, 1] <- dt1
nr <- nrow(rd1)

# And vectorize
for(tkr in 1:ncol(ua)){
	x  <- c(rd1[, tkr], as.Date("9999-12-31"))
	inxlist <- lapply(1:nr, function(i) which(x[i] <= dt1 & dt1 < x[i + 1]))
	sapply(1:length(inxlist), function(i) if(length(ix[[i]])) fin1[ix[[i]], tkr
+ 1] <<- ua[i, tkr])
}
colnames(fin1) <- c("daily_dates", colnames(ua))

# Check results
str(fin)
str(fin1)
head(fin)
head(fin1)
tail(fin)
tail(fin1)


Note that 'fin' has facotrs, 'fin1' numerics.
I haven't timed it but I believe it should be faster.

Hope this helps,

Rui Barradas





--
View this message in context: http://r.789695.n4.nabble.com/index-instead-of-loop-tp4447672p4448567.html
Sent from the R help mailing list archive at Nabble.com.
#
Hello,
Right, sorry.
I've changed the name from 'ix' to 'inxlist' to make it more readable just
before posting.
And since the object 'ix' still existed in the R global environment it
didn't throw an error...

Your correction in the post that followed is what I meant.

Correction (full loop, tested):

for(tkr in 1:ncol(ua)){
	x  <- c(rd1[, tkr], as.Date("9999-12-31"))
	ix <- lapply(1:nr, function(i)
			which(x[i] <= dt1 & dt1 < x[i + 1]))
	sapply(1:length(ix), function(i)
		if(length(ix[[i]])) fin1[ix[[i]], tkr + 1] <<- ua[i, tkr])
}

Rui Barradas


--
View this message in context: http://r.789695.n4.nabble.com/index-instead-of-loop-tp4447672p4450186.html
Sent from the R help mailing list archive at Nabble.com.
1 day later
#
Hello again.


Ben quant wrote
Maybe I'm not understanding the problem very well, but let me describe what
I'm thinking it is.

You have two tables, 'rd1' and 'ua' and a vector of dates, 'z.dates'.  The
result is a table such that:
1. From 'z.dates' make a vector of daily dates.
2. Each column is filled with numbers from 'ua' based on dates in 'rd1',
starting at the day given in step 1.
My doubt is that your last posted code seems to give a special role to
column 'A'.
A       B              C      D
2007-11-10 636.16 2645     NaN NaN
2007-11-11 636.16 2645     NaN NaN
2007-11-12 636.16 2645     NaN NaN
2007-11-13 636.16 2645     NaN NaN
2007-11-14 636.16 2645     NaN NaN
2007-11-15 655.91 2645 -98.426 NaN
2007-11-16 655.91 2645 -98.426 NaN
2007-11-17 655.91 2645 -98.426 NaN

The values in column 'C' change following the date in column 'A'. That is
the third date in 'rd1',
more exactly, rd1[3, 1] == "20071115".

Shouldn't the values in mat[, "C"] start at 2009-03-20? The corresponding
value in 'ua' would then be 144.138.

(I still believe this can be made much faster.)

Rui Barradas

--
View this message in context: http://r.789695.n4.nabble.com/index-instead-of-loop-tp4447672p4455223.html
Sent from the R help mailing list archive at Nabble.com.
#
Hello,
I think I have the fastest so far solution, and it checks with your
corrected,last one.

I've made just a change: to transform it into a function I renamed the
parameters
(only for use inside the function) 'zdates', without the period, 'rddata'
and 'uadata'.

'fun1' is yours, 'fun2', mine. Here it goes.


fun1 <- function(zdates, rddata, uadata){
    fix = function(x)
    {
      year = substring(x, 1, 4)
      mo = substring(x, 5, 6)
      day = substring(x, 7, 8)
      ifelse(year=="--", "--", paste(year, mo, day, sep = "-"))

    }
    rd = apply(rddata, 2, fix)
    dimnames(rd) = dimnames(rd)

    wd1 <- seq(from =as.Date(min(zdates)), to = Sys.Date(), by = "day")
    #wd1 = wd1[weekdays(wd1) == "Friday"] # uncomment to go weekly
    wd = sapply(wd1, as.character)
    mat = matrix(NA,nrow=length(wd),ncol=ncol(uadata))
    rownames(mat) = wd
    nms = as.Date(rownames(uadata))

    for(i in 1:length(wd)){
      d = as.Date(wd[i])
      diff = abs(nms - d)
      rd_row_idx = max(which(diff == min(diff)))
      rd_col_idx = which(as.Date(rd[rd_row_idx,], format="%Y-%m-%d")  < d)
      rd_col_idx_lag = which(as.Date(rd[rd_row_idx - 1,], format="%Y-%m-%d") 
< d)
      rd_col_idx_lag2 = which(as.Date(rd[rd_row_idx - 2,],
format="%Y-%m-%d")  < d)

      if(length(rd_col_idx_lag2) && (rd_row_idx - 2) > 0){
        mat[i,rd_col_idx_lag2] = uadata[rd_row_idx - 2,rd_col_idx_lag2]
      }
      if(length(rd_col_idx_lag)){
        mat[i,rd_col_idx_lag] = uadata[rd_row_idx - 1,rd_col_idx_lag]
      }
      if( length(rd_col_idx)){
        mat[i,rd_col_idx] = uadata[rd_row_idx,rd_col_idx]
      }
    }
    colnames(mat)=colnames(uadata)
    mat
}

fun2 <- function(zdates, rddata, uadata){

	fdate <- function(x, format="%Y%m%d"){
		DF <- data.frame(x)
		for(i in colnames(DF)){
			DF[, i] <- as.Date(DF[, i], format=format)
			class(DF[, i]) <- "Date"
		}
		DF
	}

	rddata <- fdate(rddata)
	wd1 <- seq(from = as.Date(zdates[1]), to = Sys.Date(), by = "day")
	nwd1 <- length(wd1)

	fin1 <- matrix(NA, nrow=length(wd1), ncol=ncol(uadata))
	nr <- nrow(rddata)
	xstart <- c(integer(nr), nwd1)
	for(j in 1:ncol(uadata)){
		x <- xstart
		for(i in 1:nr)
			x[i] <- if(!is.na(rddata[i, j]) & !is.nan(rddata[i, j]))
					which(wd1 == rddata[i, j])
					else NA
		ix <- which(!is.na(x))
		for(i in seq_len(length(ix) - 1)){
			from <- x[ ix[i] ] + 1
			to   <- x[ ix[i + 1] ]
			fin1[ from:to, j ] <- uadata[ ix[i], j ]
		}
	}
	colnames(fin1) <- colnames(uadata)
	rownames(fin1) <- as.character(wd1)
	fin1
}

t1 <- system.time(m1 <- fun1(z.dates, rd1, ua))
t2 <- system.time(m2 <- fun2(z.dates, rd1, ua))

all.equal(m1, m2)
[1] TRUE

rbind(t1, t2)
   user.self sys.self elapsed user.child sys.child
t1      1.50        0    1.50         NA        NA
t2      0.02        0    0.01         NA        NA

And the better news is that I believe it scales up without degrading
performance,
like my first did.

See if it works.

Rui Barradas



--
View this message in context: http://r.789695.n4.nabble.com/index-instead-of-loop-tp4447672p4457290.html
Sent from the R help mailing list archive at Nabble.com.
1 day later