Skip to content
Prev 2408 / 15274 Next

question on zoo data manipulation

I thought about this one a bit more and have two
additional solutions.  One uses zoo more intensively
by forming the "time" index out of the merge keys.
This relies on the fact that zoo can use any class
with certain methods, not just the usual time/date
classes.  However, I think that the best wayof thinking
about this problem is from an SQL viewpoint since its
basically just a three way self merge followed by
an aggregation and the entire thing can be done
in a single SQL statement (although it spans several
lines).  Solution 1 is our prior minimally zoo solution,
solution 2 is the much more zoo-ish solution and
solution 3 uses sqldf to implement it in SQL.

DF <- structure(list(Ticker = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "XXX", class = "factor"), Date = c(20080320L,
20080320L, 20080320L, 20080320L, 20080320L, 20080218L, 20080218L,
20080218L, 20080218L), BrokerName = structure(c(1L, 1L, 2L, 2L,
3L, 1L, 1L, 2L, 2L), .Label = c("BRK1", "BRK2", "BRK3"), class = "factor"),
    Acc_Yr = c(200806L, 200906L, 200806L, 200906L, 200806L, 200806L,
    200906L, 200806L, 200906L), Measure = c(2.2, 2.5, 2.3, 2.8,
    3.3, 2.2, 2.5, 2.4, 2.8), lag = c(0L, 0L, 0L, 0L, 0L, 1L,
    1L, 1L, 1L)), .Names = c("Ticker", "Date", "BrokerName",
"Acc_Yr", "Measure", "lag"), class = "data.frame", row.names = c(NA,
-9L))


# zoo solution 1

library(zoo)

f <- function(x) {
    br <- intersect(x[x$lag == 0, "BrokerName"], x[x$lag == 1, "BrokerName"])
    sb <- subset(x, BrokerName %in% br)
    ag <- aggregate(sb["Measure"], sb[c(1, 2, 4, 6)], mean)
    transform(tail(ag, -1), Measure =
        coredata(diff(zoo(ag$Measure), arithmetic = FALSE) - 1))
}
do.call("rbind", by(DF, DF[c(1, 4)], f))

# zoo solution 2

library(zoo)

z <- zoo(DF$Measure, apply(DF[c(1, 3, 4, 6)], 1, paste, collapse = ":"))

zl <- zoo(DF$Measure,
    apply(transform(DF[c(1, 3, 4, 6)], lag = lag+1), 1, paste, collapse = ":"))

zm <- merge(z, zl, all = FALSE)

z01 <- zm[sub(":[0-9]*$", ":1", time(zm)) %in% time(zm)]

transform(aggregate(z01, sub(":[^:]*", "", time(z01)), mean), Change = z/zl-1)

# solution 3 - sqldf

library(sqldf)
sqldf("select Ticker, Date__1, Acc_Yr, lag, avg(Measure)/avg(Mprev)-1 Change
    from (select y.*, x.Measure Mprev from DF x, DF y, DF z
    where x.Ticker = y.Ticker and x.Acc_Yr = y.Acc_Yr
        and x.BrokerName = y.BrokerName and x.lag = y.lag - 1
        and x.Ticker = z.Ticker and x.Acc_Yr = z.Acc_Yr
        and x.BrokerName = z.BrokerName and z.lag = 1)
    group by Ticker, Acc_Yr, lag")



On Tue, Apr 15, 2008 at 8:56 AM, Gabor Grothendieck
<ggrothendieck at gmail.com> wrote: