Skip to content
Prev 305675 / 398506 Next

[newbie] aggregating table() results and simplifying code with loop

Hello,

Now I believe I've got it. (So-so believe.)
ddply is a good idea, in my first post I was using xtabs to the same 
effect, but ddply makes it a lot easier.
I had made a big mistake, though. I thought that only sequences of FALSE 
were valid, hence rle(). In a follow-up post, Davide listed all 
combinations of T/F that matter. The code below uses them to create a df 
of cases to count with ddply. The creation is done by merge, on the T/F 
columns (function f1) and the rest is just to apply to 6 periods of 5 
years each, after applying to covers, after applying to years of crop type.
Sounds confusing, and it probably is but I've separated all of it in 
small functions.
Each of the functions is in fact very simple, and all one needs is to 
remember that the inner most function is reached after a series of 
*apply statements, doing one thing at a time.


#------------------------------------------------------------------------------
# data.frame of target sequences of T/F per crop type and years
# (from an earlier post)

wanted <-
structure(list(V2 = c(TRUE, FALSE, TRUE, FALSE, FALSE, TRUE,
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE,
TRUE, FALSE, TRUE), V3 = c(FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE,
FALSE, TRUE, TRUE, TRUE), V4 = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, FALSE), V5 = c(FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE,
FALSE, TRUE, FALSE, TRUE, TRUE), V6 = c(FALSE, TRUE, TRUE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE), V7 = c(" return", " return",
" return", " return", " return", " return", " return", " return",
" return", " return", " mono-succession", " mono-succession",
" mono-succession", " mono-succession", " mono-succession", " mono-succession",
" mono-succession", " mono-succession", " crops"), V8 = c(5L,
5L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 2L, 5L, 4L, 4L, 3L, 3L, 3L, 3L,
3L, 2L)), .Names = c("V2", "V3", "V4", "V5", "V6", "V7", "V8"
), row.names = c(NA, -19L), class = "data.frame")

want <- split(wanted, wanted$V7)

#--------------------------------------------------------------------
# The functions go from f5 down to f1 (f5 calls f4, f4 calls f3, etc.)
#
f1 <- function(DF, Mat, currcols, WS){
	mcols <- names(T80[, ycols])[currcols]  # names for 'merge'
	names(DF)[1:5] <- mcols
	m <- merge(DF, data.frame(Mat[, mcols], WS = WS))
	ddply(m, .(WS), summarize, length(V8))
}
f2 <- function(w, tf, WS){
	l1 <- lapply(yrs, function(yy) t(f1(w, tf, yy, WS)))
	l2 <- lapply(l1, function(x){
		nms <- as.character(x["WS",])
		resline[ nms ] <- x[2, ]
		resline})
	res <- do.call(rbind, l2)
	data.frame(res)
}
f3 <- function(Cover, w){
	tf <- T80[, ycols] == Cover
	inx <- as.vector(apply(tf, 1, function(.x) any(.x)))
	tf <- tf[inx, ]
	WS <- T80$WS[inx]
	# Special care with empty sets
	if(sum(tf) > 0){
		res <- f2(w, tf, WS)
		na <- as.vector(apply(res, 1, function(.x) all(is.na(.x))))
		res <- res[!na, ]
		if(!is.null(res) && nrow(res) > 0){
			res$Cover <- Cover
			res$Period <- (1:6)[!na]
		}else res <- NULL
	}else res <- NULL
	res
}
f4 <- function(w){
	res <- do.call(rbind, lapply(covers, f3, w))
	# Special care with empty sets
	if(!is.null(nrow(res)) && nrow(res) > 0){
		res <- data.frame(res)
		res$Years <- w$V8[1]
	}else res <- NULL
	res
}
f5 <- function(w){
	sp <- split(w, w$V8)
	res <- do.call( rbind, lapply(sp, f4) )
	res <- data.frame(res)
	res
}

T80 <- read.table("sample.txt", header = TRUE, sep = ";")

ycols <- grep("y", names(T80))
ws <- unique(T80$WS)
covers <- as.character(unique(unlist(T80[ycols])))
yrs <- lapply(0:5, `+`, 1:5)
resline <- rep(NA, length(ws))
names(resline) <- ws

result <- lapply(want, f5)


(Ok, maybe two or three things at a time.)

I hope this is it!
Have fun.

Rui Barradas

Em 17-09-2012 14:19, John Kane escreveu: