Trevor Wiens
twiens at interbaun.com
==========================================================
myglm <- function(formula, family, data){
ret <- glm(formula, family=binomial, data=data)
return(ret)
}
myfacpred <- function(object, newdata) {
ret <- as.factor(ifelse(predict.glm(object, newdata, type='response') < 0.5, 0, 1))
return(ret)
}
# logerrorest takes four arguments
# mdata is a data frame holding the data to be modeled
# form is the output of the is.formula function
# rvar is the response variable as a string, for example 'birdx'
# fvar is the fold variable, for example 'recordyear'
logerrorest <- function(mdata, form, rvar, fvar) {
require(Hmisc)
require(ipred)
# determine index of variables
rpos <- match(rvar, names(mdata))
fpos <- match(fvar, names(mdata))
# get fold values and count for each group
vardesc <- describe(mdata[[fpos]])$values
fvarlist <- as.integer(dimnames(vardesc)[[2]])
k <- length(fvarlist)
countlist <- vardesc[1,1]
for (i in 2:k)
{
countlist[i] <- vardesc[1,i]
}
n <- length(mdata[[fpos]])
# get index list for each fold
cc <- list()
for (i in 1:k)
{
cc[[i]] <- as.integer(rownames(mdata[mdata[[fpos]] == fvarlist[i], ]))
}
# determine root mean squared error
ee <- errorest(form, mdata, estimator='cv', model=myglm, est.para=control.errorest(list.tindx = cc))
# determine misclassification error
# first convert to factor
width = length(mdata)
response <- as.factor(as.integer(mdata[[rpos]]))
newmatrix <- data.frame(cbind(mdata[1:(rpos-1)], response, mdata[(rpos+1):width]))
newform <- as.formula(paste('response ~ ', as.character(form)[[3]]))
me <- errorest(newform, newmatrix, estimator='cv', model=myglm, predict=myfacpred, est.para=control.errorest(list.tindx = cc))
ret <- data.frame(cbind(ee, me))
return(ret)
}