Skip to content
Prev 201638 / 398506 Next

predict: remove columns with new levels automatically

Thank you all for the good advice.

Now i did a fast hack, which does want i was looking for, maybe anyone 
else finds this usefull


set.seed(0)
x <- rnorm(9)
y <- x + rnorm(9)

training <- data.frame(x=x, y=y,
                       z1=c(rep("A", 3), rep("B", 3), rep("C", 3)),
                       z2=c(rep("F", 4), rep("G", 5)))
test <- data.frame(x=t<-rnorm(1), y=t+rnorm(1), z1="D", z2="F")


`predict.drop` <- function(f, dat, newdat)
{
  datlev <- vector("list", ncol(dat))
  newdatlev <- vector("list", ncol(newdat))

  `filllevs` <- function(dat, veclev)
  {
    for (j in 1:ncol(dat))
    {
      if (is.factor(dat[,j]))
        veclev[[j]] <- levels(dat[,j])
      else
        veclev[[j]] <- NULL
    }

    return(veclev)
  }

  datlev <- filllevs(dat, datlev)
  newdatlev <- filllevs(newdat, newdatlev)

  if (ncol(dat) == ncol(newdat))
  {
    drop <- logical(ncol(dat))
    names(drop) <- colnames(dat)

    for (j in 1:ncol(dat))
    {
      if (!is.null(datlev[[j]]))
      {
        if (!(newdatlev[[j]] %in% datlev[[j]]))
          drop[j] <- TRUE
      }
    }
  }
  else
    stop("dat and newdat must have the same column length!")

  m <- lm(formula(f), data=dat[,(1:ncol(dat))[!drop]])
  p <- predict(m, newdat)

  return(list(drop=drop, p=p))
}


predict.drop(x ~ ., training, test)


best regards

Andreas
David Winsemius wrote: