Skip to content

Plotting survival curves after multiple imputation

3 messages · Robert Long, Frank E Harrell Jr

#
I am working with some survival data with missing values.

I am using the mice package to do multiple imputation.

I have found code in this thread which handles pooling of the MI results:
https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html

Now I would like to plot a survival curve using the pooled results.

Here is a reproducible example:

require(survival)
require(mice)

set.seed(2)

dt <- colon

fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon)

dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40))
plot(survfit(fit, newdata=dummy) )

# now create some missing values in the data
dt <- colon
dt$rx[sample(1:nrow(dt),50)] <- NA
dt$sex [sample(1:nrow(dt),50)] <- NA
dt$age[sample(1:nrow(dt),50)] <- NA

imp <-mice(dt)

fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp)
# Note, this function is defined below...

imputed=summary.impute(pool.impute(fit.imp))
print(imputed)

# now, how to plot a survival curve with the pooled results ?




########## begin code from linked thread above

coxph.mids <- function (formula, data, ...) {

     call <- match.call()
     if (!is.mids(data)) stop("The data must have class mids")

     analyses <- as.list(1:data$m)

     for (i in 1:data$m) {
       data.i        <- complete(data, i)
       analyses[[i]] <- coxph(formula, data = data.i, ...)
     }

     object <- list(call = call, call1 = data$call,
                    nmis = data$nmis, analyses = analyses)

     return(object)
}

pool.impute <- function (object, method = "smallsample") {

     if ((m <- length(object$analyses)) < 2)
       stop("At least two imputations are needed for pooling.\n")

     analyses <- object$analyses

     k     <- length(coef(analyses[[1]]))
     names <- names(coef(analyses[[1]]))
     qhat  <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names))
     u     <- array(NA, dim = c(m, k, k),
                    dimnames = list(1:m, names, names))

     for (i in 1:m) {
       fit       <- analyses[[i]]
       qhat[i, ] <- coef(fit)
       u[i, , ]  <- vcov(fit)
     }

     qbar <- apply(qhat, 2, mean)
     ubar <- apply(u, c(2, 3), mean)
     e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
     b <- (t(e) %*% e)/(m - 1)
     t <- ubar + (1 + 1/m) * b
     r <- (1 + 1/m) * diag(b/ubar)
     f <- (1 + 1/m) * diag(b/t)
     df <- (m - 1) * (1 + 1/r)^2

     if (method == "smallsample") {

       if( any( class(fit) == "coxph" ) ){

         ### this loop is the hack for survival analysis ###

         status   <- fit$y[ , 2]
         n.events <- sum(status == max(status))
         p        <- length( coefficients( fit )  )
         dfc      <- n.events - p

       } else {

         dfc <- fit$df.residual
       }

       df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
     }

     names(r) <- names(df) <- names(f) <- names
     fit <- list(call = call, call1 = object$call, call2 = object$call1,
                 nmis = object$nmis, m = m, qhat = qhat, u = u,
                 qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df,
                 f = f)

     return(fit)
}

summary.impute <- function(object){

      if (!is.null(object$call1)){
        cat("Call: ")
        dput(object$call1)
      }

      est  <- object$qbar
      se   <- sqrt(diag(object$t))
      tval <- est/se
      df   <- object$df
      pval <- 2 * pt(abs(tval), df, lower.tail = FALSE)

      coefmat <- cbind(est, se, tval, pval)
      colnames(coefmat) <- c("Estimate", "Std. Error",
                                           "t value", "Pr(>|t|)")

      cat("\nCoefficients:\n")
      printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T )

      cat("\nFraction of information about the coefficients
                                      missing due to nonresponse:","\n")
      print(object$f)

      ans <- list( coefficients=coefmat, df=df,
                   call=object$call1, fracinfo.miss=object$f )
      invisible( ans )
  }

### end code from linked thread above
9 days later
#
Can anyone help with this ?
On 14/02/2013 14:07, Robert Long wrote:
#
I haven't seen anyone solve this.  I think it would be reasonable to do a
time point by time point averaging (over multiple imputations) of the
underlying survival curve, although there is some question about whether to
"freeze" the centering constant (sum of beta times covariate mean).  What
will be harder to get is confidence intervals for predicted probabilities
after multiple imputation.  I hope someone will take up the challenge.
Frank

Robert Long wrote

            
-----
Frank Harrell
Department of Biostatistics, Vanderbilt University
--
View this message in context: http://r.789695.n4.nabble.com/Plotting-survival-curves-after-multiple-imputation-tp4658552p4659505.html
Sent from the R help mailing list archive at Nabble.com.