I am working with some survival data with missing values. I am using the mice package to do multiple imputation. I have found code in this thread which handles pooling of the MI results: https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html Now I would like to plot a survival curve using the pooled results. Here is a reproducible example: require(survival) require(mice) set.seed(2) dt <- colon fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon) dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40)) plot(survfit(fit, newdata=dummy) ) # now create some missing values in the data dt <- colon dt$rx[sample(1:nrow(dt),50)] <- NA dt$sex [sample(1:nrow(dt),50)] <- NA dt$age[sample(1:nrow(dt),50)] <- NA imp <-mice(dt) fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp) # Note, this function is defined below... imputed=summary.impute(pool.impute(fit.imp)) print(imputed) # now, how to plot a survival curve with the pooled results ? ########## begin code from linked thread above coxph.mids <- function (formula, data, ...) { call <- match.call() if (!is.mids(data)) stop("The data must have class mids") analyses <- as.list(1:data$m) for (i in 1:data$m) { data.i <- complete(data, i) analyses[[i]] <- coxph(formula, data = data.i, ...) } object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) return(object) } pool.impute <- function (object, method = "smallsample") { if ((m <- length(object$analyses)) < 2) stop("At least two imputations are needed for pooling.\n") analyses <- object$analyses k <- length(coef(analyses[[1]])) names <- names(coef(analyses[[1]])) qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names)) u <- array(NA, dim = c(m, k, k), dimnames = list(1:m, names, names)) for (i in 1:m) { fit <- analyses[[i]] qhat[i, ] <- coef(fit) u[i, , ] <- vcov(fit) } qbar <- apply(qhat, 2, mean) ubar <- apply(u, c(2, 3), mean) e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE) b <- (t(e) %*% e)/(m - 1) t <- ubar + (1 + 1/m) * b r <- (1 + 1/m) * diag(b/ubar) f <- (1 + 1/m) * diag(b/t) df <- (m - 1) * (1 + 1/r)^2 if (method == "smallsample") { if( any( class(fit) == "coxph" ) ){ ### this loop is the hack for survival analysis ### status <- fit$y[ , 2] n.events <- sum(status == max(status)) p <- length( coefficients( fit ) ) dfc <- n.events - p } else { dfc <- fit$df.residual } df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df) } names(r) <- names(df) <- names(f) <- names fit <- list(call = call, call1 = object$call, call2 = object$call1, nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df, f = f) return(fit) } summary.impute <- function(object){ if (!is.null(object$call1)){ cat("Call: ") dput(object$call1) } est <- object$qbar se <- sqrt(diag(object$t)) tval <- est/se df <- object$df pval <- 2 * pt(abs(tval), df, lower.tail = FALSE) coefmat <- cbind(est, se, tval, pval) colnames(coefmat) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") cat("\nCoefficients:\n") printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T ) cat("\nFraction of information about the coefficients missing due to nonresponse:","\n") print(object$f) ans <- list( coefficients=coefmat, df=df, call=object$call1, fracinfo.miss=object$f ) invisible( ans ) } ### end code from linked thread above
Plotting survival curves after multiple imputation
3 messages · Robert Long, Frank E Harrell Jr
9 days later
Can anyone help with this ?
On 14/02/2013 14:07, Robert Long wrote:
I am working with some survival data with missing values. I am using the mice package to do multiple imputation. I have found code in this thread which handles pooling of the MI results: https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html Now I would like to plot a survival curve using the pooled results. Here is a reproducible example: require(survival) require(mice) set.seed(2) dt <- colon fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon) dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40)) plot(survfit(fit, newdata=dummy) ) # now create some missing values in the data dt <- colon dt$rx[sample(1:nrow(dt),50)] <- NA dt$sex [sample(1:nrow(dt),50)] <- NA dt$age[sample(1:nrow(dt),50)] <- NA imp <-mice(dt) fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp) # Note, this function is defined below... imputed=summary.impute(pool.impute(fit.imp)) print(imputed) # now, how to plot a survival curve with the pooled results ? ########## begin code from linked thread above coxph.mids <- function (formula, data, ...) { call <- match.call() if (!is.mids(data)) stop("The data must have class mids") analyses <- as.list(1:data$m) for (i in 1:data$m) { data.i <- complete(data, i) analyses[[i]] <- coxph(formula, data = data.i, ...) } object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) return(object) } pool.impute <- function (object, method = "smallsample") { if ((m <- length(object$analyses)) < 2) stop("At least two imputations are needed for pooling.\n") analyses <- object$analyses k <- length(coef(analyses[[1]])) names <- names(coef(analyses[[1]])) qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names)) u <- array(NA, dim = c(m, k, k), dimnames = list(1:m, names, names)) for (i in 1:m) { fit <- analyses[[i]] qhat[i, ] <- coef(fit) u[i, , ] <- vcov(fit) } qbar <- apply(qhat, 2, mean) ubar <- apply(u, c(2, 3), mean) e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE) b <- (t(e) %*% e)/(m - 1) t <- ubar + (1 + 1/m) * b r <- (1 + 1/m) * diag(b/ubar) f <- (1 + 1/m) * diag(b/t) df <- (m - 1) * (1 + 1/r)^2 if (method == "smallsample") { if( any( class(fit) == "coxph" ) ){ ### this loop is the hack for survival analysis ### status <- fit$y[ , 2] n.events <- sum(status == max(status)) p <- length( coefficients( fit ) ) dfc <- n.events - p } else { dfc <- fit$df.residual } df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df) } names(r) <- names(df) <- names(f) <- names fit <- list(call = call, call1 = object$call, call2 = object$call1, nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df, f = f) return(fit) } summary.impute <- function(object){ if (!is.null(object$call1)){ cat("Call: ") dput(object$call1) } est <- object$qbar se <- sqrt(diag(object$t)) tval <- est/se df <- object$df pval <- 2 * pt(abs(tval), df, lower.tail = FALSE) coefmat <- cbind(est, se, tval, pval) colnames(coefmat) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") cat("\nCoefficients:\n") printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T ) cat("\nFraction of information about the coefficients missing due to nonresponse:","\n") print(object$f) ans <- list( coefficients=coefmat, df=df, call=object$call1, fracinfo.miss=object$f ) invisible( ans ) } ### end code from linked thread above
______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
I haven't seen anyone solve this. I think it would be reasonable to do a time point by time point averaging (over multiple imputations) of the underlying survival curve, although there is some question about whether to "freeze" the centering constant (sum of beta times covariate mean). What will be harder to get is confidence intervals for predicted probabilities after multiple imputation. I hope someone will take up the challenge. Frank Robert Long wrote
I am working with some survival data with missing values. I am using the mice package to do multiple imputation. I have found code in this thread which handles pooling of the MI results: https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html Now I would like to plot a survival curve using the pooled results. Here is a reproducible example: require(survival) require(mice) set.seed(2) dt <- colon fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon) dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40)) plot(survfit(fit, newdata=dummy) ) # now create some missing values in the data dt <- colon dt$rx[sample(1:nrow(dt),50)] <- NA dt$sex [sample(1:nrow(dt),50)] <- NA dt$age[sample(1:nrow(dt),50)] <- NA imp <-mice(dt) fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp) # Note, this function is defined below... imputed=summary.impute(pool.impute(fit.imp)) print(imputed) # now, how to plot a survival curve with the pooled results ? ########## begin code from linked thread above coxph.mids <- function (formula, data, ...) { call <- match.call() if (!is.mids(data)) stop("The data must have class mids") analyses <- as.list(1:data$m) for (i in 1:data$m) { data.i <- complete(data, i) analyses[[i]] <- coxph(formula, data = data.i, ...) } object <- list(call = call, call1 = data$call, nmis = data$nmis, analyses = analyses) return(object) } pool.impute <- function (object, method = "smallsample") { if ((m <- length(object$analyses)) < 2) stop("At least two imputations are needed for pooling.\n") analyses <- object$analyses k <- length(coef(analyses[[1]])) names <- names(coef(analyses[[1]])) qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names)) u <- array(NA, dim = c(m, k, k), dimnames = list(1:m, names, names)) for (i in 1:m) { fit <- analyses[[i]] qhat[i, ] <- coef(fit) u[i, , ] <- vcov(fit) } qbar <- apply(qhat, 2, mean) ubar <- apply(u, c(2, 3), mean) e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE) b <- (t(e) %*% e)/(m - 1) t <- ubar + (1 + 1/m) * b r <- (1 + 1/m) * diag(b/ubar) f <- (1 + 1/m) * diag(b/t) df <- (m - 1) * (1 + 1/r)^2 if (method == "smallsample") { if( any( class(fit) == "coxph" ) ){ ### this loop is the hack for survival analysis ### status <- fit$y[ , 2] n.events <- sum(status == max(status)) p <- length( coefficients( fit ) ) dfc <- n.events - p } else { dfc <- fit$df.residual } df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df) } names(r) <- names(df) <- names(f) <- names fit <- list(call = call, call1 = object$call, call2 = object$call1, nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df, f = f) return(fit) } summary.impute <- function(object){ if (!is.null(object$call1)){ cat("Call: ") dput(object$call1) } est <- object$qbar se <- sqrt(diag(object$t)) tval <- est/se df <- object$df pval <- 2 * pt(abs(tval), df, lower.tail = FALSE) coefmat <- cbind(est, se, tval, pval) colnames(coefmat) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)") cat("\nCoefficients:\n") printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T ) cat("\nFraction of information about the coefficients missing due to nonresponse:","\n") print(object$f) ans <- list( coefficients=coefmat, df=df, call=object$call1, fracinfo.miss=object$f ) invisible( ans ) } ### end code from linked thread above
______________________________________________
R-help@
mailing list
https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
----- Frank Harrell Department of Biostatistics, Vanderbilt University -- View this message in context: http://r.789695.n4.nabble.com/Plotting-survival-curves-after-multiple-imputation-tp4658552p4659505.html Sent from the R help mailing list archive at Nabble.com.