Splines in MCMCglmm

2 messages · Linus Holtermann, Jarrod Hadfield

Thu, Mar 6, 2014 2:53 AM #

Hello,

For various reasons i need the MCMCglmm-package for my model specification, but it seems to be useful to incorporate splines in my model. I want to employ the same procedure as in the gamm4-package. I try to incorporate the splines via Jarrod?s function, which based on the mgvc-package. I attached my data and codes. 
For the thin-plate-splines the fixed parts are similiar between MCMCglmm and gamm4, but the random parts differ. It is worth noting, that the predictions are the same.
When using P-Splines instead of thin-plate-splines both fixed parts and random parts differ. I guess, i made a mistake somewhere.
I also add two verions where more than one covariate has spline-functions.

Are my codes ok or is there a mistake? Why does the results for P-Spline differ by a substantial amount?

R-Code:


library(gamm4)
library(MCMCglmm)
library(lme4)
library(nlme)

### read data: y = respones variable; x_1...x_8 covariates (x_8=dummy); country = groups
data <- read.csv2("....data_test.csv")	
attach(data)

### grand mean centering of covariates (x_8 not centered [dummy])
grand_mean <- function(gm){
gm - mean(gm)
}

data$x_1_c <- grand_mean(x_1)
data$x_2_c <- grand_mean(x_2)
data$x_3_c <- grand_mean(x_3)
data$x_4_c <- grand_mean(x_4)
data$x_5_c <- grand_mean(x_5)
data$x_6_c <- grand_mean(x_6)
data$x_7_c <- grand_mean(x_7)

### Model without Splines in MCMCglmm and gamm4 (Varying Intercept)

prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002)))
mc <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country,data =data,family="gaussian" , verbose = F,prior = prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc)

# same model in gamm4

gamm <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm$mer)

### identical output!!! ###

### use standard splines in the mgvc-package(thin plate splines)
### use Jarrod?s function

library(mgcv)

spl2<-function(formula, data, p=TRUE, dataX=data){

  aug<-nrow(data)-nrow(dataX)

  if(aug!=0){
    if(aug<0){
      stop("sorry nrow(dataX) must be less than or equal to nrow(data)")
    }else{
      augX<-matrix(0, aug, ncol(dataX))
      colnames(augX)<-colnames(dataX)
      dataX<-rbind(dataX, augX)
    }
  }
  smooth.spec.object<-interpret.gam(formula)$smooth.spec[[1]]
  sm<-smoothCon(smooth.spec.object, data=data, knots=NULL,absorb.cons=TRUE, dataX=dataX)[[1]]

  Sed<-eigen(sm$S[[1]])
  Su<-Sed$vectors
  Sd<-Sed$values
  nonzeros <- which(Sd > sqrt(.Machine$double.eps))
  
  if(p){
    Zn<-sm$X%*%Su[,nonzeros, drop=FALSE]%*%diag(1/sqrt(Sd[nonzeros]))
  }else{
    Zn<-sm$X[,-nonzeros, drop=FALSE]
  }
  return(Zn[1:(nrow(data)-aug),,drop=FALSE])
}

# A function for setting up fixed (p=FALSE) and random (p=TRUE) effect design matrices
# for a reparmetrised smooth.
# data is used to set up the basis, but the design matrices are based on dataX 

### set thin plate spline for ONE COVARIATE x_5_c ###

data$Xo<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn<-spl2(~s(x_5_c,k=10), data=data)

# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn),data =data,family="gaussian" , verbose = F,prior = prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl)

# gamm4
gamm_spl <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl$mer)

plot(predict(gamm_spl$mer), predict(mc_spl, marginal=NULL))

### results for fixed part a similiar, but not for random parts
### Prediction are the same
### ???


### use P-Splines for covariate x_5_c

data$Xo_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data, p=F)
data$Zn_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data)

# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_p <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo_p+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn_p),data =data,family="gaussian" , verbose = F,prior = prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_p)

# gamm4
gamm_spl_p <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,bs="ps",k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_p$mer)

plot(predict(gamm_spl_p$mer), predict(mc_spl_p, marginal=NULL))

### results for fixed parts and random parts differ
### Prediction are nearly the same
### ???

#-----------------------------------------------------------------------

### set thin plate splines for covariates x_1_c ... x_7_c

data$Xo_1<-spl2(~s(x_1_c,k=10), data=data, p=F)
data$Zn_1<-spl2(~s(x_1_c,k=10), data=data)
data$Xo_2<-spl2(~s(x_2_c,k=10), data=data, p=F)
data$Zn_2<-spl2(~s(x_2_c,k=10), data=data)
data$Xo_3<-spl2(~s(x_3_c,k=10), data=data, p=F)
data$Zn_3<-spl2(~s(x_3_c,k=10), data=data)
data$Xo_4<-spl2(~s(x_4_c,k=10), data=data, p=F)
data$Zn_4<-spl2(~s(x_4_c,k=10), data=data)
data$Xo_5<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn_5<-spl2(~s(x_5_c,k=10), data=data)
data$Xo_6<-spl2(~s(x_6_c,k=10), data=data, p=F)
data$Zn_6<-spl2(~s(x_6_c,k=10), data=data)
data$Xo_7<-spl2(~s(x_7_c,k=10), data=data, p=F)
data$Zn_7<-spl2(~s(x_7_c,k=10), data=data)

# Version a (that will take some time)

prior_mc_spl_a <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002),
G3 = list(V = diag(1), nu = 0.002),G4 = list(V = diag(1), nu = 0.002),G5 = list(V = diag(1), nu = 0.002),
G6 = list(V = diag(1), nu = 0.002),G7 = list(V = diag(1), nu = 0.002),G8 = list(V = diag(1), nu = 0.002)))
mc_spl_a <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_1)+idv(Zn_2)+idv(Zn_3)+idv(Zn_4)+idv(Zn_5)+idv(Zn_6)+idv(Zn_7),
data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_a,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_a)

gamm_spl_a <- gamm4(y~s(x_1_c,k=10)+s(x_2_c,k=10)+s(x_3_c,k=10)+s(x_4_c,k=10)+s(x_5_c,k=10)+s(x_6_c,k=10)+s(x_7_c,k=10)+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_a$mer)

plot(predict(gamm_spl_a$mer), predict(mc_spl_a, marginal=NULL))
 
### fixed parts are similiar, but random parts differ
### Predictions are nearly the same
### ???

# Version b: cbind Zns to one matrix

Zn_b <- cbind(data$Zn_1,data$Zn_2,data$Zn_3,data$Zn_4,data$Zn_5,data$Zn_6,data$Zn_7)

prior_mc_spl_b <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_b <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_b),data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_b,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_b)

plot(predict(mc_spl_b, marginal=NULL), predict(mc_spl_a, marginal=NULL))

### Prediction are the same between Version a and Version b
### So, are both methods ok???


best regards,


Linus Holtermann
Hamburgisches WeltWirtschaftsInstitut gemeinn?tzige GmbH (HWWI)
Heimhuder Stra?e 71
20148 Hamburg
Tel +49-(0)40-340576-336
Fax+49-(0)40-340576-776
Internet: www.hwwi.org
Email: holtermann at hwwi.org
 
AmtsgerichtHamburg HRB 94303
Gesch?ftsf?hrer: Prof. Dr. Thomas Straubhaar, Gunnar Geyer
Umsatzsteuer-ID: DE 241849425
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: test_splines_MCMCglmm.R
URL: <https://stat.ethz.ch/pipermail/r-sig-mixed-models/attachments/20140306/8f97e5b3/attachment.pl>

Jarrod Hadfield

Thu, Mar 6, 2014 1:02 PM #

Hi Linus,

I think the splines estimated in MCMCglmm are agreeing with gamm4.  
After all, the predictions are almost identical. There are differences  
in the estimates due to priors and/or Monte Carlo error but they're  
pretty subtle in the context of the uncertainty around the estimates.  
The splines are being heavily penalised in both, perhaps a bit more in  
MCMCglmm where the IW prior puts higher mass on variances of 0.  
Parameter expanded priors may give more similar results.

The agreement between the predictions from models spl_a and spl_b is  
not a general phenomenon: in the first model you are penalsing all  
splines independently, in the latter you are penalising them all to  
the same degree. The reason that they both agree is that there is  
little evidence that the relationships differ from linearity for any  
of the covariates.

For people wishing to use spl2 to fit splines in MCMCglmm, please be  
aware I have not tested this function very carefully.

Cheers,

Jarrod







Quoting Linus Holtermann <holtermann at hwwi.org> on Thu, 6 Mar 2014  
11:53:50 +0100:

Hello,

For various reasons i need the MCMCglmm-package for my model  
specification, but it seems to be useful to incorporate splines in  
my model. I want to employ the same procedure as in the  
gamm4-package. I try to incorporate the splines via Jarrod?s  
function, which based on the mgvc-package. I attached my data and  
codes.
For the thin-plate-splines the fixed parts are similiar between  
MCMCglmm and gamm4, but the random parts differ. It is worth noting,  
that the predictions are the same.
When using P-Splines instead of thin-plate-splines both fixed parts  
and random parts differ. I guess, i made a mistake somewhere.
I also add two verions where more than one covariate has spline-functions.

Are my codes ok or is there a mistake? Why does the results for  
P-Spline differ by a substantial amount?

R-Code:


library(gamm4)
library(MCMCglmm)
library(lme4)
library(nlme)

### read data: y = respones variable; x_1...x_8 covariates  
(x_8=dummy); country = groups
data <- read.csv2("....data_test.csv")
attach(data)

### grand mean centering of covariates (x_8 not centered [dummy])
grand_mean <- function(gm){
gm - mean(gm)
}

data$x_1_c <- grand_mean(x_1)
data$x_2_c <- grand_mean(x_2)
data$x_3_c <- grand_mean(x_3)
data$x_4_c <- grand_mean(x_4)
data$x_5_c <- grand_mean(x_5)
data$x_6_c <- grand_mean(x_6)
data$x_7_c <- grand_mean(x_7)

### Model without Splines in MCMCglmm and gamm4 (Varying Intercept)

prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =  
diag(1), nu = 0.002)))
mc <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country,data =data,family="gaussian" , verbose = F,prior =  
prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc)

# same model in gamm4

gamm <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm$mer)

### identical output!!! ###

### use standard splines in the mgvc-package(thin plate splines)
### use Jarrod?s function

library(mgcv)

spl2<-function(formula, data, p=TRUE, dataX=data){

  aug<-nrow(data)-nrow(dataX)

  if(aug!=0){
    if(aug<0){
      stop("sorry nrow(dataX) must be less than or equal to nrow(data)")
    }else{
      augX<-matrix(0, aug, ncol(dataX))
      colnames(augX)<-colnames(dataX)
      dataX<-rbind(dataX, augX)
    }
  }
  smooth.spec.object<-interpret.gam(formula)$smooth.spec[[1]]
  sm<-smoothCon(smooth.spec.object, data=data,  
knots=NULL,absorb.cons=TRUE, dataX=dataX)[[1]]

  Sed<-eigen(sm$S[[1]])
  Su<-Sed$vectors
  Sd<-Sed$values
  nonzeros <- which(Sd > sqrt(.Machine$double.eps))

  if(p){
    Zn<-sm$X%*%Su[,nonzeros, drop=FALSE]%*%diag(1/sqrt(Sd[nonzeros]))
  }else{
    Zn<-sm$X[,-nonzeros, drop=FALSE]
  }
  return(Zn[1:(nrow(data)-aug),,drop=FALSE])
}

# A function for setting up fixed (p=FALSE) and random (p=TRUE)  
effect design matrices
# for a reparmetrised smooth.
# data is used to set up the basis, but the design matrices are  
based on dataX

### set thin plate spline for ONE COVARIATE x_5_c ###

data$Xo<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn<-spl2(~s(x_5_c,k=10), data=data)

# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =  
diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn),data =data,family="gaussian" , verbose =  
F,prior = prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl)

# gamm4
gamm_spl <-  
gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl$mer)

plot(predict(gamm_spl$mer), predict(mc_spl, marginal=NULL))

### results for fixed part a similiar, but not for random parts
### Prediction are the same
### ???


### use P-Splines for covariate x_5_c

data$Xo_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data, p=F)
data$Zn_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data)

# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =  
diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_p <-  
MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo_p+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn_p),data =data,family="gaussian" , verbose =  
F,prior = prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_p)

# gamm4
gamm_spl_p <-  
gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,bs="ps",k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_p$mer)

plot(predict(gamm_spl_p$mer), predict(mc_spl_p, marginal=NULL))

### results for fixed parts and random parts differ
### Prediction are nearly the same
### ???

#-----------------------------------------------------------------------

### set thin plate splines for covariates x_1_c ... x_7_c

data$Xo_1<-spl2(~s(x_1_c,k=10), data=data, p=F)
data$Zn_1<-spl2(~s(x_1_c,k=10), data=data)
data$Xo_2<-spl2(~s(x_2_c,k=10), data=data, p=F)
data$Zn_2<-spl2(~s(x_2_c,k=10), data=data)
data$Xo_3<-spl2(~s(x_3_c,k=10), data=data, p=F)
data$Zn_3<-spl2(~s(x_3_c,k=10), data=data)
data$Xo_4<-spl2(~s(x_4_c,k=10), data=data, p=F)
data$Zn_4<-spl2(~s(x_4_c,k=10), data=data)
data$Xo_5<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn_5<-spl2(~s(x_5_c,k=10), data=data)
data$Xo_6<-spl2(~s(x_6_c,k=10), data=data, p=F)
data$Zn_6<-spl2(~s(x_6_c,k=10), data=data)
data$Xo_7<-spl2(~s(x_7_c,k=10), data=data, p=F)
data$Zn_7<-spl2(~s(x_7_c,k=10), data=data)

# Version a (that will take some time)

prior_mc_spl_a <- list(R = list(V = 1, nu=0.002), G = list(G1 =  
list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002),
G3 = list(V = diag(1), nu = 0.002),G4 = list(V = diag(1), nu =  
0.002),G5 = list(V = diag(1), nu = 0.002),
G6 = list(V = diag(1), nu = 0.002),G7 = list(V = diag(1), nu =  
0.002),G8 = list(V = diag(1), nu = 0.002)))
mc_spl_a <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random =  
~country+idv(Zn_1)+idv(Zn_2)+idv(Zn_3)+idv(Zn_4)+idv(Zn_5)+idv(Zn_6)+idv(Zn_7),
data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_a,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_a)

gamm_spl_a <-  
gamm4(y~s(x_1_c,k=10)+s(x_2_c,k=10)+s(x_3_c,k=10)+s(x_4_c,k=10)+s(x_5_c,k=10)+s(x_6_c,k=10)+s(x_7_c,k=10)+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_a$mer)

plot(predict(gamm_spl_a$mer), predict(mc_spl_a, marginal=NULL))

### fixed parts are similiar, but random parts differ
### Predictions are nearly the same
### ???

# Version b: cbind Zns to one matrix

Zn_b <-  
cbind(data$Zn_1,data$Zn_2,data$Zn_3,data$Zn_4,data$Zn_5,data$Zn_6,data$Zn_7)

prior_mc_spl_b <- list(R = list(V = 1, nu=0.002), G = list(G1 =  
list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_b <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_b),data =data,family="gaussian" , verbose =  
F,prior = prior_mc_spl_b,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_b)

plot(predict(mc_spl_b, marginal=NULL), predict(mc_spl_a, marginal=NULL))

### Prediction are the same between Version a and Version b
### So, are both methods ok???


best regards,


Linus Holtermann
Hamburgisches WeltWirtschaftsInstitut gemeinn?tzige GmbH (HWWI)
Heimhuder Stra?e 71
20148 Hamburg
Tel +49-(0)40-340576-336
Fax+49-(0)40-340576-776
Internet: www.hwwi.org
Email: holtermann at hwwi.org

AmtsgerichtHamburg HRB 94303
Gesch?ftsf?hrer: Prof. Dr. Thomas Straubhaar, Gunnar Geyer
Umsatzsteuer-ID: DE 241849425

The University of Edinburgh is a charitable body, registered in
Scotland, with registration number SC005336.