Hello,
For various reasons i need the MCMCglmm-package for my model specification, but it seems to be useful to incorporate splines in my model. I want to employ the same procedure as in the gamm4-package. I try to incorporate the splines via Jarrod?s function, which based on the mgvc-package. I attached my data and codes.
For the thin-plate-splines the fixed parts are similiar between MCMCglmm and gamm4, but the random parts differ. It is worth noting, that the predictions are the same.
When using P-Splines instead of thin-plate-splines both fixed parts and random parts differ. I guess, i made a mistake somewhere.
I also add two verions where more than one covariate has spline-functions.
Are my codes ok or is there a mistake? Why does the results for P-Spline differ by a substantial amount?
R-Code:
library(gamm4)
library(MCMCglmm)
library(lme4)
library(nlme)
### read data: y = respones variable; x_1...x_8 covariates (x_8=dummy); country = groups
data <- read.csv2("....data_test.csv")
attach(data)
### grand mean centering of covariates (x_8 not centered [dummy])
grand_mean <- function(gm){
gm - mean(gm)
}
data$x_1_c <- grand_mean(x_1)
data$x_2_c <- grand_mean(x_2)
data$x_3_c <- grand_mean(x_3)
data$x_4_c <- grand_mean(x_4)
data$x_5_c <- grand_mean(x_5)
data$x_6_c <- grand_mean(x_6)
data$x_7_c <- grand_mean(x_7)
### Model without Splines in MCMCglmm and gamm4 (Varying Intercept)
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002)))
mc <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country,data =data,family="gaussian" , verbose = F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc)
# same model in gamm4
gamm <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm$mer)
### identical output!!! ###
### use standard splines in the mgvc-package(thin plate splines)
### use Jarrod?s function
library(mgcv)
spl2<-function(formula, data, p=TRUE, dataX=data){
aug<-nrow(data)-nrow(dataX)
if(aug!=0){
if(aug<0){
stop("sorry nrow(dataX) must be less than or equal to nrow(data)")
}else{
augX<-matrix(0, aug, ncol(dataX))
colnames(augX)<-colnames(dataX)
dataX<-rbind(dataX, augX)
}
}
smooth.spec.object<-interpret.gam(formula)$smooth.spec[[1]]
sm<-smoothCon(smooth.spec.object, data=data, knots=NULL,absorb.cons=TRUE, dataX=dataX)[[1]]
Sed<-eigen(sm$S[[1]])
Su<-Sed$vectors
Sd<-Sed$values
nonzeros <- which(Sd > sqrt(.Machine$double.eps))
if(p){
Zn<-sm$X%*%Su[,nonzeros, drop=FALSE]%*%diag(1/sqrt(Sd[nonzeros]))
}else{
Zn<-sm$X[,-nonzeros, drop=FALSE]
}
return(Zn[1:(nrow(data)-aug),,drop=FALSE])
}
# A function for setting up fixed (p=FALSE) and random (p=TRUE) effect design matrices
# for a reparmetrised smooth.
# data is used to set up the basis, but the design matrices are based on dataX
### set thin plate spline for ONE COVARIATE x_5_c ###
data$Xo<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn<-spl2(~s(x_5_c,k=10), data=data)
# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn),data =data,family="gaussian" , verbose = F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl)
# gamm4
gamm_spl <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl$mer)
plot(predict(gamm_spl$mer), predict(mc_spl, marginal=NULL))
### results for fixed part a similiar, but not for random parts
### Prediction are the same
### ???
### use P-Splines for covariate x_5_c
data$Xo_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data, p=F)
data$Zn_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data)
# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_p <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo_p+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn_p),data =data,family="gaussian" , verbose = F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_p)
# gamm4
gamm_spl_p <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,bs="ps",k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_p$mer)
plot(predict(gamm_spl_p$mer), predict(mc_spl_p, marginal=NULL))
### results for fixed parts and random parts differ
### Prediction are nearly the same
### ???
#-----------------------------------------------------------------------
### set thin plate splines for covariates x_1_c ... x_7_c
data$Xo_1<-spl2(~s(x_1_c,k=10), data=data, p=F)
data$Zn_1<-spl2(~s(x_1_c,k=10), data=data)
data$Xo_2<-spl2(~s(x_2_c,k=10), data=data, p=F)
data$Zn_2<-spl2(~s(x_2_c,k=10), data=data)
data$Xo_3<-spl2(~s(x_3_c,k=10), data=data, p=F)
data$Zn_3<-spl2(~s(x_3_c,k=10), data=data)
data$Xo_4<-spl2(~s(x_4_c,k=10), data=data, p=F)
data$Zn_4<-spl2(~s(x_4_c,k=10), data=data)
data$Xo_5<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn_5<-spl2(~s(x_5_c,k=10), data=data)
data$Xo_6<-spl2(~s(x_6_c,k=10), data=data, p=F)
data$Zn_6<-spl2(~s(x_6_c,k=10), data=data)
data$Xo_7<-spl2(~s(x_7_c,k=10), data=data, p=F)
data$Zn_7<-spl2(~s(x_7_c,k=10), data=data)
# Version a (that will take some time)
prior_mc_spl_a <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002),
G3 = list(V = diag(1), nu = 0.002),G4 = list(V = diag(1), nu = 0.002),G5 = list(V = diag(1), nu = 0.002),
G6 = list(V = diag(1), nu = 0.002),G7 = list(V = diag(1), nu = 0.002),G8 = list(V = diag(1), nu = 0.002)))
mc_spl_a <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_1)+idv(Zn_2)+idv(Zn_3)+idv(Zn_4)+idv(Zn_5)+idv(Zn_6)+idv(Zn_7),
data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_a,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_a)
gamm_spl_a <- gamm4(y~s(x_1_c,k=10)+s(x_2_c,k=10)+s(x_3_c,k=10)+s(x_4_c,k=10)+s(x_5_c,k=10)+s(x_6_c,k=10)+s(x_7_c,k=10)+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_a$mer)
plot(predict(gamm_spl_a$mer), predict(mc_spl_a, marginal=NULL))
### fixed parts are similiar, but random parts differ
### Predictions are nearly the same
### ???
# Version b: cbind Zns to one matrix
Zn_b <- cbind(data$Zn_1,data$Zn_2,data$Zn_3,data$Zn_4,data$Zn_5,data$Zn_6,data$Zn_7)
prior_mc_spl_b <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_b <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_b),data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_b,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_b)
plot(predict(mc_spl_b, marginal=NULL), predict(mc_spl_a, marginal=NULL))
### Prediction are the same between Version a and Version b
### So, are both methods ok???
best regards,
Linus Holtermann
Hamburgisches WeltWirtschaftsInstitut gemeinn?tzige GmbH (HWWI)
Heimhuder Stra?e 71
20148 Hamburg
Tel +49-(0)40-340576-336
Fax+49-(0)40-340576-776
Internet: www.hwwi.org
Email: holtermann at hwwi.org
AmtsgerichtHamburg HRB 94303
Gesch?ftsf?hrer: Prof. Dr. Thomas Straubhaar, Gunnar Geyer
Umsatzsteuer-ID: DE 241849425
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: test_splines_MCMCglmm.R
URL: <https://stat.ethz.ch/pipermail/r-sig-mixed-models/attachments/20140306/8f97e5b3/attachment.pl>
Splines in MCMCglmm
2 messages · Linus Holtermann, Jarrod Hadfield
Hi Linus, I think the splines estimated in MCMCglmm are agreeing with gamm4. After all, the predictions are almost identical. There are differences in the estimates due to priors and/or Monte Carlo error but they're pretty subtle in the context of the uncertainty around the estimates. The splines are being heavily penalised in both, perhaps a bit more in MCMCglmm where the IW prior puts higher mass on variances of 0. Parameter expanded priors may give more similar results. The agreement between the predictions from models spl_a and spl_b is not a general phenomenon: in the first model you are penalsing all splines independently, in the latter you are penalising them all to the same degree. The reason that they both agree is that there is little evidence that the relationships differ from linearity for any of the covariates. For people wishing to use spl2 to fit splines in MCMCglmm, please be aware I have not tested this function very carefully. Cheers, Jarrod Quoting Linus Holtermann <holtermann at hwwi.org> on Thu, 6 Mar 2014 11:53:50 +0100:
Hello,
For various reasons i need the MCMCglmm-package for my model
specification, but it seems to be useful to incorporate splines in
my model. I want to employ the same procedure as in the
gamm4-package. I try to incorporate the splines via Jarrod?s
function, which based on the mgvc-package. I attached my data and
codes.
For the thin-plate-splines the fixed parts are similiar between
MCMCglmm and gamm4, but the random parts differ. It is worth noting,
that the predictions are the same.
When using P-Splines instead of thin-plate-splines both fixed parts
and random parts differ. I guess, i made a mistake somewhere.
I also add two verions where more than one covariate has spline-functions.
Are my codes ok or is there a mistake? Why does the results for
P-Spline differ by a substantial amount?
R-Code:
library(gamm4)
library(MCMCglmm)
library(lme4)
library(nlme)
### read data: y = respones variable; x_1...x_8 covariates
(x_8=dummy); country = groups
data <- read.csv2("....data_test.csv")
attach(data)
### grand mean centering of covariates (x_8 not centered [dummy])
grand_mean <- function(gm){
gm - mean(gm)
}
data$x_1_c <- grand_mean(x_1)
data$x_2_c <- grand_mean(x_2)
data$x_3_c <- grand_mean(x_3)
data$x_4_c <- grand_mean(x_4)
data$x_5_c <- grand_mean(x_5)
data$x_6_c <- grand_mean(x_6)
data$x_7_c <- grand_mean(x_7)
### Model without Splines in MCMCglmm and gamm4 (Varying Intercept)
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =
diag(1), nu = 0.002)))
mc <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country,data =data,family="gaussian" , verbose = F,prior =
prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc)
# same model in gamm4
gamm <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm$mer)
### identical output!!! ###
### use standard splines in the mgvc-package(thin plate splines)
### use Jarrod?s function
library(mgcv)
spl2<-function(formula, data, p=TRUE, dataX=data){
aug<-nrow(data)-nrow(dataX)
if(aug!=0){
if(aug<0){
stop("sorry nrow(dataX) must be less than or equal to nrow(data)")
}else{
augX<-matrix(0, aug, ncol(dataX))
colnames(augX)<-colnames(dataX)
dataX<-rbind(dataX, augX)
}
}
smooth.spec.object<-interpret.gam(formula)$smooth.spec[[1]]
sm<-smoothCon(smooth.spec.object, data=data,
knots=NULL,absorb.cons=TRUE, dataX=dataX)[[1]]
Sed<-eigen(sm$S[[1]])
Su<-Sed$vectors
Sd<-Sed$values
nonzeros <- which(Sd > sqrt(.Machine$double.eps))
if(p){
Zn<-sm$X%*%Su[,nonzeros, drop=FALSE]%*%diag(1/sqrt(Sd[nonzeros]))
}else{
Zn<-sm$X[,-nonzeros, drop=FALSE]
}
return(Zn[1:(nrow(data)-aug),,drop=FALSE])
}
# A function for setting up fixed (p=FALSE) and random (p=TRUE)
effect design matrices
# for a reparmetrised smooth.
# data is used to set up the basis, but the design matrices are
based on dataX
### set thin plate spline for ONE COVARIATE x_5_c ###
data$Xo<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn<-spl2(~s(x_5_c,k=10), data=data)
# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =
diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn),data =data,family="gaussian" , verbose =
F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl)
# gamm4
gamm_spl <-
gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl$mer)
plot(predict(gamm_spl$mer), predict(mc_spl, marginal=NULL))
### results for fixed part a similiar, but not for random parts
### Prediction are the same
### ???
### use P-Splines for covariate x_5_c
data$Xo_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data, p=F)
data$Zn_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data)
# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =
diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_p <-
MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo_p+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn_p),data =data,family="gaussian" , verbose =
F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_p)
# gamm4
gamm_spl_p <-
gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,bs="ps",k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_p$mer)
plot(predict(gamm_spl_p$mer), predict(mc_spl_p, marginal=NULL))
### results for fixed parts and random parts differ
### Prediction are nearly the same
### ???
#-----------------------------------------------------------------------
### set thin plate splines for covariates x_1_c ... x_7_c
data$Xo_1<-spl2(~s(x_1_c,k=10), data=data, p=F)
data$Zn_1<-spl2(~s(x_1_c,k=10), data=data)
data$Xo_2<-spl2(~s(x_2_c,k=10), data=data, p=F)
data$Zn_2<-spl2(~s(x_2_c,k=10), data=data)
data$Xo_3<-spl2(~s(x_3_c,k=10), data=data, p=F)
data$Zn_3<-spl2(~s(x_3_c,k=10), data=data)
data$Xo_4<-spl2(~s(x_4_c,k=10), data=data, p=F)
data$Zn_4<-spl2(~s(x_4_c,k=10), data=data)
data$Xo_5<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn_5<-spl2(~s(x_5_c,k=10), data=data)
data$Xo_6<-spl2(~s(x_6_c,k=10), data=data, p=F)
data$Zn_6<-spl2(~s(x_6_c,k=10), data=data)
data$Xo_7<-spl2(~s(x_7_c,k=10), data=data, p=F)
data$Zn_7<-spl2(~s(x_7_c,k=10), data=data)
# Version a (that will take some time)
prior_mc_spl_a <- list(R = list(V = 1, nu=0.002), G = list(G1 =
list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002),
G3 = list(V = diag(1), nu = 0.002),G4 = list(V = diag(1), nu =
0.002),G5 = list(V = diag(1), nu = 0.002),
G6 = list(V = diag(1), nu = 0.002),G7 = list(V = diag(1), nu =
0.002),G8 = list(V = diag(1), nu = 0.002)))
mc_spl_a <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random =
~country+idv(Zn_1)+idv(Zn_2)+idv(Zn_3)+idv(Zn_4)+idv(Zn_5)+idv(Zn_6)+idv(Zn_7),
data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_a,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_a)
gamm_spl_a <-
gamm4(y~s(x_1_c,k=10)+s(x_2_c,k=10)+s(x_3_c,k=10)+s(x_4_c,k=10)+s(x_5_c,k=10)+s(x_6_c,k=10)+s(x_7_c,k=10)+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_a$mer)
plot(predict(gamm_spl_a$mer), predict(mc_spl_a, marginal=NULL))
### fixed parts are similiar, but random parts differ
### Predictions are nearly the same
### ???
# Version b: cbind Zns to one matrix
Zn_b <-
cbind(data$Zn_1,data$Zn_2,data$Zn_3,data$Zn_4,data$Zn_5,data$Zn_6,data$Zn_7)
prior_mc_spl_b <- list(R = list(V = 1, nu=0.002), G = list(G1 =
list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_b <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_b),data =data,family="gaussian" , verbose =
F,prior = prior_mc_spl_b,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_b)
plot(predict(mc_spl_b, marginal=NULL), predict(mc_spl_a, marginal=NULL))
### Prediction are the same between Version a and Version b
### So, are both methods ok???
best regards,
Linus Holtermann
Hamburgisches WeltWirtschaftsInstitut gemeinn?tzige GmbH (HWWI)
Heimhuder Stra?e 71
20148 Hamburg
Tel +49-(0)40-340576-336
Fax+49-(0)40-340576-776
Internet: www.hwwi.org
Email: holtermann at hwwi.org
AmtsgerichtHamburg HRB 94303
Gesch?ftsf?hrer: Prof. Dr. Thomas Straubhaar, Gunnar Geyer
Umsatzsteuer-ID: DE 241849425
The University of Edinburgh is a charitable body, registered in Scotland, with registration number SC005336.