random effects model
Hi,
I get the error message.
BP.gee8<- gee(hibp14~time*Categ,data=BPsub7,id=CODEA,family=binomial,corstr="exchangeable",na.action=na.omit)
#Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
#Error in gee(hibp14 ~ time * Categ, data = BPsub7, id = CODEA, family = binomial,? :
?# rank-deficient model matrix
#Reason I mentioned before.
If you want to see what is happening, you could check this:
dat1<-structure(list(CODEA = c(3L, 7L, 8L), IntScore = c(0L, 0L, 0L
), Obese14 = c(1L, 0L, 1L), Overweight14 = c(1L, 1L, 1L), Obese21 = c(1L,
1L, 0L), Overweight21 = c(1L, 1L, 0L), hibp14 = c(1L, 0L, 1L),
??? hibp21 = c(0L, 1L, 0L)), .Names = c("CODEA", "IntScore",
"Obese14", "Overweight14", "Obese21", "Overweight21", "hibp14",
"hibp21"), class = "data.frame", row.names = c(NA, -3L))
reshape(dat1,idvar="CODEA",timevar="time",sep="_",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21")),v.names=c("Obese","Overweight"),times=factor(c(1,2)),direction="long") #your code
#??? CODEA IntScore hibp14 hibp21 time Obese Overweight?? #here hibp14/hibp21 (response variable) gets replicated i.e. hibp14 values are the same for time==1 and time==2 whereas Obese, Overweigh #values change
#3.1???? 3??????? 0????? 1????? 0??? 1???? 1????????? 1
#7.1???? 7??????? 0????? 0????? 1??? 1???? 0????????? 1
#8.1???? 8??????? 0????? 1????? 0??? 1???? 1????????? 1
#3.2???? 3??????? 0????? 1????? 0??? 2???? 1????????? 1
#7.2???? 7??????? 0????? 0????? 1??? 2???? 1????????? 1
#8.2???? 8??????? 0????? 1????? 0??? 2???? 0????????? 0
A.K.
----- Original Message -----
From: rex2013 <usha.nathan at gmail.com>
To: r-help at r-project.org
Cc:
Sent: Monday, January 14, 2013 3:04 AM
Subject: Re: [R] random effects model
Sorry
I have corrected the mistakes:
BP.stack3 <-
reshape(Copy.of.BP_2,idvar="CODEA",timevar="time",sep="_",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21")),v.names=c("Obese","Overweight"),times=factor(c(1,2)),direction="long")
BP.stack3
head(BP.stack3)
tail(BP.stack3)
names(BP.stack3)[c(2,3,4,5,6,7)] <-
c("Sex","MaternalAge","Education","Birthplace","AggScore","IntScore")
names(BP.stack3)[1:7]
BP.stack3 <-
transform(BP.stack3,CODEA=factor(CODEA),Sex=factor(Sex,labels=c("Male","Female")),MaternalAge=factor(MaternalAge,labels=c("39years
or less","40-49 years","50 years or
older")),Education=factor(Education,labels=c("Primary/special","Started
secondary","Completed grade10", "Completed grade12",
"College","University")),Birthplace=factor(Birthplace,labels=c("Australia","Other
English-speaking","Other")))
str(BP.stack3)
table(BP.stack3$Sex)
BP.stack3$Sex <- factor(BP.stack3$Sex,levels=levels(BP.stack3$Sex)[c(2,1)])
levels(BP.stack3$Sex)
BPsub6 <-? subset(BP.stack3,subset=!(is.na(Sex)| is.na(Education)|is.na
(Birthplace)|is.na(Education)|is.na(hibp14)| is.na(hibp21)))
summary(BPsub6)
BPsub6$Categ[BPsub6$Overweight==1&BPsub6$time==1&BPsub6$Obese==0] <-
"Overweight14"
BPsub6$Categ[BPsub6$Overweight==1&BPsub6$time==2&BPsub6$Obese==0] <-
"Overweight21"
BPsub6$Categ[BPsub6$Obese==1&BPsub6$time==1&BPsub6$Overweight==0|BPsub6$Obese==1&BPsub6$time==1&BPsub6$Overweight==1
] <- "Obese14"
BPsub6$Categ[BPsub6$Obese==0&BPsub6$time==1&BPsub6$Overweight==0] <-
"Normal14"
BPsub6$Categ[BPsub6$Obese==0&BPsub6$time==2&BPsub6$Overweight==0] <-
"Normal21"
BPsub6$Categ[BPsub6$Obese==1&BPsub6$time==2&BPsub6$Overweight==0|BPsub6$Obese==1&BPsub6$time==2&BPsub6$Overweight==1]
<- "Obese21"
BPsub6$Categ <- factor(BPsub6$Categ)
BPsub6$time <- factor(BPsub6$time)
summary(BPsub6$Categ)
BPsub7 <- subset(BPsub6,subset=!(is.na(Categ)))
BPsub7 <- BPsub7[order(BPsub7$CODEA),]
BPsub7$hibp14 <- factor(BPsub7$hibp14)
levels(BPsub7$hibp14)
levels(BPsub7$Categ)
names(BPsub7)
head(BPsub7)
tail(BPsub7)
str(BPsub7)
library(gee)
BP.gee8 <- gee(hibp14~ time*Categ, data=BPsub7,id=CODEA,family=binomial,
corstr="exchangeable",na.action=na.omit)
summary(BP.gee8)
## Can you try this out please?? I am not clear where the? defect is with
model? One other previous model had no correlation between obese 14 and
time. With this one, i cannot find anything wrong as such, but still wont
work.
Thanks
On Mon, Jan 14, 2013 at 10:30 AM, arun kirshna [via R] <
ml-node+s789695n4655440h67 at n4.nabble.com> wrote:
HI,
I think I mentioned to you before that when you reshape the
columns excluding the response variable, response variable gets repeated
(in this case hibp14 or hibp21) and creates the error"
I run your code, there are obvious problems in the code so I didn't reach
up to BP.gee
BP_2b<-read.csv("BP_2b.csv",sep="\t")
BP.stack3 <-
reshape(BP_2b,idvar="CODEA",timevar="time",sep="_",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21")),v.names=c("Obese","Overweight"),times=factor(c(1,2)),direction="long")
BP.stack3 <-
transform(BP.stack3,CODEA=factor(CODEA),Sex=factor(Sex,labels=c("Male","Female")),MaternalAge=factor(MaternalAge,labels=c("39years
or less","40-49 years","50 years or
older")),Education=factor(Education,labels=c("Primary/special","Started
secondary","Completed grade10", "Completed grade12",
"College","University")),Birthplace=factor(Birthplace,labels=c("Australia","Other
English-speaking","Other")))
? BP.stack3$Sex <-
factor(BP.stack3$Sex,levels=levels(BP.stack3$Sex)[c(2,1)])
? BP.sub3a <-? subset(BP.stack3,subset=!(is.na(Sex)| is.na(Education)|is.na
(Birthplace)|is.na(Education)|is.na(hibp14)| is.na(hibp21)))
? nrow(BP.sub3a)
#[1] 3364
? BP.sub5a <- BP.sub3a[order(BP.sub3a$CODEA),] # your code was BP.sub5a <-
BP.sub3a[order(BP.sub5a$CODEA),]
^^^^^ was not defined before
#Next line
BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==1&BPsub3$Obese==0]<-
"Overweight14"? #It should be BP.sub3 and what is BPsub6, it was not
defined previously.
#Error in BPsub3$Categ[BPsub6$Overweight == 1 & BPsub3$time == 1 &
BPsub3$Obese ==? :
? #object 'BPsub3' not found
A.K.
________________________________ From: Usha Gurunathan <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=0>> To: arun <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=1>> Sent: Sunday, January 13, 2013 1:51 AM Subject: Re: [R] random effects model HI AK Thanks a lot? for explaining that. 1. With the chi sq. ( in order to find out if the diffce is significant between groups) do I have create a separate excel file and make a dataframe.How do I go about it? I have resent a mail to Jun Yan at a difft email ad( first add.didn't work, mail not delivered). 2. With my previous query ( reg. Obese/Overweight/ Normal at age 14 Vs change of blood pressure status at 21), even though I had compromised without the age-specific regression, but I am still keen to explore why the age-specific regression didn't work despite it looking okay. I have given below the syntax. If you get time, could you kindly look at it and see if it could work by any chance? Apologies for persisting with this query. BP.stack3 <- reshape(Copy.of.BP_2,idvar="CODEA",timevar="time",sep="_",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21")),v.names=c("Obese","Overweight"),times=factor(c(1,2)),direction="long BP.stack3 head(BP.stack3) tail(BP.stack3) ? names(BP.stack3)[c(2,3,4,5,6,7)] <- c("Sex","MaternalAge","Education","Birthplace","AggScore","IntScore") BP.stack3 <- transform(BP.stack3,CODEA=factor(CODEA),Sex=factor(Sex,labels=c("Male","Female")),MaternalAge=factor(MaternalAge,labels=c("39years or less","40-49 years","50 years or older")),Education=factor(Education,labels=c("Primary/special","Started secondary","Completed grade10", "Completed grade12", "College","University")),Birthplace=factor(Birthplace,labels=c("Australia","Other English-speaking","Other"))) table(BP.stack3$Sex) BP.stack3$Sex <- factor(BP.stack3$Sex,levels=levels(BP.stack3$Sex)[c(2,1)]) levels(BP.stack3$Sex) BP.sub3a <-? subset(BP.stack3,subset=!(is.na(Sex)| is.na(Education)|is.na (Birthplace)|is.na(Education)|is.na(hibp14)| is.na(hibp21))) summary(BP.sub3a) BP.sub5a <- BP.sub3a[order(BP.sub5a$CODEA),] ? BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==1&BPsub3$Obese==0] <- "Overweight14" BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==2&BPsub3$Obese==0] <- "Overweight21" BPsub3$Categ[BPsub3$Obese==1&BPsub3$time==1&BPsub3$Overweight==0|BPsub3$Obese==1&BPsub3$time==1&BPsub3$Overweight==1 ] <- "Obese14" BPsub3$Categ[BPsub3$Obese==0&BPsub3$time==1&BPsub3 BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==1&BPsub3$Obese==0] <- "Overweight14"$Overweight==0] <- "Normal14" BPsub3$Categ[BPsub3$Obese==0&BPsub3$time==2&BPsub3$Overweight==0] <- "Normal21" BPsub3$Categ[BPsub3$Obese==1&BPsub3$time==2&BPsub3$Overweight==0|BPsub3$Obese==1&BPsub3$time==2&BPsub3$Overweight==1] <- "Obese21" BPsub3$Categ <- factor(BPsub3$Categ) BPsub3$time <- factor(BPsub3$time) summary(BPsub3$Categ) BPsub7 <- subset(BPsub6,subset=!(is.na(Categ))) BPsub7$time <- recode(BPsub7$time,"1=14;2=21") BPsub7$hibp14 <- factor(BPsub7$hibp14) levels(BPsub7$hibp14) levels(BPsub7$Categ) names(BPsub7) head(BPsub7)? ? ### this was looking quite okay. tail(BPsub7) str(BPsub7) library(gee) BP.gee <- geese(hibp14~ time*Categ, data=BPsub7,id=CODEA,family=binomial, corstr="exchangeable",na.action=na.omit) Thanks again. On Sun, Jan 13, 2013 at 1:22 PM, arun <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=2>> wrote: HI, table(BP_2b$Sex) #original dataset #? 1? ? 2 #3232 3028 nrow(BP_2b) #[1] 6898 nrow(BP_2bSexNoMV) #[1] 6260 6898-6260 #[1] 638 #these rows were removed from the BP_2b to create BP_2bSexNoMV BP_2bSexMale<-BP_2bSexNoMV[BP_2bSexNoMV$Sex=="Male",] nrow(BP_2bSexMale) #[1] 3232 nrow(BP_2bSexMale[!complete.cases(BP_2bSexMale),]) #Missing rows with Male #[1] 2407 nrow(BP_2bSexMale[complete.cases(BP_2bSexMale),]) #Non missing rows with Male #[1] 825 You did the chisquare test on the new dataset with 6260 rows, right. I removed those 638 rows because these doesn't belong to either male or female, but you want the % of missing value per male or female.? So, I thought this will bias the results.? If you want to include the missing values, you could do it, but I don't know where you would put that missing values as it cannot be classified as belonging specifically to males or females.? I hope you understand it. Sometimes, the maintainer's respond a bit slow.? You have to sent an email reminding him again. Regarding the vmv package, you could email Waqas Ahmed Malik ([hidden email] <http://user/SendEmail.jtp?type=node&node=4655440&i=3>) regarding options for changing the title and the the font etc. You could also use this link ( http://www.r-bloggers.com/visualizing-missing-data-2/ ) to plot missing value (?plot.missing()).? I never used that package, but you could try. Looks like it gives more information. A.K. ________________________________ From: Usha Gurunathan <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=4>> To: arun <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=5>> Sent: Saturday, January 12, 2013 9:05 PM Subject: Re: [R] random effects model Hi A.K So it is number of females missing/total female participants enrolled: 72.65% Number of females missing/total (of males+ females)? participants enrolled : 35.14% The total no. with the master data: Males: 3232, females: 3028 ( I got this before removing any missing values) with table(Copy.of.BP_2$ Sex)? ## BP If I were to write a table (? and do a chi sq. later), as Gender? ? ? ? ? ? Study? ? ? ? ? ? ? ? ? ? Non study/missing? ? Total ? ? ? Male? ? ? ? ? ? ? 825 (25.53%)? ? ? ? ? ? 2407 (74.47%)? ? ? 3232 (100%) ? ? Female? ? ? ? ? 828 (27.35%)? ? ? ? ? ? 2200 (72.65%)? ? ? 3028 ( 100%) ? ? Total? ? ? ? ? ? ? 1653? ? ? ? ? ? ? ? ? ? ? ? ? 4607 ? ? ? ? 6260 The problem is when I did colSums(is.na(Copy.of.BP_2), the sex category showed N=638. I cannot understand the discrepancy.Also, when you have mentioned to remove NA, is that not a missing value that needs to be included in the total number missing. I am a bit confused. Can you help? ## I tried sending email to gee pack maintainer at the ID with R site, mail didn't go through?? Many thanks On Sun, Jan 13, 2013 at 9:17 AM, arun <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=6>> wrote: Hi, Yes, you are right.? 72.655222% was those missing among females. ? 35.14377% of values in females are missing from among the whole dataset (combined total of Males+Females data after removing the NAs from the variable "Sex"). A.K. ________________________________ From: Usha Gurunathan <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=7>> To: arun <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=8>> Cc: R help <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=9>> Sent: Saturday, January 12, 2013 5:59 PM Subject: Re: [R] random effects model Hi AK That works. I was trying to get? similar results from any other package. Being a beginner, I was not sure how to modify the syntax to get my output. lapply(split(BP_2bSexNoMV,BP_ 2bSexNoMV$Sex),function(x) (nrow(x[!complete.cases(x[,-2]),])/nrow(x))*100) #gives the percentage of rows of missing #values from the overall rows for Males and Females #$Female #[1] 72.65522 # #$Male #[1] 74.47401 #iF you want the percentage from the total number rows in Males and Females (without NA's in the the Sex column) lapply(split(BP_2bSexNoMV,BP_2bSexNoMV$Sex),function(x) (nrow(x[!complete.cases(x[,-2]),])/nrow(BP_2bSexNoMV))*100) #$Female #[1] 35.14377 # #$Male #[1] 38.45048 How do I interpret the above 2 difft results? 72.66% of values were missing among female participants?? Can you pl. clarify. Many thanks. On Sun, Jan 13, 2013 at 3:28 AM, arun <[hidden email]<http://user/SendEmail.jtp?type=node&node=4655440&i=10>> wrote: lapply(split(BP_2bSexNoMV,BP_2bSexNoMV$Sex),function(x) (nrow(x[!complete.cases(x[,-2]),])/nrow(x))*100) #gives the percentage of rows of missing #values from the overall rows for Males and Females #$Female #[1] 72.65522 # #$Male #[1] 74.47401 #iF you want the percentage from the total number rows in Males and Females (without NA's in the the Sex column) lapply(split(BP_2bSexNoMV,BP_2bSexNoMV$Sex),function(x) (nrow(x[!complete.cases(x[,-2]),])/nrow(BP_2bSexNoMV))*100) #$Female #[1] 35.14377 # #$Male #[1] 38.45048 ______________________________________________ [hidden email] <http://user/SendEmail.jtp?type=node&node=4655440&i=11>mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. ------------------------------ ? If you reply to this email, your message will be added to the discussion below: http://r.789695.n4.nabble.com/random-effects-model-tp4654346p4655440.html ? To unsubscribe from random effects model, click here<http://r.789695.n4.nabble.com/template/NamlServlet.jtp?macro=unsubscribe_by_code&node=4654346&code=dXNoYS5uYXRoYW5AZ21haWwuY29tfDQ2NTQzNDZ8MjAyMjE1NDI0> . NAML<http://r.789695.n4.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml>
-- View this message in context: http://r.789695.n4.nabble.com/random-effects-model-tp4654346p4655456.html Sent from the R help mailing list archive at Nabble.com. ??? [[alternative HTML version deleted]] ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.