random effects model
Hi AK Got an error message with library(ggplot2)> ggplot(BP.stack1,aes(x=factor(HiBP),fill=Obese))+geom_bar(position="fill")Error in rename(x, .base_to_ggplot, warn_missing = FALSE) : could not find function "revalue"> ggplot(BP.stack1,aes(x=factor(HiBP),fill=Overweight))+geom_bar(position="fill")Error in rename(x, .base_to_ggplot, warn_missing = FALSE) : could not find function "revalue" I got the dot plot, thanks for that. I have attached some plots, not sure how to interpret, they had unusual patterns.Is it because of missing data? I tried removing the missing data too. They still appeared the same. Do I need to transform the data? Thanks in advance.
On Tue, Jan 15, 2013 at 8:54 AM, arun <smartpink111 at yahoo.com> wrote:
HI,
BP_2b<-read.csv("BP_2b.csv",sep="\t")
BP_2bNM<-na.omit(BP_2b)
BP.stack3 <-
reshape(BP_2bNM,idvar="CODEA",timevar="time",sep="",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21"),c("hibp14","hibp21")),v.names=c("Obese","Overweight","HiBP"),times=factor(c(1,2)),direction="long")
library(car)
BP.stack3$Obese<- recode(BP.stack3$Obese,"1='Obese';0='Not Obese'")
BP.stack3$Overweight<- recode(BP.stack3$Overweight,"1='Overweight';0='Not
Overweight'")
library(ggplot2)
ggplot(BP.stack3,aes(x=factor(HiBP),fill=Obese))+geom_bar(position="fill")
ggplot(BP.stack3,aes(x=factor(HiBP),fill=Overweight))+geom_bar(position="fill")
You could try lmer() from lme4.
library(lme4)
fm1<-lmer(HiBP~time+(1|CODEA), family=binomial,data=BP.stack3) #check
codes, not sure
print(dotplot(ranef(fm1,post=TRUE),
scales = list(x = list(relation = "free")))[[1]])
qmt1<- qqmath(ranef(fm1, postVar=TRUE))
print(qmt1[[1]])
A.K.
________________________________
From: Usha Gurunathan <usha.nathan at gmail.com>
To: arun <smartpink111 at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Monday, January 14, 2013 6:32 AM
Subject: Re: [R] random effects model
Hi AK
I have been trying to create some plots. All being categorical variables,
I am not getting any luck with plots. The few ones that have worked are
below:
barchart(~table(HiBP)|Obese,data=BP.sub3) ## BP.sub3 is the stacked data
without missing values
barchart(~table(HiBP)|Overweight,data=BP.sub3)
plot(jitter(hibp14,factor=2)~jitter(Obese14,factor=2),col="gray",cex=0.7,
data=Copy.of.BP_2) ## Copy.of.BP_2 is the original wide format
## not producing any good plots with mixed models as well.
summary(lme.3 <- lme(HiBP~time, data=BP.sub3,random=~1|CODEA,
na.action=na.omit))
anova(lme.3)
head(ranef(lme.3))
print(plot(ranef(lme.3))) ##
Thanks for any help.
On Mon, Jan 14, 2013 at 4:33 AM, arun <smartpink111 at yahoo.com> wrote:
HI,
I think I mentioned to you before that when you reshape the
columns excluding the response variable, response variable gets repeated
(in this case hibp14 or hibp21) and creates the error"
I run your code, there are obvious problems in the code so I didn't reach
up to BP.gee
BP_2b<-read.csv("BP_2b.csv",sep="\t")
BP.stack3 <-
reshape(BP_2b,idvar="CODEA",timevar="time",sep="_",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21")),v.names=c("Obese","Overweight"),times=factor(c(1,2)),direction="long")
BP.stack3 <-
transform(BP.stack3,CODEA=factor(CODEA),Sex=factor(Sex,labels=c("Male","Female")),MaternalAge=factor(MaternalAge,labels=c("39years
or less","40-49 years","50 years or
older")),Education=factor(Education,labels=c("Primary/special","Started
secondary","Completed grade10", "Completed grade12",
"College","University")),Birthplace=factor(Birthplace,labels=c("Australia","Other
English-speaking","Other")))
BP.stack3$Sex <-
factor(BP.stack3$Sex,levels=levels(BP.stack3$Sex)[c(2,1)])
BP.sub3a <- subset(BP.stack3,subset=!(is.na(Sex)| is.na(Education)|
is.na(Birthplace)|is.na(Education)|is.na(hibp14)| is.na(hibp21)))
nrow(BP.sub3a)
#[1] 3364
BP.sub5a <- BP.sub3a[order(BP.sub3a$CODEA),] # your code was BP.sub5a <-
BP.sub3a[order(BP.sub5a$CODEA),]
^^^^^ was not defined before
#Next line
BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==1&BPsub3$Obese==0]<-
"Overweight14" #It should be BP.sub3 and what is BPsub6, it was not
defined previously.
#Error in BPsub3$Categ[BPsub6$Overweight == 1 & BPsub3$time == 1 &
BPsub3$Obese == :
#object 'BPsub3' not found
A.K.
________________________________
From: Usha Gurunathan <usha.nathan at gmail.com>
To: arun <smartpink111 at yahoo.com>
Sent: Sunday, January 13, 2013 1:51 AM
Subject: Re: [R] random effects model
HI AK
Thanks a lot for explaining that.
1. With the chi sq. ( in order to find out if the diffce is significant
between groups) do I have create a separate excel file and make a
dataframe.How do I go about it?
I have resent a mail to Jun Yan at a difft email ad( first add.didn't
work, mail not delivered).
2. With my previous query ( reg. Obese/Overweight/ Normal at age 14 Vs
change of blood pressure status at 21), even though I had compromised
without the age-specific regression, but I am still keen to explore why the
age-specific regression didn't work despite it looking okay. I have given
below the syntax. If you get time, could you kindly look at it and see if
it could work by any chance? Apologies for persisting with this query.
BP.stack3 <-
reshape(Copy.of.BP_2,idvar="CODEA",timevar="time",sep="_",varying=list(c("Obese14","Obese21"),c("Overweight14","Overweight21")),v.names=c("Obese","Overweight"),times=factor(c(1,2)),direction="long
BP.stack3
head(BP.stack3)
tail(BP.stack3)
names(BP.stack3)[c(2,3,4,5,6,7)] <-
c("Sex","MaternalAge","Education","Birthplace","AggScore","IntScore")
BP.stack3 <-
transform(BP.stack3,CODEA=factor(CODEA),Sex=factor(Sex,labels=c("Male","Female")),MaternalAge=factor(MaternalAge,labels=c("39years
or less","40-49 years","50 years or
older")),Education=factor(Education,labels=c("Primary/special","Started
secondary","Completed grade10", "Completed grade12",
"College","University")),Birthplace=factor(Birthplace,labels=c("Australia","Other
English-speaking","Other")))
table(BP.stack3$Sex)
BP.stack3$Sex <-
factor(BP.stack3$Sex,levels=levels(BP.stack3$Sex)[c(2,1)])
levels(BP.stack3$Sex)
BP.sub3a <- subset(BP.stack3,subset=!(is.na(Sex)| is.na(Education)|is.na
(Birthplace)|is.na(Education)|is.na(hibp14)| is.na(hibp21)))
summary(BP.sub3a)
BP.sub5a <- BP.sub3a[order(BP.sub5a$CODEA),]
BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==1&BPsub3$Obese==0]
<- "Overweight14"
BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==2&BPsub3$Obese==0]
<- "Overweight21"
BPsub3$Categ[BPsub3$Obese==1&BPsub3$time==1&BPsub3$Overweight==0|BPsub3$Obese==1&BPsub3$time==1&BPsub3$Overweight==1
] <- "Obese14"
BPsub3$Categ[BPsub3$Obese==0&BPsub3$time==1&BPsub3
BPsub3$Categ[BPsub6$Overweight==1&BPsub3$time==1&BPsub3$Obese==0]
<- "Overweight14"$Overweight==0]
<- "Normal14"
BPsub3$Categ[BPsub3$Obese==0&BPsub3$time==2&BPsub3$Overweight==0]
<- "Normal21"
BPsub3$Categ[BPsub3$Obese==1&BPsub3$time==2&BPsub3$Overweight==0|BPsub3$Obese==1&BPsub3$time==2&BPsub3$Overweight==1]
<- "Obese21"
BPsub3$Categ <- factor(BPsub3$Categ)
BPsub3$time <- factor(BPsub3$time)
summary(BPsub3$Categ)
BPsub7 <- subset(BPsub6,subset=!(is.na(Categ)))
BPsub7$time <-
recode(BPsub7$time,"1=14;2=21")
BPsub7$hibp14 <- factor(BPsub7$hibp14)
levels(BPsub7$hibp14)
levels(BPsub7$Categ)
names(BPsub7)
head(BPsub7) ### this was looking quite okay.
tail(BPsub7)
str(BPsub7)
library(gee)
BP.gee <- geese(hibp14~ time*Categ,
data=BPsub7,id=CODEA,family=binomial,
corstr="exchangeable",na.action=na.omit)
Thanks again.
On Sun, Jan 13, 2013 at 1:22 PM, arun <smartpink111 at yahoo.com> wrote:
HI,
table(BP_2b$Sex) #original dataset
# 1 2
#3232 3028
nrow(BP_2b)
#[1] 6898
nrow(BP_2bSexNoMV)
#[1] 6260
6898-6260
#[1] 638 #these rows were removed from the BP_2b to create BP_2bSexNoMV
BP_2bSexMale<-BP_2bSexNoMV[BP_2bSexNoMV$Sex=="Male",]
nrow(BP_2bSexMale)
#[1] 3232
nrow(BP_2bSexMale[!complete.cases(BP_2bSexMale),]) #Missing rows with
Male
#[1] 2407
nrow(BP_2bSexMale[complete.cases(BP_2bSexMale),]) #Non missing rows
with Male
#[1] 825
You did the chisquare test on the new dataset with 6260 rows, right.
I removed those 638 rows because these doesn't belong to either male or
female, but you want the % of missing value per male or female. So, I
thought this will bias the results. If you want to include the missing
values, you could do it, but I don't know where you would put that missing
values as it cannot be classified as belonging specifically to males or
females. I hope you understand it.
Sometimes, the maintainer's respond a bit slow. You have to sent an
email reminding him again.
Regarding the vmv package, you could email Waqas Ahmed Malik (
malik at math.uni-augsburg.de) regarding options for changing the title and
the the font etc.
You could also use this link (
http://www.r-bloggers.com/visualizing-missing-data-2/ ) to plot missing
value (?plot.missing()). I never used that package, but you could try.
Looks like it gives more information.
A.K.
________________________________
From: Usha Gurunathan <usha.nathan at gmail.com>
To: arun <smartpink111 at yahoo.com>
Sent: Saturday, January 12, 2013 9:05 PM
Subject: Re: [R] random effects model
Hi A.K
So it is number of females missing/total female participants enrolled:
72.65%
Number of females missing/total (of males+ females) participants
enrolled : 35.14%
The total no. with the master data: Males: 3232, females: 3028 ( I got
this before removing any missing values)
with table(Copy.of.BP_2$ Sex) ## BP
If I were to write a table ( and do a chi sq. later),
as Gender Study Non study/missing Total
Male 825 (25.53%) 2407 (74.47%)
3232 (100%)
Female 828 (27.35%) 2200 (72.65%) 3028 (
100%)
Total 1653 4607
6260
The problem is when I did
colSums(is.na(Copy.of.BP_2), the sex category showed N=638.
I cannot understand the discrepancy.Also, when you have mentioned to
remove NA, is that not a missing value that needs to be included in the
total number missing. I am a bit confused. Can you help?
## I tried sending email to gee pack maintainer at the ID with R site,
mail didn't go through??
Many thanks
On Sun, Jan 13, 2013 at 9:17 AM, arun <smartpink111 at yahoo.com> wrote:
Hi,
Yes, you are right. 72.655222% was those missing among females.
35.14377% of values in females are missing from among the whole dataset
(combined total of Males+Females data after removing the NAs from the
variable "Sex").
A.K.
________________________________
From: Usha Gurunathan <usha.nathan at gmail.com>
To: arun <smartpink111 at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Saturday, January 12, 2013 5:59 PM
Subject: Re: [R] random effects model
Hi AK
That works. I was trying to get similar results from any other
package. Being a beginner, I was not sure how to modify the syntax to get
my output.
lapply(split(BP_2bSexNoMV,BP_
2bSexNoMV$Sex),function(x)
(nrow(x[!complete.cases(x[,-2]),])/nrow(x))*100) #gives the percentage of
rows of missing #values from the overall rows for Males and Females
#$Female
#[1] 72.65522
#
#$Male
#[1] 74.47401
#iF you want the percentage from the total number rows in Males and
Females (without NA's in the the Sex column)
lapply(split(BP_2bSexNoMV,BP_2bSexNoMV$Sex),function(x)
(nrow(x[!complete.cases(x[,-2]),])/nrow(BP_2bSexNoMV))*100)
#$Female
#[1] 35.14377
#
#$Male
#[1] 38.45048
How do I interpret the above 2 difft results? 72.66% of values were
missing among female participants?? Can you pl. clarify.
Many thanks.
On Sun, Jan 13, 2013 at 3:28 AM, arun <smartpink111 at yahoo.com> wrote:
lapply(split(BP_2bSexNoMV,BP_2bSexNoMV$Sex),function(x)
(nrow(x[!complete.cases(x[,-2]),])/nrow(x))*100) #gives the percentage of
rows of missing #values from the overall rows for Males and Females
#$Female
#[1] 72.65522
#
#$Male
#[1] 74.47401
#iF you want the percentage from the total number rows in Males and
Females (without NA's in the the Sex column)
lapply(split(BP_2bSexNoMV,BP_2bSexNoMV$Sex),function(x)
(nrow(x[!complete.cases(x[,-2]),])/nrow(BP_2bSexNoMV))*100)
#$Female
#[1] 35.14377
#
#$Male
#[1] 38.45048
-------------- next part -------------- A non-text attachment was scrubbed... Name: QQ plot HIBP ~Overweight.png Type: image/png Size: 3452 bytes Desc: not available URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20130115/7bda764a/attachment.png> -------------- next part -------------- A non-text attachment was scrubbed... Name: Fitted vs residuals HiBP ~time+Sex+Maternal Age, random intercept.png Type: image/png Size: 3470 bytes Desc: not available URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20130115/7bda764a/attachment-0001.png> -------------- next part -------------- A non-text attachment was scrubbed... Name: QQ plot hibp21 BP.sub4.png Type: image/png Size: 1883 bytes Desc: not available URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20130115/7bda764a/attachment-0002.png> -------------- next part -------------- A non-text attachment was scrubbed... Name: HiBP ~Overweight qq plots residuals,random effects.png Type: image/png Size: 3425 bytes Desc: not available URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20130115/7bda764a/attachment-0003.png>