Skip to content

ave function

1 message · arun

#
Hi Robert,


source("shareB101")
##Clean is the dataset
?res1<-with(Clean,aggregate(GRADE,list(TERM,INST_NUM),FUN=function(x) cbind(shapiro.test(x)$p.value,shapiro.test(x)$statistic)) )
?head(res1)
#? Group.1 Group.2????????? x.1????????? x.2
#1? 201001? 689809 1.720329e-07 9.307362e-01
#2? 201201? 689809 2.029761e-11 9.139405e-01
#3? 201301? 689809 4.709662e-14 8.791063e-01
#4? 200701? 994474 3.695317e-14 7.939902e-01
#5? 200710? 994474 4.560275e-13 8.849943e-01
#6? 201203 1105752 4.434649e-15 9.220643e-01


#Regarding the lapply() error, it was the same problem as I thought:

lapply(split(Clean,list(Clean$TERM,Clean$INST_NUM)),function(x) shapiro.test(x$GRADE))
#Error in shapiro.test(x$GRADE) : sample size must be between 3 and 5000


lst1<-split(Clean,list(Clean$TERM,Clean$INST_NUM))
lst2<- lapply(lst1[lapply(lst1,nrow)>0], function(x) shapiro.test(x$GRADE))
?lst2[[1]]

#??? Shapiro-Wilk normality test
#
#data:? x$GRADE
#W = 0.9307, p-value = 1.72e-07


library(plyr)
?res2<- ldply(dlply(Clean,.(TERM,INST_NUM), function(x) shapiro.test(x$GRADE)), summarize, pval=p.value,stat1=statistic)
?head(res2)
#??? TERM INST_NUM???????? pval???? stat1
#1 200610? 1106842 1.420787e-11 0.9192428
#2 200610? 1324438 2.345177e-12 0.9048394
#3 200610? 1557630 4.618117e-10 0.8968445
#4 200701?? 994474 3.695317e-14 0.7939902
#5 200701? 1106842 2.745429e-08 0.9292158
#6 200701? 1107019 6.887642e-10 0.9213602


A.K.