Skip to content

new question

3 messages · arun

#
Hi,
Try this:


Spec <- function(lista,FDR_k) {

?list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
?split.list<-split(list.new,names(lista))

?#Data needed with FDR<FDR_k
?seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
?names(seq.mod.z)<- names(split.list) 

?#insert colunm with the name of the folder
?folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
?#merge data with the same Seq, Mod and z
?library(plyr)
?library(data.table)
?merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))

?#colunm with number of spec
?count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
?#count spec by group (2-columns)
?spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
? #spec.group1<-spec.group[lapply(spec.group,length)!=0]

?#data frame with count of spec
?res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
?res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
?print(res)
}

?Spec(ListFacGroup,0.05)
#?????????????????????????????? Seq???????????????? Mod z a2 c2 c3 t2
#1??????????? aAAAAAAAAAAAAAATATAGPR????????? 1-n_acPro/ 2? 5? 0? 0? 1
#2???????????? aAAAAAAAAAAASSPVGVGQR????????? 1-n_acPro/ 2? 6? 0? 0? 1
#3????????????????? aAAAAAAAAAGAAGGR????????? 1-n_acPro/ 2? 1? 1? 0? 1
#4?????????????????????? AAAAAAALQAK???????????????????? 2? 1? 0? 1? 1
#5??????????????????? aAAAAAGAGPEMVR????????? 1-n_acPro/ 2? 2? 2? 1? 2
#6???????? aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2? 1? 0? 0? 1
#7???????? aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3? 1? 0? 0? 1
#8???????? aAAAAEQQQFYLLLGNLLSPDNVVR????????? 1-n_acPro/ 2? 0? 1? 0? 0
#9???????? aAAAAEQQQFYLLLGNLLSPDNVVR????????? 1-n_acPro/ 3? 1? 2? 2? 1
#10????????????????????? AAAAAPGTAEK???????????????????? 2? 0? 1? 0? 0
#11??????????? aAAAASAPQQLSDEELFSQLR????????? 1-n_acPro/ 2? 1? 0? 0? 1
#12????????????????? aAAAAVGNAVPCGAR????????? 1-n_acPro/ 2? 1? 1? 1? 1
#13??????????????? AAAAAWEEPSSGNGTAR???????????????????? 2? 1? 1? 1? 1
#14????????????????????? aAAAELSLLEK????????? 1-n_acPro/ 1? 1? 0? 0? 1
#15????????????????????? aAAAELSLLEK????????? 1-n_acPro/ 2? 1? 1? 1? 1
#16???????????????????? AAAAEVLGLILR???????????????????? 2? 1? 1? 1? 1
#17????? aAAAGAAAAAAAEGEAPAEMGALLLEK????????? 1-n_acPro/ 3? 1? 1? 1? 1
#18? aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3? 0? 0? 1? 0
#19? aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR????????? 1-n_acPro/ 3? 1? 0? 0? 1
#20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK????????? 1-n_acPro/ 3? 1? 0? 0? 1
#21????????????????????? AAAAAAAkAAK???????????? 8-K_ac/ 2? 0? 1? 0? 0
#22???????? aAAAVGAGHGAGGPGAASSSGGAR????????? 1-n_acPro/ 2? 0? 1? 1? 0
#23???????? aAAAVGAGHGAGGPGAASSSGGAR????????? 1-n_acPro/ 3? 0? 0? 1? 0
#24???????????? aAADGDDSLYPIAVLIDELR????????? 1-n_acPro/ 2? 0? 0? 1? 0


Regarding the 2nd question, I am a bit busy now.? Will try it later.
A.K.
#
Hi,
The function outputs the unique rows and also chisq test on frequency ( by row).


Spec <- function(lista,FDR_k) {

?list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
?split.list<-split(list.new,names(lista))

?#Data needed with FDR<FDR_k
?seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
?names(seq.mod.z)<- names(split.list) 

?#insert colunm with the name of the folder
?folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
?#merge data with the same Seq, Mod and z
?library(plyr)
?library(data.table)
?merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))

?#colunm with number of spec
?count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
?#count spec by group (2-columns)
?spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
? #spec.group1<-spec.group[lapply(spec.group,length)!=0]

?#data frame with count of spec
?res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
?res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
?#print(res)
Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1}))
#print(Chisq1test_Count)
res1<- cbind(res,Chisq1test_Count)
res1
}

ListFacGroup<-ReadDir(FacGroup)
Spec(ListFacGroup,0.05)
?head(Spec(ListFacGroup,0.05))
#??????????????????????? Seq???????????????? Mod z a2 c2 c3 t2 Count_a2c2
#1??? aAAAAAAAAAAAAAATATAGPR????????? 1-n_acPro/ 2? 5? 0? 0? 1 0.02534732
#2???? aAAAAAAAAAAASSPVGVGQR????????? 1-n_acPro/ 2? 6? 0? 0? 1 0.01430588
#3????????? aAAAAAAAAAGAAGGR????????? 1-n_acPro/ 2? 1? 1? 0? 1 1.00000000
#4?????????????? AAAAAAALQAK???????????????????? 2? 1? 0? 1? 1 0.31731051
#5??????????? aAAAAAGAGPEMVR????????? 1-n_acPro/ 2? 2? 2? 1? 2 1.00000000
#6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2? 1? 0? 0? 1 0.31731051
?# Count_a2c3 Count_a2t2 Count_c2c3 Count_c2t2 Count_c3t2
#1 0.02534732 0.10247043???????? NA? 0.3173105? 0.3173105
#2 0.01430588 0.05878172???????? NA? 0.3173105? 0.3173105
#3 0.31731051 1.00000000? 0.3173105? 1.0000000? 0.3173105
#4 1.00000000 1.00000000? 0.3173105? 0.3173105? 1.0000000
#5 0.56370286 1.00000000? 0.5637029? 1.0000000? 0.5637029
#6 0.31731051 1.00000000???????? NA? 0.3173105? 0.3173105

A.K.
#
Hi,
You also mentioned about separating the significant from the non-significant.

If you replace:
Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1}))
res1<- cbind(res,Chisq1test_Count)

with
Chisqtest_CountNew<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x2<-within(x1,{Flag<-ifelse(x1[,1]<0.05,"S","NS")}); colnames(x2)[2]<-paste0(colnames(x2)[1],"_Flag");x2}))
res1<- cbind(res,Chisqtest_CountNew)

in the Spec(),
head(Spec(ListFacGroup,0.05),2)
#???????????????????? Seq??????? Mod z a2 c2 c3 t2 V1.Count_a2c2
#1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2? 5? 0? 0? 1??? 0.02534732
#2? aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2? 6? 0? 0? 1??? 0.01430588
#? V1.Count_a2c2_Flag V2.Count_a2c3 V2.Count_a2c3_Flag V3.Count_a2t2
#1????????????????? S??? 0.02534732????????????????? S??? 0.10247043
#2????????????????? S??? 0.01430588????????????????? S??? 0.05878172
#? V3.Count_a2t2_Flag V4.Count_c2c3 V4.Count_c2c3_Flag V5.Count_c2t2
#1???????????????? NS??????????? NA?????????????? <NA>???? 0.3173105
#2???????????????? NS??????????? NA?????????????? <NA>???? 0.3173105
?# V5.Count_c2t2_Flag V6.Count_c3t2 V6.Count_c3t2_Flag
#1???????????????? NS???? 0.3173105???????????????? NS
#2???????????????? NS???? 0.3173105???????????????? NS


A.K.





----- Original Message -----
From: arun <smartpink111 at yahoo.com>
To: Vera Costa <veracosta.rt at gmail.com>
Cc: R help <r-help at r-project.org>
Sent: Thursday, March 28, 2013 2:28 PM
Subject: Re: [R] new question

Hi,
The function outputs the unique rows and also chisq test on frequency ( by row).


Spec <- function(lista,FDR_k) {

?list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
?split.list<-split(list.new,names(lista))

?#Data needed with FDR<FDR_k
?seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
?names(seq.mod.z)<- names(split.list) 

?#insert colunm with the name of the folder
?folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
?#merge data with the same Seq, Mod and z
?library(plyr)
?library(data.table)
?merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))

?#colunm with number of spec
?count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
?#count spec by group (2-columns)
?spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
? #spec.group1<-spec.group[lapply(spec.group,length)!=0]

?#data frame with count of spec
?res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
?res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
?#print(res)
Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1}))
#print(Chisq1test_Count)
res1<- cbind(res,Chisq1test_Count)
res1
}

ListFacGroup<-ReadDir(FacGroup)
Spec(ListFacGroup,0.05)
?head(Spec(ListFacGroup,0.05))
#??????????????????????? Seq???????????????? Mod z a2 c2 c3 t2 Count_a2c2
#1??? aAAAAAAAAAAAAAATATAGPR????????? 1-n_acPro/ 2? 5? 0? 0? 1 0.02534732
#2???? aAAAAAAAAAAASSPVGVGQR????????? 1-n_acPro/ 2? 6? 0? 0? 1 0.01430588
#3????????? aAAAAAAAAAGAAGGR????????? 1-n_acPro/ 2? 1? 1? 0? 1 1.00000000
#4?????????????? AAAAAAALQAK???????????????????? 2? 1? 0? 1? 1 0.31731051
#5??????????? aAAAAAGAGPEMVR????????? 1-n_acPro/ 2? 2? 2? 1? 2 1.00000000
#6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2? 1? 0? 0? 1 0.31731051
?# Count_a2c3 Count_a2t2 Count_c2c3 Count_c2t2 Count_c3t2
#1 0.02534732 0.10247043???????? NA? 0.3173105? 0.3173105
#2 0.01430588 0.05878172???????? NA? 0.3173105? 0.3173105
#3 0.31731051 1.00000000? 0.3173105? 1.0000000? 0.3173105
#4 1.00000000 1.00000000? 0.3173105? 0.3173105? 1.0000000
#5 0.56370286 1.00000000? 0.5637029? 1.0000000? 0.5637029
#6 0.31731051 1.00000000???????? NA? 0.3173105? 0.3173105

A.K.