Hi, You also mentioned about separating the significant from the non-significant.
If you replace: Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1})) res1<- cbind(res,Chisq1test_Count) with Chisqtest_CountNew<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x2<-within(x1,{Flag<-ifelse(x1[,1]<0.05,"S","NS")}); colnames(x2)[2]<-paste0(colnames(x2)[1],"_Flag");x2})) res1<- cbind(res,Chisqtest_CountNew) in the Spec(), head(Spec(ListFacGroup,0.05),2) # Seq Mod z a2 c2 c3 t2 V1.Count_a2c2 #1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 0.02534732 #2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 0.01430588 # V1.Count_a2c2_Flag V2.Count_a2c3 V2.Count_a2c3_Flag V3.Count_a2t2 #1 S 0.02534732 S 0.10247043 #2 S 0.01430588 S 0.05878172 # V3.Count_a2t2_Flag V4.Count_c2c3 V4.Count_c2c3_Flag V5.Count_c2t2 #1 NS NA <NA> 0.3173105 #2 NS NA <NA> 0.3173105 # V5.Count_c2t2_Flag V6.Count_c3t2 V6.Count_c3t2_Flag #1 NS 0.3173105 NS #2 NS 0.3173105 NS A.K. ----- Original Message ----- From: arun <smartpink...@yahoo.com> To: Vera Costa <veracosta...@gmail.com> Cc: R help <r-help@r-project.org> Sent: Thursday, March 28, 2013 2:28 PM Subject: Re: [R] new question Hi, The function outputs the unique rows and also chisq test on frequency ( by row). Spec <- function(lista,FDR_k) { list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)})) split.list<-split(list.new,names(lista)) #Data needed with FDR<FDR_k seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")])) names(seq.mod.z)<- names(split.list) #insert colunm with the name of the folder folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x))))) #merge data with the same Seq, Mod and z library(plyr) library(data.table) merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]})) #colunm with number of spec count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]})) count.specUnique<-lapply(count.spec,function(x) lapply(x,unique)) #count spec by group (2-columns) spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x)) #spec.group1<-spec.group[lapply(spec.group,length)!=0] #data frame with count of spec res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group) res[is.na(res)] <- 0 res<- as.data.frame(res,stringsAsFactors=FALSE) #print(res) Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1})) #print(Chisq1test_Count) res1<- cbind(res,Chisq1test_Count) res1 } ListFacGroup<-ReadDir(FacGroup) Spec(ListFacGroup,0.05) head(Spec(ListFacGroup,0.05)) # Seq Mod z a2 c2 c3 t2 Count_a2c2 #1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 0.02534732 #2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 0.01430588 #3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1 1.00000000 #4 AAAAAAALQAK 2 1 0 1 1 0.31731051 #5 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2 1.00000000 #6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1 0.31731051 # Count_a2c3 Count_a2t2 Count_c2c3 Count_c2t2 Count_c3t2 #1 0.02534732 0.10247043 NA 0.3173105 0.3173105 #2 0.01430588 0.05878172 NA 0.3173105 0.3173105 #3 0.31731051 1.00000000 0.3173105 1.0000000 0.3173105 #4 1.00000000 1.00000000 0.3173105 0.3173105 1.0000000 #5 0.56370286 1.00000000 0.5637029 1.0000000 0.5637029 #6 0.31731051 1.00000000 NA 0.3173105 0.3173105 A.K. ________________________________ From: arun <smartpink...@yahoo.com> To: Vera Costa <veracosta...@gmail.com> Cc: R help <r-help@r-project.org> Sent: Thursday, March 28, 2013 10:18 AM Subject: Re: [R] new question Hi, Try this: Spec <- function(lista,FDR_k) { list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)})) split.list<-split(list.new,names(lista)) #Data needed with FDR<FDR_k seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")])) names(seq.mod.z)<- names(split.list) #insert colunm with the name of the folder folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x))))) #merge data with the same Seq, Mod and z library(plyr) library(data.table) merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]})) #colunm with number of spec count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]})) count.specUnique<-lapply(count.spec,function(x) lapply(x,unique)) #count spec by group (2-columns) spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x)) #spec.group1<-spec.group[lapply(spec.group,length)!=0] #data frame with count of spec res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group) res[is.na(res)] <- 0 res<- as.data.frame(res,stringsAsFactors=FALSE) print(res) } Spec(ListFacGroup,0.05) # Seq Mod z a2 c2 c3 t2 #1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 #2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 #3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1 #4 AAAAAAALQAK 2 1 0 1 1 #5 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2 #6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1 #7 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3 1 0 0 1 #8 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 0 1 0 0 #9 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 1 2 2 1 #10 AAAAAPGTAEK 2 0 1 0 0 #11 aAAAASAPQQLSDEELFSQLR 1-n_acPro/ 2 1 0 0 1 #12 aAAAAVGNAVPCGAR 1-n_acPro/ 2 1 1 1 1 #13 AAAAAWEEPSSGNGTAR 2 1 1 1 1 #14 aAAAELSLLEK 1-n_acPro/ 1 1 0 0 1 #15 aAAAELSLLEK 1-n_acPro/ 2 1 1 1 1 #16 AAAAEVLGLILR 2 1 1 1 1 #17 aAAAGAAAAAAAEGEAPAEMGALLLEK 1-n_acPro/ 3 1 1 1 1 #18 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3 0 0 1 0 #19 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-n_acPro/ 3 1 0 0 1 #20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK 1-n_acPro/ 3 1 0 0 1 #21 AAAAAAAkAAK 8-K_ac/ 2 0 1 0 0 #22 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 2 0 1 1 0 #23 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 3 0 0 1 0 #24 aAADGDDSLYPIAVLIDELR 1-n_acPro/ 2 0 0 1 0 Regarding the 2nd question, I am a bit busy now. Will try it later. A.K. ________________________________ From: Vera Costa <veracosta...@gmail.com> To: arun <smartpink...@yahoo.com> Sent: Thursday, March 28, 2013 9:43 AM Subject: Re: new question I don't remove duplicated, but write only one time. If I haven't "unique" I have the same row a lot of times, but with "unique" we remove all. I need this row write only one time. without "unique" the output is 1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 2 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 3 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 4 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 5 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 6 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 7 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 8 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 9 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 10 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 11 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 12 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1 13 AAAAAAALQAK 2 1 0 1 1 14 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2 15 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1 16 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3 1 0 0 1 17 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 0 1 0 0 18 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 1 2 2 1 19 AAAAAPGTAEK 2 0 1 0 0 20 aAAAASAPQQLSDEELFSQLR 1-n_acPro/ 2 1 0 0 1 21 aAAAAVGNAVPCGAR 1-n_acPro/ 2 1 1 1 1 22 AAAAAWEEPSSGNGTAR 2 1 1 1 1 23 aAAAELSLLEK 1-n_acPro/ 1 1 0 0 1 24 aAAAELSLLEK 1-n_acPro/ 2 1 1 1 1 25 AAAAEVLGLILR 2 1 1 1 1 26 aAAAGAAAAAAAEGEAPAEMGALLLEK 1-n_acPro/ 3 1 1 1 1 27 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3 0 0 1 0 28 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-n_acPro/ 3 1 0 0 1 29 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK 1-n_acPro/ 3 1 0 0 1 30 AAAAAAAkAAK 8-K_ac/ 2 0 1 0 0 31 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 2 0 1 1 0 32 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 3 0 0 1 0 33 aAADGDDSLYPIAVLIDELR 1-n_acPro/ 2 0 0 1 0 with "unique" is Seq Mod z a2 c2 c3 t2 1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 1 0 0 1 2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 1 0 0 1 3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1 4 AAAAAAALQAK 2 1 0 1 1 5 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2 6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1 7 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3 1 0 0 1 8 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 0 1 0 0 9 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 1 2 2 1 10 AAAAAPGTAEK 2 0 1 0 0 11 aAAAASAPQQLSDEELFSQLR 1-n_acPro/ 2 1 0 0 1 12 aAAAAVGNAVPCGAR 1-n_acPro/ 2 1 1 1 1 13 AAAAAWEEPSSGNGTAR 2 1 1 1 1 14 aAAAELSLLEK 1-n_acPro/ 1 1 0 0 1 15 aAAAELSLLEK 1-n_acPro/ 2 1 1 1 1 16 AAAAEVLGLILR 2 1 1 1 1 17 aAAAGAAAAAAAEGEAPAEMGALLLEK 1-n_acPro/ 3 1 1 1 1 18 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3 0 0 1 0 19 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-n_acPro/ 3 1 0 0 1 20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK 1-n_acPro/ 3 1 0 0 1 21 AAAAAAAkAAK 8-K_ac/ 2 0 1 0 0 22 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 2 0 1 1 0 23 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 3 0 0 1 0 24 aAADGDDSLYPIAVLIDELR 1-n_acPro/ 2 0 0 1 0 But I need the row 1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 write only one time ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.