HI, directory<- "/home/arunksa111/NewData" GetFileList <- function(directory,number){ setwd(directory) filelist1<-dir()[file.info(dir())$isdir] direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE) direct<-lapply(direct,function(x) paste(directory,"/",x,sep="")) lista<-unlist(direct) output<- list(filelist1,lista) return(output) }
file.list.names<-GetFileList(directory,23) [[1]] lista<-GetFileList(directory,23) [[2]] FacGroup<-c(0,1,1,1,0,2,2,2) ReadDir<-function(FacGroup){ list.new<-lista[FacGroup!=0] read.list<-lapply(list.new, function(x) read.table(x,header=TRUE, sep = "\t",stringsAsFactors=FALSE)) names(read.list)<-file.list.names[FacGroup!=0] return (read.list) } ListFacGroup<-ReadDir(FacGroup) ListFacGroupSub<-lapply(ListFacGroup,head) Pro<- function(lista,FDR_k) { split.list<- split(lista,names(lista)) seq.mod.z<- lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]< FDR_k,c("Seq","Mod","z","Pro")])) names(seq.mod.z)<- names(split.list) folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x))))) library(plyr) library(data.table) merge.data<-lapply(folder.name,function(x) lapply(x,function(x1) {x1<- data.table(x1);x1[,Pro:=paste(Pro,collapse=","),by=c("Seq","Mod","z")]})) count.Pro<-lapply(merge.data,function(x) lapply(x,function(x1) { x1$counts<-sapply(x1$Pro,function(x2) length(gsub("\\s","",unlist(strsplit(x2,",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]})) count.ProUnique<-lapply(count.Pro,function(x) lapply(x,unique)) #count Pro by group (2-columns) Pro.group<-lapply(count.ProUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x)) #Pro.group1<-Pro.group[lapply(Pro.group,length)!=0] res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),Pro.group) res[is.na(res)] <- 0 res<- as.data.frame(res,stringsAsFactors=FALSE) res } Pro(ListFacGroupSub,0.05) # Seq Mod z c2 c3 c4 t2 t3 t4 #1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 0 0 1 1 0 1 #2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 0 0 1 1 0 1 #3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 2 0 2 2 2 2 #4 aAAAAAAAGAAGGRGSGPGRR 1-n_acPro/ 2 0 0 2 0 0 2 #5 AAAAAAAkAAK 8-K_ac/ 2 1 0 0 0 0 0 #6 AAAAAAALQAK 2 0 2 0 2 0 0 #7 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 2 2 2 2 #8 aAAAAATAAAAASIR 1-n_acPro/ 2 0 0 0 0 1 0 #9 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 0 0 0 18 0 0 #10 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 18 0 0 0 0 0 #11 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 0 18 0 0 0 0 #12 aAAAAVGNAVPCGAR 1-n_acPro/ 2 0 1 0 0 0 0 ProCt<-Pro(ListFacGroup,0.05) dim(ProCt) #[1] 29429 9 A.K. ________________________________ From: Vera Costa <veracosta...@gmail.com> To: arun <smartpink...@yahoo.com> Sent: Thursday, May 16, 2013 1:37 PM Subject: Re: question Hi. Other thing that I need (and I sent a new format data) is to count data like function spec, but for the variable "pro". The idea is exactly the same, but I'm with some dificulties because the format of the data... The Pro is like ">sp|Q86U42|PABP2_HUMAN,>sp|Q86U42-2|PABP2_HUMAN". The comma split 2 pro's. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.