HI,
directory<- "/home/arunksa111/NewData"
GetFileList <- function(directory,number){
  setwd(directory)
 filelist1<-dir()[file.info(dir())$isdir]
    direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), 
full.names = FALSE, recursive = TRUE)
 direct<-lapply(direct,function(x) paste(directory,"/",x,sep=""))
    lista<-unlist(direct)
 output<- list(filelist1,lista)
 return(output)
    }

 file.list.names<-GetFileList(directory,23) [[1]]
 lista<-GetFileList(directory,23) [[2]]
FacGroup<-c(0,1,1,1,0,2,2,2)

ReadDir<-function(FacGroup){
 list.new<-lista[FacGroup!=0]
 read.list<-lapply(list.new, function(x) read.table(x,header=TRUE, sep = 
"\t",stringsAsFactors=FALSE))
 names(read.list)<-file.list.names[FacGroup!=0]
 return (read.list)
}

ListFacGroup<-ReadDir(FacGroup)
ListFacGroupSub<-lapply(ListFacGroup,head)


Pro<- function(lista,FDR_k) {
split.list<- split(lista,names(lista))
seq.mod.z<- lapply(seq_along(split.list),function(i) 
lapply(split.list[[i]],function(x) x[x[["FDR"]]< 
FDR_k,c("Seq","Mod","z","Pro")]))
names(seq.mod.z)<- names(split.list)
folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) 
do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
  library(plyr)
  library(data.table)
merge.data<-lapply(folder.name,function(x) lapply(x,function(x1) {x1<- 
data.table(x1);x1[,Pro:=paste(Pro,collapse=","),by=c("Seq","Mod","z")]}))
count.Pro<-lapply(merge.data,function(x) lapply(x,function(x1) { 
x1$counts<-sapply(x1$Pro,function(x2) 
length(gsub("\\s","",unlist(strsplit(x2,",")))));x3<-as.data.frame(x1);names(x3)[6]<-
 as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.ProUnique<-lapply(count.Pro,function(x) lapply(x,unique))
  #count Pro by group (2-columns)
  Pro.group<-lapply(count.ProUnique,function(x) Reduce(function(...) 
merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
   #Pro.group1<-Pro.group[lapply(Pro.group,length)!=0]
   res<- Reduce(function(...) 
merge(...,by=c("Seq","Mod","z"),all=TRUE),Pro.group)
  res[is.na(res)] <- 0
 res<- as.data.frame(res,stringsAsFactors=FALSE)
 res
}

 Pro(ListFacGroupSub,0.05)
#                        Seq                 Mod z c2 c3 c4 t2 t3 t4
#1     aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  0  0  1  1  0  1
#2      aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  0  0  1  1  0  1
#3           aAAAAAAAAAGAAGGR          1-n_acPro/ 2  2  0  2  2  2  2
#4      aAAAAAAAGAAGGRGSGPGRR          1-n_acPro/ 2  0  0  2  0  0  2
#5                AAAAAAAkAAK             8-K_ac/ 2  1  0  0  0  0  0
#6                AAAAAAALQAK                     2  0  2  0  2  0  0
#7             aAAAAAGAGPEMVR          1-n_acPro/ 2  2  2  2  2  2  2
#8            aAAAAATAAAAASIR          1-n_acPro/ 2  0  0  0  0  1  0
#9  aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2  0  0  0 18  0  0
#10 aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 2 18  0  0  0  0  0
#11 aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 3  0 18  0  0  0  0
#12           aAAAAVGNAVPCGAR          1-n_acPro/ 2  0  1  0  0  0  0


ProCt<-Pro(ListFacGroup,0.05)
 dim(ProCt)
#[1] 29429     9

A.K.

________________________________
From: Vera Costa <veracosta...@gmail.com>
To: arun <smartpink...@yahoo.com> 
Sent: Thursday, May 16, 2013 1:37 PM
Subject: Re: question



Hi. 

Other thing that I need (and I sent a new format data) is to count data like 
function spec, but for the variable "pro". The idea is exactly the same, but 
I'm with some dificulties because the format of the data... The Pro is like 
">sp|Q86U42|PABP2_HUMAN,>sp|Q86U42-2|PABP2_HUMAN". The comma split 2 pro's.

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to