Hi Atem, No problem.
which(res==-9999.99) # [1] 18246 397379 420059 426569 427109 603659 604199 662518 664678 #[10] 698982 699522 700062 701142 754745 1289823 1500490 1589487 1716011 #[19] 1837083 which(res==-9999.99,arr.ind=TRUE) # row col #1506 1506 2 #12359 12359 24 #1559 1559 26 #8069 8069 26 #---------------------- res[ which(res==-9999.99,arr.ind=TRUE)]<-NA #or res[res==-9999.99]<-NA which(res==-9999.99) #integer(0) A.K. ________________________________ From: Zilefac Elvis <zilefacel...@yahoo.com> To: arun <smartpink...@yahoo.com> Sent: Wednesday, June 5, 2013 10:56 AM Subject: Re: dates and time series management Hi A.K, It works as expected. You are too smart. Can you find all -9999.99 and replace with NA, if only it exists? lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) Thanks so much A.K. ________________________________ From: arun <smartpink...@yahoo.com> To: Zilefac Elvis <zilefacel...@yahoo.com> Cc: R help <r-help@r-project.org> Sent: Wednesday, June 5, 2013 7:44 AM Subject: Re: dates and time series management Hi, Try this: lstf1<- list.files(pattern=".txt") length(lstf1) #[1] 119 fun2<- function(lstf){ lst1<-lapply(lstf,function(x) readLines(x)) lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE)) lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,]) lst6<- lapply(lst5,function(x) x[!is.na(x$V1),]) lst7<- lapply(lst6,function(x) { if((min(x$V1)>1961)|(max(x$V1)<2005)){ n1<- (min(x$V1)-1961)*12 x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2<- (2005-max(x$V1))*12 x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) x3<- rbind(x1,x,x2) } else { x } }) lst8<- lapply(lst7,function(x) data.frame(col1=unlist(x[,-c(1:2)]))) lst9<- lapply(seq_along(lst8),function(i){ x<- lst8[[i]] colnames(x)<- lstf1[i] row.names(x)<- 1:nrow(x) x }) do.call(cbind,lst9)} res<-fun2(lstf1) dim(res) #[1] 16740 119 res[1:5,1:3] # dt3011120.txt dt3011240.txt dt3011887.txt #1 1.67 NA 0.17 #2 0.00 NA 0.28 #3 0.00 NA 0.00 #4 0.00 NA 0.30 #5 0.00 NA 0.00 ######################################## There are some formatting issues in your files: For eg. If I run the function line by line: lst1<-lapply(lstf1,function(x) readLines(x)) sapply(lst1,function(x) any(grepl("\\d+-9999.99",xmeans some rows in the a few files have: #-9999.99 0 0 0 0.00-9999.99 0 0.00-9999.99 0 0 0 0.00-9999.99 (no space before -9999.99) lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) sapply(lst2,function(x) any(grepl("\\d+-9999.99",x))) #still a few files had the problemlst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) any(sapply(lst3,function(x) any(grepl("\\d+-9999.99",x)))) #[1] FALSE lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE)) any(sapply(lst4,function(x) any(sapply(x,is.character)))) #[1] FALSE lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,]) lst6<- lapply(lst5,function(x) x[!is.na(x$V1),]) sapply(lst6,nrow) # [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [73] 540 540 540 540 528 492 528 540 348 540 540 480 540 540 540 540 540 540 # [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 528 540 540 540 #[109] 540 540 540 540 540 540 540 540 540 468 540 lst7<- lapply(lst6,function(x) { if((min(x$V1)>1961)|(max(x$V1)<2005)){ n1<- (min(x$V1)-1961)*12 x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2<- (2005-max(x$V1))*12 x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) x3<- rbind(x1,x,x2) } else { x } }) sapply(lst7,nrow) # [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [73] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 #[109] 540 540 540 540 540 540 540 540 540 540 540 Hope this helps. A.K. ________________________________ From: Zilefac Elvis <zilefacel...@yahoo.com> To: arun <smartpink...@yahoo.com> Sent: Wednesday, June 5, 2013 2:05 AM Subject: Re: dates and time series management Hi A.K, Sorry my internet connection was so bad last evening. I have attached all the files as .zip. Below is the output you requested. As I explained, the start date in 'res' should be 1961 and end date should be 2005 in all 119 files. Thanks A.K > lapply(lst1,head,3) [[1]] V1.V2.V3.V4.V5.V6.V7.V8.V9.V10.V11.V12.V13.V14.V15.V16.V17.V18.V19.V20.V21.V22.V23.V24.V25.V26.V27.V28.V29.V30.V31.V32.V33 1 1915 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 2 1915 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 3 1915 3 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA [ ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.