Hi, Try this: lstf1<- list.files(pattern=".txt") length(lstf1) #[1] 119 #I changed the function a little bit to unlist by rows to match the dates column I created.
fun2<- function(lstf){ lst1<-lapply(lstf,function(x) readLines(x)) lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE)) lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,]) lst6<- lapply(lst5,function(x) x[!is.na(x$V1),]) lst7<- lapply(lst6,function(x) { if((min(x$V1)>1961)|(max(x$V1)<2005)){ n1<- (min(x$V1)-1961)*12 x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2<- (2005-max(x$V1))*12 x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) x3<- rbind(x1,x,x2) } else { x } }) lst8<-lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) ####changed lst9<- lapply(seq_along(lst8),function(i){ x<- lst8[[i]] colnames(x)<- lstf1[i] row.names(x)<- 1:nrow(x) x }) do.call(cbind,lst9)} res<-fun2(lstf1) dim(res) #[1] 16740 119 res[res==-9999.99]<-NA which(res==-9999.99) #integer(0) dates1<-seq.Date(as.Date('1Jan1961',format="%d%b%Y"),as.Date('31Dec2005',format="%d%b%Y"),by="day") dates2<- as.character(dates1) sldat<- split(dates2,list(gsub("-.*","",dates2))) lst11<-lapply(sldat,function(x) lapply(split(x,gsub(".*-(.*)-.*","\\1",x)), function(y){x1<-as.numeric(gsub(".*-.*-(.*)","\\1",y));if((31-max(x1))>0) {x2<-seq(max(x1)+1,31,1);x3<-paste0(unique(gsub("(.*-.*-).*","\\1",y)),x2);c(y,x3)} else y} )) any(sapply(lst1,function(x) any(lapply(x,length)!=31))) #[1] FALSE lst22<-lapply(lst11,function(x) unlist(x,use.names=FALSE)) sapply(lst22,length) #1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 # 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372 #1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 # 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372 #1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 # 372 372 372 372 372 372 372 372 372 372 372 372 372 dates3<-unlist(lst22,use.names=FALSE) length(dates3) #[1] 16740 res1<- data.frame(dates=dates3,res,stringsAsFactors=FALSE) str(res1) 'data.frame': 16740 obs. of 120 variables: $ dates : chr "1961-01-01" "1961-01-02" "1961-01-03" "1961-01-04" ... $ dt3011120.txt: num 1.67 0 0 0 0 0 4.17 0 0 0 ... $ dt3011240.txt: num NA NA NA NA NA NA NA NA NA NA ... $ dt3011887.txt: num 0.17 0.28 0 0.3 0 0 1.78 0 0.3 0 ... $ dt3012205.txt: num 0.34 0.21 0 0.51 0 0 2.82 0 0.3 0 ... ----------------------------------------------------------- res1$dates<-as.Date(res1$dates) res2<-res1[!is.na(res1$dates),] res2[1:3,1:3] # dates dt3011120.txt dt3011240.txt #1 1961-01-01 1.67 NA #2 1961-01-02 0.00 NA #3 1961-01-03 0.00 NA dim(res2) #[1] 16436 120 Now, you can try the reshape() and the zoo(). Hope it helps. A.K. ________________________________ From: Zilefac Elvis <zilefacel...@yahoo.com> To: arun <smartpink...@yahoo.com> Sent: Wednesday, June 5, 2013 5:17 PM Subject: Re: dates and time series management Hi A.K, I am gradually improving my R skills thanks to your support. I have this code for the attached data. ******************************************************************************************************** library(hydroTSM) # Reading the data with 21 daily simulations, from 1961-01-01 up to 2005-12-31 x <- read.csv("data2.csv") # Creating a single variable with all the dates dates <- as.Date(paste0(x$year, "-", x$month, "-", x$day), format="%Y-%m-%d") # Creating a data.frame with 3 columns: simulation number, Date, Rainfall values x.new <- data.frame(s.num=x[,1], Date=dates, Rainfall=x[,5]) # Creating a data.frame with 22 columns: Dates + Rainfall values for21 simulations x.wide <- reshape(x.new, idvar = "Date", timevar = "s.num", direction = "wide") # Creating a zoo variable z <- zoo(x.wide[,-1], order.by=x.wide[,1]) # 5-day total rainfall for each one of the simulations z.5tot <- rollapply(data=z, width=5, FUN=sum, fill=NA, partial= TRUE, align="center")# to get the total of 5-day precipitation # Maximum value per year of 5-day total rainfall for each one of the simulations z.5max.annual <- daily2annual(z.5max, dates=1, FUN=max) ********************************************************************************************************* Problem: I am trying to do a similar thing with 'res' from our previous problem (see below). However, instead of width=5, I need something like Max.Daily<-rollapply(data=z, width=372, FUN=max, by.column = TRUE, partial= TRUE, align="center") # width=1961 to 2005=45years, 16740/45=372 To do this, I need a date column vector just as I did above. Can you show me how to generate daily dates with format="%Y-%m-%d"? Days range from 1 to 31 for all months since we filled for example February having 28/29 days with NA. Months from 1 to 12 and years from 1961 to 2005. If column 1 of 'res' contains dates, then we can use parts of the code above to extract the Maximum value for each year and for each column. So, my final output will be 45 * 119. Thanks so much A.K. I keep learning hard though slowly. ________________________________ From: arun : R help <r-help@r-project.org> Sent: Wednesday, June 5, 2013 9:44 AM Subject: Re: dates and time series management Hi Atem, No problem. which(res==-9999.99) # [1] 18246 397379 420059 426569 427109 603659 604199 662518 664678 #[10] 698982 699522 700062 701142 754745 1289823 1500490 1589487 1716011 #[19] 1837083 which(res==-9999.99,arr.ind=TRUE) # row col #1506 1506 2 #12359 12359 24 #1559 1559 26 #8069 8069 26 #---------------------- res[ which(res==-9999.99,arr.ind=TRUE)]<-NA #or res[res==-9999.99]<-NA which(res==-9999.99) #integer(0) A.K. ________________________________ From: Zilefac Elvis <zilefacelv 5, 2013 10:56 AM Subject: Re: dates and time series management Hi A.K, It works as expected. You are too smart. Can you find all -9999.99 and replace with NA, if only it exists? lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) Thanks so much A.K. ________________________________ From: arun <smartpin <r-help@r-project.org> Sent: Wednesday, June 5, 2013 7:44 AM Subject: Re: dates and time series management Hi, Try this: lstf1<- list.files(pattern=".txt") length(lstf1) #[1] 119 fun2<- function(lstf){ lst1<-lapply(lstf,function(x) readLines(x)) lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE)) lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,]) lst6<- lapply(lst5,function(x) x[!is.na(x$V1),]) lst7<- lapply(lst6,function(x) { if((min(x$V1)>1961)|(max(x$V1)<2005)){ n1<- (min(x$V1)-1961)*12 x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2<- (2005-max(x$V1))*12 x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) x3<- rbind(x1,x,x2) } else { x } }) lst8<- lapply(lst7,function(x) data.frame(col1=unlist(x[,-c(1:2)]))) lst9<- lapply(seq_along(lst8),function(i){ x<- lst8[[i]] colnames(x)<- lstf1[i] row.names(x)<- 1:nrow(x) x }) do.call(cbind,lst9)} res<-fun2(lstf1) dim(res) #[1] 16740 119 res[1:5,1:3] # dt3011120.txt dt3011240.txt dt3011887.txt #1 1.67 NA 0.17 #2 0.00 NA 0.28 #3 0.00 NA 0.00 #4 0.00 NA 0.30 #5 0.00 NA 0.00 ######################################## There are some formatting issues in your files: For eg. If I run the function line by line: lst1<-lapply(lstf1,function(x) readLines(x)) sapply(lst1,function(x) any(grepl("\\d+-9999.99",x))) [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [37] TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [73] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE [97] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE [109] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE ###means some rows in the a few files have: #-9999.99 0 0 0 0.00-9999.99 0 0.00-9999.99 0 0 0 0.00-9999.99 (no space before -9999.99) lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) sapply(lst2,function(x) any(grepl("\\d+-9999.99",x))) #still a few files had the problem [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) any(sapply(lst3,function(x) any(grepl("\\d+-9999.99",x)))) #[1] FALSE lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE)) any(sapply(lst4,function(x) any(sapply(x,is.character)))) #[1] FALSE lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,]) lst6<- lapply(lst5,function(x) x[!is.na(x$V1),]) sapply(lst6,nrow) # [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [73] 540 540 540 540 528 492 528 540 348 540 540 480 540 540 540 540 540 540 # [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 528 540 540 540 #[109] 540 540 540 540 540 540 540 540 540 468 540 lst7<- lapply(lst6,function(x) { if((min(x$V1)>1961)|(max(x$V1)<2005)){ n1<- (min(x$V1)-1961)*12 x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1)) n2<- (2005-max(x$V1))*12 x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2)) x3<- rbind(x1,x,x2) } else { x } }) sapply(lst7,nrow) # [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [73] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 # [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 #[109] 540 540 540 540 540 540 540 540 540 540 540 Hope this helps. A.K. ________________________________ Fr om> Sent: Wednesday, June 5, 2013 2:05 AM Subject: Re: dates and time series management Hi A.K, Sorry my internet connection was so bad last evening. I have attached all the files as .zip. Below is the output you requested. As I explained, the start date in 'res' should be 1961 and end date should be 2005 in all 119 files. Thanks A.K > lapply(lst1,head,3) [[1]] V1.V2.V3.V4.V5.V6.V7.V8.V9.V10.V11.V12.V13.V14.V15.V16.V17.V18.V19.V20.V21.V22.V23.V24.V25.V26.V27.V28.V29.V30.V31.V32.V33 1 1915 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 2 1915 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 3 1915 3 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA [ ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.