Hi Atem,
I guess this is what you wanted.
###Q1:
###
###working directory: Observed
#Only one file per Site. Assuming this is the case for the full dataset, then
I guess there is no need to average
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1,function(x1) lapply(x1, function(x2) {lines1 <-
readLines(x2); header1 <- lines1[1:2]; dat1 <-
read.table(text=lines1,header=FALSE,sep=",",stringsAsFactors=FALSE, skip=2);
colnames(dat1) <-
Reduce(paste,strsplit(header1,","));dat1[-c(nrow(dat1),nrow(dat1)-1),]}))
#different number of rows
sapply(seq_along(lst2),function(i){lstN <- lapply(lst2[[i]],function(x)
x[,-1]);sapply(lstN,function(x) nrow(x))})
#[1] 9 9 9 8 2 9
#difference in number of columns
sapply(seq_along(lst2),function(i) {sapply(lst2[[i]],function(x) ncol(x))})
#[1] 157 258 258 98 157 258
library(plyr)
library(stringr)
lst3 <- setNames(lapply(seq_along(lst2),function(i)
{lapply(lst2[[i]],function(x) {names(x)[-1] <- paste(names(x)[-1],
names(lst1)[i],sep="_"); names(x) <- str_trim(names(x)); x})[[1]]}),
names(lst1))
df1 <- join_all(lst3,by="Year")
dim(df1)
#[1] 9 1181
sapply(split(names(df1)[-1] ,gsub(".*\\_","",names(df1)[-1])),function(x) {df2
<- df1[,x];df3 <- data.frame(Percentiles=paste0(seq(0,100, by=1) ,"%"),
numcolwise(function(y)
quantile(y,seq(0,1,by=0.01),na.rm=TRUE))(df2),stringsAsFactors=FALSE);ncol(df3)
})
#G100 G101 G102 G103 G104 G105
# 157 258 258 98 157 258
lst4 <- split(names(df1)[-1] ,gsub(".*\\_","",names(df1)[-1]))
lapply(seq_along(lst4),function(i) {df2 <- df1[,lst4[[i]]]; df3 <-
data.frame(Percentiles=paste0(seq(0,100, by=1) ,"%"), numcolwise(function(y)
quantile(y,seq(0,1,by=0.01),na.rm=TRUE))(df2),stringsAsFactors=FALSE);df3[1:3,1:3];
write.csv(df3,paste0(paste(getwd(),
"final",paste(names(lst1)[[i]],"Quantile",sep="_"),sep="/"),".csv"),row.names=FALSE,quote=FALSE)})
ReadOut1 <-
lapply(list.files(recursive=TRUE)[grep("Quantile",list.files(recursive=TRUE))],function(x)
read.csv(x,header=TRUE,stringsAsFactors=FALSE))
sapply(ReadOut1,dim)
# [,1] [,2] [,3] [,4] [,5] [,6]
#[1,] 101 101 101 101 101 101
#[2,] 157 258 258 98 157 258
lapply(ReadOut1,function(x) x[1:2,1:3])[1:3]
#[[1]]
# Percentiles pav.DJF_G100 pav.MAM_G100
#1 0% 0 0.640500
#2 1% 0 0.664604
#
#[[2]]
# Percentiles txav.DJF_G101 txav.MAM_G101
#1 0% -13.8756 4.742400
#2 1% -13.8140 4.817184
#
#[[3]]
# Percentiles txav.DJF_G102 txav.MAM_G102
#1 0% -15.05000 4.520700
#2 1% -14.96833 4.543828
#####
###Q2:
###Observed data
dir.create("Indices")
names1 <- unlist(lapply(ReadOut1,function(x)
names(x)[-1]))
names2 <- gsub("\\_.*","",names1)
names3 <- unique(gsub("[.]", " ", names2))
res <- do.call(rbind,lapply(seq_along(lst4),function(i) {df2 <-
df1[,lst4[[i]]];vec1 <- colMeans(df2,na.rm=TRUE); vec2 <-
rep(NA,length(names3));names(vec2) <- paste(names3,names(lst4)[[i]],sep="_");
vec2[names(vec2) %in% names(vec1)] <- vec1; names(vec2) <-
gsub("\\_.*","",names(vec2)); vec2 }))
lapply(seq_len(ncol(res)),function(i) {mat1 <-
t(res[,i,drop=FALSE]);colnames(mat1) <- names(lst4);
write.csv(mat1,paste0(paste(getwd(),"Indices", gsub("
","_",rownames(mat1)),sep="/"),".csv"),row.names=FALSE,quote=FALSE)})
##Output2:
ReadOut2 <-
lapply(list.files(recursive=TRUE)[grep("Indices",list.files(recursive=TRUE))],function(x)
read.csv(x,header=TRUE,stringsAsFactors=FALSE))
length(ReadOut2)
#[1] 257
list.files(recursive=TRUE)[grep("Indices",list.files(recursive=TRUE))][1]
#[1] "Indices/pav_ANN.csv"
res[,"pav ANN",drop=FALSE]
# pav ANN
#[1,] 1.298811
#[2,] 7.642922
#[3,] 6.740011
#[4,] NA
#[5,] 1.296650
#[6,] 6.887622
ReadOut2[[1]]
# G100 G101 G102 G103 G104 G105
#1 1.298811 7.642922 6.740011 NA 1.29665 6.887622
###Sample data
###Working directory changed to "sample"
dir.create("Indices_colMeans")
lst1 <-
split(list.files(pattern=".csv"),gsub("\\_.*","",list.files(pattern=".csv")))
lst2 <- lapply(lst1,function(x1) lapply(x1, function(x2) {lines1 <-
readLines(x2); header1 <- lines1[1:2]; dat1 <-
read.table(text=lines1,header=FALSE,sep=",",stringsAsFactors=FALSE, skip=2);
colnames(dat1) <-
Reduce(paste,strsplit(header1,","));dat1[-c(nrow(dat1),nrow(dat1)-1),]}))
res1 <- do.call(rbind,lapply(seq_along(lst2),function(i)
{rowMeans(do.call(cbind,lapply(lst2[[i]],function(x)
colMeans(x[,-1],na.rm=TRUE))),na.rm=TRUE) }))
lapply(seq_len(ncol(res1)),function(i){mat1 <- t(res1[,i,drop=FALSE]);
colnames(mat1) <-
names(lst2);write.csv(mat1,paste0(paste(getwd(),"Indices_colMeans",gsub("
","_",rownames(mat1)),sep="/"),".csv"),row.names=FALSE,quote=FALSE)})
##Output2 Sample
ReadOut2S <-
lapply(list.files(recursive=TRUE)[grep("Indices",list.files(recursive=TRUE))],function(x)
read.csv(x,header=TRUE,stringsAsFactors=FALSE))
length(ReadOut2S)
#[1] 257
list.files(recursive=TRUE)[grep("Indices",list.files(recursive=TRUE))][1]
#[1] "Indices_colMeans/pav_ANN.csv"
res1[,"pav ANN",drop=FALSE]
# pav ANN
#[1,] 1.545620
#[2,] 1.518553
ReadOut2S[[1]]
# G100 G101
#1 1.54562 1.518553
A.K.
On Monday, April 14, 2014 1:05 AM, Zilefac Elvis <zilefacel...@yahoo.com> wrote:
Hi AK,
Q1) Please apply the Quantilecode.R to Observed.zip (attached). I tried but
received an error which was self-explanatory but I could not change the
dimensions in the code.
Q2) Please apply Quantilecode.R to both sample.zip and observed.zip. Here,
instead of doing quantile(y, seq(0, 1, by = 0.01), take colMeans of the
indices.
I have tried to solve both Q1 and Q2 but still unable to control the dimensions.
Thanks,
Atem.
On Sunday, April 13, 2014 9:05 AM, arun <smartpink...@yahoo.com> wrote:
Hi Atem,
On my end, the codes are not formatted in the email as seen in the screen of
formatR GUI.
I am attaching the .R file in case there is some difficulty for you.
Arun
On Sunday, April 13, 2014 10:54 AM, arun <smartpink...@yahoo.com> wrote:
Hi,
I am formatting the codes using library(formatR). Hopefully, it will not be
mangled in the email.
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) { lines1 <-
readLines(x2) header1 <- lines1[1:2] dat1 <- read.table(text = lines1, header =
FALSE, sep = ",", stringsAsFactors = FALSE, skip = 2) colnames(dat1) <-
Reduce(paste, strsplit(header1, ",")) dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
library(plyr)
lapply(seq_along(lst2), function(i) { lstN <- lapply(lst2[[i]], function(x) x[,
-1]) lstQ1 <- lapply(lstN, function(x) numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(x)) arr1 <- array(unlist(lstQ1), dim =
c(dim(lstQ1[[1]]), length(lstQ1)), dimnames = list(NULL, lapply(lstQ1,
names)[[1]])) res <- rowMeans(arr1, dims = 2, na.rm = TRUE) colnames(res) <-
gsub(" ", "_", colnames(res)) res1 <- data.frame(Percentiles = paste0(seq(0,
100, by = 1), "%"), res, stringsAsFactors = FALSE) write.csv(res1,
paste0(paste(getwd(), "final", paste(names(lst1)[[i]], "Quantile", sep = "_"),
sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile",
list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE,
stringsAsFactors = FALSE))
sapply(ReadOut1,
dim)
# [,1] [,2]
#[1,] 101 101
#[2,] 258 258
lapply(ReadOut1,function(x) x[1:2,1:3])
#[[1]]
# Percentiles txav_DJF txav_MAM
#1 0% -12.68566 7.09702
#2 1% -12.59062 7.15338
#
#[[2]]
# Percentiles txav_DJF txav_MAM
#1 0% -12.75516 6.841840
#2 1% -12.68244 6.910664
###Q2:
dir.create("Indices")
names1 <- lapply(ReadOut1, function(x) names(x))[[1]]
lstNew <- simplify2array(ReadOut1)
lapply(2:nrow(lstNew), function(i) { dat1 <- data.frame(lstNew[1],
do.call(cbind, lstNew[i, ]), stringsAsFactors = FALSE) colnames(dat1) <-
c(rownames(lstNew)[1], paste(names(lst1), rep(rownames(lstNew)[i],
length(lst1)), sep = "_")) write.csv(dat1, paste0(paste(getwd(), "Indices",
rownames(lstNew)[i], sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
}) ## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE,
stringsAsFactors = FALSE))
length(ReadOut2)
# [1] 257
head(ReadOut2[[1]], 2)
# Percentiles G100_pav_ANN G101_pav_ANN
#1 0% 1.054380 1.032740
#2 1%
1.069457 1.045689
A.K.
On Sunday, April 13, 2014 2:46 AM, Zilefac Elvis <zilefacel...@yahoo.com> wrote:
Hi AK,
Q1) I need your help again. Using the previous data (attached) and the previous
code below,instead of taking rowMeans, let's do quantile(x,seq(0,1,by=0.01)).
Delete the last 2 rows (Trend and p<) in each file before doing
quantile(x,seq(0,1,by=0.01)).
For example, assume that I want to
calculate quantile(x,seq(0,1,by=0.01)) for each column of Site G100. I will do
so for the 5 sims of site G100 and then take their average. This will be
approximately close to the true value than just calculating
quantile(x,seq(0,1,by=0.01)) from one sim. Please do this same thing for all
the files.
So, when you do rowMeans, it should be the mean of quantile(x,seq(0,1,by=0.01))
calculated from all sims in that Site.
Output
The number of files in "final" remains the same (2 files). The "Year"
column(will be replaced) will contain the names of
quantile(x,seq(0,1,by=0.01)) such as 0% 1% 2% 3%
4% 5% 6%, ..., 98%
99% 100% . You can give this column any name such as "Percentiles".
Q2) From the folder "final", please go to each file identified by site name,
take a column, say col1 of txav from each file, create a dataframe whose
colnames are site codes (names of files in "final"). Create a folder called
"Indices" and place this dataframe in it. The filename for the dataframe is
txav, say. So, in "Indices", you will have one file having 3 columns [,
c(Percentiles, G100,G101)]. The idea is that I want to be able to pick any
column from files in "final" and form a dataframe from which I will generate my
qqplot or boxplot.
Thanks very much AK.
Atem
This should be the final step of this my drama, at least for now.
#==============================================================================================================
dir.create("final")
lst1 <-
split(list.files(pattern=".csv"),gsub("\\_.*","",list.files(pattern=".csv")))
lst2 <- lapply(lst1,function(x1) lapply(x1, function(x2) {lines1 <-
readLines(x2); header1 <- lines1[1:2]; dat1 <-
read.table(text=lines1,header=FALSE,sep=",",stringsAsFactors=FALSE, skip=2);
colnames(dat1) <- Reduce(paste,strsplit(header1,","));dat1}))
lstYear <- lapply(lst2,function(x) lapply(x, function(y)
y[,1,drop=FALSE])[[1]])
lapply(seq_along(lst2),function(i) {lstN <-lapply(lst2[[i]],function(x)
x[,-1]); arr1 <-
array(unlist(lstN),dim=c(dim(lstN[[1]]),length(lstN)),dimnames=list(NULL,lapply(lstN,names)[[1]]));res
<-
cbind(lstYear[[i]],rowMeans(arr1,dims=2,na.rm=TRUE)); names(res) <-
gsub("\\_$","",gsub(" ", "_",names(res))); res[,1] <- gsub(" <", "",res[,1]);
write.csv(res,paste0(paste(getwd(),"final",names(lst1)
[[i]],sep="/"),".csv"),row.names=FALSE,quote=FALSE) })
#====================================================================================================
### Q1: working directory: Observed #Only one file per Site. Assuming this is
the
### case for the full dataset, then I guess there is no need to average
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
lines1 <- readLines(x2)
header1 <- lines1[1:2]
dat1 <- read.table(text = lines1, header = FALSE, sep = ",",
stringsAsFactors = FALSE,
skip = 2)
colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
# different number of rows
sapply(seq_along(lst2), function(i) {
lstN <- lapply(lst2[[i]], function(x) x[, -1])
sapply(lstN, function(x) nrow(x))
})
# [1] 9 9 9 8 2 9 difference in column number
sapply(seq_along(lst2), function(i) {
sapply(lst2[[i]], function(x) ncol(x))
})
# [1] 157 258 258 98 157 258
library(plyr)
library(stringr)
lst3 <- setNames(lapply(seq_along(lst2), function(i) {
lapply(lst2[[i]], function(x) {
names(x)[-1] <- paste(names(x)[-1], names(lst1)[i], sep = "_")
names(x) <- str_trim(names(x))
x
})[[1]]
}), names(lst1))
df1 <- join_all(lst3, by = "Year")
dim(df1)
# [1] 9 1181
sapply(split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])), function(x) {
df2 <- df1[, x]
df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"),
numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE)
ncol(df3)
})
# G100 G101 G102 G103 G104 G105 157 258 258 98 157 258
lst4 <- split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1]))
lapply(seq_along(lst4), function(i) {
df2 <- df1[, lst4[[i]]]
df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"),
numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE)
df3[1:3, 1:3]
write.csv(df3, paste0(paste(getwd(), "final", paste(names(lst1)[[i]],
"Quantile",
sep = "_"), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
sapply(ReadOut1, dim)
# [,1] [,2] [,3] [,4] [,5] [,6] [1,] 101 101 101 101 101 101 [2,] 157 258 258 98
# 157 258
lapply(ReadOut1, function(x) x[1:2, 1:3])[1:3]
# [[1]] Percentiles pav.DJF_G100 pav.MAM_G100 1 0% 0 0.640500 2 1% 0 0.664604
# [[2]] Percentiles txav.DJF_G101 txav.MAM_G101 1 0% -13.8756 4.742400 2 1%
# -13.8140 4.817184 [[3]] Percentiles txav.DJF_G102 txav.MAM_G102 1 0% -15.05000
# 4.520700 2 1% -14.96833 4.543828
### Q2: Observed data
dir.create("Indices")
names1 <- unlist(lapply(ReadOut1, function(x) names(x)[-1]))
names2 <- gsub("\\_.*", "", names1)
names3 <- unique(gsub("[.]", " ", names2))
res <- do.call(rbind, lapply(seq_along(lst4), function(i) {
df2 <- df1[, lst4[[i]]]
vec1 <- colMeans(df2, na.rm = TRUE)
vec2 <- rep(NA, length(names3))
names(vec2) <- paste(names3, names(lst4)[[i]], sep = "_")
vec2[names(vec2) %in% names(vec1)] <- vec1
names(vec2) <- gsub("\\_.*", "", names(vec2))
vec2
}))
lapply(seq_len(ncol(res)), function(i) {
mat1 <- t(res[, i, drop = FALSE])
colnames(mat1) <- names(lst4)
write.csv(mat1, paste0(paste(getwd(), "Indices", gsub(" ", "_",
rownames(mat1)),
sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2)
# [1] 257
list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1]
# [1] 'Indices/pav_ANN.csv'
res[, "pav ANN", drop = FALSE]
# pav ANN [1,] 1.298811 [2,] 7.642922 [3,] 6.740011 [4,] NA [5,] 1.296650 [6,]
# 6.887622
ReadOut2[[1]]
# G100 G101 G102 G103 G104 G105 1 1.298811 7.642922 6.740011 NA 1.29665 6.887622
### Sample data Working directory changed to 'sample'
dir.create("Indices_colMeans")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
lines1 <- readLines(x2)
header1 <- lines1[1:2]
dat1 <- read.table(text = lines1, header = FALSE, sep = ",",
stringsAsFactors = FALSE,
skip = 2)
colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
res1 <- do.call(rbind, lapply(seq_along(lst2), function(i) {
rowMeans(do.call(cbind, lapply(lst2[[i]], function(x) colMeans(x[, -1],
na.rm = TRUE))),
na.rm = TRUE)
}))
lapply(seq_len(ncol(res1)), function(i) {
mat1 <- t(res1[, i, drop = FALSE])
colnames(mat1) <- names(lst2)
write.csv(mat1, paste0(paste(getwd(), "Indices_colMeans", gsub(" ", "_",
rownames(mat1)),
sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
## Output2 Sample
ReadOut2S <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2S)
# [1] 257
list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1]
# [1] 'Indices_colMeans/pav_ANN.csv'
res1[, "pav ANN", drop = FALSE]
# pav ANN [1,] 1.545620 [2,] 1.518553
ReadOut2S[[1]]
# G100 G101 1 1.54562 1.518553
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.