Dear R help; I'll preface this by saying that the example I've provided below is pretty long, turgid, and otherwise a deep dive into a series of functions I wrote for a simulation study. It is, however, reproducible and self-contained.
I'm trying to do my first simulation study that's quite big, and so I'll say that the output of this simulation as I'd like it to be is 9.375 million rows (9375 combinations of the inputs * 1000). So, first thing, maybe I should try to cut down on the number of dimensions and do it a piece at a time. If that's a conclusion people come to here, I'll look into paring down/doing it a chunk at a time. I'm hoping, though, that this is an opportunity for me to learn some more efficient, elegant ways to a. vectorize and b. use the apply functions, which still seem to elude me when it comes to their use with more complex, custom functions that have multiple inputs. If having looked the below over you can give me suggestions to help make this and future code I write run more quickly/efficiently, I will be most grateful, as as this is currently written it would take what looks like days to run a thousand simulations of each possible combination of variables of interest. Best Ben Caldwell ----------------------------------------------- #unpaired verification.plots<-rnorm(30, 100, 10) # writeClipboard(as.character(verification.plots)) unpaired.test<- function(verification.plots, project.n, project.mean, project.sd, allowed.deviation, project.acres, alpha=.05){ verification.plots <-as.numeric(as.character(verification.plots)) a <- qnorm(alpha/2) d <- alpha*project.mean # verifier plot number n<-length(verification.plots) verifier.plot.number <- c(1:n) #all plots (verifier and project) all.plots.n <- rep(0,n) for(i in 1:n){ all.plots.n[i] <- project.n + verifier.plot.number[i] } #running mean X_bar <- rep(1,n) for(i in 1:n){ X_bar[i]<- mean(verification.plots[1:i]) } #running sd sd2 <- NULL for(i in 2:n){ sd2[i]<-sd(verification.plots[1:i]) } #Tn Tn<-NULL for(i in 2:n){ Tn[i] <- project.mean-X_bar[i] } #n_Threshold n_thresh<-NULL for(i in 2:n){ n_thresh[i] <- (((a^2)/(d^2))*((project.sd^2)+(sd2[i]^2))) } #Upper upper <- NULL for (i in 2:n){ upper[i] <- Tn[i]-d } #Lower lower<- NULL for(i in 2:n){ lower[i] <- Tn[i]+d } #Success.fail success.fail <- NULL success.fail.num <- rep(0,n) if(abs(verification.plots[1]-project.mean)<(allowed.deviation*project.mean)){ success.fail[1] <- "Pass" success.fail.num[1] <- 1 } else { success.fail[1] <- "Fail" success.fail.num[1] <-0 } for(i in 2:n){ if(all.plots.n[i]>=n_thresh[i]){ if(upper[i] <= 0){ if(lower[i] >= 0){ success.fail[i]<-"Pass" success.fail.num[i]<-1 } else { success.fail[i] <- "Fail" success.fail.num[i] <-0} } else { success.fail[i] <- "Fail" success.fail.num[i] <- 0 } } else { success.fail[i] <- "Inconclusive" success.fail.num[i] <- 0 } } out.unpaired.test <- data.frame(success.fail, success.fail.num) } out.unpaired.test<-unpaired.test(verification.plots=verification.plots, project.n=30, project.mean=100, project.sd=10, allowed.deviation=.05, project.acres=99) out.unpaired.test # sequential.unpaired<- function(number.strata, project.n, project.mean, project.sd, verification.plots,allowed.deviation, project.acres, min.plots="default", alpha=.05){ if(min.plots=="default"){ min.plots<-NULL # minimum passing if(number.strata == 1 & project.acres <= 100){ min.plots = 8 } else if(number.strata == 1 & project.acres > 100 & project.acres < 500){ min.plots = 12 } else if(number.strata == 1 & project.acres > 500 & project.acres < 5000){ min.plots = 16 } else if(number.strata == 1 & project.acres > 5000 & project.acres < 10000){ min.plots = 20 } else if(number.strata == 1 & project.acres > 10000){ min.plots = 24 } else if(number.strata == 2 & project.acres < 100){ min.plots = 4 } else if(number.strata == 2 & project.acres > 100 & project.acres < 500){ min.plots = 6 } else if(number.strata == 2 & project.acres > 501 & project.acres < 5000){ min.plots = 8 } else if(number.strata == 2 & project.acres > 5001 & project.acres < 10000){ min.plots = 10 } else if(number.strata == 2 & project.acres > 10000){ min.plots = 12 } else if(number.strata == 3 & project.acres < 100){ min.plots = 2 } else if(number.strata == 3 & project.acres > 100 & project.acres < 500){ min.plots = 3 } else if(number.strata == 3 & project.acres > 501 & project.acres < 5000){ min.plots = 4 } else if(number.strata == 3 & project.acres > 5001 & project.acres < 10000){ min.plots = 5 } else if(number.strata == 3 & project.acres > 10000){ min.plots = 6 } else {min.plots=NULL} } else (min.plots = min.plots) if(number.strata > 3){print("max three strata")} if(number.strata > 3){break} p <- length(verification.plots) mp <- min.plots run <- unpaired.test(verification.plots, project.n, project.mean, project.sd, allowed.deviation, project.acres, alpha=.05) number.passing.plots <- function(verification.plots, success.fail.num){ n<-length(verification.plots) number.passing<-rep(0,n) number.passing[1]<-ifelse(sum(success.fail.num[1]> mp),1,0) for(i in 2:n){ if(success.fail.num[i] == 1){ v <- i-1 number.passing[i]<-number.passing[v]+1 } else{number.passing[i] <- 0} } number.passing } number.passing<-number.passing.plots(verification.plots=verification.plots, success.fail.num=run$success.fail.num) sucessful.unsucessful<-rep("blank",p) one.zero <- rep(0,p) result <- "blank" resultL <- 0 n <-length(verification.plots) for(i in 1:n){ if(number.passing[i] >= mp) { sucessful.unsucessful[i] <- "successful" one.zero[i] <- 1 } else { sucessful.unsucessful[i]<- "unsuccessful" one.zero[i] <- 0 } } if(sum(one.zero) > 0 ) { result <- "successful" resultL <- 1 } plots.success <- function(one.zero){ plots.to.success<-NULL for(i in 1:n){ if(sum(one.zero)==0){plots.to.success= NA } else if(one.zero[i]==1){plots.to.success<-append(plots.to.success, i)} } plots.to.success<-min(plots.to.success) plots.to.success } plots.to.success <- plots.success(one.zero=one.zero) complete <-data.frame (min.plots, number.passing, sucessful.unsucessful, one.zero) collapsed <- data.frame(result, resultL, plots.to.success) out <- c(as.list(complete),as.list(collapsed)) } unpaired.out<-sequential.unpaired(number.strata=2, verification.plots=verification.plots, project.n=15, project.mean=100, project.sd=10, allowed.deviation=.05, project.acres=99, min.plots="default") str(unpaired.out) #unpaired.out[[7]][1] ## simulation.unpaired <- function(reps, project.acreage.range = c(99, 110,510,5100,11000), project.mean, project.n.min, project.n.max, project.sd.min, project.sd.max, verification.mean.min, verification.mean.max, number.verification.plots.min, number.verification.plots.max, allowed.deviation=.1, alpha=.05, length.out = 5, min.plots="default") { number.strata.range<-c(1:3) # set by CAR project.n.range <- seq(project.n.min, project.n.max, length.out=length.out) project.sd.range <- seq(project.sd.min, project.sd.max, length.out=length.out) # assume verification sd is the same as the project sd to simplify - seems a reasonable simpification number.verification.plots<- seq(number.verification.plots.min, number.verification.plots.max, length.out=length.out) verification.range <- seq(verification.mean.min, verification.mean.max, length.out=length.out) permut.grid<-expand.grid(number.strata.range, project.n.range, project.acreage.range, project.mean, project.sd.range, number.verification.plots, verification.range, allowed.deviation) # create a matrix with all combinations of the supplied vectors #assign names to the colums of the grid of combinations names.permut<-c("number.strata", "project.n.plots", "project.acreage", "project.mean", "project.sd", "number.verification.plots", "verification.mean", "allowed.deviation") names(permut.grid)<-names.permut # done combinations<-length(permut.grid[,1]) size <-reps*combinations #need to know the size of the master matrix, which is the the number of replications * each combination of the supplied factors # we want a df from which to read all the data into the simulation, and record the results permut.col<-ncol(permut.grid) col.base<-ncol(permut.grid)+2 base <- matrix(nrow=size, ncol=col.base) base <-data.frame(base) # supply the names names.base<-c("number.strata", "project.n.plots", "project.acreage", "project.mean", "project.sd", "number.verification.plots", "verification.mean", "allowed.deviation","success.fail", "plots.to.success") names(base)<-names.base # need to create index vectors for the base, master df ends <- seq(reps+1, size+1, by=reps) begins <- ends-reps index <- cbind(begins, ends-1) #done # next, need to assign the first 6 columns and number of rows = to the number of reps in the simulation to be the given row in the permut.grid matrix pb <- winProgressBar(title="Create base, big loop 1 of 2", label="0% done", min=0, max=100, initial=0) for (i in 1:combinations) { base[index[i,1]:index[i,2],1:permut.col] <- permut.grid[i,] #progress bar info <- sprintf("%d%% done", round((i/combinations)*100)) setWinProgressBar(pb, (i/combinations)*100, label=info) } close(pb) # now, simply feed the values replicated the number of times we want to run the simulation into the sequential.unpaired function, and assign the values to the appropriate columns out.index1<-ncol(permut.grid)+1 out.index2<-ncol(permut.grid)+2 #progress bar pb <- winProgressBar(title="fill values, big loop 2 of 2", label="0% done", min=0, max=100, initial=0) for (i in 1:size){ scalar.base <- base[i,] verification.plots <- rnorm(scalar.base$number.verification.plots, scalar.base$verification.mean, scalar.base$project.sd) result<- sequential.unpaired(scalar.base$number.strata, scalar.base$project.n.plots, scalar.base$project.mean, scalar.base$ project.sd, verification.plots, scalar.base$allowed.deviation, scalar.base$project.acreage, min.plots='default', alpha) base[i,out.index1] <- result[[6]][1] base[i,out.index2] <- result[[7]][1] info <- sprintf("%d%% done", round((i/size)*100)) setWinProgressBar(pb, (i/size)*100, label=info) } close(pb) #return the results return(base) } # I would like reps to = 1000, but that takes a really long time right now test5 <- simulation.unpaired(reps=5, project.acreage.range = c(99, 110,510,5100,11000), project.mean=100, project.n.min=10, project.n.max=100, project.sd.min=.01, project.sd.max=.2, verification.mean.min=100, verification.mean.max=130, number.verification.plots.min=10, number.verification.plots.max=50, length.out = 5) [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.