Hi,

I've just read your intro, not the full code. But one way first divide your
input data into parts and make a list where each element of the list is one
part of your input data. Rest of code put in a function, which will work on
a part of your data. Build the function such that it takes list as input.
At the beginning of the function unlist your function variable, and then
convert to required data frame. Then use mclapply of multicore/parallel
package and you'll get results much faster.

Well this is the way I'd have tried, but there are other experts who can
give a better way to do it.

Regards,
Anindya

On Tue, Feb 12, 2013 at 10:52 AM, Benjamin Caldwell <btcaldw...@berkeley.edu
> wrote:

> Dear R help;
>
> I'll preface this by saying that the example I've provided below is pretty
> long, turgid, and otherwise a deep dive into a series of functions I wrote
> for a simulation study. It is, however, reproducible and self-contained.
>
> I'm trying to do my first simulation study that's quite big, and so I'll
> say that the output of this simulation as I'd like it to be is 9.375
> million rows (9375 combinations of the inputs * 1000). So, first thing,
> maybe I should try to cut down on the number of dimensions and do it a
> piece at a time. If that's a conclusion people come to here, I'll look into
> paring down/doing it a chunk at a time.
>
> I'm hoping, though, that this is an opportunity for me to learn some more
> efficient, elegant ways to a. vectorize and b. use the apply functions,
> which still seem to elude me when it comes to their use with more complex,
> custom functions that have multiple inputs.
>
> If having looked the below over you can give me suggestions to help make
> this and future code I write run more quickly/efficiently, I will be
> most grateful, as as this is currently written it would take what looks
> like days to run a thousand simulations of each possible combination of
> variables of interest.
>
> Best
>
> Ben Caldwell
>
> -----------------------------------------------
> #unpaired
> verification.plots<-rnorm(30, 100, 10)
> # writeClipboard(as.character(verification.plots))
>
> unpaired.test<- function(verification.plots, project.n, project.mean,
> project.sd, allowed.deviation, project.acres, alpha=.05){
>
> verification.plots <-as.numeric(as.character(verification.plots))
> a <- qnorm(alpha/2)
> d <- alpha*project.mean
>
> # verifier plot number
>
> n<-length(verification.plots)
> verifier.plot.number <- c(1:n)
>
>
> #all plots (verifier and project)
>
> all.plots.n <- rep(0,n)
> for(i in 1:n){
> all.plots.n[i] <- project.n + verifier.plot.number[i]
> }
>
> #running mean
> X_bar <- rep(1,n)
>
> for(i in 1:n){
> X_bar[i]<- mean(verification.plots[1:i])
> }
>  #running sd
> sd2 <- NULL
>
> for(i in 2:n){
> sd2[i]<-sd(verification.plots[1:i])
>  }
> #Tn
> Tn<-NULL
>
> for(i in 2:n){
> Tn[i] <- project.mean-X_bar[i]
>  }
> #n_Threshold
> n_thresh<-NULL
>
> for(i in 2:n){
> n_thresh[i] <- (((a^2)/(d^2))*((project.sd^2)+(sd2[i]^2)))
>  }
> #Upper
> upper <- NULL
> for (i in 2:n){
> upper[i] <- Tn[i]-d
> }
> #Lower
> lower<- NULL
> for(i in 2:n){
> lower[i] <- Tn[i]+d
> }
>
> #Success.fail
> success.fail <- NULL
>
>
>
> success.fail.num <- rep(0,n)
>
> if(abs(verification.plots[1]-project.mean)<(allowed.deviation*project.mean)){
>  success.fail[1] <- "Pass"
> success.fail.num[1] <- 1
> } else {
>  success.fail[1] <- "Fail"
> success.fail.num[1] <-0
> }
>  for(i in 2:n){
> if(all.plots.n[i]>=n_thresh[i]){
> if(upper[i] <= 0){
>  if(lower[i] >= 0){
> success.fail[i]<-"Pass"
> success.fail.num[i]<-1
>  } else {
> success.fail[i] <- "Fail"
> success.fail.num[i] <-0}
>  } else {
> success.fail[i] <- "Fail"
> success.fail.num[i] <- 0
>  }
>
> } else {
> success.fail[i] <- "Inconclusive"
>  success.fail.num[i] <- 0
> }
> }
>
> out.unpaired.test <- data.frame(success.fail, success.fail.num)
> }
>
>
> out.unpaired.test<-unpaired.test(verification.plots=verification.plots,
> project.n=30, project.mean=100, project.sd=10, allowed.deviation=.05,
> project.acres=99)
> out.unpaired.test
> #
> sequential.unpaired<- function(number.strata, project.n, project.mean,
> project.sd, verification.plots,allowed.deviation, project.acres,
> min.plots="default", alpha=.05){
>
> if(min.plots=="default"){
> min.plots<-NULL # minimum passing
>  if(number.strata == 1 & project.acres <= 100){
>     min.plots = 8
> } else if(number.strata == 1 & project.acres > 100 & project.acres < 500){
> min.plots = 12
>  } else if(number.strata == 1 & project.acres > 500 & project.acres <
> 5000){
>     min.plots = 16
>     } else if(number.strata == 1 & project.acres > 5000 & project.acres <
> 10000){
>     min.plots = 20
> } else if(number.strata == 1 & project.acres > 10000){
>     min.plots = 24
>     } else if(number.strata == 2 & project.acres < 100){
>     min.plots = 4
> } else if(number.strata == 2 & project.acres > 100 & project.acres < 500){
>     min.plots = 6
> } else if(number.strata == 2 & project.acres > 501 & project.acres < 5000){
>     min.plots = 8
> } else if(number.strata == 2 & project.acres > 5001 & project.acres <
> 10000){
>     min.plots = 10
>     } else if(number.strata == 2 & project.acres > 10000){
>     min.plots = 12
> } else if(number.strata == 3 & project.acres < 100){
>     min.plots = 2
>     } else if(number.strata == 3 & project.acres > 100 & project.acres <
> 500){
>  min.plots = 3
> } else if(number.strata == 3 & project.acres > 501 & project.acres < 5000){
>  min.plots = 4
> } else if(number.strata == 3 & project.acres > 5001 & project.acres <
> 10000){
> min.plots = 5
>  } else if(number.strata == 3 & project.acres > 10000){
> min.plots = 6
> } else {min.plots=NULL}
> } else (min.plots = min.plots)
>
>
> if(number.strata > 3){print("max three strata")}
> if(number.strata > 3){break}
>
>
> p <- length(verification.plots)
>  mp <- min.plots
> run <- unpaired.test(verification.plots, project.n, project.mean,
> project.sd,
> allowed.deviation, project.acres, alpha=.05)
>  number.passing.plots <- function(verification.plots, success.fail.num){
>  n<-length(verification.plots)
>  number.passing<-rep(0,n)
> number.passing[1]<-ifelse(sum(success.fail.num[1]> mp),1,0)
> for(i in 2:n){
>  if(success.fail.num[i] == 1){
> v <- i-1
> number.passing[i]<-number.passing[v]+1
>  } else{number.passing[i] <- 0}
> }
> number.passing
>  }
> number.passing<-number.passing.plots(verification.plots=verification.plots,
> success.fail.num=run$success.fail.num)
>  sucessful.unsucessful<-rep("blank",p)
> one.zero <- rep(0,p)
> result <- "blank"
> resultL <- 0
> n <-length(verification.plots)
> for(i in 1:n){
> if(number.passing[i] >= mp) {
> sucessful.unsucessful[i] <- "successful"
>  one.zero[i] <- 1
> } else {
> sucessful.unsucessful[i]<- "unsuccessful"
>  one.zero[i] <- 0
> }
> }
>
> if(sum(one.zero) > 0 ) {
>  result <- "successful"
> resultL <- 1
> }
>
> plots.success <- function(one.zero){
>
> plots.to.success<-NULL
>
> for(i in 1:n){
> if(sum(one.zero)==0){plots.to.success= NA
>  } else if(one.zero[i]==1){plots.to.success<-append(plots.to.success, i)}
> }
> plots.to.success<-min(plots.to.success)
> plots.to.success
> }
>
> plots.to.success <- plots.success(one.zero=one.zero)
>
> complete <-data.frame (min.plots, number.passing, sucessful.unsucessful,
> one.zero)
> collapsed <- data.frame(result, resultL, plots.to.success)
> out <- c(as.list(complete),as.list(collapsed))
> }
>
>
> unpaired.out<-sequential.unpaired(number.strata=2,
> verification.plots=verification.plots, project.n=15, project.mean=100,
> project.sd=10, allowed.deviation=.05, project.acres=99,
> min.plots="default")
>
> str(unpaired.out)
> #unpaired.out[[7]][1]
>
> ##
>
> simulation.unpaired <- function(reps, project.acreage.range = c(99,
> 110,510,5100,11000), project.mean, project.n.min, project.n.max,
> project.sd.min, project.sd.max, verification.mean.min,
> verification.mean.max, number.verification.plots.min,
> number.verification.plots.max, allowed.deviation=.1, alpha=.05, length.out
> = 5, min.plots="default") {
>
> number.strata.range<-c(1:3) # set by CAR
>
> project.n.range <- seq(project.n.min, project.n.max, length.out=length.out)
>
> project.sd.range <- seq(project.sd.min, project.sd.max,
> length.out=length.out) # assume verification sd is the same as the project
> sd to simplify - seems a reasonable simpification
>
> number.verification.plots<- seq(number.verification.plots.min,
> number.verification.plots.max, length.out=length.out)
>
> verification.range <- seq(verification.mean.min, verification.mean.max,
> length.out=length.out)
>
> permut.grid<-expand.grid(number.strata.range, project.n.range,
> project.acreage.range, project.mean, project.sd.range,
> number.verification.plots, verification.range, allowed.deviation) # create
> a matrix with all combinations of the supplied vectors
>
> #assign names to the colums of the grid of combinations
> names.permut<-c("number.strata", "project.n.plots", "project.acreage",
> "project.mean", "project.sd", "number.verification.plots",
> "verification.mean", "allowed.deviation")
>
> names(permut.grid)<-names.permut # done
>
> combinations<-length(permut.grid[,1])
>
> size <-reps*combinations #need to know the size of the master matrix, which
> is the the number of replications * each combination of the supplied
> factors
>
> # we want a df from which to read all the data into the simulation, and
> record the results
> permut.col<-ncol(permut.grid)
> col.base<-ncol(permut.grid)+2
> base <- matrix(nrow=size, ncol=col.base)
> base <-data.frame(base)
>
> # supply the names
> names.base<-c("number.strata", "project.n.plots", "project.acreage",
> "project.mean", "project.sd", "number.verification.plots",
> "verification.mean", "allowed.deviation","success.fail",
> "plots.to.success")
>
> names(base)<-names.base
>
> # need to create index vectors for the base, master df
> ends <- seq(reps+1, size+1, by=reps)
> begins <- ends-reps
> index <- cbind(begins, ends-1)
> #done
>
> # next, need to assign the first 6 columns and number of rows = to the
> number of reps in the simulation to be the given row in the permut.grid
> matrix
>
> pb <- winProgressBar(title="Create base, big loop 1 of 2", label="0% done",
> min=0, max=100, initial=0)
>
> for (i in 1:combinations) {
>
> base[index[i,1]:index[i,2],1:permut.col] <- permut.grid[i,]
> #progress bar
>  info <- sprintf("%d%% done", round((i/combinations)*100))
> setWinProgressBar(pb, (i/combinations)*100, label=info)
> }
>
> close(pb)
>
> # now, simply feed the values replicated the number of times we want to run
> the simulation into the sequential.unpaired function, and assign the values
> to the appropriate columns
>
> out.index1<-ncol(permut.grid)+1
> out.index2<-ncol(permut.grid)+2
>
> #progress bar
> pb <- winProgressBar(title="fill values, big loop 2 of 2", label="0% done",
> min=0, max=100, initial=0)
>
> for (i in 1:size){
>
> scalar.base <- base[i,]
>  verification.plots <- rnorm(scalar.base$number.verification.plots,
> scalar.base$verification.mean, scalar.base$project.sd)
>  result<- sequential.unpaired(scalar.base$number.strata,
> scalar.base$project.n.plots, scalar.base$project.mean, scalar.base$
> project.sd, verification.plots, scalar.base$allowed.deviation,
> scalar.base$project.acreage, min.plots='default', alpha)
>
> base[i,out.index1] <- result[[6]][1]
> base[i,out.index2] <- result[[7]][1]
>  info <- sprintf("%d%% done", round((i/size)*100))
> setWinProgressBar(pb, (i/size)*100, label=info)
> }
>
> close(pb)
> #return the results
> return(base)
>
> }
>
> # I would like reps to = 1000, but that takes a really long time right now
> test5 <- simulation.unpaired(reps=5, project.acreage.range = c(99,
> 110,510,5100,11000), project.mean=100, project.n.min=10, project.n.max=100,
> project.sd.min=.01,  project.sd.max=.2, verification.mean.min=100,
> verification.mean.max=130, number.verification.plots.min=10,
> number.verification.plots.max=50, length.out = 5)
>
>         [[alternative HTML version deleted]]
>
> ______________________________________________
> R-help@r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



-- 
Anindya Sankar Dey

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to