Dear R help;

I'll preface this by saying that the example I've provided below is pretty
long, turgid, and otherwise a deep dive into a series of functions I wrote
for a simulation study. It is, however, reproducible and self-contained.

I'm trying to do my first simulation study that's quite big, and so I'll
say that the output of this simulation as I'd like it to be is 9.375
million rows (9375 combinations of the inputs * 1000). So, first thing,
maybe I should try to cut down on the number of dimensions and do it a
piece at a time. If that's a conclusion people come to here, I'll look into
paring down/doing it a chunk at a time.

I'm hoping, though, that this is an opportunity for me to learn some more
efficient, elegant ways to a. vectorize and b. use the apply functions,
which still seem to elude me when it comes to their use with more complex,
custom functions that have multiple inputs.

If having looked the below over you can give me suggestions to help make
this and future code I write run more quickly/efficiently, I will be
most grateful, as as this is currently written it would take what looks
like days to run a thousand simulations of each possible combination of
variables of interest.

Best

Ben Caldwell

-----------------------------------------------
#unpaired
verification.plots<-rnorm(30, 100, 10)
# writeClipboard(as.character(verification.plots))

unpaired.test<- function(verification.plots, project.n, project.mean,
project.sd, allowed.deviation, project.acres, alpha=.05){

verification.plots <-as.numeric(as.character(verification.plots))
a <- qnorm(alpha/2)
d <- alpha*project.mean

# verifier plot number

n<-length(verification.plots)
verifier.plot.number <- c(1:n)


#all plots (verifier and project)

all.plots.n <- rep(0,n)
for(i in 1:n){
all.plots.n[i] <- project.n + verifier.plot.number[i]
}

#running mean
X_bar <- rep(1,n)

for(i in 1:n){
X_bar[i]<- mean(verification.plots[1:i])
}
 #running sd
sd2 <- NULL

for(i in 2:n){
sd2[i]<-sd(verification.plots[1:i])
 }
#Tn
Tn<-NULL

for(i in 2:n){
Tn[i] <- project.mean-X_bar[i]
 }
#n_Threshold
n_thresh<-NULL

for(i in 2:n){
n_thresh[i] <- (((a^2)/(d^2))*((project.sd^2)+(sd2[i]^2)))
 }
#Upper
upper <- NULL
for (i in 2:n){
upper[i] <- Tn[i]-d
}
#Lower
lower<- NULL
for(i in 2:n){
lower[i] <- Tn[i]+d
}

#Success.fail
success.fail <- NULL



success.fail.num <- rep(0,n)
if(abs(verification.plots[1]-project.mean)<(allowed.deviation*project.mean)){
 success.fail[1] <- "Pass"
success.fail.num[1] <- 1
} else {
 success.fail[1] <- "Fail"
success.fail.num[1] <-0
}
 for(i in 2:n){
if(all.plots.n[i]>=n_thresh[i]){
if(upper[i] <= 0){
 if(lower[i] >= 0){
success.fail[i]<-"Pass"
success.fail.num[i]<-1
 } else {
success.fail[i] <- "Fail"
success.fail.num[i] <-0}
 } else {
success.fail[i] <- "Fail"
success.fail.num[i] <- 0
 }

} else {
success.fail[i] <- "Inconclusive"
 success.fail.num[i] <- 0
}
}

out.unpaired.test <- data.frame(success.fail, success.fail.num)
}


out.unpaired.test<-unpaired.test(verification.plots=verification.plots,
project.n=30, project.mean=100, project.sd=10, allowed.deviation=.05,
project.acres=99)
out.unpaired.test
#
sequential.unpaired<- function(number.strata, project.n, project.mean,
project.sd, verification.plots,allowed.deviation, project.acres,
min.plots="default", alpha=.05){

if(min.plots=="default"){
min.plots<-NULL # minimum passing
 if(number.strata == 1 & project.acres <= 100){
    min.plots = 8
} else if(number.strata == 1 & project.acres > 100 & project.acres < 500){
min.plots = 12
 } else if(number.strata == 1 & project.acres > 500 & project.acres < 5000){
    min.plots = 16
    } else if(number.strata == 1 & project.acres > 5000 & project.acres <
10000){
    min.plots = 20
} else if(number.strata == 1 & project.acres > 10000){
    min.plots = 24
    } else if(number.strata == 2 & project.acres < 100){
    min.plots = 4
} else if(number.strata == 2 & project.acres > 100 & project.acres < 500){
    min.plots = 6
} else if(number.strata == 2 & project.acres > 501 & project.acres < 5000){
    min.plots = 8
} else if(number.strata == 2 & project.acres > 5001 & project.acres <
10000){
    min.plots = 10
    } else if(number.strata == 2 & project.acres > 10000){
    min.plots = 12
} else if(number.strata == 3 & project.acres < 100){
    min.plots = 2
    } else if(number.strata == 3 & project.acres > 100 & project.acres <
500){
 min.plots = 3
} else if(number.strata == 3 & project.acres > 501 & project.acres < 5000){
 min.plots = 4
} else if(number.strata == 3 & project.acres > 5001 & project.acres <
10000){
min.plots = 5
 } else if(number.strata == 3 & project.acres > 10000){
min.plots = 6
} else {min.plots=NULL}
} else (min.plots = min.plots)


if(number.strata > 3){print("max three strata")}
if(number.strata > 3){break}


p <- length(verification.plots)
 mp <- min.plots
run <- unpaired.test(verification.plots, project.n, project.mean, project.sd,
allowed.deviation, project.acres, alpha=.05)
 number.passing.plots <- function(verification.plots, success.fail.num){
 n<-length(verification.plots)
 number.passing<-rep(0,n)
number.passing[1]<-ifelse(sum(success.fail.num[1]> mp),1,0)
for(i in 2:n){
 if(success.fail.num[i] == 1){
v <- i-1
number.passing[i]<-number.passing[v]+1
 } else{number.passing[i] <- 0}
}
number.passing
 }
number.passing<-number.passing.plots(verification.plots=verification.plots,
success.fail.num=run$success.fail.num)
 sucessful.unsucessful<-rep("blank",p)
one.zero <- rep(0,p)
result <- "blank"
resultL <- 0
n <-length(verification.plots)
for(i in 1:n){
if(number.passing[i] >= mp) {
sucessful.unsucessful[i] <- "successful"
 one.zero[i] <- 1
} else {
sucessful.unsucessful[i]<- "unsuccessful"
 one.zero[i] <- 0
}
}

if(sum(one.zero) > 0 ) {
 result <- "successful"
resultL <- 1
}

plots.success <- function(one.zero){

plots.to.success<-NULL

for(i in 1:n){
if(sum(one.zero)==0){plots.to.success= NA
 } else if(one.zero[i]==1){plots.to.success<-append(plots.to.success, i)}
}
plots.to.success<-min(plots.to.success)
plots.to.success
}

plots.to.success <- plots.success(one.zero=one.zero)

complete <-data.frame (min.plots, number.passing, sucessful.unsucessful,
one.zero)
collapsed <- data.frame(result, resultL, plots.to.success)
out <- c(as.list(complete),as.list(collapsed))
}


unpaired.out<-sequential.unpaired(number.strata=2,
verification.plots=verification.plots, project.n=15, project.mean=100,
project.sd=10, allowed.deviation=.05, project.acres=99, min.plots="default")

str(unpaired.out)
#unpaired.out[[7]][1]

##

simulation.unpaired <- function(reps, project.acreage.range = c(99,
110,510,5100,11000), project.mean, project.n.min, project.n.max,
project.sd.min, project.sd.max, verification.mean.min,
verification.mean.max, number.verification.plots.min,
number.verification.plots.max, allowed.deviation=.1, alpha=.05, length.out
= 5, min.plots="default") {

number.strata.range<-c(1:3) # set by CAR

project.n.range <- seq(project.n.min, project.n.max, length.out=length.out)

project.sd.range <- seq(project.sd.min, project.sd.max,
length.out=length.out) # assume verification sd is the same as the project
sd to simplify - seems a reasonable simpification

number.verification.plots<- seq(number.verification.plots.min,
number.verification.plots.max, length.out=length.out)

verification.range <- seq(verification.mean.min, verification.mean.max,
length.out=length.out)

permut.grid<-expand.grid(number.strata.range, project.n.range,
project.acreage.range, project.mean, project.sd.range,
number.verification.plots, verification.range, allowed.deviation) # create
a matrix with all combinations of the supplied vectors

#assign names to the colums of the grid of combinations
names.permut<-c("number.strata", "project.n.plots", "project.acreage",
"project.mean", "project.sd", "number.verification.plots",
"verification.mean", "allowed.deviation")

names(permut.grid)<-names.permut # done

combinations<-length(permut.grid[,1])

size <-reps*combinations #need to know the size of the master matrix, which
is the the number of replications * each combination of the supplied factors

# we want a df from which to read all the data into the simulation, and
record the results
permut.col<-ncol(permut.grid)
col.base<-ncol(permut.grid)+2
base <- matrix(nrow=size, ncol=col.base)
base <-data.frame(base)

# supply the names
names.base<-c("number.strata", "project.n.plots", "project.acreage",
"project.mean", "project.sd", "number.verification.plots",
"verification.mean", "allowed.deviation","success.fail", "plots.to.success")

names(base)<-names.base

# need to create index vectors for the base, master df
ends <- seq(reps+1, size+1, by=reps)
begins <- ends-reps
index <- cbind(begins, ends-1)
#done

# next, need to assign the first 6 columns and number of rows = to the
number of reps in the simulation to be the given row in the permut.grid
matrix

pb <- winProgressBar(title="Create base, big loop 1 of 2", label="0% done",
min=0, max=100, initial=0)

for (i in 1:combinations) {

base[index[i,1]:index[i,2],1:permut.col] <- permut.grid[i,]
#progress bar
 info <- sprintf("%d%% done", round((i/combinations)*100))
setWinProgressBar(pb, (i/combinations)*100, label=info)
}

close(pb)

# now, simply feed the values replicated the number of times we want to run
the simulation into the sequential.unpaired function, and assign the values
to the appropriate columns

out.index1<-ncol(permut.grid)+1
out.index2<-ncol(permut.grid)+2

#progress bar
pb <- winProgressBar(title="fill values, big loop 2 of 2", label="0% done",
min=0, max=100, initial=0)

for (i in 1:size){

scalar.base <- base[i,]
 verification.plots <- rnorm(scalar.base$number.verification.plots,
scalar.base$verification.mean, scalar.base$project.sd)
 result<- sequential.unpaired(scalar.base$number.strata,
scalar.base$project.n.plots, scalar.base$project.mean, scalar.base$
project.sd, verification.plots, scalar.base$allowed.deviation,
scalar.base$project.acreage, min.plots='default', alpha)

base[i,out.index1] <- result[[6]][1]
base[i,out.index2] <- result[[7]][1]
 info <- sprintf("%d%% done", round((i/size)*100))
setWinProgressBar(pb, (i/size)*100, label=info)
}

close(pb)
#return the results
return(base)

}

# I would like reps to = 1000, but that takes a really long time right now
test5 <- simulation.unpaired(reps=5, project.acreage.range = c(99,
110,510,5100,11000), project.mean=100, project.n.min=10, project.n.max=100,
project.sd.min=.01,  project.sd.max=.2, verification.mean.min=100,
verification.mean.max=130, number.verification.plots.min=10,
number.verification.plots.max=50, length.out = 5)

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to