Hello!

Bortz, Lienert, & Boehnke (2008, pp. 140--142) suggest an exact polynomial test for low frequency tables. I used it recently, and thus, created the code attached. Maybe someone would use (and likely modify) it or incorporate it into their package.

Sören

References:

Bortz, J., Lienert, G. A., & Boehnke, K. (2008). Verteilungsfreie Methoden in der Biostatistik. Berlin: Springer

Code:

library(forensim)
myfun <- function(cur, exp, p.obs, force=F){
  if(length(cur) != length(exp)){
    stop("wrong length of dimensions, try transpose the matrix\n")
  }
  # keep users from hot laptops
  if(!force){
    if(Cmn(sum(obs), length(obs)) > (2 * (10^6))){
stop("Use option \"force=T\" to compute more than 2 million combinations.\n")
    }
  }
p.cur <- factorial(sum(cur)) / prod(sapply(cur, function(x) factorial(x))) * prod(exp ^ cur)
  if(p.cur <= p.obs){
    return(p.cur)
  }
}

# Trial 1, Book example
d.sta <- Sys.time()
exp <- c(.28, .08, .04, .42, .12, .06)
exp <- exp/sum(exp)
obs <- c(0, 3, 0, 0, 0, 0)
Cmn(sum(obs), length(obs)) # 56
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x) factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 0.1103680 secs

# Trial 2
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 0, 2, 0, 1, 1, 0, 5, 3) # now with a longer vector
Cmn(sum(obs), length(obs)) # 490314
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x) factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 1.050846 mins

# Trial 3
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 2, 2, 0, 1, 1, 0, 5, 3) # changed 0 to 2 on position 2
Cmn(sum(obs), length(obs)) # 1081575
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x) factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 2.425888 mins

# Trial 4
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 2, 2, 0, 2, 1, 0, 5, 3) # changed 1 to 2 on position 5
Cmn(sum(obs), length(obs)) # 1562275
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x) factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 3.658092 mins

# Trial 5
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 3, 2, 0, 2, 1, 0, 5, 3) # changed 1 to 2 on position 5
Cmn(sum(obs), length(obs)) # 2220075
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x) factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 3.658092 mins

--
Sören Vogel, Dipl.-Psych. (Univ.), PhD-Student, Eawag, Dept. SIAM
http://www.eawag.ch, http://sozmod.eawag.ch

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to