Hi, I believe I have significantly improved [dpq]wilcox functions by implementing Harding's algorithm: Harding, E.F. (1984): An Efficient, Minimal-storage Procedure for Calculating the Mann-Whitney U, Generalized U and Similar Distributions, App. Statist., 33, 1-6
Results on my computer show (against R-2.9.1): > system.time( dwilcox( 800, 800, 80) ) user system elapsed 0.240 0.180 0.443 > system.time( dwilcox( (1:2800), 80, 80) ) user system elapsed 0.290 0.020 0.30 > system.time( dwilcox( (1:2800), 160, 160) ) user system elapsed 0.810 0.060 0.868 > system.time( dwilcox( (1:28000), 210, 210) ) user system elapsed 17.700 0.600 18.313 RAM: ~ 700MB > system.time( pwilcox( 21000, 211, 211) ) user system elapsed 18.110 0.640 18.762 > system.time( a <- qwilcox( 0.43, 200, 400) ) ^C Timing stopped at: 14.39 1.43 18.794 RAM: > 1.4GB at interrupt time > system.time( dwilcox(800, 800, 80) ) user system elapsed 0.140 0.000 0.144 > system.time( dwilcox( (1:2800), 80, 80) ) user system elapsed 0.010 0.000 0.007 > system.time( dwilcox( (1:2800), 160, 160) ) user system elapsed 0.020 0.000 0.016 > system.time( dwilcox( (1:28000), 210, 210) ) user system elapsed 0.050 0.000 0.05 RAM: < 1MB > system.time( pwilcox( 21000, 211, 211) ) user system elapsed 0.020 0.000 0.025 > system.time( a <- qwilcox( 0.43, 200, 400) ) user system elapsed 0.040 0.000 0.07 RAM: < 1MB There is no more need for wilcox_free at [dpq]wilcox in src/library/stats/distn.R (every other call after the first one with the same m,n will just read the results from the array so it will be really fast) and for #define WILCOX_MAX 50 in src/nmath/nmath.h p.s. modified files are in the attachment have fun, -- Ivo Ugrina << http://web.math.hr/~iugrina >> Teaching/Research Assistant at Department of Mathematics University of Zagreb, Croatia
# File src/library/stats/R/distn.R # Part of the R package, http://www.R-project.org # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ dexp <- function(x, rate=1, log = FALSE) .Internal(dexp(x, 1/rate, log)) pexp <- function(q, rate=1, lower.tail = TRUE, log.p = FALSE) .Internal(pexp(q, 1/rate, lower.tail, log.p)) qexp <- function(p, rate=1, lower.tail = TRUE, log.p = FALSE) .Internal(qexp(p, 1/rate, lower.tail, log.p)) rexp <- function(n, rate=1) .Internal(rexp(n, 1/rate)) dunif <- function(x, min=0, max=1, log = FALSE) .Internal(dunif(x, min, max, log)) punif <- function(q, min=0, max=1, lower.tail = TRUE, log.p = FALSE) .Internal(punif(q, min, max, lower.tail, log.p)) qunif <- function(p, min=0, max=1, lower.tail = TRUE, log.p = FALSE) .Internal(qunif(p, min, max, lower.tail, log.p)) runif <- function(n, min=0, max=1) .Internal(runif(n, min, max)) dnorm <- function(x, mean=0, sd=1, log=FALSE) .Internal(dnorm(x, mean, sd, log)) pnorm <- function(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) .Internal(pnorm(q, mean, sd, lower.tail, log.p)) qnorm <- function(p, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) .Internal(qnorm(p, mean, sd, lower.tail, log.p)) rnorm <- function(n, mean=0, sd=1) .Internal(rnorm(n, mean, sd)) dcauchy <- function(x, location=0, scale=1, log = FALSE) .Internal(dcauchy(x, location, scale, log)) pcauchy <- function(q, location=0, scale=1, lower.tail = TRUE, log.p = FALSE) .Internal(pcauchy(q, location, scale, lower.tail, log.p)) qcauchy <- function(p, location=0, scale=1, lower.tail = TRUE, log.p = FALSE) .Internal(qcauchy(p, location, scale, lower.tail, log.p)) rcauchy <- function(n, location=0, scale=1) .Internal(rcauchy(n, location, scale)) dgamma <- function(x, shape, rate = 1, scale = 1/rate, log = FALSE) .Internal(dgamma(x, shape, scale, log)) pgamma <- function(q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE) .Internal(pgamma(q, shape, scale, lower.tail, log.p)) qgamma <- function(p, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE) .Internal(qgamma(p, shape, scale, lower.tail, log.p)) rgamma <- function(n, shape, rate = 1, scale = 1/rate) .Internal(rgamma(n, shape, scale)) dlnorm <- function(x, meanlog=0, sdlog=1, log=FALSE) .Internal(dlnorm(x, meanlog, sdlog, log)) plnorm <- function(q, meanlog=0, sdlog=1, lower.tail = TRUE, log.p = FALSE) .Internal(plnorm(q, meanlog, sdlog, lower.tail, log.p)) qlnorm <- function(p, meanlog=0, sdlog=1, lower.tail = TRUE, log.p = FALSE) .Internal(qlnorm(p, meanlog, sdlog, lower.tail, log.p)) rlnorm <- function(n, meanlog=0, sdlog=1) .Internal(rlnorm(n, meanlog, sdlog)) dlogis <- function(x, location=0, scale=1, log = FALSE) .Internal(dlogis(x, location, scale, log)) plogis <- function(q, location=0, scale=1, lower.tail = TRUE, log.p = FALSE) .Internal(plogis(q, location, scale, lower.tail, log.p)) qlogis <- function(p, location=0, scale=1, lower.tail = TRUE, log.p = FALSE) .Internal(qlogis(p, location, scale, lower.tail, log.p)) rlogis <- function(n, location=0, scale=1) .Internal(rlogis(n, location, scale)) dweibull <- function(x, shape, scale=1, log = FALSE) .Internal(dweibull(x, shape, scale, log)) pweibull <- function(q, shape, scale=1, lower.tail = TRUE, log.p = FALSE) .Internal(pweibull(q, shape, scale, lower.tail, log.p)) qweibull <- function(p, shape, scale=1, lower.tail = TRUE, log.p = FALSE) .Internal(qweibull(p, shape, scale, lower.tail, log.p)) rweibull <- function(n, shape, scale=1) .Internal(rweibull(n, shape, scale)) dbeta <- function(x, shape1, shape2, ncp=0, log = FALSE) { if(missing(ncp)) .Internal(dbeta(x, shape1, shape2, log)) else .Internal(dnbeta(x, shape1, shape2, ncp, log)) } pbeta <- function(q, shape1, shape2, ncp=0, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(pbeta(q, shape1, shape2, lower.tail, log.p)) else .Internal(pnbeta(q, shape1, shape2, ncp, lower.tail, log.p)) } qbeta <- function(p, shape1, shape2, ncp=0, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(qbeta(p, shape1, shape2, lower.tail, log.p)) else .Internal(qnbeta(p, shape1, shape2, ncp, lower.tail, log.p)) } rbeta <- function(n, shape1, shape2, ncp = 0) { if(ncp == 0) .Internal(rbeta(n, shape1, shape2)) else { X <- rchisq(n, 2*shape1, ncp =ncp) X/(X + rchisq(n, 2*shape2)) } } dbinom <- function(x, size, prob, log = FALSE) .Internal(dbinom(x, size, prob, log)) pbinom <- function(q, size, prob, lower.tail = TRUE, log.p = FALSE) .Internal(pbinom(q, size, prob, lower.tail, log.p)) qbinom <- function(p, size, prob, lower.tail = TRUE, log.p = FALSE) .Internal(qbinom(p, size, prob, lower.tail, log.p)) rbinom <- function(n, size, prob) .Internal(rbinom(n, size, prob)) ## Multivariate: that's why there's no C interface (yet) for d...(): dmultinom <- function(x, size=NULL, prob, log = FALSE) { K <- length(prob) if(length(x) != K) stop("x[] and prob[] must be equal length vectors.") if(any(prob < 0) || (s <- sum(prob)) == 0) stop("probabilities cannot be negative nor all 0.") prob <- prob / s x <- as.integer(x + 0.5) if(any(x < 0)) stop("'x' must be non-negative") N <- sum(x) if(is.null(size)) size <- N else if (size != N) stop("size != sum(x), i.e. one is wrong") i0 <- prob == 0 if(any(i0)) { if(any(x[i0] != 0)) ## prob[j] ==0 and x[j] > 0 ==> "impossible" => P = 0 return(if(log)-Inf else 0) ## otherwise : 'all is fine': prob[j]= 0 = x[j] ==> drop j and continue if(all(i0)) return(if(log)0 else 1) ## else x <- x[!i0] prob <- prob[!i0] } r <- lgamma(size+1) + sum(x*log(prob) - lgamma(x+1)) if(log) r else exp(r) } rmultinom <- function(n, size, prob) .Internal(rmultinom(n, size, prob)) dchisq <- function(x, df, ncp=0, log = FALSE) { if(missing(ncp)) .Internal(dchisq(x, df, log)) else .Internal(dnchisq(x, df, ncp, log)) } pchisq <- function(q, df, ncp=0, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(pchisq(q, df, lower.tail, log.p)) else .Internal(pnchisq(q, df, ncp, lower.tail, log.p)) } qchisq <- function(p, df, ncp=0, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(qchisq(p, df, lower.tail, log.p)) else .Internal(qnchisq(p, df, ncp, lower.tail, log.p)) } rchisq <- function(n, df, ncp=0) { if(missing(ncp)) .Internal(rchisq(n, df)) else .Internal(rnchisq(n, df, ncp)) } df <- function(x, df1, df2, ncp, log = FALSE) { if(missing(ncp)) .Internal(df(x, df1, df2, log)) else .Internal(dnf(x, df1, df2, ncp, log)) } pf <- function(q, df1, df2, ncp, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(pf(q, df1, df2, lower.tail, log.p)) else .Internal(pnf(q, df1, df2, ncp, lower.tail, log.p)) } qf <- function(p, df1, df2, ncp, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(qf(p, df1, df2, lower.tail, log.p)) else .Internal(qnf(p, df1, df2, ncp, lower.tail, log.p)) } rf <- function(n, df1, df2, ncp) { if(missing(ncp)) .Internal(rf(n, df1, df2)) else (rchisq(n, df1, ncp=ncp)/df1)/(rchisq(n, df2)/df2) } dgeom <- function(x, prob, log = FALSE) .Internal(dgeom(x, prob, log)) pgeom <- function(q, prob, lower.tail = TRUE, log.p = FALSE) .Internal(pgeom(q, prob, lower.tail, log.p)) qgeom <- function(p, prob, lower.tail = TRUE, log.p = FALSE) .Internal(qgeom(p, prob, lower.tail, log.p)) rgeom <- function(n, prob) .Internal(rgeom(n, prob)) dhyper <- function(x, m, n, k, log = FALSE) .Internal(dhyper(x, m, n, k, log)) phyper <- function(q, m, n, k, lower.tail = TRUE, log.p = FALSE) .Internal(phyper(q, m, n, k, lower.tail, log.p)) qhyper <- function(p, m, n, k, lower.tail = TRUE, log.p = FALSE) .Internal(qhyper(p, m, n, k, lower.tail, log.p)) rhyper <- function(nn, m, n, k) .Internal(rhyper(nn, m, n, k)) dnbinom <- function(x, size, prob, mu, log = FALSE) { if (!missing(mu)) { if (!missing(prob)) stop("'prob' and 'mu' both specified") .Internal(dnbinom_mu(x, size, mu, log)) } else .Internal(dnbinom (x, size, prob, log)) } pnbinom <- function(q, size, prob, mu, lower.tail = TRUE, log.p = FALSE) { if (!missing(mu)) { if (!missing(prob)) stop("'prob' and 'mu' both specified") .Internal(pnbinom_mu(q, size, mu, lower.tail, log.p)) } else .Internal(pnbinom (q, size, prob, lower.tail, log.p)) } qnbinom <- function(p, size, prob, mu, lower.tail = TRUE, log.p = FALSE) { if (!missing(mu)) { if (!missing(prob)) stop("'prob' and 'mu' both specified") ### FIXME: implement qnbinom_mu(...) prob <- size/(size + mu) } .Internal(qnbinom(p, size, prob, lower.tail, log.p)) } rnbinom <- function(n, size, prob, mu) { if (!missing(mu)) { if (!missing(prob)) stop("'prob' and 'mu' both specified") .Internal(rnbinom_mu(n, size, mu)) } else .Internal(rnbinom(n, size, prob)) } dpois <- function(x, lambda, log = FALSE) .Internal(dpois(x, lambda, log)) ppois <- function(q, lambda, lower.tail = TRUE, log.p = FALSE) .Internal(ppois(q, lambda, lower.tail, log.p)) qpois <- function(p, lambda, lower.tail = TRUE, log.p = FALSE) .Internal(qpois(p, lambda, lower.tail, log.p)) rpois <- function(n, lambda) .Internal(rpois(n, lambda)) dt <- function(x, df, ncp, log = FALSE) { if(missing(ncp)) .Internal(dt(x, df, log)) else .Internal(dnt(x, df, ncp, log)) } pt <- function(q, df, ncp, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(pt(q, df, lower.tail, log.p)) else .Internal(pnt(q, df, ncp, lower.tail, log.p)) } qt <- function(p, df, ncp, lower.tail = TRUE, log.p = FALSE) { if(missing(ncp)) .Internal(qt(p, df, lower.tail, log.p)) else .Internal(qnt(p, df, ncp, lower.tail, log.p)) } rt <- function(n, df, ncp) { if(missing(ncp)) .Internal(rt(n, df)) else rnorm(n, ncp)/sqrt(rchisq(n, df)/df) } ptukey <- function(q, nmeans, df, nranges=1, lower.tail = TRUE, log.p = FALSE) .Internal(ptukey(q, nranges, nmeans, df, lower.tail, log.p)) qtukey <- function(p, nmeans, df, nranges=1, lower.tail = TRUE, log.p = FALSE) .Internal(qtukey(p, nranges, nmeans, df, lower.tail, log.p)) dwilcox <- function(x, m, n, log = FALSE) { .Internal(dwilcox(x, m, n, log)) } pwilcox <- function(q, m, n, lower.tail = TRUE, log.p = FALSE) { .Internal(pwilcox(q, m, n, lower.tail, log.p)) } qwilcox <- function(p, m, n, lower.tail = TRUE, log.p = FALSE) { .Internal(qwilcox(p, m, n, lower.tail, log.p)) } rwilcox <- function(nn, m, n) .Internal(rwilcox(nn, m, n)) dsignrank <- function(x, n, log = FALSE) { on.exit(.C("signrank_free", PACKAGE = "base")) .Internal(dsignrank(x, n, log)) } psignrank <- function(q, n, lower.tail = TRUE, log.p = FALSE) { on.exit(.C("signrank_free", PACKAGE = "base")) .Internal(psignrank(q, n, lower.tail, log.p)) } qsignrank <- function(p, n, lower.tail = TRUE, log.p = FALSE) { on.exit(.C("signrank_free", PACKAGE = "base")) .Internal(qsignrank(p, n, lower.tail, log.p)) } rsignrank <- function(nn, n) .Internal(rsignrank(nn, n))
/* * Mathlib : A C Library of Special Functions * Copyright (C) 1998-2004 The R Development Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * http://www.r-project.org/Licenses/ */ /* Private header file for use during compilation of Mathlib */ #ifndef MATHLIB_PRIVATE_H #define MATHLIB_PRIVATE_H #ifdef HAVE_CONFIG_H # include <config.h> #endif #ifdef HAVE_LONG_DOUBLE # define LDOUBLE long double #else # define LDOUBLE double #endif #include <math.h> #include <float.h> /* DBL_MIN etc */ #include <Rconfig.h> #include <Rmath.h> double Rf_d1mach(int); #define gamma_cody Rf_gamma_cody double gamma_cody(double); #include <R_ext/RS.h> /* possibly needed for debugging */ #include <R_ext/Print.h> #ifndef MATHLIB_STANDALONE #include <R_ext/Error.h> # define MATHLIB_ERROR(fmt,x) error(fmt,x); # define MATHLIB_WARNING(fmt,x) warning(fmt,x) # define MATHLIB_WARNING2(fmt,x,x2) warning(fmt,x,x2) # define MATHLIB_WARNING3(fmt,x,x2,x3) warning(fmt,x,x2,x3) # define MATHLIB_WARNING4(fmt,x,x2,x3,x4) warning(fmt,x,x2,x3,x4) #include <R_ext/Arith.h> #define ML_POSINF R_PosInf #define ML_NEGINF R_NegInf #define ML_NAN R_NaN void R_CheckUserInterrupt(void); /* Ei-ji Nakama reported that AIX 5.2 has calloc as a macro and objected to redefining it. Tests added for 2.2.1 */ #ifdef calloc # undef calloc #endif #define calloc R_chk_calloc #ifdef free # undef free #endif #define free R_chk_free #ifdef ENABLE_NLS #include <libintl.h> #define _(String) gettext (String) #else #define _(String) (String) #endif #else /* Mathlib standalone */ #include <stdio.h> #include <stdlib.h> /* for exit */ #define MATHLIB_ERROR(fmt,x) { printf(fmt,x); exit(1); } #define MATHLIB_WARNING(fmt,x) printf(fmt,x) #define MATHLIB_WARNING2(fmt,x,x2) printf(fmt,x,x2) #define MATHLIB_WARNING3(fmt,x,x2,x3) printf(fmt,x,x2,x3) #define MATHLIB_WARNING4(fmt,x,x2,x3,x4) printf(fmt,x,x2,x3,x4) #define ISNAN(x) (isnan(x)!=0) #define R_FINITE(x) R_finite(x) int R_finite(double); #define ML_POSINF (1.0 / 0.0) #define ML_NEGINF ((-1.0) / 0.0) #define ML_NAN (0.0 / 0.0) #define _(String) String #endif /* standalone */ #define ML_VALID(x) (!ISNAN(x)) #define ME_NONE 0 /* no error */ #define ME_DOMAIN 1 /* argument out of domain */ #define ME_RANGE 2 /* value out of range */ #define ME_NOCONV 4 /* process did not converge */ #define ME_PRECISION 8 /* does not have "full" precision */ #define ME_UNDERFLOW 16 /* and underflow occured (important for IEEE)*/ #define ML_ERR_return_NAN { ML_ERROR(ME_DOMAIN, ""); return ML_NAN; } /* For a long time prior to R 2.3.0 ML_ERROR did nothing. We don't report ME_DOMAIN errors as the callers collect ML_NANs into a single warning. */ #define ML_ERROR(x, s) { \ if(x > ME_DOMAIN) { \ char *msg = ""; \ switch(x) { \ case ME_DOMAIN: \ msg = "argument out of domain in '%s'\n"; \ break; \ case ME_RANGE: \ msg = "value out of range in '%s'\n"; \ break; \ case ME_NOCONV: \ msg = "convergence failed in '%s'\n"; \ break; \ case ME_PRECISION: \ msg = "full precision was not achieved in '%s'\n"; \ break; \ case ME_UNDERFLOW: \ msg = "underflow occurred in '%s'\n"; \ break; \ } \ MATHLIB_WARNING(msg, s); \ } \ } #ifdef HAVE_VISIBILITY_ATTRIBUTE # define attribute_hidden __attribute__ ((visibility ("hidden"))) #else # define attribute_hidden #endif /* Formerly private part of Mathlib.h */ /* always remap internal functions */ #define bd0 Rf_bd0 #define chebyshev_eval Rf_chebyshev_eval #define chebyshev_init Rf_chebyshev_init #define gammalims Rf_gammalims #define lfastchoose Rf_lfastchoose #define lgammacor Rf_lgammacor #define stirlerr Rf_stirlerr /* in Rmath.h #define gamma_cody Rf_gamma_cody */ /* Chebyshev Series */ int attribute_hidden chebyshev_init(double*, int, double); double attribute_hidden chebyshev_eval(double, const double *, const int); /* Gamma and Related Functions */ void attribute_hidden gammalims(double*, double*); double attribute_hidden lgammacor(double); /* log(gamma) correction */ double attribute_hidden stirlerr(double); /* Stirling expansion "error" */ double attribute_hidden lfastchoose(double, double); double attribute_hidden bd0(double, double); double attribute_hidden dbinom_raw(double, double, double, double, int); double attribute_hidden dpois_raw (double, double, int); double attribute_hidden pnchisq_raw(double, double, double, double, double, int, Rboolean); double attribute_hidden pgamma_raw(double, double, int, int); double attribute_hidden pbeta_raw(double, double, double, int, int); double attribute_hidden qchisq_appr(double, double, double, int, int, double tol); LDOUBLE attribute_hidden pnbeta_raw(double, double, double, double, double); double attribute_hidden pnbeta2(double, double, double, double, double, int, int); int Rf_i1mach(int); /* From toms708.c */ void attribute_hidden bratio(double a, double b, double x, double y, double *w, double *w1, int *ierr, int log_p); #endif /* MATHLIB_PRIVATE_H */
/* Mathlib : A C Library of Special Functions Copyright (C) 1999-2007 The R Development Core Team This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, a copy is available at http://www.r-project.org/Licenses/ SYNOPSIS #include <Rmath.h> double dwilcox(double x, double m, double n, int give_log) double pwilcox(double x, double m, double n, int lower_tail, int log_p) double qwilcox(double x, double m, double n, int lower_tail, int log_p); double rwilcox(double m, double n) DESCRIPTION dwilcox The density of the Wilcoxon distribution. pwilcox The distribution function of the Wilcoxon distribution. qwilcox The quantile function of the Wilcoxon distribution. rwilcox Random variates from the Wilcoxon distribution. */ /* Note: the checks here for R_CheckInterrupt also do stack checking. calloc/free are remapped for use in R, so allocation checks are done there. freeing is completed by an on.exit action in the R wrappers. */ #include "nmath.h" #include "dpq.h" #ifndef MATHLIB_STANDALONE #include <R_ext/Utils.h> #endif static double *w; static int allocated_m, allocated_n; static void w_free() { if(!w) return; free((void *) w); w = 0; allocated_m = allocated_n = 0; } void wilcox_free() { w_free(); } static void w_init_maybe(int m, int n) { int c= (m*n+1)/2; if( w){ if( m == allocated_m && n == allocated_n ) return; else w_free(); } if( !w){ w = (double *) calloc(c + 1, sizeof(double)); #ifdef MATHLIB_STANDALONE if (!w) MATHLIB_ERROR(_("wilcox allocation error %d"), 1); #endif allocated_m = m; allocated_n = n; } } /* This counts the number of choices with statistic = k More correctly, it returns the number of choices with statistic = k but it also evaluates number of choices with for every statistic k This is because the function implements Harding's algorithm. (Harding, E.F. (1984): An Efficient, Minimal-storage Procedure for Calculating the Mann-Whitney U, Generalized U and Similar Distributions, App. Statist., 33, 1-6) */ static double cwilcox(int k, int m, int n) { int c, u, i, j; int q, step; #ifndef MATHLIB_STANDALONE R_CheckUserInterrupt(); #endif u = m * n; if (k < 0 || k > u) return 0; c = (int)(u / 2); if (k > c) k = u - k; if( w[k] != 0 ) return w[k]; for( i=0; i < n+1; ++i) w[i]=1.; for( i=2; i < m+1; ++i){ step = n+i; q = c; while( q-step > -1){ w[q] = w[q] - w[q-step]; --q; } for( j=i; j<c+1; ++j) w[j] += w[j-i]; } return w[k]; } double dwilcox(double x, double m, double n, int give_log) { double d; #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(m) || ISNAN(n)) return(x + m + n); #endif m = floor(m + 0.5); n = floor(n + 0.5); if (m <= 0 || n <= 0) ML_ERR_return_NAN; if (fabs(x - floor(x + 0.5)) > 1e-7) return(R_D__0); x = floor(x + 0.5); if ((x < 0) || (x > m * n)) return(R_D__0); w_init_maybe(m, n); d = give_log ? log(cwilcox(x, m, n)) - lchoose(m + n, n) : cwilcox(x, m, n) / choose(m + n, n); return(d); } /* args have the same meaning as R function pwilcox */ double pwilcox(double q, double m, double n, int lower_tail, int log_p) { int i; double c, p; #ifdef IEEE_754 if (ISNAN(q) || ISNAN(m) || ISNAN(n)) return(q + m + n); #endif if (!R_FINITE(m) || !R_FINITE(n)) ML_ERR_return_NAN; m = floor(m + 0.5); n = floor(n + 0.5); if (m <= 0 || n <= 0) ML_ERR_return_NAN; q = floor(q + 1e-7); if (q < 0.0) return(R_DT_0); if (q >= m * n) return(R_DT_1); w_init_maybe(m, n); c = choose(m + n, n); p = 0; /* Use summation of probs over the shorter range */ if (q <= (m * n / 2)) { for (i = q; i > -1; i--) p += cwilcox(i, m, n) / c; } else { q = m * n - q; for (i = q-1; i > -1; i--) p += cwilcox(i, m, n) / c; lower_tail = !lower_tail; /* p = 1 - p; */ } return(R_DT_val(p)); } /* pwilcox */ /* x is 'p' in R function qwilcox */ double qwilcox(double x, double m, double n, int lower_tail, int log_p) { double c, p, q; #ifdef IEEE_754 if (ISNAN(x) || ISNAN(m) || ISNAN(n)) return(x + m + n); #endif if(!R_FINITE(x) || !R_FINITE(m) || !R_FINITE(n)) ML_ERR_return_NAN; R_Q_P01_check(x); m = floor(m + 0.5); n = floor(n + 0.5); if (m <= 0 || n <= 0) ML_ERR_return_NAN; if (x == R_DT_0) return(0); if (x == R_DT_1) return(m * n); if(log_p || !lower_tail) x = R_DT_qIv(x); /* lower_tail,non-log "p" */ w_init_maybe(m, n); c = choose(m + n, n); p = 0; q = 0; if (x <= 0.5) { x = x - 10 * DBL_EPSILON; for (;;) { p += cwilcox(q, m, n) / c; if (p >= x) break; q++; } } else { x = 1 - x + 10 * DBL_EPSILON; for (;;) { p += cwilcox(q, m, n) / c; if (p > x) { q = m * n - q; break; } q++; } } return(q); } double rwilcox(double m, double n) { int i, j, k, *x; double r; #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(m) || ISNAN(n)) return(m + n); #endif m = floor(m + 0.5); n = floor(n + 0.5); if ((m < 0) || (n < 0)) ML_ERR_return_NAN; if ((m == 0) || (n == 0)) return(0); r = 0.0; k = (int) (m + n); x = (int *) calloc(k, sizeof(int)); #ifdef MATHLIB_STANDALONE if (!x) MATHLIB_ERROR(_("wilcox allocation error %d"), 4); #endif for (i = 0; i < k; i++) x[i] = i; for (i = 0; i < n; i++) { j = floor(k * unif_rand()); r += x[j]; x[j] = x[--k]; } free(x); return(r - n * (n - 1) / 2); }
______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel