[Rd] Improvement of [dpq]wilcox functions

Ivo Ugrina Thu, 09 Jul 2009 08:09:34 -0700

Hi,

I believe I have significantly improved [dpq]wilcox
functions by implementing Harding's algorithm:
Harding, E.F. (1984): An Efficient, Minimal-storage Procedure
for Calculating the Mann-Whitney U, Generalized U and Similar
Distributions, App. Statist., 33, 1-6


Results on my computer show (against R-2.9.1):

> system.time( dwilcox( 800, 800, 80) )
   user  system elapsed
  0.240   0.180   0.443
> system.time( dwilcox( (1:2800), 80, 80) )
   user  system elapsed
  0.290   0.020   0.30
> system.time( dwilcox( (1:2800), 160, 160) )
   user  system elapsed
  0.810   0.060   0.868
> system.time( dwilcox( (1:28000), 210, 210) )
   user  system elapsed
 17.700   0.600  18.313
RAM: ~ 700MB
> system.time( pwilcox( 21000, 211, 211) )
   user  system elapsed
 18.110   0.640  18.762
> system.time( a <- qwilcox( 0.43, 200, 400) )
^C
Timing stopped at: 14.39 1.43 18.794
RAM: > 1.4GB at interrupt time

> system.time( dwilcox(800, 800, 80) )
   user  system elapsed
  0.140   0.000   0.144
> system.time( dwilcox( (1:2800), 80, 80) )
   user  system elapsed
  0.010   0.000   0.007
> system.time( dwilcox( (1:2800), 160, 160) )
   user  system elapsed
  0.020   0.000   0.016
> system.time( dwilcox( (1:28000), 210, 210) )
   user  system elapsed
  0.050   0.000   0.05
RAM: < 1MB
> system.time( pwilcox( 21000, 211, 211) )
   user  system elapsed
  0.020   0.000   0.025
> system.time( a <- qwilcox( 0.43, 200, 400) )
   user  system elapsed
  0.040   0.000   0.07
RAM: < 1MB

There is no more need for
wilcox_free at [dpq]wilcox in src/library/stats/distn.R
(every other call after the first one with the same m,n
will just read the results from the array so it will be
really fast) and for
#define WILCOX_MAX 50 in src/nmath/nmath.h

p.s. modified files are in the attachment

have fun,
-- 
Ivo Ugrina << http://web.math.hr/~iugrina >>
Teaching/Research Assistant at Department of Mathematics
University of Zagreb, Croatia

#  File src/library/stats/R/distn.R
#  Part of the R package, http://www.R-project.org
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/

dexp <- function(x, rate=1, log = FALSE) .Internal(dexp(x, 1/rate, log))
pexp <- function(q, rate=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(pexp(q, 1/rate, lower.tail, log.p))
qexp <- function(p, rate=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qexp(p, 1/rate, lower.tail, log.p))
rexp <- function(n, rate=1) .Internal(rexp(n, 1/rate))

dunif <- function(x, min=0, max=1, log = FALSE)
    .Internal(dunif(x, min, max, log))
punif <- function(q, min=0, max=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(punif(q, min, max, lower.tail, log.p))
qunif <- function(p, min=0, max=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qunif(p, min, max, lower.tail, log.p))
runif <- function(n, min=0, max=1) .Internal(runif(n, min, max))

dnorm <- function(x, mean=0, sd=1, log=FALSE) .Internal(dnorm(x, mean, sd, log))
pnorm <- function(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(pnorm(q, mean, sd, lower.tail, log.p))
qnorm <- function(p, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qnorm(p, mean, sd, lower.tail, log.p))
rnorm <- function(n, mean=0, sd=1) .Internal(rnorm(n, mean, sd))

dcauchy <- function(x, location=0, scale=1, log = FALSE)
    .Internal(dcauchy(x, location, scale, log))
pcauchy <-
    function(q, location=0, scale=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(pcauchy(q, location, scale, lower.tail, log.p))
qcauchy <-
    function(p, location=0, scale=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qcauchy(p, location, scale, lower.tail, log.p))
rcauchy <-
    function(n, location=0, scale=1) .Internal(rcauchy(n, location, scale))

dgamma <- function(x, shape, rate = 1, scale = 1/rate, log = FALSE)
    .Internal(dgamma(x, shape, scale, log))
pgamma <- function(q, shape, rate = 1, scale = 1/rate,
                   lower.tail = TRUE, log.p = FALSE)
    .Internal(pgamma(q, shape, scale, lower.tail, log.p))

qgamma <- function(p, shape, rate = 1, scale = 1/rate,
                   lower.tail = TRUE, log.p = FALSE)
    .Internal(qgamma(p, shape, scale, lower.tail, log.p))
rgamma <- function(n, shape, rate = 1, scale = 1/rate)
    .Internal(rgamma(n, shape, scale))

dlnorm <- function(x, meanlog=0, sdlog=1, log=FALSE)
    .Internal(dlnorm(x, meanlog, sdlog, log))
plnorm <- function(q, meanlog=0, sdlog=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(plnorm(q, meanlog, sdlog, lower.tail, log.p))
qlnorm <- function(p, meanlog=0, sdlog=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qlnorm(p, meanlog, sdlog, lower.tail, log.p))
rlnorm <- function(n, meanlog=0, sdlog=1) .Internal(rlnorm(n, meanlog, sdlog))

dlogis <- function(x, location=0, scale=1, log = FALSE)
    .Internal(dlogis(x, location, scale, log))
plogis <- function(q, location=0, scale=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(plogis(q, location, scale, lower.tail, log.p))
qlogis <- function(p, location=0, scale=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qlogis(p, location, scale, lower.tail, log.p))
rlogis <- function(n, location=0, scale=1) .Internal(rlogis(n, location, scale))

dweibull <- function(x, shape, scale=1, log = FALSE)
    .Internal(dweibull(x, shape, scale, log))
pweibull <- function(q, shape, scale=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(pweibull(q, shape, scale, lower.tail, log.p))
qweibull <- function(p, shape, scale=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qweibull(p, shape, scale, lower.tail, log.p))
rweibull <- function(n, shape, scale=1) .Internal(rweibull(n, shape, scale))

dbeta <- function(x, shape1, shape2, ncp=0, log = FALSE) {
    if(missing(ncp)) .Internal(dbeta(x, shape1, shape2, log))
    else .Internal(dnbeta(x, shape1, shape2, ncp, log))
}
pbeta <- function(q, shape1, shape2, ncp=0, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(pbeta(q, shape1, shape2, lower.tail, log.p))
    else .Internal(pnbeta(q, shape1, shape2, ncp, lower.tail, log.p))
}
qbeta <- function(p, shape1, shape2, ncp=0, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(qbeta(p, shape1, shape2, lower.tail, log.p))
    else .Internal(qnbeta(p, shape1, shape2, ncp, lower.tail, log.p))
}
rbeta <- function(n, shape1, shape2, ncp = 0) {
    if(ncp == 0) .Internal(rbeta(n, shape1, shape2))
    else {
        X <- rchisq(n, 2*shape1, ncp =ncp)
        X/(X + rchisq(n, 2*shape2))
    }
}

dbinom <- function(x, size, prob, log = FALSE)
    .Internal(dbinom(x, size, prob, log))
pbinom <- function(q, size, prob, lower.tail = TRUE, log.p = FALSE)
    .Internal(pbinom(q, size, prob, lower.tail, log.p))
qbinom <- function(p, size, prob, lower.tail = TRUE, log.p = FALSE)
    .Internal(qbinom(p, size, prob, lower.tail, log.p))
rbinom <- function(n, size, prob) .Internal(rbinom(n, size, prob))

## Multivariate: that's why there's no C interface (yet) for d...():
dmultinom <- function(x, size=NULL, prob, log = FALSE)
{
    K <- length(prob)
    if(length(x) != K) stop("x[] and prob[] must be equal length vectors.")
    if(any(prob < 0) || (s <- sum(prob)) == 0)
        stop("probabilities cannot be negative nor all 0.")
    prob <- prob / s

    x <- as.integer(x + 0.5)
    if(any(x < 0)) stop("'x' must be non-negative")
    N <- sum(x)
    if(is.null(size)) size <- N
    else if (size != N) stop("size != sum(x), i.e. one is wrong")

    i0 <- prob == 0
    if(any(i0)) {
        if(any(x[i0] != 0))
            ##  prob[j] ==0 and x[j] > 0 ==>  "impossible" => P = 0
            return(if(log)-Inf else 0)
        ## otherwise : 'all is fine': prob[j]= 0 = x[j] ==> drop j and continue
        if(all(i0)) return(if(log)0 else 1)
        ## else
        x <- x[!i0]
        prob <- prob[!i0]
    }
    r <- lgamma(size+1) + sum(x*log(prob) - lgamma(x+1))
    if(log) r else exp(r)
}
rmultinom <- function(n, size, prob) .Internal(rmultinom(n, size, prob))

dchisq <- function(x, df, ncp=0, log = FALSE) {
    if(missing(ncp)) .Internal(dchisq(x, df, log))
    else .Internal(dnchisq(x, df, ncp, log))
}
pchisq <- function(q, df, ncp=0, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(pchisq(q, df, lower.tail, log.p))
    else .Internal(pnchisq(q, df, ncp, lower.tail, log.p))
}
qchisq <- function(p, df, ncp=0, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(qchisq(p, df, lower.tail, log.p))
    else .Internal(qnchisq(p, df, ncp, lower.tail, log.p))
}
rchisq <- function(n, df, ncp=0) {
    if(missing(ncp)) .Internal(rchisq(n, df))
    else .Internal(rnchisq(n, df, ncp))
}

df <- function(x, df1, df2, ncp, log = FALSE) {
    if(missing(ncp)) .Internal(df(x, df1, df2, log))
    else .Internal(dnf(x, df1, df2, ncp, log))
}
pf <- function(q, df1, df2, ncp, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(pf(q, df1, df2, lower.tail, log.p))
    else .Internal(pnf(q, df1, df2, ncp, lower.tail, log.p))
}
qf <- function(p, df1, df2, ncp, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(qf(p, df1, df2, lower.tail, log.p))
    else .Internal(qnf(p, df1, df2, ncp, lower.tail, log.p))
}
rf <- function(n, df1, df2, ncp)
{
    if(missing(ncp)) .Internal(rf(n, df1, df2))
    else (rchisq(n, df1, ncp=ncp)/df1)/(rchisq(n, df2)/df2)
}

dgeom <- function(x, prob, log = FALSE) .Internal(dgeom(x, prob, log))
pgeom <- function(q, prob, lower.tail = TRUE, log.p = FALSE)
    .Internal(pgeom(q, prob, lower.tail, log.p))
qgeom <- function(p, prob, lower.tail = TRUE, log.p = FALSE)
    .Internal(qgeom(p, prob, lower.tail, log.p))
rgeom <- function(n, prob) .Internal(rgeom(n, prob))

dhyper <- function(x, m, n, k, log = FALSE) .Internal(dhyper(x, m, n, k, log))
phyper <- function(q, m, n, k, lower.tail = TRUE, log.p = FALSE)
    .Internal(phyper(q, m, n, k, lower.tail, log.p))
qhyper <- function(p, m, n, k, lower.tail = TRUE, log.p = FALSE)
    .Internal(qhyper(p, m, n, k, lower.tail, log.p))
rhyper <- function(nn, m, n, k) .Internal(rhyper(nn, m, n, k))

dnbinom <- function(x, size, prob, mu, log = FALSE)
{
    if (!missing(mu)) {
        if (!missing(prob)) stop("'prob' and 'mu' both specified")
        .Internal(dnbinom_mu(x, size, mu, log))
    }
    else
        .Internal(dnbinom (x, size, prob, log))
}
pnbinom <- function(q, size, prob, mu, lower.tail = TRUE, log.p = FALSE)
{
    if (!missing(mu)) {
        if (!missing(prob)) stop("'prob' and 'mu' both specified")
        .Internal(pnbinom_mu(q, size, mu, lower.tail, log.p))
    }
    else
        .Internal(pnbinom (q, size, prob, lower.tail, log.p))
}
qnbinom <- function(p, size, prob, mu, lower.tail = TRUE, log.p = FALSE)
{
    if (!missing(mu)) {
        if (!missing(prob)) stop("'prob' and 'mu' both specified")
### FIXME: implement qnbinom_mu(...)
        prob <- size/(size + mu)
    }
    .Internal(qnbinom(p, size, prob, lower.tail, log.p))
}
rnbinom <- function(n, size, prob, mu)
{
    if (!missing(mu)) {
        if (!missing(prob)) stop("'prob' and 'mu' both specified")
        .Internal(rnbinom_mu(n, size, mu))
    }
    else
        .Internal(rnbinom(n, size, prob))
}

dpois <- function(x, lambda, log = FALSE) .Internal(dpois(x, lambda, log))
ppois <- function(q, lambda, lower.tail = TRUE, log.p = FALSE)
    .Internal(ppois(q, lambda, lower.tail, log.p))
qpois <- function(p, lambda, lower.tail = TRUE, log.p = FALSE)
    .Internal(qpois(p, lambda, lower.tail, log.p))
rpois <- function(n, lambda) .Internal(rpois(n, lambda))

dt <- function(x, df, ncp, log = FALSE) {
    if(missing(ncp)) .Internal(dt(x, df, log))
    else .Internal(dnt(x, df, ncp, log))
}
pt <- function(q, df, ncp, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(pt(q, df, lower.tail, log.p))
    else .Internal(pnt(q, df, ncp, lower.tail, log.p))
}
qt <- function(p, df, ncp, lower.tail = TRUE, log.p = FALSE) {
    if(missing(ncp)) .Internal(qt(p, df, lower.tail, log.p))
    else .Internal(qnt(p, df, ncp, lower.tail, log.p))
}
rt <- function(n, df, ncp) {
    if(missing(ncp)) .Internal(rt(n, df))
    else rnorm(n, ncp)/sqrt(rchisq(n, df)/df)
}

ptukey <- function(q, nmeans, df, nranges=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(ptukey(q, nranges, nmeans, df, lower.tail, log.p))
qtukey <- function(p, nmeans, df, nranges=1, lower.tail = TRUE, log.p = FALSE)
    .Internal(qtukey(p, nranges, nmeans, df, lower.tail, log.p))

dwilcox <- function(x, m, n, log = FALSE)
{
    .Internal(dwilcox(x, m, n, log))
}
pwilcox <- function(q, m, n, lower.tail = TRUE, log.p = FALSE)
{
    .Internal(pwilcox(q, m, n, lower.tail, log.p))
}
qwilcox <- function(p, m, n, lower.tail = TRUE, log.p = FALSE)
{
    .Internal(qwilcox(p, m, n, lower.tail, log.p))
}
rwilcox <- function(nn, m, n) .Internal(rwilcox(nn, m, n))

dsignrank <- function(x, n, log = FALSE)
{
    on.exit(.C("signrank_free", PACKAGE = "base"))
    .Internal(dsignrank(x, n, log))
}
psignrank <- function(q, n, lower.tail = TRUE, log.p = FALSE)
{
    on.exit(.C("signrank_free", PACKAGE = "base"))
    .Internal(psignrank(q, n, lower.tail, log.p))
}
qsignrank <- function(p, n, lower.tail = TRUE, log.p = FALSE)
{
    on.exit(.C("signrank_free", PACKAGE = "base"))
    .Internal(qsignrank(p, n, lower.tail, log.p))
}
rsignrank <- function(nn, n) .Internal(rsignrank(nn, n))

/*
 *  Mathlib : A C Library of Special Functions
 *  Copyright (C) 1998-2004  The R Development Core Team
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  http://www.r-project.org/Licenses/
 */

/* Private header file for use during compilation of Mathlib */
#ifndef MATHLIB_PRIVATE_H
#define MATHLIB_PRIVATE_H

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#ifdef HAVE_LONG_DOUBLE
#  define LDOUBLE long double
#else
#  define LDOUBLE double
#endif

#include <math.h>
#include <float.h> /* DBL_MIN etc */

#include <Rconfig.h>
#include <Rmath.h>

double  Rf_d1mach(int);
#define gamma_cody      Rf_gamma_cody
double  gamma_cody(double);

#include <R_ext/RS.h>

/* possibly needed for debugging */
#include <R_ext/Print.h>


#ifndef MATHLIB_STANDALONE

#include <R_ext/Error.h>
# define MATHLIB_ERROR(fmt,x)           error(fmt,x);
# define MATHLIB_WARNING(fmt,x)         warning(fmt,x)
# define MATHLIB_WARNING2(fmt,x,x2)     warning(fmt,x,x2)
# define MATHLIB_WARNING3(fmt,x,x2,x3)  warning(fmt,x,x2,x3)
# define MATHLIB_WARNING4(fmt,x,x2,x3,x4) warning(fmt,x,x2,x3,x4)

#include <R_ext/Arith.h>
#define ML_POSINF       R_PosInf
#define ML_NEGINF       R_NegInf
#define ML_NAN          R_NaN


void R_CheckUserInterrupt(void);
/* Ei-ji Nakama reported that AIX 5.2 has calloc as a macro and objected
   to redefining it.  Tests added for 2.2.1 */
#ifdef calloc
# undef calloc
#endif
#define calloc R_chk_calloc
#ifdef free
# undef free
#endif
#define free R_chk_free

#ifdef ENABLE_NLS
#include <libintl.h>
#define _(String) gettext (String)
#else
#define _(String) (String)
#endif

#else
/* Mathlib standalone */

#include <stdio.h>
#include <stdlib.h> /* for exit */
#define MATHLIB_ERROR(fmt,x)    { printf(fmt,x); exit(1); }
#define MATHLIB_WARNING(fmt,x)          printf(fmt,x)
#define MATHLIB_WARNING2(fmt,x,x2)      printf(fmt,x,x2)
#define MATHLIB_WARNING3(fmt,x,x2,x3)   printf(fmt,x,x2,x3)
#define MATHLIB_WARNING4(fmt,x,x2,x3,x4) printf(fmt,x,x2,x3,x4)

#define ISNAN(x) (isnan(x)!=0)
#define R_FINITE(x)    R_finite(x)
int R_finite(double);

#define ML_POSINF       (1.0 / 0.0)
#define ML_NEGINF       ((-1.0) / 0.0)
#define ML_NAN          (0.0 / 0.0)

#define _(String) String
#endif /* standalone */

#define ML_VALID(x)     (!ISNAN(x))

#define ME_NONE         0
/*      no error */
#define ME_DOMAIN       1
/*      argument out of domain */
#define ME_RANGE        2
/*      value out of range */
#define ME_NOCONV       4
/*      process did not converge */
#define ME_PRECISION    8
/*      does not have "full" precision */
#define ME_UNDERFLOW    16
/*      and underflow occured (important for IEEE)*/

#define ML_ERR_return_NAN { ML_ERROR(ME_DOMAIN, ""); return ML_NAN; }

/* For a long time prior to R 2.3.0 ML_ERROR did nothing.
   We don't report ME_DOMAIN errors as the callers collect ML_NANs into
   a single warning.
 */
#define ML_ERROR(x, s) { \
   if(x > ME_DOMAIN) { \
       char *msg = ""; \
       switch(x) { \
       case ME_DOMAIN: \
           msg = "argument out of domain in '%s'\n"; \
           break; \
       case ME_RANGE: \
           msg = "value out of range in '%s'\n"; \
           break; \
       case ME_NOCONV: \
           msg = "convergence failed in '%s'\n"; \
           break; \
       case ME_PRECISION: \
           msg = "full precision was not achieved in '%s'\n"; \
           break; \
       case ME_UNDERFLOW: \
           msg = "underflow occurred in '%s'\n"; \
           break; \
       } \
       MATHLIB_WARNING(msg, s); \
   } \
}

#ifdef HAVE_VISIBILITY_ATTRIBUTE
# define attribute_hidden __attribute__ ((visibility ("hidden")))
#else
# define attribute_hidden
#endif

/* Formerly private part of Mathlib.h */

/* always remap internal functions */
#define bd0             Rf_bd0
#define chebyshev_eval  Rf_chebyshev_eval
#define chebyshev_init  Rf_chebyshev_init
#define gammalims       Rf_gammalims
#define lfastchoose     Rf_lfastchoose
#define lgammacor       Rf_lgammacor
#define stirlerr        Rf_stirlerr
/* in Rmath.h
#define gamma_cody      Rf_gamma_cody
*/

        /* Chebyshev Series */

int     attribute_hidden chebyshev_init(double*, int, double);
double  attribute_hidden chebyshev_eval(double, const double *, const int);

        /* Gamma and Related Functions */

void    attribute_hidden gammalims(double*, double*);
double  attribute_hidden lgammacor(double); /* log(gamma) correction */
double  attribute_hidden stirlerr(double);  /* Stirling expansion "error" */

double  attribute_hidden lfastchoose(double, double);

double  attribute_hidden bd0(double, double);

double  attribute_hidden dbinom_raw(double, double, double, double, int);
double  attribute_hidden dpois_raw (double, double, int);
double  attribute_hidden pnchisq_raw(double, double, double, double, double, 
int, Rboolean);
double  attribute_hidden pgamma_raw(double, double, int, int);
double  attribute_hidden pbeta_raw(double, double, double, int, int);
double  attribute_hidden qchisq_appr(double, double, double, int, int, double 
tol);
LDOUBLE attribute_hidden pnbeta_raw(double, double, double, double, double);
double  attribute_hidden pnbeta2(double, double, double, double, double, int, 
int);

int     Rf_i1mach(int);

/* From toms708.c */
void attribute_hidden bratio(double a, double b, double x, double y,
                             double *w, double *w1, int *ierr, int log_p);


#endif /* MATHLIB_PRIVATE_H */

/*
  Mathlib : A C Library of Special Functions
  Copyright (C) 1999-2007  The R Development Core Team

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or (at
  your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, a copy is available at
  http://www.r-project.org/Licenses/

  SYNOPSIS

    #include <Rmath.h>
    double dwilcox(double x, double m, double n, int give_log)
    double pwilcox(double x, double m, double n, int lower_tail, int log_p)
    double qwilcox(double x, double m, double n, int lower_tail, int log_p);
    double rwilcox(double m, double n)

  DESCRIPTION

    dwilcox     The density of the Wilcoxon distribution.
    pwilcox     The distribution function of the Wilcoxon distribution.
    qwilcox     The quantile function of the Wilcoxon distribution.
    rwilcox     Random variates from the Wilcoxon distribution.

*/

/* 
   Note: the checks here for R_CheckInterrupt also do stack checking.

   calloc/free are remapped for use in R, so allocation checks are done there.
   freeing is completed by an on.exit action in the R wrappers.
*/

#include "nmath.h"
#include "dpq.h"

#ifndef MATHLIB_STANDALONE
#include <R_ext/Utils.h>
#endif

static double *w; 
static int allocated_m, allocated_n;

static void
w_free()
{
    if(!w) return;

    free((void *) w);
    w = 0;
    allocated_m = allocated_n = 0;
}

void wilcox_free()
{
    w_free();
}


static void
w_init_maybe(int m, int n)
{
    int c= (m*n+1)/2;


    if( w){
        if( m == allocated_m && n == allocated_n ) return;
        else w_free();
    }

    if( !w){
        w = (double *) calloc(c + 1, sizeof(double));
#ifdef MATHLIB_STANDALONE
        if (!w) MATHLIB_ERROR(_("wilcox allocation error %d"), 1);
#endif
        allocated_m = m; allocated_n = n;
    }
}

/*
   This counts the number of choices with statistic = k

   More correctly, it returns the number of choices with statistic = k
   but it also evaluates number of choices with for every statistic k

   This is because the function implements Harding's algorithm.
   (Harding, E.F. (1984): An Efficient, Minimal-storage Procedure
   for Calculating the Mann-Whitney U, Generalized U and Similar
   Distributions, App. Statist., 33, 1-6)
*/
static double
cwilcox(int k, int m, int n)
{
    int c, u, i, j;
    int q, step;

#ifndef MATHLIB_STANDALONE
    R_CheckUserInterrupt();
#endif

    u = m * n;
    if (k < 0 || k > u)
        return 0;

    c = (int)(u / 2);
    if (k > c)
        k = u - k;

    if( w[k] != 0 )
        return w[k];

    for( i=0; i < n+1; ++i)
        w[i]=1.;

    for( i=2; i < m+1; ++i){
        step = n+i;
        q = c;
        while( q-step > -1){
            w[q] = w[q] - w[q-step];
            --q;
        }
        for( j=i; j<c+1; ++j)
            w[j] += w[j-i];
    }

    return w[k];
}

double dwilcox(double x, double m, double n, int give_log)
{
    double d;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(m) || ISNAN(n))
        return(x + m + n);
#endif
    m = floor(m + 0.5);
    n = floor(n + 0.5);
    if (m <= 0 || n <= 0)
        ML_ERR_return_NAN;

    if (fabs(x - floor(x + 0.5)) > 1e-7)
        return(R_D__0);

    x = floor(x + 0.5);
    if ((x < 0) || (x > m * n))
        return(R_D__0);

    w_init_maybe(m, n);
    d = give_log ?
        log(cwilcox(x, m, n)) - lchoose(m + n, n) :
        cwilcox(x, m, n)  / choose(m + n, n);

    return(d);
}

/* args have the same meaning as R function pwilcox */
double pwilcox(double q, double m, double n, int lower_tail, int log_p)
{
    int i;
    double c, p;

#ifdef IEEE_754
    if (ISNAN(q) || ISNAN(m) || ISNAN(n))
        return(q + m + n);
#endif
    if (!R_FINITE(m) || !R_FINITE(n))
        ML_ERR_return_NAN;
    m = floor(m + 0.5);
    n = floor(n + 0.5);
    if (m <= 0 || n <= 0)
        ML_ERR_return_NAN;

    q = floor(q + 1e-7);

    if (q < 0.0)
        return(R_DT_0);
    if (q >= m * n)
        return(R_DT_1);

    w_init_maybe(m, n);
    c = choose(m + n, n);
    p = 0;
    /* Use summation of probs over the shorter range */
    if (q <= (m * n / 2)) {
        for (i = q; i > -1; i--)
            p += cwilcox(i, m, n) / c;
    }
    else {
        q = m * n - q;
        for (i = q-1; i > -1; i--)
            p += cwilcox(i, m, n) / c;
        lower_tail = !lower_tail; /* p = 1 - p; */
    }

    return(R_DT_val(p));
} /* pwilcox */

/* x is 'p' in R function qwilcox */

double qwilcox(double x, double m, double n, int lower_tail, int log_p)
{
    double c, p, q;

#ifdef IEEE_754
    if (ISNAN(x) || ISNAN(m) || ISNAN(n))
        return(x + m + n);
#endif
    if(!R_FINITE(x) || !R_FINITE(m) || !R_FINITE(n))
        ML_ERR_return_NAN;
    R_Q_P01_check(x);

    m = floor(m + 0.5);
    n = floor(n + 0.5);
    if (m <= 0 || n <= 0)
        ML_ERR_return_NAN;

    if (x == R_DT_0)
        return(0);
    if (x == R_DT_1)
        return(m * n);

    if(log_p || !lower_tail)
        x = R_DT_qIv(x); /* lower_tail,non-log "p" */

    w_init_maybe(m, n);
    c = choose(m + n, n);
    p = 0;
    q = 0;
    if (x <= 0.5) {
        x = x - 10 * DBL_EPSILON;
        for (;;) {
            p += cwilcox(q, m, n) / c;
            if (p >= x)
                break;
            q++;
        }
    }
    else {
        x = 1 - x + 10 * DBL_EPSILON;
        for (;;) {
            p += cwilcox(q, m, n) / c;
            if (p > x) {
                q = m * n - q;
                break;
            }
            q++;
        }
    }

    return(q);
}

double rwilcox(double m, double n)
{
    int i, j, k, *x;
    double r;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(m) || ISNAN(n))
        return(m + n);
#endif
    m = floor(m + 0.5);
    n = floor(n + 0.5);
    if ((m < 0) || (n < 0))
        ML_ERR_return_NAN;

    if ((m == 0) || (n == 0))
        return(0);

    r = 0.0;
    k = (int) (m + n);
    x = (int *) calloc(k, sizeof(int));
#ifdef MATHLIB_STANDALONE
    if (!x) MATHLIB_ERROR(_("wilcox allocation error %d"), 4);
#endif
    for (i = 0; i < k; i++)
        x[i] = i;
    for (i = 0; i < n; i++) {
        j = floor(k * unif_rand());
        r += x[j];
        x[j] = x[--k];
    }
    free(x);
    return(r - n * (n - 1) / 2);
}

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

[Rd] Improvement of [dpq]wilcox functions

Reply via email to