[racket] Number-crunching...

William Cushing Tue, 12 Nov 2013 04:55:05 -0800

I'm in the process of benchmarking various systems for their ability to
handle the inner loop of compilations of MCMC from a higher level
probabilistic modeling language (BLOG).


Attached are examples of the kind of output (instantiations of MCMC) we'd
like to be able to generate given the tried-and-true Bayes Net linking the
probabilities of earthquake, burglary, john calling, and mary calling.
Classic AI fare.

Predictably, C is fastest.  But Stalin and SBCL also put in a very fine
showing.

For 50,000,000 iterations on my machine, I get the following semi-serious,
semi-bogus timings for a random assortment of systems (petite chez just for
fun):

1 SBCL 2.20
2 Bigloo 5.25
3 Chicken 11.49
4 Stalin 2.15
5 Racket 16.67
6 Petite-Chez 30.42
7 GCC 0.82
8 Clang 0.92

There is much about the comparison that isn't fair.  Chiefly, this is
primarily a test of speed of random number generation, but I haven't
controlled for the algorithm being used to generate random numbers.  Stalin
may very well be using GLIBC random number generation, for example.  I know
SBCL uses MT; in fact, the SBCL experts responded to my little encoding by
halving the shown runtime figure via dynamically linking in a new and
improved (SIMD-aware) implementation of MT.  Which is quite impressive, as
that puts it within 30% of my C version...and my C version is "totally
cheating" by employing a simple little LCPRNG.

Porting the same generator to Racket in the obvious way more than triples
its running time. (!)
(Is it the global var?  Is the conversion to double being compiled as a
real division?)

In any case, even just sticking with Racket's default PRNG, it seems the
performance of a simple number crunching loop leaves something to be
desired.  I have done what I can to try and shave off time here and there.
Using unsafe ops, and threading most of the state through the parameters of
a tail-recursive loop (thus avoiding set!), I managed to shave off 3
seconds from my original encoding (so about 15%), which isn't too shabby.
Of course, for even less effort, Stalin is already 800% times faster.

I couldn't get typed/racket to type check, and /no-check doesn't seem to
help performance at all (not surprising).  I suspect that getting
typed/racket to be able to process this file might help substantially...but
I don't really understand its type theory enough to help it figure this
benchmark out.

I though Racket experts might have some suggestions for how to convince
Racket to run a simple number crunching loop without quite so much in the
way of constant-factor slowdown.

-Will

mcmc.rkt
Description: Binary data

(include "QobiScheme")

(define (compiled-mcmc N)
    (let ((Ncounts (vector 0 0))
	  (burglary (random-integer 2)) 
	  (p_burglary_0 0)
	  (earthquake (random-integer 2)) 
	  (p_earthquake_0 0)
	  (alarm (random-integer 2)) 
	  (p_alarm_0 0)
	  (cpt_alarm_*_0_* (vector (vector 0.999d0 0.001d0)
				   (vector 0.060d0 0.940d0)))
	  (cpt_alarm_0_*_* (vector (vector 0.999d0 0.001d0)
				   (vector 0.710d0 0.290d0)))
	  (cpt_alarm_*_*_0 (vector (vector 0.999d0 0.710d0)
				   (vector 0.060d0 0.050d0)))
	  (cpt_johncalls_*_1 (vector 0.050d0 0.900d0))
	  (cpt_marycalls_*_1 (vector 0.010d0 0.700d0)))
      (vector-fill! Ncounts 0)
      (do ((j 1 (+ j 1))) ((> j N))
      ;;; sample burglary                                                                                                                                                                                 
	(vector-set! Ncounts burglary (+ 1 (vector-ref Ncounts burglary)))
	(set! p_burglary_0 (* 0.999d0 (vector-ref (vector-ref cpt_alarm_0_*_* earthquake) alarm)))
	(set! burglary (if (> (random-real) p_burglary_0) 1 0))
      ;;; sample earthquake                                                                                                                                                                               
	(vector-set! Ncounts burglary (+ 1 (vector-ref Ncounts burglary)))
	(set! p_earthquake_0 (* 0.998d0 (vector-ref (vector-ref cpt_alarm_*_0_* burglary) earthquake)))
	(set! earthquake (if (> (random-real) p_earthquake_0) 1 0))
      ;;; sample alarm                                                                                                                                                                                    
	(vector-set! Ncounts burglary (+ 1 (vector-ref Ncounts burglary)))
	(set! p_alarm_0 (* (vector-ref (vector-ref cpt_alarm_*_*_0 burglary) earthquake)
			   (vector-ref cpt_johncalls_*_1 alarm)
			   (vector-ref cpt_marycalls_*_1 alarm)))
	(set! alarm (if (> (random-real) p_alarm_0) 1 0))
	)
      (let ((sum (apply + (vector->list Ncounts))))
	(let loop ((i 0))
	     (vector-set! Ncounts i (/ (vector-ref Ncounts i) sum))
	     (when (< (+ i 1) (vector-length Ncounts)) (loop (+ i 1)))
	     ))
      Ncounts))

(display (compiled-mcmc (string->number (vector-ref ARGV 1))))
(newline)

#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <inttypes.h>
#include <errno.h>
#include <math.h>
#include <string.h>

//Knuth
//6364136223846793005 	
//1442695040888963407
//64
//-1ull;
//Java
//0x5DEECE66Dul;
//0xBul;
//48
//(1ul<<48)-1;
static const uint64_t multiplier = 6364136223846793005; 	
static const uint64_t addend = 1442695040888963407;
static const uint8_t width = 64;
static const uint64_t mask = -1;
static const uint64_t RANDOM_MAX = -1;
static const double pow_2_52 = 0x10000000000000p0;

typedef struct {
    uint64_t seed;
} generator_state_t;


uint64_t random_next_uint64_t(generator_state_t *r)
{
    return r->seed = (r->seed * multiplier + addend) & mask;
}


uint64_t random_next(generator_state_t *r, uint8_t bits) 
{
    return random_next_uint64_t(r) >> (width-bits);
}


double random_next_0_1_double_with_possible_bias_from_rounding(generator_state_t *r) 
{
    return random_next_uint64_t(r) / (((double) (mask))+1);
}


double random_next_0_1_double(generator_state_t *r) 
{
    return (random_next(r,52)) / pow_2_52;
    //return (random_next(r,52)) / 0x10000000000000p0;
}


static generator_state_t r = {
  .seed= 8682522807148012ull// & mask,
};


// misnomers...


double uniform_real() { return random_next_0_1_double(&r); }
//double uniform_real() { return drand48(); }
//double uniform_real() { return rand() / ((double)RAND_MAX+1); }

// all of the following are statistically terrible ways to generate natural numbers,
// especially booleans, because the lowest bit just alternates...(for all of them)

uint64_t uniform_nat(uint64_t m) { return random_next_uint64_t(&r)%m; }
//uint64_t uniform_nat(uint64_t m) { return lrand48()%m; }
//uint64_t uniform_nat(uint64_t m) { return rand()%m; }


typedef double prob_t;
typedef prob_t cpt_2_2_t[2][2];
typedef prob_t cpt_2_t[2];

static const cpt_2_2_t cpt_alarm_any_false_any = {{0.999, 0.001},
						  {0.060, 0.940}},
    cpt_alarm_false_any_any 
    = {{0.999, 0.001},
       {0.710, 0.290}},
    cpt_alarm_any_any_false
    = {{0.999, 0.710},
       {0.060, 0.050}};
static const cpt_2_t cpt_johncalls_any_true = {0.050, 0.900},
    cpt_marycalls_any_true = {0.010, 0.700};


typedef struct {
    unsigned burglary : 1;
    unsigned earthquake : 1;
    unsigned alarm : 1;
} model_state_t;
/* typedef struct { */
/*     uint8_t burglary ; */
/*     uint8_t earthquake ; */
/*     uint8_t alarm; */
/* } model_state_t; */

typedef struct {
    prob_t p_burglary_false; 
    prob_t p_earthquake_false; 
    prob_t p_alarm_false; 
} mcmc_state_t;


void compiled_mcmc(uint64_t counts[2],uint64_t count) 
{
    uint64_t i;
    model_state_t s;
    mcmc_state_t m;

    // doing the memset --- thus double-initializing --- makes this go *faster*.
    memset(&s,0,sizeof(s));
    // this memset makes no difference, however
    //memset(&m,0,sizeof(m));

    s.burglary = uniform_nat(2);
    s.earthquake = uniform_nat(2);
    s.alarm = uniform_nat(2);

    // what's with the magic numbers 0.999 and 0.998?
    // (I introduced the symmetric 1.0...)

    for(i=0;i<count;i+=1) {
	// sample burglary

	counts[s.burglary] = 1 + counts[s.burglary];
	m.p_burglary_false = 0.999 * cpt_alarm_false_any_any[s.earthquake][s.alarm];
	// if the probability were set to 1, uniform_real would never equal it,
	// thus always going to the false branch, as desired.
	// conversely, if the probability were set to 0, uniform_real would always be >= 0,
	// so always it would go to the true branch, again as desired.
	s.burglary = uniform_real() >= m.p_burglary_false ? 1 : 0;

	// sample earthquake
	counts[s.burglary] = 1 + counts[s.burglary];
	m.p_earthquake_false = 0.998 * cpt_alarm_any_false_any[s.burglary][s.earthquake];
	s.earthquake = uniform_real() >= m.p_earthquake_false ? 1 : 0;

	// sample alarm
	// doesn't this just double count burglary?? the above assignments can't change it...
	counts[s.burglary] = 1 + counts[s.burglary];
	m.p_alarm_false = 1.0  
	    * cpt_alarm_any_any_false[s.burglary][s.earthquake]
	    * cpt_johncalls_any_true[s.alarm]
	    * cpt_marycalls_any_true[s.alarm];
	s.alarm = uniform_real() >= m.p_alarm_false ? 1 : 0;
    }
}


void 
normalize(double *weights, const uint64_t *counts, size_t size) 
{
    int i;
    double sum=0;
    for(i=0;i<size;++i)
	sum += counts[i];
    for(i=0;i<size;++i)
	weights[i] = counts[i]/sum;
}


void 
normalize_with_sum(double *weights, const uint64_t *counts, size_t size, uint64_t count) 
{
    int i;
    double sum=count;
    for(i=0;i<size;++i)
	weights[i] = counts[i]/sum;
}


int main(int argc, char ** args) 
{
    if (argc < 2) {
	fprintf(stderr,"Call this program with an integer argument.");
	exit(EXIT_FAILURE);
    }
    errno=0;
    uint64_t count = atoi(args[1]);
    if (errno) {
	perror("Not a representable integer");
	exit(EXIT_FAILURE);
    }

    uint64_t counts[2]={0,0};
    //= calloc(2,sizeof(uint64_t));
    compiled_mcmc(counts,count);
    double weights[2]={0,0};
    //= calloc(2,sizeof(double));
    //fprintf(stdout,"#(%lu, %lu)\n",counts[0],counts[1]);
    //normalize_with_sum(weights,counts,2,count*3);
    normalize(weights,counts,2);

    printf("#(%lf, %lf)\n",weights[0],weights[1]);
    //free(counts);
    //free(weights);
    return EXIT_SUCCESS;
}

____________________
  Racket Users list:
  http://lists.racket-lang.org/users

[racket] Number-crunching...

Reply via email to