Changeset: 10700761ef2a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=10700761ef2a Modified Files: common/utils/mtwist.c gdk/gdk_sample.c monetdb5/modules/kernel/microbenchmark.c monetdb5/modules/mal/sample.c Branch: stratified_sampling Log Message:
fix memory leak, make mtwist global diffs (233 lines): diff --git a/common/utils/mtwist.c b/common/utils/mtwist.c --- a/common/utils/mtwist.c +++ b/common/utils/mtwist.c @@ -35,8 +35,10 @@ * @a Dave Beckett, Abe Wits * @* Mersenne Twister (MT19937) algorithm * - * This random number generator has very good statistical properties, - * and outperforms most stl implementations of rand() in terms of speed + * This random number generator outperforms most stl implementations + * of rand() in terms of speed. Dieharder tests confirm that the + * numbers generated are statistically much closer to true random + * numbers than those generated by a typical LCG (including the gcc rand()). * * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html * http://en.wikipedia.org/wiki/Mersenne_twister diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c --- a/gdk/gdk_sample.c +++ b/gdk/gdk_sample.c @@ -86,6 +86,8 @@ struct oidtreenode { struct oidtreenode *right; }; +mtwist *mt_rng = NULL; + static int OIDTreeMaybeInsert(struct oidtreenode *tree, oid o, BUN allocated) { @@ -156,7 +158,7 @@ BATsample(BAT *b, BUN n) BUN cnt, slen; BUN rescnt; struct oidtreenode *tree = NULL; - mtwist *mt_rng; + unsigned int range; BATcheck(b, "BATsample", NULL); @@ -201,10 +203,12 @@ BATsample(BAT *b, BUN n) return NULL; } - /* create and seed Mersenne Twister */ - mt_rng = mtwist_new(); + if(!mt_rng) { + /* create and seed Mersenne Twister */ + mt_rng = mtwist_new(); - mtwist_seed(mt_rng, rand()); + mtwist_seed(mt_rng, rand()); + } range = maxoid - minoid; @@ -249,7 +253,6 @@ BATweightedsample(BAT *b, BUN n, BAT *w) dbl* w_ptr;//TODO types of w dbl* keys;/* keys as defined in Alg-A-exp */ BUN cnt, i, j; - mtwist *mt_rng; BUN pos, childpos; oid item; dbl r, xw, r2, key, tw; @@ -266,7 +269,7 @@ BATweightedsample(BAT *b, BUN n, BAT *w) //TODO: handle NULL values in w_ptr cnt = BATcount(b); - keys = (dbl*) malloc(sizeof(dbl)*n); + keys = (dbl*) GDKmalloc(sizeof(dbl)*n); if(keys == NULL) return NULL; @@ -279,8 +282,10 @@ BATweightedsample(BAT *b, BUN n, BAT *w) oids = (oid *) Tloc(sample, 0); w_ptr = (dbl*) Tloc(w, 0); - mt_rng = mtwist_new(); - mtwist_seed(mt_rng, rand()); + if(!mt_rng) { + mt_rng = mtwist_new(); + mtwist_seed(mt_rng, rand()); + } BATsetcount(sample, n); /* obtain sample */ @@ -295,19 +300,25 @@ BATweightedsample(BAT *b, BUN n, BAT *w) keys[i] = pow(mtwist_drand(mt_rng),1.0/w_ptr[j]);//TODO cast 1.0 to dbl? i++; } - if(i < n)/* not enough non-zero weights: cannot take sample */ + if(i < n) {/* not enough non-zero weights: cannot take sample */ + BBPunfix(sample->batCacheid);//TODO why not unfix? + GDKfree(keys); return NULL; + } heapify(compKeysGT, SWAP3); while(true) { r = mtwist_drand(mt_rng); xw = log(r)/log(keys[0]); - for(;j<cnt && xw > w_ptr[j]; j++) + for(;j<cnt && xw >= w_ptr[j]; j++) xw -= w_ptr[j]; if(j >= cnt) break; - /* At this point, w_ptr[c]+w_ptr[c+1]+...+w_ptr[i-1] < xw < w_ptr[c]+w_ptr[c+1]+...+w_ptr[i] */ + /* At this point: + * w_ptr[c]+w_ptr[c+1]+...+w_ptr[i-1] + * < xw (the initial value, log(r)/log(keys[0])) + * <= w_ptr[c]+w_ptr[c+1]+...+w_ptr[i] */ tw = pow(keys[0], w_ptr[j]); r2 = mtwist_drand(mt_rng)*(1-tw)+tw; key = pow(r2, 1/w_ptr[j]); @@ -320,7 +331,7 @@ BATweightedsample(BAT *b, BUN n, BAT *w) j++;/* Increment j so j=c (c is defined in Alg-A-exp) */ } - free(keys); + GDKfree(keys); sample->trevsorted = sample->batCount <= 1; sample->tsorted = sample->batCount <= 1; diff --git a/monetdb5/modules/kernel/microbenchmark.c b/monetdb5/modules/kernel/microbenchmark.c --- a/monetdb5/modules/kernel/microbenchmark.c +++ b/monetdb5/modules/kernel/microbenchmark.c @@ -22,6 +22,8 @@ #include "microbenchmark.h" #include "mtwist.h" +mtwist *mt_rng = NULL; + static gdk_return BATrandom(BAT **bn, oid *base, lng *size, int *domain, int seed) { @@ -29,7 +31,7 @@ BATrandom(BAT **bn, oid *base, lng *size BUN i; BAT *b = NULL; int *restrict val; - mtwist *mt_rng; + if (*size > (lng)BUN_MAX) { GDKerror("BATrandom: size must not exceed BUN_MAX"); @@ -55,8 +57,10 @@ BATrandom(BAT **bn, oid *base, lng *size val = (int *) Tloc(b, 0); /* create BUNs with random distribution */ - mt_rng = mtwist_new(); - mtwist_seed(mt_rng, seed); + if(!mt_rng) { + mt_rng = mtwist_new(); + mtwist_seed(mt_rng, rand()); + } if (seed != int_nil) mtwist_seed(mt_rng, seed); @@ -88,7 +92,6 @@ BATuniform(BAT **bn, oid *base, lng *siz BAT *b = NULL; int *restrict val; int v; - mtwist *mt_rng; if (*size > (lng)BUN_MAX) { @@ -121,7 +124,10 @@ BATuniform(BAT **bn, oid *base, lng *siz v = 0; } - mt_rng = mtwist_new(); + if(!mt_rng) { + mt_rng = mtwist_new(); + mtwist_seed(mt_rng, rand()); + } /* mix BUNs randomly */ for (r = 0, i = 0; i < n; i++) { @@ -151,7 +157,6 @@ BATskewed(BAT **bn, oid *base, lng *size int *restrict val; const BUN skewedSize = ((*skew) * n) / 100; const int skewedDomain = ((100 - (*skew)) * (*domain)) / 100; - mtwist *mt_rng; if (*size > (lng)BUN_MAX) { @@ -182,7 +187,10 @@ BATskewed(BAT **bn, oid *base, lng *size } val = (int *) Tloc(b, 0); - mt_rng = mtwist_new(); + if(!mt_rng) { + mt_rng = mtwist_new(); + mtwist_seed(mt_rng, rand()); + } /* create BUNs with skewed distribution */ for (i = 0; i < skewedSize; i++) @@ -368,15 +376,16 @@ MBMmix(bat *bn, bat *batid) { BUN n, r, i; BAT *b; - mtwist *mt_rng; if ((b = BATdescriptor(*batid)) == NULL) throw(MAL, "microbenchmark.mix", RUNTIME_OBJECT_MISSING); n = BATcount(b); - mt_rng = mtwist_new(); - + if(!mt_rng) { + mt_rng = mtwist_new(); + mtwist_seed(mt_rng, rand()); + } /* mix BUNs randomly */ for (r = i = 0; i < n; i++) { BUN idx = i + ((r += (BUN) mtwist_u32rand(mt_rng)) % (n - i)); diff --git a/monetdb5/modules/mal/sample.c b/monetdb5/modules/mal/sample.c --- a/monetdb5/modules/mal/sample.c +++ b/monetdb5/modules/mal/sample.c @@ -113,12 +113,14 @@ SAMPLEweighted(bat *r, bat *b, lng *s, b throw(MAL, "sample.subweighted", INTERNAL_BAT_ACCESS); } if ((bb = BATdescriptor(*b)) == NULL ) { + BBPunfix(bw->batCacheid); throw(MAL, "sample.subweighted", INTERNAL_BAT_ACCESS); } br = BATweightedsample(bb, (BUN) *s, bw); if (br == NULL) throw(MAL, "sample.subweighted", OPERATION_FAILED); + BBPunfix(bw->batCacheid); BBPunfix(bb->batCacheid); BBPkeepref(*r = br->batCacheid); return MAL_SUCCEED; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list