Changeset: 10700761ef2a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=10700761ef2a
Modified Files:
        common/utils/mtwist.c
        gdk/gdk_sample.c
        monetdb5/modules/kernel/microbenchmark.c
        monetdb5/modules/mal/sample.c
Branch: stratified_sampling
Log Message:

fix memory leak, make mtwist global


diffs (233 lines):

diff --git a/common/utils/mtwist.c b/common/utils/mtwist.c
--- a/common/utils/mtwist.c
+++ b/common/utils/mtwist.c
@@ -35,8 +35,10 @@
  * @a Dave Beckett, Abe Wits
  * @* Mersenne Twister (MT19937) algorithm
  * 
- * This random number generator has very good statistical properties,
- * and outperforms most stl implementations of rand() in terms of speed
+ * This random number generator outperforms most stl implementations
+ * of rand() in terms of speed. Dieharder tests confirm that the
+ * numbers generated are statistically much closer to true random
+ * numbers than those generated by a typical LCG (including the gcc rand()).
  *
  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
  * http://en.wikipedia.org/wiki/Mersenne_twister
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -86,6 +86,8 @@ struct oidtreenode {
        struct oidtreenode *right;
 };
 
+mtwist *mt_rng = NULL;
+
 static int
 OIDTreeMaybeInsert(struct oidtreenode *tree, oid o, BUN allocated)
 {
@@ -156,7 +158,7 @@ BATsample(BAT *b, BUN n)
        BUN cnt, slen;
        BUN rescnt;
        struct oidtreenode *tree = NULL;
-       mtwist *mt_rng;
+
        unsigned int range;
 
        BATcheck(b, "BATsample", NULL);
@@ -201,10 +203,12 @@ BATsample(BAT *b, BUN n)
                        return NULL;
                }
                
-               /* create and seed Mersenne Twister */
-               mt_rng = mtwist_new();
+               if(!mt_rng) {
+                       /* create and seed Mersenne Twister */
+                       mt_rng = mtwist_new();
 
-               mtwist_seed(mt_rng, rand());
+                       mtwist_seed(mt_rng, rand());
+               }
                
                range = maxoid - minoid;
                
@@ -249,7 +253,6 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
        dbl* w_ptr;//TODO types of w
        dbl* keys;/* keys as defined in Alg-A-exp */
        BUN cnt, i, j;
-       mtwist *mt_rng;
        BUN pos, childpos;
        oid item;
        dbl r, xw, r2, key, tw;
@@ -266,7 +269,7 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
        //TODO: handle NULL values in w_ptr
        cnt = BATcount(b);
 
-       keys = (dbl*) malloc(sizeof(dbl)*n);
+       keys = (dbl*) GDKmalloc(sizeof(dbl)*n);
        if(keys == NULL)
                return NULL;
 
@@ -279,8 +282,10 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
        oids = (oid *) Tloc(sample, 0);
        w_ptr = (dbl*) Tloc(w, 0);
 
-       mt_rng = mtwist_new();
-       mtwist_seed(mt_rng, rand());
+       if(!mt_rng) {
+               mt_rng = mtwist_new();
+               mtwist_seed(mt_rng, rand());
+       }
 
        BATsetcount(sample, n);
                /* obtain sample */
@@ -295,19 +300,25 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
                keys[i] = pow(mtwist_drand(mt_rng),1.0/w_ptr[j]);//TODO cast 
1.0 to dbl?
                i++;
        }
-       if(i < n)/* not enough non-zero weights: cannot take sample */
+       if(i < n) {/* not enough non-zero weights: cannot take sample */
+               BBPunfix(sample->batCacheid);//TODO why not unfix?
+               GDKfree(keys);
                return NULL;
+       }
 
        heapify(compKeysGT, SWAP3);
 
        while(true) {
                r = mtwist_drand(mt_rng);
                xw = log(r)/log(keys[0]);
-               for(;j<cnt && xw > w_ptr[j]; j++)
+               for(;j<cnt && xw >= w_ptr[j]; j++)
                        xw -= w_ptr[j];
                if(j >= cnt) break;
 
-               /* At this point, w_ptr[c]+w_ptr[c+1]+...+w_ptr[i-1] < xw < 
w_ptr[c]+w_ptr[c+1]+...+w_ptr[i] */
+               /* At this point:
+                *              w_ptr[c]+w_ptr[c+1]+...+w_ptr[i-1]
+                *   <  xw (the initial value, log(r)/log(keys[0]))
+                *       <= w_ptr[c]+w_ptr[c+1]+...+w_ptr[i] */
                tw = pow(keys[0], w_ptr[j]);
                r2 = mtwist_drand(mt_rng)*(1-tw)+tw;
                key = pow(r2, 1/w_ptr[j]);
@@ -320,7 +331,7 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
                j++;/* Increment j so j=c (c is defined in Alg-A-exp) */
        }
 
-       free(keys);
+       GDKfree(keys);
 
        sample->trevsorted = sample->batCount <= 1;
        sample->tsorted = sample->batCount <= 1;
diff --git a/monetdb5/modules/kernel/microbenchmark.c 
b/monetdb5/modules/kernel/microbenchmark.c
--- a/monetdb5/modules/kernel/microbenchmark.c
+++ b/monetdb5/modules/kernel/microbenchmark.c
@@ -22,6 +22,8 @@
 #include "microbenchmark.h"
 #include "mtwist.h"
 
+mtwist *mt_rng = NULL;
+
 static gdk_return
 BATrandom(BAT **bn, oid *base, lng *size, int *domain, int seed)
 {
@@ -29,7 +31,7 @@ BATrandom(BAT **bn, oid *base, lng *size
        BUN i;
        BAT *b = NULL;
        int *restrict val;
-       mtwist *mt_rng;
+
 
        if (*size > (lng)BUN_MAX) {
                GDKerror("BATrandom: size must not exceed BUN_MAX");
@@ -55,8 +57,10 @@ BATrandom(BAT **bn, oid *base, lng *size
        val = (int *) Tloc(b, 0);
 
        /* create BUNs with random distribution */
-       mt_rng = mtwist_new();
-       mtwist_seed(mt_rng, seed);
+       if(!mt_rng) {
+               mt_rng = mtwist_new();
+               mtwist_seed(mt_rng, rand());
+       }
 
        if (seed != int_nil)
                mtwist_seed(mt_rng, seed);
@@ -88,7 +92,6 @@ BATuniform(BAT **bn, oid *base, lng *siz
        BAT *b = NULL;
        int *restrict val;
        int v;
-       mtwist *mt_rng;
 
 
        if (*size > (lng)BUN_MAX) {
@@ -121,7 +124,10 @@ BATuniform(BAT **bn, oid *base, lng *siz
                        v = 0;
        }
 
-       mt_rng = mtwist_new();
+       if(!mt_rng) {
+               mt_rng = mtwist_new();
+               mtwist_seed(mt_rng, rand());
+       }
 
        /* mix BUNs randomly */
        for (r = 0, i = 0; i < n; i++) {
@@ -151,7 +157,6 @@ BATskewed(BAT **bn, oid *base, lng *size
        int *restrict val;
        const BUN skewedSize = ((*skew) * n) / 100;
        const int skewedDomain = ((100 - (*skew)) * (*domain)) / 100;
-       mtwist *mt_rng;
 
 
        if (*size > (lng)BUN_MAX) {
@@ -182,7 +187,10 @@ BATskewed(BAT **bn, oid *base, lng *size
        }
        val = (int *) Tloc(b, 0);
 
-       mt_rng = mtwist_new();
+       if(!mt_rng) {
+               mt_rng = mtwist_new();
+               mtwist_seed(mt_rng, rand());
+       }
 
        /* create BUNs with skewed distribution */
        for (i = 0; i < skewedSize; i++)
@@ -368,15 +376,16 @@ MBMmix(bat *bn, bat *batid)
 {
        BUN n, r, i;
        BAT *b;
-       mtwist *mt_rng;
 
        if ((b = BATdescriptor(*batid)) == NULL)
                throw(MAL, "microbenchmark.mix", RUNTIME_OBJECT_MISSING);
 
        n = BATcount(b);
 
-       mt_rng = mtwist_new();
-
+       if(!mt_rng) {
+               mt_rng = mtwist_new();
+               mtwist_seed(mt_rng, rand());
+       }
        /* mix BUNs randomly */
        for (r = i = 0; i < n; i++) {
                BUN idx = i + ((r += (BUN) mtwist_u32rand(mt_rng)) % (n - i));
diff --git a/monetdb5/modules/mal/sample.c b/monetdb5/modules/mal/sample.c
--- a/monetdb5/modules/mal/sample.c
+++ b/monetdb5/modules/mal/sample.c
@@ -113,12 +113,14 @@ SAMPLEweighted(bat *r, bat *b, lng *s, b
                throw(MAL, "sample.subweighted", INTERNAL_BAT_ACCESS);
        }
        if ((bb = BATdescriptor(*b)) == NULL ) {
+               BBPunfix(bw->batCacheid);
                throw(MAL, "sample.subweighted", INTERNAL_BAT_ACCESS);
        }
        br = BATweightedsample(bb, (BUN) *s, bw);
        if (br == NULL)
                throw(MAL, "sample.subweighted", OPERATION_FAILED);
 
+       BBPunfix(bw->batCacheid);
        BBPunfix(bb->batCacheid);
        BBPkeepref(*r = br->batCacheid);
        return MAL_SUCCEED;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to