Changeset: 354674570bd5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=354674570bd5
Modified Files:
        gdk/gdk.h
        gdk/gdk_sample.c
Branch: default
Log Message:

BATsample_ implements BATsample but expecting void head and produce oid's of 
sampled BUNs.


diffs (89 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3165,6 +3165,7 @@ gdk_export BAT *BATintersectcand(BAT *a,
  *
  */
 gdk_export BAT *BATsample(BAT *b, BUN n);
+gdk_export BAT *BATsample_(BAT *b, BUN n); /* version that expects void head 
and returns oids */
 
 /* generic n-ary multijoin beast, with defines to interpret retval */
 #define MULTIJOIN_SORTED(r)    ((char*) &r)[0]
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -114,3 +114,74 @@ bunins_failed:
        BBPreclaim(bn);
        return NULL;
 }
+
+/* BATsample_ implements sampling for void headed BATs */
+BAT *
+BATsample_(BAT *b, BUN n)
+{
+       BAT *bn;
+       BUN cnt;
+
+       BATcheck(b, "BATsample");
+       assert(BAThdense(b));
+       ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n");
+       ALGODEBUG fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n", 
n);
+
+       cnt = BATcount(b);
+       /* empty sample size */
+       if (n == 0) {
+               bn = BATnew(TYPE_void, TYPE_void, 0);
+               BATsetcount(bn, 0);
+               BATseqbase(bn, 0);
+               BATseqbase(BATmirror(bn), 0);
+       /* sample size is larger than the input BAT, return all oids */
+       } else if (cnt <= n) {
+               bn = BATnew(TYPE_void, TYPE_void, cnt);
+               BATsetcount(bn, cnt);
+               BATseqbase(bn, 0);
+               BATseqbase(BATmirror(bn), b->H->seq);
+       } else {
+               BUN smp = 0;
+               /* we use wrd and not BUN since p may be -1 */
+               wrd top = b->hseqbase + cnt - n;
+               wrd p = ((wrd) b->hseqbase) - 1;
+               oid *o;
+               bn = BATnew(TYPE_void, TYPE_oid, smp);
+               if (bn == NULL) {
+                       GDKerror("#BATsample: memory allocation error");
+                       return NULL;
+               }
+               o = (oid *) Tloc(bn, BUNfirst(bn));
+               while (smp < n-1) { /* loop until all but 1 values are sampled 
*/
+                       double v = DRAND;
+                       double quot = (double)top/(double)cnt;
+                       BUN jump = 0;
+                       while (quot > v) { /* determine how many positions to 
jump */
+                               jump++;
+                               top--;
+                               cnt--;
+                               quot *= (double)top/(double)cnt;
+                       }
+                       p += (jump+1);
+                       cnt--;
+                       o[smp++] = (oid) p;
+               }
+               /* 1 left */
+               p += (BUN) rand() % cnt;
+               o[smp] = (oid) p;
+
+               /* property management */
+               BATsetcount(bn, n);
+               bn->trevsorted = bn->U->count <= 1;
+               bn->tkey = 1;
+               bn->tdense = bn->U->count <= 1;
+               if (bn->U->count == 1)
+                       bn->tseqbase = * (oid *) Tloc(bn, BUNfirst(bn));
+               bn->hdense = 1;
+               bn->hseqbase = 0;
+               bn->hkey = 1;
+               bn->hrevsorted = bn->U->count <= 1;
+       }
+
+       return bn;
+}
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to