Changeset: 7a616de6829a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7a616de6829a
Modified Files:
        gdk/ChangeLog
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_batop.mx
        gdk/gdk_calc.c
        gdk/gdk_relop.mx
        gdk/gdk_scanselect.mx
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/modules/kernel/aggr_be_minmax.mx
        monetdb5/modules/kernel/aggr_bge_minmax.mx
        monetdb5/modules/kernel/algebra.mx
        monetdb5/modules/mal/batExtensions.c
        sql/backends/monet5/vaults/mseed.c
        sql/storage/bat/bat_utils.c
Branch: default
Log Message:

Disallow BAT-of-BATs.


diffs (truncated from 1091 to 300 lines):

diff --git a/gdk/ChangeLog b/gdk/ChangeLog
--- a/gdk/ChangeLog
+++ b/gdk/ChangeLog
@@ -1,3 +1,8 @@
 # ChangeLog file for MonetDB
 # This file is updated with Maddlog
 
+* Tue Jul 17 2012 Sjoerd Mullender <sjo...@acm.org>
+- BAT-of-BATs is no longer allowed.  It was already not allowed to
+  make these types of BATs persistent, but now they can't be created at
+  all anymore.
+
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2133,12 +2133,6 @@ gdk_export oid OIDnew(oid inc);
  * @item BAT*
  * @tab
  *  BAThash (BAT *b, BUN masksize)
- * @item BAT *
- * @tab
- *  BAThashsplit (BAT *b, BUN n, int unary)
- * @item BAT *
- * @tab
- *  BATrangesplit  (BAT *b, int n)
  * @end multitable
  *
  * The current BAT implementation supports one search accelerator:
@@ -2147,14 +2141,8 @@ gdk_export oid OIDnew(oid inc);
  * failure to create the supportive structures.
  *
  * The hash data structures are currently maintained during update operations.
- *
- * A BAT can be redistributed over n buckets using a hash
- * function with BAThashsplit. The return value is a list of BAT
- * pointers.  Similarly, a range partitioning based is supported.
  */
 gdk_export BAT *BAThash(BAT *b, BUN masksize);
-gdk_export BAT *BAThashsplit(BAT *b, BUN n, int unary);
-gdk_export BAT *BATrangesplit(BAT *b, BUN n, int unary);
 gdk_export BAT *BAThashjoin(BAT *l, BAT *r, BUN estimate);
 
 /* low level functions */
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -299,6 +299,8 @@ BATnew(int ht, int tt, BUN cap)
        BATstore *bs;
 
        assert(cap <= BUN_MAX);
+       assert(ht != TYPE_bat);
+       assert(tt != TYPE_bat);
        ERRORcheck((ht < 0) || (ht > GDKatomcnt), "BATnew:ht error\n");
        ERRORcheck((tt < 0) || (tt > GDKatomcnt), "BATnew:tt error\n");
 
@@ -763,6 +765,8 @@ BATcopy(BAT *b, int ht, int tt, int writ
        BAT *bn = NULL;
 
        BATcheck(b, "BATcopy");
+       assert(ht != TYPE_bat);
+       assert(tt != TYPE_bat);
        cnt = b->batCount;
 
        /* maybe a bit ugly to change the requested bat types?? */
@@ -2730,7 +2734,6 @@ BATgetaccess(BAT *b)
 #define check_type(tp)                                                 \
        do {                                                            \
                if (ATOMisdescendant((tp), TYPE_ptr) ||                 \
-                   ATOMisdescendant((tp), TYPE_bat) ||                 \
                    BATatoms[tp].atomUnfix ||                           \
                    BATatoms[tp].atomFix) {                             \
                        GDKerror("BATmode: %s type implies that %s[%s,%s] " \
@@ -2817,6 +2820,7 @@ BATassertHeadProps(BAT *b)
        assert(b != NULL);
        assert(b->htype >= TYPE_void);
        assert(b->htype < GDKatomcnt);
+       assert(b->htype != TYPE_bat);
 
        cmpf = BATatoms[b->htype].atomCmp;
        nilp = ATOMnilptr(b->htype);
diff --git a/gdk/gdk_batop.mx b/gdk/gdk_batop.mx
--- a/gdk/gdk_batop.mx
+++ b/gdk/gdk_batop.mx
@@ -763,9 +763,8 @@ BATreplace(BAT *b, BAT *n, bit force)
  * void-columns (in this case, the seqbase has to be recomputed in the result).
  *
  * Note that the BATslice() is used indirectly as well as a special
- * case for BATselect (range selection on sorted column), BATrangesplit
- * (fragmentation on sorted column) and BATsemijoin (when two dense columns
- * are semijoined).
+ * case for BATselect (range selection on sorted column) and
+ * BATsemijoin (when two dense columns are semijoined).
  *
  * NOTE new semantics, the selected range is excluding the high value.
  */
@@ -2002,320 +2001,6 @@ BATrevert(BAT *b)
 }
 
 /*
- * @+ BAT partitioning
- * For distributed processing we support hash and range
- * partitioning operators: BATsplithash and BATsplitrange.
- *
- * The part_bat function creates a partition BAT.
- */
-static BAT *
-part_bat(BAT *b, int ht, int tt, BUN expected_size, int respect_order)
-{
-       BAT *bn = BATnew(ht, tt, (BUN) ((double) expected_size * BATMARGIN));
-
-       if (bn) {
-               BATkey(bn, BAThkey(b));
-               BATkey(BATmirror(bn), BATtkey(b));
-               bn->hsorted = respect_order && BAThordered(b);
-               bn->tsorted = respect_order && BATtordered(b);
-               bn->hrevsorted = respect_order && BAThrevordered(b);
-               bn->trevsorted = respect_order && BATtrevordered(b);
-               bn->H->nonil = b->H->nonil;
-               bn->T->nonil = b->T->nonil;
-       }
-       return bn;
-}
-
-/*
- * @- hash partitioning
- */
-#define BUNhash(bx,hx,tx)\
-       /* assert(n <= 0x40000000); */\
-       i = (int)(HASHprobe(&h,tx)%n); \
-       if ((r = BUNfnd(bx, &i)) != BUN_NONE){\
-               bat bid = *(bat*)BUNtloc(metabati,r); \
-               bunfastins(BBPdescriptor(bid), hx, tx);\
-       }
-
-BAT *
-BAThashsplit(BAT *b, BUN n, int unary)
-{
-       BAT *metabat, *bn, *bf;
-       BUN r;
-       Hash h;
-       BUN cnt;
-       int i = 0;
-
-       /* assert(n <= 0x40000000); */
-       BATcheck(b, "BAThashsplit");
-       if (n > BATcount(b)) {
-               PROPDEBUG THRprintf(GDKout, "#BAThashsplit: reduced number of 
ranges (" BUNFMT ") to number of tuples (" BUNFMT ").\n", n, BATcount(b));
-               n = BATcount(b);
-       }
-       if (n < 1) {
-               GDKerror("BAThashsplit: number of ranges must not be less than 
1!\n");
-               return 0;
-       }
-
-       metabat = BATnew(TYPE_int, TYPE_bat, n);
-       if (metabat == NULL)
-               return NULL;
-       bn = unary ? VIEWhead_(b, b->batRestricted) : b;
-       if (n <= 1) {
-               if (BUNins(metabat, &i, &bn->batCacheid, FALSE) == NULL)
-                       goto bunins_failed;
-       } else {
-               BUN p, q;
-               BATiter metabati;
-
-               for (i = 2; i < (int) n; i *= 2)
-                       ;
-               h.mask = i - 1;
-               h.type = BATttype(b);
-               cnt = (BUN) (BATMARGIN * (double) BATbuncount(b) / (double) n);
-               for (i = 0; i < (int) n; i++) {
-                       bf = part_bat(bn, BAThtype(bn), BATttype(bn), cnt, 
TRUE);
-                       if (bf == NULL) {
-                               BBPreclaim(metabat);
-                               return NULL;
-                       }
-                       if (BUNins(metabat, &i, &bf->batCacheid, FALSE) == NULL)
-                               goto bunins_failed;
-               }
-               metabati = bat_iterator(metabat);
-               updateloop(metabat, b, BUNhash);
-               BATloop(metabat, p, q) {
-                       bat bt = *(bat *) BUNtail(metabati, p);
-
-                       BBPunfix(bt);
-               }
-       }
-       return metabat;
-      bunins_failed:
-       BBPreclaim(metabat);
-       return NULL;
-}
-
-/*
- * Range partitioning ensures that identical values appear in one
- * partition only. The routine also tries to deliver partitions of
- * uniform size.
- */
-BAT *
-BATrangesplit(BAT *b, BUN n, int unary)
-{
-       BAT *metabat, *slice, *histo, *bf = NULL, *bn, *m;
-       int target, tpe, *sizes = NULL;
-       int zz = 0;
-       BUN yy = 0;
-       ptr *seps = NULL, nilval;
-       BUN r, s;
-       BATiter histoi, bi = bat_iterator(b);
-       dbl scale;
-       BUN thorough = (n <= 1 || BATtvoid(b) || BATtordered(b)) ? 1 : 10;
-
-       BATcheck(b, "BATrangesplit");
-       if (n > BATcount(b)) {
-               PROPDEBUG THRprintf(GDKout, "#BATrangesplit: reduced number of 
ranges (" BUNFMT ") to number of tuples (" BUNFMT ").\n", n, BATcount(b));
-               n = BATcount(b);
-       }
-       if (n < 1) {
-               GDKerror("BAThashsplit: number of ranges must not be less than 
1!\n");
-               return 0;
-       }
-
-       /* assert(BATcount(b)/n <= 0x7fffffff); */
-       bn = unary ? VIEWhead_(b, b->batRestricted) : b;
-       m = BATmirror(b);
-       metabat = BATnew(BATttype(b), TYPE_bat, n);
-       BATcheck(metabat, "BATrangesplit 2");
-       nilval = ATOMnilptr(BATttype(b));
-
-       /*
-        * We use sampling to determine bucket sizes.
-        * Uniform bucket sizes are the ideal to be achieved.
-        * If necessary though, we deliver less than n buckets.
-        */
-       slice = BATsample(b, MIN(MAX(30 * n * thorough, 100 * thorough), 
BATcount(b)));
-       histo = BAThistogram(slice);
-       target = (int) (BATcount(b) / n);       /* see assert above */
-       scale = ((dbl) BATcount(b)) / ((dbl) BATcount(slice));
-       BBPreclaim(slice);
-       if ((sizes = (int *) GDKmalloc(2 * n * sizeof(int))) == NULL)
-               goto bunins_failed;
-       if ((seps = (ptr *) GDKmalloc(2 * n * sizeof(ptr))) == NULL)
-               goto bunins_failed;
-       /*
-        * Use the histogram to determine good split boundaries on b.
-        */
-       BATorder(histo);
-       histoi = bat_iterator(histo);
-       BATloop(histo, r, s) {
-               int cnt = *(int *) BUNtloc(histoi, r);
-               int add = (int) (scale * cnt);
-
-               if (zz + add > target) {
-                       if ((zz + add - target) < (target - zz)) {
-                               sizes[yy] = zz + add;
-                               seps[yy] = ATOMdup(histo->htype, 
BUNhead(histoi, r));
-                               add = 0;
-                       } else {
-                               sizes[yy] = zz;
-                               seps[yy] = ATOMdup(histo->htype, 
BUNhead(histoi, (r - 1)));
-                       }
-                       zz = 0;
-                       yy++;
-               }
-               zz += add;
-       }
-       if (yy) {
-               if ((sizes[yy - 1] + zz - target) > (target - zz)) {
-                       sizes[yy] = zz;
-               } else {
-                       yy--;   /* join with the last */
-               }
-       }
-       seps[yy] = nilval;
-       BBPreclaim(histo);
-
-       if (n > 1 && n != yy + 1) {
-               PROPDEBUG THRprintf(GDKout, "#rangesplit: delivering " BUNFMT " 
instead of " BUNFMT " fragments\n", yy + 1, n);
-               n = yy + 1;
-       }
-       /*
-        * CASE 1: just one bucket.
-        * This is done without copying b.
-        */
-       if (n <= 1) {
-               if (BUNins(metabat, nilval, &bn->batCacheid, FALSE) == NULL)
-                       goto bunins_failed;
-
-       /*
-        * CASE 2: sorted on fragmentation column.
-        * We can again avoid copying, by giving slices (views) on the source 
BAT.
-        * Virtual oids (void) is a special subcase with positional lookup 
instead
-        * of binary search.
-        */
-       } else if (BATtvoid(b) || BATtordered(b)) {
-               BUN l, h = 0, o = BUNfirst(b);
-
-               for (yy = 0; yy < n; yy++) {
-                       l = h;
-                       if (yy == n - 1) {
-                               r = BUNlast(m);
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to