Changeset: 7a616de6829a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7a616de6829a Modified Files: gdk/ChangeLog gdk/gdk.h gdk/gdk_bat.c gdk/gdk_batop.mx gdk/gdk_calc.c gdk/gdk_relop.mx gdk/gdk_scanselect.mx monetdb5/extras/rdf/rdf_shredder.mx monetdb5/modules/kernel/aggr_be_minmax.mx monetdb5/modules/kernel/aggr_bge_minmax.mx monetdb5/modules/kernel/algebra.mx monetdb5/modules/mal/batExtensions.c sql/backends/monet5/vaults/mseed.c sql/storage/bat/bat_utils.c Branch: default Log Message:
Disallow BAT-of-BATs. diffs (truncated from 1091 to 300 lines): diff --git a/gdk/ChangeLog b/gdk/ChangeLog --- a/gdk/ChangeLog +++ b/gdk/ChangeLog @@ -1,3 +1,8 @@ # ChangeLog file for MonetDB # This file is updated with Maddlog +* Tue Jul 17 2012 Sjoerd Mullender <sjo...@acm.org> +- BAT-of-BATs is no longer allowed. It was already not allowed to + make these types of BATs persistent, but now they can't be created at + all anymore. + diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2133,12 +2133,6 @@ gdk_export oid OIDnew(oid inc); * @item BAT* * @tab * BAThash (BAT *b, BUN masksize) - * @item BAT * - * @tab - * BAThashsplit (BAT *b, BUN n, int unary) - * @item BAT * - * @tab - * BATrangesplit (BAT *b, int n) * @end multitable * * The current BAT implementation supports one search accelerator: @@ -2147,14 +2141,8 @@ gdk_export oid OIDnew(oid inc); * failure to create the supportive structures. * * The hash data structures are currently maintained during update operations. - * - * A BAT can be redistributed over n buckets using a hash - * function with BAThashsplit. The return value is a list of BAT - * pointers. Similarly, a range partitioning based is supported. */ gdk_export BAT *BAThash(BAT *b, BUN masksize); -gdk_export BAT *BAThashsplit(BAT *b, BUN n, int unary); -gdk_export BAT *BATrangesplit(BAT *b, BUN n, int unary); gdk_export BAT *BAThashjoin(BAT *l, BAT *r, BUN estimate); /* low level functions */ diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -299,6 +299,8 @@ BATnew(int ht, int tt, BUN cap) BATstore *bs; assert(cap <= BUN_MAX); + assert(ht != TYPE_bat); + assert(tt != TYPE_bat); ERRORcheck((ht < 0) || (ht > GDKatomcnt), "BATnew:ht error\n"); ERRORcheck((tt < 0) || (tt > GDKatomcnt), "BATnew:tt error\n"); @@ -763,6 +765,8 @@ BATcopy(BAT *b, int ht, int tt, int writ BAT *bn = NULL; BATcheck(b, "BATcopy"); + assert(ht != TYPE_bat); + assert(tt != TYPE_bat); cnt = b->batCount; /* maybe a bit ugly to change the requested bat types?? */ @@ -2730,7 +2734,6 @@ BATgetaccess(BAT *b) #define check_type(tp) \ do { \ if (ATOMisdescendant((tp), TYPE_ptr) || \ - ATOMisdescendant((tp), TYPE_bat) || \ BATatoms[tp].atomUnfix || \ BATatoms[tp].atomFix) { \ GDKerror("BATmode: %s type implies that %s[%s,%s] " \ @@ -2817,6 +2820,7 @@ BATassertHeadProps(BAT *b) assert(b != NULL); assert(b->htype >= TYPE_void); assert(b->htype < GDKatomcnt); + assert(b->htype != TYPE_bat); cmpf = BATatoms[b->htype].atomCmp; nilp = ATOMnilptr(b->htype); diff --git a/gdk/gdk_batop.mx b/gdk/gdk_batop.mx --- a/gdk/gdk_batop.mx +++ b/gdk/gdk_batop.mx @@ -763,9 +763,8 @@ BATreplace(BAT *b, BAT *n, bit force) * void-columns (in this case, the seqbase has to be recomputed in the result). * * Note that the BATslice() is used indirectly as well as a special - * case for BATselect (range selection on sorted column), BATrangesplit - * (fragmentation on sorted column) and BATsemijoin (when two dense columns - * are semijoined). + * case for BATselect (range selection on sorted column) and + * BATsemijoin (when two dense columns are semijoined). * * NOTE new semantics, the selected range is excluding the high value. */ @@ -2002,320 +2001,6 @@ BATrevert(BAT *b) } /* - * @+ BAT partitioning - * For distributed processing we support hash and range - * partitioning operators: BATsplithash and BATsplitrange. - * - * The part_bat function creates a partition BAT. - */ -static BAT * -part_bat(BAT *b, int ht, int tt, BUN expected_size, int respect_order) -{ - BAT *bn = BATnew(ht, tt, (BUN) ((double) expected_size * BATMARGIN)); - - if (bn) { - BATkey(bn, BAThkey(b)); - BATkey(BATmirror(bn), BATtkey(b)); - bn->hsorted = respect_order && BAThordered(b); - bn->tsorted = respect_order && BATtordered(b); - bn->hrevsorted = respect_order && BAThrevordered(b); - bn->trevsorted = respect_order && BATtrevordered(b); - bn->H->nonil = b->H->nonil; - bn->T->nonil = b->T->nonil; - } - return bn; -} - -/* - * @- hash partitioning - */ -#define BUNhash(bx,hx,tx)\ - /* assert(n <= 0x40000000); */\ - i = (int)(HASHprobe(&h,tx)%n); \ - if ((r = BUNfnd(bx, &i)) != BUN_NONE){\ - bat bid = *(bat*)BUNtloc(metabati,r); \ - bunfastins(BBPdescriptor(bid), hx, tx);\ - } - -BAT * -BAThashsplit(BAT *b, BUN n, int unary) -{ - BAT *metabat, *bn, *bf; - BUN r; - Hash h; - BUN cnt; - int i = 0; - - /* assert(n <= 0x40000000); */ - BATcheck(b, "BAThashsplit"); - if (n > BATcount(b)) { - PROPDEBUG THRprintf(GDKout, "#BAThashsplit: reduced number of ranges (" BUNFMT ") to number of tuples (" BUNFMT ").\n", n, BATcount(b)); - n = BATcount(b); - } - if (n < 1) { - GDKerror("BAThashsplit: number of ranges must not be less than 1!\n"); - return 0; - } - - metabat = BATnew(TYPE_int, TYPE_bat, n); - if (metabat == NULL) - return NULL; - bn = unary ? VIEWhead_(b, b->batRestricted) : b; - if (n <= 1) { - if (BUNins(metabat, &i, &bn->batCacheid, FALSE) == NULL) - goto bunins_failed; - } else { - BUN p, q; - BATiter metabati; - - for (i = 2; i < (int) n; i *= 2) - ; - h.mask = i - 1; - h.type = BATttype(b); - cnt = (BUN) (BATMARGIN * (double) BATbuncount(b) / (double) n); - for (i = 0; i < (int) n; i++) { - bf = part_bat(bn, BAThtype(bn), BATttype(bn), cnt, TRUE); - if (bf == NULL) { - BBPreclaim(metabat); - return NULL; - } - if (BUNins(metabat, &i, &bf->batCacheid, FALSE) == NULL) - goto bunins_failed; - } - metabati = bat_iterator(metabat); - updateloop(metabat, b, BUNhash); - BATloop(metabat, p, q) { - bat bt = *(bat *) BUNtail(metabati, p); - - BBPunfix(bt); - } - } - return metabat; - bunins_failed: - BBPreclaim(metabat); - return NULL; -} - -/* - * Range partitioning ensures that identical values appear in one - * partition only. The routine also tries to deliver partitions of - * uniform size. - */ -BAT * -BATrangesplit(BAT *b, BUN n, int unary) -{ - BAT *metabat, *slice, *histo, *bf = NULL, *bn, *m; - int target, tpe, *sizes = NULL; - int zz = 0; - BUN yy = 0; - ptr *seps = NULL, nilval; - BUN r, s; - BATiter histoi, bi = bat_iterator(b); - dbl scale; - BUN thorough = (n <= 1 || BATtvoid(b) || BATtordered(b)) ? 1 : 10; - - BATcheck(b, "BATrangesplit"); - if (n > BATcount(b)) { - PROPDEBUG THRprintf(GDKout, "#BATrangesplit: reduced number of ranges (" BUNFMT ") to number of tuples (" BUNFMT ").\n", n, BATcount(b)); - n = BATcount(b); - } - if (n < 1) { - GDKerror("BAThashsplit: number of ranges must not be less than 1!\n"); - return 0; - } - - /* assert(BATcount(b)/n <= 0x7fffffff); */ - bn = unary ? VIEWhead_(b, b->batRestricted) : b; - m = BATmirror(b); - metabat = BATnew(BATttype(b), TYPE_bat, n); - BATcheck(metabat, "BATrangesplit 2"); - nilval = ATOMnilptr(BATttype(b)); - - /* - * We use sampling to determine bucket sizes. - * Uniform bucket sizes are the ideal to be achieved. - * If necessary though, we deliver less than n buckets. - */ - slice = BATsample(b, MIN(MAX(30 * n * thorough, 100 * thorough), BATcount(b))); - histo = BAThistogram(slice); - target = (int) (BATcount(b) / n); /* see assert above */ - scale = ((dbl) BATcount(b)) / ((dbl) BATcount(slice)); - BBPreclaim(slice); - if ((sizes = (int *) GDKmalloc(2 * n * sizeof(int))) == NULL) - goto bunins_failed; - if ((seps = (ptr *) GDKmalloc(2 * n * sizeof(ptr))) == NULL) - goto bunins_failed; - /* - * Use the histogram to determine good split boundaries on b. - */ - BATorder(histo); - histoi = bat_iterator(histo); - BATloop(histo, r, s) { - int cnt = *(int *) BUNtloc(histoi, r); - int add = (int) (scale * cnt); - - if (zz + add > target) { - if ((zz + add - target) < (target - zz)) { - sizes[yy] = zz + add; - seps[yy] = ATOMdup(histo->htype, BUNhead(histoi, r)); - add = 0; - } else { - sizes[yy] = zz; - seps[yy] = ATOMdup(histo->htype, BUNhead(histoi, (r - 1))); - } - zz = 0; - yy++; - } - zz += add; - } - if (yy) { - if ((sizes[yy - 1] + zz - target) > (target - zz)) { - sizes[yy] = zz; - } else { - yy--; /* join with the last */ - } - } - seps[yy] = nilval; - BBPreclaim(histo); - - if (n > 1 && n != yy + 1) { - PROPDEBUG THRprintf(GDKout, "#rangesplit: delivering " BUNFMT " instead of " BUNFMT " fragments\n", yy + 1, n); - n = yy + 1; - } - /* - * CASE 1: just one bucket. - * This is done without copying b. - */ - if (n <= 1) { - if (BUNins(metabat, nilval, &bn->batCacheid, FALSE) == NULL) - goto bunins_failed; - - /* - * CASE 2: sorted on fragmentation column. - * We can again avoid copying, by giving slices (views) on the source BAT. - * Virtual oids (void) is a special subcase with positional lookup instead - * of binary search. - */ - } else if (BATtvoid(b) || BATtordered(b)) { - BUN l, h = 0, o = BUNfirst(b); - - for (yy = 0; yy < n; yy++) { - l = h; - if (yy == n - 1) { - r = BUNlast(m); _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list