Changeset: b52ed41e9e9a for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/b52ed41e9e9a Modified Files: gdk/gdk.h gdk/gdk_batop.c gdk/gdk_bbp.c monetdb5/modules/atoms/mtime.c sql/backends/monet5/sql.c sql/storage/bat/bat_storage.c sql/storage/sql_storage.h sql/storage/store.c Branch: properties Log Message:
Merged with default diffs (truncated from 7920 to 300 lines): diff --git a/gdk/ChangeLog.Jan2022 b/gdk/ChangeLog.Jan2022 --- a/gdk/ChangeLog.Jan2022 +++ b/gdk/ChangeLog.Jan2022 @@ -1,3 +1,7 @@ # ChangeLog file for GDK # This file is updated with Maddlog +* Tue Mar 29 2022 Sjoerd Mullender <sjo...@acm.org> +- Improved speed of projection (BATproject) on varsized bats by sharing + the data heap (vheap). + diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -947,6 +947,7 @@ typedef struct BATiter { void *base; Heap *vh; BUN count; + BUN baseoff; uint16_t width; uint8_t shift; int8_t type; @@ -954,6 +955,13 @@ typedef struct BATiter { BUN hfree, vhfree; BUN minpos, maxpos; double unique_est; + bool key:1, + nonil:1, + nil:1, + sorted:1, + revsorted:1, + hdirty:1, + vhdirty:1; union { oid tvid; bool tmsk; @@ -973,6 +981,7 @@ bat_iterator_nolock(BAT *b) .b = b, .h = b->theap, .base = b->theap->base ? b->theap->base + (b->tbaseoff << b->tshift) : NULL, + .baseoff = b->tbaseoff, .vh = b->tvheap, .count = b->batCount, .width = b->twidth, @@ -989,6 +998,13 @@ bat_iterator_nolock(BAT *b) .minpos = isview ? BUN_NONE : b->tminpos, .maxpos = isview ? BUN_NONE : b->tmaxpos, .unique_est = b->tunique_est, + .key = b->tkey, + .nonil = b->tnonil, + .nil = b->tnil, + .sorted = b->tsorted, + .revsorted = b->trevsorted, + .hdirty = b->theap->dirty, + .vhdirty = b->tvheap && b->tvheap->dirty, #ifndef NDEBUG .locked = false, #endif @@ -1023,6 +1039,20 @@ bat_iterator(BAT *b) return bi; } +/* return a copy of a BATiter instance; needs to be released with + * bat_iterator_end */ +static inline BATiter +bat_iterator_copy(BATiter *bip) +{ + assert(bip); + assert(bip->locked); + if (bip->h) + HEAPincref(bip->h); + if (bip->vh) + HEAPincref(bip->vh); + return *bip; +} + static inline void bat_iterator_end(BATiter *bip) { @@ -1359,8 +1389,6 @@ gdk_export gdk_return BATsort(BAT **sort gdk_export void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast); -#define BATtordered(b) ((b)->tsorted) -#define BATtrevordered(b) ((b)->trevsorted) /* BAT is dense (i.e., BATtvoid() is true and tseqbase is not NIL) */ #define BATtdense(b) (!is_oid_nil((b)->tseqbase) && \ ((b)->tvheap == NULL || (b)->tvheap->free == 0)) diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c --- a/gdk/gdk_aggr.c +++ b/gdk/gdk_aggr.c @@ -961,8 +961,8 @@ BATgroupsum(BAT *b, BAT *g, BAT *e, BAT gids = (const oid *) Tloc(g, 0); BATiter bi = bat_iterator(b); - nils = dosum(bi.base, b->tnonil, b->hseqbase, &ci, - Tloc(bn, 0), ngrp, b->ttype, tp, gids, min, max, + nils = dosum(bi.base, bi.nonil, b->hseqbase, &ci, + Tloc(bn, 0), ngrp, bi.type, tp, gids, min, max, skip_nils, abort_on_error, true, __func__, &algo); bat_iterator_end(&bi); @@ -1191,8 +1191,8 @@ BATsum(void *res, int tp, BAT *b, BAT *s if (ci.ncand == 0) return GDK_SUCCEED; BATiter bi = bat_iterator(b); - BUN nils = dosum(bi.base, b->tnonil, b->hseqbase, &ci, - res, true, b->ttype, tp, &min, min, max, + BUN nils = dosum(bi.base, bi.nonil, b->hseqbase, &ci, + res, true, bi.type, tp, &min, min, max, skip_nils, abort_on_error, nil_if_empty, __func__, &algo); bat_iterator_end(&bi); if (algo) @@ -1649,7 +1649,7 @@ BATgroupprod(BAT *b, BAT *g, BAT *e, BAT BATiter bi = bat_iterator(b); nils = doprod(bi.base, b->hseqbase, &ci, Tloc(bn, 0), ngrp, - b->ttype, tp, gids, true, min, max, skip_nils, + bi.type, tp, gids, true, min, max, skip_nils, abort_on_error, true, __func__); bat_iterator_end(&bi); @@ -1724,7 +1724,7 @@ BATprod(void *res, int tp, BAT *b, BAT * return GDK_SUCCEED; BATiter bi = bat_iterator(b); nils = doprod(bi.base, b->hseqbase, &ci, res, true, - b->ttype, tp, &min, false, min, max, + bi.type, tp, &min, false, min, max, skip_nils, abort_on_error, nil_if_empty, __func__); bat_iterator_end(&bi); TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT "; " @@ -1956,7 +1956,7 @@ BATgroupavg(BAT **bnp, BAT **cntsp, BAT AGGR_AVG_FLOAT(dbl); break; default: - GDKerror("type (%s) not supported.\n", ATOMname(b->ttype)); + GDKerror("type (%s) not supported.\n", ATOMname(bi.type)); goto bailout; } bat_iterator_end(&bi); @@ -3043,7 +3043,7 @@ BATcalcavg(BAT *b, BAT *s, dbl *avg, BUN break; default: GDKerror("average of type %s unsupported.\n", - ATOMname(b->ttype)); + ATOMname(bi.type)); goto bailout; } bat_iterator_end(&bi); @@ -3705,7 +3705,7 @@ BATmin_skipnil(BAT *b, void *aggr, bit s } BATiter bi = bat_iterator(b); if (bi.count == 0) { - res = ATOMnilptr(b->ttype); + res = ATOMnilptr(bi.type); } else if (bi.minpos != BUN_NONE) { res = BUNtail(bi, bi.minpos); } else { @@ -3727,7 +3727,7 @@ BATmin_skipnil(BAT *b, void *aggr, bit s /* no lock on b needed since it's a view */ MT_lock_set(&pb->batIdxLock); MT_lock_set(&pb->theaplock); - if (pb->tbaseoff == b->tbaseoff && + if (pb->tbaseoff == bi.baseoff && BATcount(pb) == bi.count && pb->hseqbase == b->hseqbase && (oidxh = pb->torderidx) != NULL) { @@ -3739,14 +3739,14 @@ BATmin_skipnil(BAT *b, void *aggr, bit s if (oidxh != NULL) { const oid *ords = (const oid *) oidxh->base + ORDERIDXOFF; BUN r; - if (!b->tnonil) { + if (!bi.nonil) { MT_thread_setalgorithm(pb ? "binsearch on parent oidx" : "binsearch on oidx"); - r = binsearch(ords, 0, b->ttype, bi.base, + r = binsearch(ords, 0, bi.type, bi.base, bi.vh ? bi.vh->base : NULL, bi.width, 0, bi.count, - ATOMnilptr(b->ttype), 1, 1); + ATOMnilptr(bi.type), 1, 1); if (r == 0) { - b->tnonil = true; + bi.nonil = true; b->batDirtydesc = true; } } else { @@ -3805,30 +3805,28 @@ BATmin_skipnil(BAT *b, void *aggr, bit s } } if (is_oid_nil(pos)) { - res = ATOMnilptr(b->ttype); + res = ATOMnilptr(bi.type); } else { bi.minpos = pos - b->hseqbase; res = BUNtail(bi, bi.minpos); MT_lock_set(&b->theaplock); if (bi.count == BATcount(b) && bi.h == b->theap) b->tminpos = bi.minpos; - MT_lock_unset(&b->theaplock); bat pbid = VIEWtparent(b); if (pbid) { BAT *pb = BBP_cache(pbid); - MT_lock_set(&pb->theaplock); if (bi.count == BATcount(pb) && bi.h == pb->theap) pb->tminpos = bi.minpos; - MT_lock_unset(&pb->theaplock); } + MT_lock_unset(&b->theaplock); } } if (aggr == NULL) { - s = ATOMlen(b->ttype, res); + s = ATOMlen(bi.type, res); aggr = GDKmalloc(s); } else { - s = ATOMsize(ATOMtype(b->ttype)); + s = ATOMsize(ATOMtype(bi.type)); } if (aggr != NULL) /* else: malloc error */ memcpy(aggr, res, s); @@ -3868,7 +3866,7 @@ BATmax_skipnil(BAT *b, void *aggr, bit s } bi = bat_iterator(b); if (bi.count == 0) { - res = ATOMnilptr(b->ttype); + res = ATOMnilptr(bi.type); } else if (bi.maxpos != BUN_NONE) { res = BUNtail(bi, bi.maxpos); } else { @@ -3890,7 +3888,7 @@ BATmax_skipnil(BAT *b, void *aggr, bit s /* no lock on b needed since it's a view */ MT_lock_set(&pb->batIdxLock); MT_lock_set(&pb->theaplock); - if (pb->tbaseoff == b->tbaseoff && + if (pb->tbaseoff == bi.baseoff && BATcount(pb) == bi.count && pb->hseqbase == b->hseqbase && (oidxh = pb->torderidx) != NULL) { @@ -3910,7 +3908,7 @@ BATmax_skipnil(BAT *b, void *aggr, bit s res = BUNtail(bi, z - b->hseqbase); - if (ATOMcmp(b->ttype, res, ATOMnilptr(b->ttype)) == 0) + if (ATOMcmp(bi.type, res, ATOMnilptr(bi.type)) == 0) pos = z; } HEAPdecref(oidxh, false); @@ -3959,30 +3957,28 @@ BATmax_skipnil(BAT *b, void *aggr, bit s } } if (is_oid_nil(pos)) { - res = ATOMnilptr(b->ttype); + res = ATOMnilptr(bi.type); } else { bi.maxpos = pos - b->hseqbase; res = BUNtail(bi, bi.maxpos); MT_lock_set(&b->theaplock); if (bi.count == BATcount(b) && bi.h == b->theap) b->tmaxpos = bi.maxpos; - MT_lock_unset(&b->theaplock); bat pbid = VIEWtparent(b); if (pbid) { BAT *pb = BBP_cache(pbid); - MT_lock_set(&pb->theaplock); if (bi.count == BATcount(pb) && bi.h == pb->theap) pb->tmaxpos = bi.maxpos; - MT_lock_unset(&pb->theaplock); } + MT_lock_unset(&b->theaplock); } } if (aggr == NULL) { - s = ATOMlen(b->ttype, res); + s = ATOMlen(bi.type, res); aggr = GDKmalloc(s); } else { - s = ATOMsize(ATOMtype(b->ttype)); + s = ATOMsize(ATOMtype(bi.type)); } if (aggr != NULL) /* else: malloc error */ memcpy(aggr, res, s); @@ -4163,7 +4159,7 @@ doBATgroupquantile(BAT *b, BAT *g, BAT * /* search for end of current group (grps is * sorted so we can use binary search) */ p = binsearch_oid(NULL, 0, grps, r, q - 1, prev, 1, 1); - if (skip_nils && !b->tnonil) { + if (skip_nils && !bi.nonil) { /* within group, locate start of non-nils */ r = binsearch(NULL, 0, tp, bi.base, bi.vh ? bi.vh->base : NULL, @@ -4211,7 +4207,7 @@ doBATgroupquantile(BAT *b, BAT *g, BAT * /* be a little paranoid about the index */ assert(qindex >= r && qindex < p); v = BUNtail(bi, qindex); - if (!skip_nils && !b->tnonil) + if (!skip_nils && !bi.nonil) nils += (*atomcmp)(v, dnil) == 0; } _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org