Changeset: b52ed41e9e9a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/b52ed41e9e9a
Modified Files:
        gdk/gdk.h
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        monetdb5/modules/atoms/mtime.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_storage.c
        sql/storage/sql_storage.h
        sql/storage/store.c
Branch: properties
Log Message:

Merged with default


diffs (truncated from 7920 to 300 lines):

diff --git a/gdk/ChangeLog.Jan2022 b/gdk/ChangeLog.Jan2022
--- a/gdk/ChangeLog.Jan2022
+++ b/gdk/ChangeLog.Jan2022
@@ -1,3 +1,7 @@
 # ChangeLog file for GDK
 # This file is updated with Maddlog
 
+* Tue Mar 29 2022 Sjoerd Mullender <sjo...@acm.org>
+- Improved speed of projection (BATproject) on varsized bats by sharing
+  the data heap (vheap).
+
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -947,6 +947,7 @@ typedef struct BATiter {
        void *base;
        Heap *vh;
        BUN count;
+       BUN baseoff;
        uint16_t width;
        uint8_t shift;
        int8_t type;
@@ -954,6 +955,13 @@ typedef struct BATiter {
        BUN hfree, vhfree;
        BUN minpos, maxpos;
        double unique_est;
+       bool key:1,
+               nonil:1,
+               nil:1,
+               sorted:1,
+               revsorted:1,
+               hdirty:1,
+               vhdirty:1;
        union {
                oid tvid;
                bool tmsk;
@@ -973,6 +981,7 @@ bat_iterator_nolock(BAT *b)
                        .b = b,
                        .h = b->theap,
                        .base = b->theap->base ? b->theap->base + (b->tbaseoff 
<< b->tshift) : NULL,
+                       .baseoff = b->tbaseoff,
                        .vh = b->tvheap,
                        .count = b->batCount,
                        .width = b->twidth,
@@ -989,6 +998,13 @@ bat_iterator_nolock(BAT *b)
                        .minpos = isview ? BUN_NONE : b->tminpos,
                        .maxpos = isview ? BUN_NONE : b->tmaxpos,
                        .unique_est = b->tunique_est,
+                       .key = b->tkey,
+                       .nonil = b->tnonil,
+                       .nil = b->tnil,
+                       .sorted = b->tsorted,
+                       .revsorted = b->trevsorted,
+                       .hdirty = b->theap->dirty,
+                       .vhdirty = b->tvheap && b->tvheap->dirty,
 #ifndef NDEBUG
                        .locked = false,
 #endif
@@ -1023,6 +1039,20 @@ bat_iterator(BAT *b)
        return bi;
 }
 
+/* return a copy of a BATiter instance; needs to be released with
+ * bat_iterator_end */
+static inline BATiter
+bat_iterator_copy(BATiter *bip)
+{
+       assert(bip);
+       assert(bip->locked);
+       if (bip->h)
+               HEAPincref(bip->h);
+       if (bip->vh)
+               HEAPincref(bip->vh);
+       return *bip;
+}
+
 static inline void
 bat_iterator_end(BATiter *bip)
 {
@@ -1359,8 +1389,6 @@ gdk_export gdk_return BATsort(BAT **sort
 
 gdk_export void GDKqsort(void *restrict h, void *restrict t, const void 
*restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast);
 
-#define BATtordered(b) ((b)->tsorted)
-#define BATtrevordered(b) ((b)->trevsorted)
 /* BAT is dense (i.e., BATtvoid() is true and tseqbase is not NIL) */
 #define BATtdense(b)   (!is_oid_nil((b)->tseqbase) &&                  \
                         ((b)->tvheap == NULL || (b)->tvheap->free == 0))
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -961,8 +961,8 @@ BATgroupsum(BAT *b, BAT *g, BAT *e, BAT 
                gids = (const oid *) Tloc(g, 0);
 
        BATiter bi = bat_iterator(b);
-       nils = dosum(bi.base, b->tnonil, b->hseqbase, &ci,
-                    Tloc(bn, 0), ngrp, b->ttype, tp, gids, min, max,
+       nils = dosum(bi.base, bi.nonil, b->hseqbase, &ci,
+                    Tloc(bn, 0), ngrp, bi.type, tp, gids, min, max,
                     skip_nils, abort_on_error, true, __func__, &algo);
        bat_iterator_end(&bi);
 
@@ -1191,8 +1191,8 @@ BATsum(void *res, int tp, BAT *b, BAT *s
        if (ci.ncand == 0)
                return GDK_SUCCEED;
        BATiter bi = bat_iterator(b);
-       BUN nils = dosum(bi.base, b->tnonil, b->hseqbase, &ci,
-                        res, true, b->ttype, tp, &min, min, max,
+       BUN nils = dosum(bi.base, bi.nonil, b->hseqbase, &ci,
+                        res, true, bi.type, tp, &min, min, max,
                         skip_nils, abort_on_error, nil_if_empty, __func__, 
&algo);
        bat_iterator_end(&bi);
        if (algo)
@@ -1649,7 +1649,7 @@ BATgroupprod(BAT *b, BAT *g, BAT *e, BAT
 
        BATiter bi = bat_iterator(b);
        nils = doprod(bi.base, b->hseqbase, &ci, Tloc(bn, 0), ngrp,
-                     b->ttype, tp, gids, true, min, max, skip_nils,
+                     bi.type, tp, gids, true, min, max, skip_nils,
                      abort_on_error, true, __func__);
        bat_iterator_end(&bi);
 
@@ -1724,7 +1724,7 @@ BATprod(void *res, int tp, BAT *b, BAT *
                return GDK_SUCCEED;
        BATiter bi = bat_iterator(b);
        nils = doprod(bi.base, b->hseqbase, &ci, res, true,
-                     b->ttype, tp, &min, false, min, max,
+                     bi.type, tp, &min, false, min, max,
                      skip_nils, abort_on_error, nil_if_empty, __func__);
        bat_iterator_end(&bi);
        TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT "; "
@@ -1956,7 +1956,7 @@ BATgroupavg(BAT **bnp, BAT **cntsp, BAT 
                AGGR_AVG_FLOAT(dbl);
                break;
        default:
-               GDKerror("type (%s) not supported.\n", ATOMname(b->ttype));
+               GDKerror("type (%s) not supported.\n", ATOMname(bi.type));
                goto bailout;
        }
        bat_iterator_end(&bi);
@@ -3043,7 +3043,7 @@ BATcalcavg(BAT *b, BAT *s, dbl *avg, BUN
                break;
        default:
                GDKerror("average of type %s unsupported.\n",
-                        ATOMname(b->ttype));
+                        ATOMname(bi.type));
                goto bailout;
        }
        bat_iterator_end(&bi);
@@ -3705,7 +3705,7 @@ BATmin_skipnil(BAT *b, void *aggr, bit s
        }
        BATiter bi = bat_iterator(b);
        if (bi.count == 0) {
-               res = ATOMnilptr(b->ttype);
+               res = ATOMnilptr(bi.type);
        } else if (bi.minpos != BUN_NONE) {
                res = BUNtail(bi, bi.minpos);
        } else {
@@ -3727,7 +3727,7 @@ BATmin_skipnil(BAT *b, void *aggr, bit s
                        /* no lock on b needed since it's a view */
                        MT_lock_set(&pb->batIdxLock);
                        MT_lock_set(&pb->theaplock);
-                       if (pb->tbaseoff == b->tbaseoff &&
+                       if (pb->tbaseoff == bi.baseoff &&
                            BATcount(pb) == bi.count &&
                            pb->hseqbase == b->hseqbase &&
                            (oidxh = pb->torderidx) != NULL) {
@@ -3739,14 +3739,14 @@ BATmin_skipnil(BAT *b, void *aggr, bit s
                if (oidxh != NULL) {
                        const oid *ords = (const oid *) oidxh->base + 
ORDERIDXOFF;
                        BUN r;
-                       if (!b->tnonil) {
+                       if (!bi.nonil) {
                                MT_thread_setalgorithm(pb ? "binsearch on 
parent oidx" : "binsearch on oidx");
-                               r = binsearch(ords, 0, b->ttype, bi.base,
+                               r = binsearch(ords, 0, bi.type, bi.base,
                                              bi.vh ? bi.vh->base : NULL,
                                              bi.width, 0, bi.count,
-                                             ATOMnilptr(b->ttype), 1, 1);
+                                             ATOMnilptr(bi.type), 1, 1);
                                if (r == 0) {
-                                       b->tnonil = true;
+                                       bi.nonil = true;
                                        b->batDirtydesc = true;
                                }
                        } else {
@@ -3805,30 +3805,28 @@ BATmin_skipnil(BAT *b, void *aggr, bit s
                        }
                }
                if (is_oid_nil(pos)) {
-                       res = ATOMnilptr(b->ttype);
+                       res = ATOMnilptr(bi.type);
                } else {
                        bi.minpos = pos - b->hseqbase;
                        res = BUNtail(bi, bi.minpos);
                        MT_lock_set(&b->theaplock);
                        if (bi.count == BATcount(b) && bi.h == b->theap)
                                b->tminpos = bi.minpos;
-                       MT_lock_unset(&b->theaplock);
                        bat pbid = VIEWtparent(b);
                        if (pbid) {
                                BAT *pb = BBP_cache(pbid);
-                               MT_lock_set(&pb->theaplock);
                                if (bi.count == BATcount(pb) &&
                                    bi.h == pb->theap)
                                        pb->tminpos = bi.minpos;
-                               MT_lock_unset(&pb->theaplock);
                        }
+                       MT_lock_unset(&b->theaplock);
                }
        }
        if (aggr == NULL) {
-               s = ATOMlen(b->ttype, res);
+               s = ATOMlen(bi.type, res);
                aggr = GDKmalloc(s);
        } else {
-               s = ATOMsize(ATOMtype(b->ttype));
+               s = ATOMsize(ATOMtype(bi.type));
        }
        if (aggr != NULL)       /* else: malloc error */
                memcpy(aggr, res, s);
@@ -3868,7 +3866,7 @@ BATmax_skipnil(BAT *b, void *aggr, bit s
        }
        bi = bat_iterator(b);
        if (bi.count == 0) {
-               res = ATOMnilptr(b->ttype);
+               res = ATOMnilptr(bi.type);
        } else if (bi.maxpos != BUN_NONE) {
                res = BUNtail(bi, bi.maxpos);
        } else {
@@ -3890,7 +3888,7 @@ BATmax_skipnil(BAT *b, void *aggr, bit s
                        /* no lock on b needed since it's a view */
                        MT_lock_set(&pb->batIdxLock);
                        MT_lock_set(&pb->theaplock);
-                       if (pb->tbaseoff == b->tbaseoff &&
+                       if (pb->tbaseoff == bi.baseoff &&
                            BATcount(pb) == bi.count &&
                            pb->hseqbase == b->hseqbase &&
                            (oidxh = pb->torderidx) != NULL) {
@@ -3910,7 +3908,7 @@ BATmax_skipnil(BAT *b, void *aggr, bit s
 
                                res = BUNtail(bi, z - b->hseqbase);
 
-                               if (ATOMcmp(b->ttype, res, 
ATOMnilptr(b->ttype)) == 0)
+                               if (ATOMcmp(bi.type, res, ATOMnilptr(bi.type)) 
== 0)
                                        pos = z;
                        }
                        HEAPdecref(oidxh, false);
@@ -3959,30 +3957,28 @@ BATmax_skipnil(BAT *b, void *aggr, bit s
                        }
                }
                if (is_oid_nil(pos)) {
-                       res = ATOMnilptr(b->ttype);
+                       res = ATOMnilptr(bi.type);
                } else {
                        bi.maxpos = pos - b->hseqbase;
                        res = BUNtail(bi, bi.maxpos);
                        MT_lock_set(&b->theaplock);
                        if (bi.count == BATcount(b) && bi.h == b->theap)
                                b->tmaxpos = bi.maxpos;
-                       MT_lock_unset(&b->theaplock);
                        bat pbid = VIEWtparent(b);
                        if (pbid) {
                                BAT *pb = BBP_cache(pbid);
-                               MT_lock_set(&pb->theaplock);
                                if (bi.count == BATcount(pb) &&
                                    bi.h == pb->theap)
                                        pb->tmaxpos = bi.maxpos;
-                               MT_lock_unset(&pb->theaplock);
                        }
+                       MT_lock_unset(&b->theaplock);
                }
        }
        if (aggr == NULL) {
-               s = ATOMlen(b->ttype, res);
+               s = ATOMlen(bi.type, res);
                aggr = GDKmalloc(s);
        } else {
-               s = ATOMsize(ATOMtype(b->ttype));
+               s = ATOMsize(ATOMtype(bi.type));
        }
        if (aggr != NULL)       /* else: malloc error */
                memcpy(aggr, res, s);
@@ -4163,7 +4159,7 @@ doBATgroupquantile(BAT *b, BAT *g, BAT *
                        /* search for end of current group (grps is
                         * sorted so we can use binary search) */
                        p = binsearch_oid(NULL, 0, grps, r, q - 1, prev, 1, 1);
-                       if (skip_nils && !b->tnonil) {
+                       if (skip_nils && !bi.nonil) {
                                /* within group, locate start of non-nils */
                                r = binsearch(NULL, 0, tp, bi.base,
                                              bi.vh ? bi.vh->base : NULL,
@@ -4211,7 +4207,7 @@ doBATgroupquantile(BAT *b, BAT *g, BAT *
                                /* be a little paranoid about the index */
                                assert(qindex >= r && qindex <  p);
                                v = BUNtail(bi, qindex);
-                               if (!skip_nils && !b->tnonil)
+                               if (!skip_nils && !bi.nonil)
                                        nils += (*atomcmp)(v, dnil) == 0;
                        }
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to