Changeset: b49728c3b1f1 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/b49728c3b1f1
Modified Files:
        sql/include/sql_catalog.h
        sql/storage/store.c
Branch: iso
Log Message:

Merged with Jul2021


diffs (truncated from 1627 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -539,8 +539,8 @@ gdk_return log_bat_transient(logger *lg,
 gdk_return log_constant(logger *lg, int type, ptr val, log_id id, lng offset, 
lng cnt);
 gdk_return log_delta(logger *lg, BAT *uid, BAT *uval, log_id id);
 gdk_return log_sequence(logger *lg, int seq, lng id);
-gdk_return log_tend(logger *lg);
-gdk_return log_tstart(logger *lg, ulng commit_ts, bool flush);
+gdk_return log_tend(logger *lg, ulng commit_ts);
+gdk_return log_tstart(logger *lg, bool flush);
 gdk_return logger_activate(logger *lg);
 lng logger_changes(logger *lg);
 logger *logger_create(int debug, const char *fn, const char *logdir, int 
version, preversionfix_fptr prefuncp, postversionfix_fptr postfuncp, void 
*funcdata);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -809,12 +809,12 @@ COLcopy(BAT *b, int tt, bool writable, r
 
        /* first try case (1); create a view, possibly with different
         * atom-types */
-       if (role == b->batRole &&
+       if (!writable &&
+           role == b->batRole &&
            b->batRestricted == BAT_READ &&
            ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
            (!VIEWtparent(b) ||
-            BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ) &&
-           !writable) {
+            BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ)) {
                bn = VIEWcreate(b->hseqbase, b);
                if (bn == NULL)
                        return NULL;
@@ -875,8 +875,8 @@ COLcopy(BAT *b, int tt, bool writable, r
                        strconcat_len(thp.filename, sizeof(thp.filename),
                                      BBP_physical(bn->batCacheid),
                                      ".theap", NULL);
-                       if ((b->ttype && HEAPcopy(&bthp, b->theap) != 
GDK_SUCCEED) ||
-                           (bn->tvheap && HEAPcopy(&thp, b->tvheap) != 
GDK_SUCCEED)) {
+                       if ((b->ttype && HEAPcopy(&bthp, b->theap, b->tbaseoff 
<< b->tshift) != GDK_SUCCEED) ||
+                           (bn->tvheap && HEAPcopy(&thp, b->tvheap, 0) != 
GDK_SUCCEED)) {
                                HEAPfree(&thp, true);
                                HEAPfree(&bthp, true);
                                BBPreclaim(bn);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -32,7 +32,7 @@ unshare_varsized_heap(BAT *b)
                h->farmid = BBPselectfarm(b->batRole, TYPE_str, varheap);
                strconcat_len(h->filename, sizeof(h->filename),
                              BBP_physical(b->batCacheid), ".theap", NULL);
-               if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) {
+               if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
                        HEAPfree(h, true);
                        GDKfree(h);
                        return GDK_FAIL;
@@ -496,7 +496,7 @@ append_varsized_bat(BAT *b, BAT *n, stru
                h->farmid = BBPselectfarm(b->batRole, b->ttype, varheap);
                strconcat_len(h->filename, sizeof(h->filename),
                              BBP_physical(b->batCacheid), ".theap", NULL);
-               if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) {
+               if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
                        HEAPfree(h, true);
                        GDKfree(h);
                        return GDK_FAIL;
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -437,41 +437,43 @@ rev(oid x)
        return x;
 }
 
-/* population count: count number of 1 bits in a value */
-static inline int
-pop(oid x)
+/* count trailing zeros, also see candmask_lobit in gdk_cand.h */
+static inline int __attribute__((__const__))
+ctz(oid x)
 {
-#ifdef __GNUC__
+#if defined(__GNUC__)
 #if SIZEOF_OID == SIZEOF_INT
-       return __builtin_popcount(x);
+       return __builtin_ctz(x);
 #else
-       return __builtin_popcountl(x);
+       return __builtin_ctzl(x);
 #endif
-#else
-#ifdef _MSC_VER
+#elif defined(_MSC_VER)
 #if SIZEOF_OID == SIZEOF_INT
-       return (int) __popcnt((unsigned int) (x));
-#else
-       return (int) __popcnt64((unsigned __int64) (x));
-#endif
+       unsigned long idx;
+       if (_BitScanForward(&idx, (unsigned long) x))
+               return (int) idx;
 #else
-       /* divide and conquer implementation */
-#if SIZEOF_OID == 8
-       x = (x & 0x5555555555555555) + ((x >>  1) & 0x5555555555555555);
-       x = (x & 0x3333333333333333) + ((x >>  2) & 0x3333333333333333);
-       x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >>  4) & 0x0F0F0F0F0F0F0F0F);
-       x = (x & 0x00FF00FF00FF00FF) + ((x >>  8) & 0x00FF00FF00FF00FF);
-       x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
-       x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
+       unsigned long idx;
+       if (_BitScanForward64(&idx, (unsigned __int64) x))
+               return (int) idx;
+#endif
+       return -1;
 #else
-       x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
-       x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
-       x = (x & 0x0F0F0F0F) + ((x >>  4) & 0x0F0F0F0F);
-       x = (x & 0x00FF00FF) + ((x >>  8) & 0x00FF00FF);
-       x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
+       /* use binary search for the lowest set bit */
+       int n = 1;
+#if SIZEOF_OID == SIZEOF_INT
+       if ((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; }
+       if ((x & 0x000000FF) == 0) { n +=  8; x >>=  8; }
+       if ((x & 0x0000000F) == 0) { n +=  4; x >>=  4; }
+       if ((x & 0x00000003) == 0) { n +=  2; x >>=  2; }
+#else
+       if ((x & UINT64_C(0x00000000FFFFFFFF)) == 0) { n += 32; x >>= 32; }
+       if ((x & UINT64_C(0x000000000000FFFF)) == 0) { n += 16; x >>= 16; }
+       if ((x & UINT64_C(0x00000000000000FF)) == 0) { n +=  8; x >>=  8; }
+       if ((x & UINT64_C(0x000000000000000F)) == 0) { n +=  4; x >>=  4; }
+       if ((x & UINT64_C(0x0000000000000003)) == 0) { n +=  2; x >>=  2; }
 #endif
-       return (int) x;
-#endif
+       return n - (x & 1);
 #endif
 }
 
@@ -1091,9 +1093,9 @@ BATgroup_internal(BAT **groups, BAT **ex
                        nbucket |= nbucket >> 32;
 #endif
                        nbucket++;
-                       /* nbucket is a power of two, so pop(nbucket - 1)
+                       /* nbucket is a power of two, so ctz(nbucket)
                         * tells us which power of two */
-                       bits = 8 * SIZEOF_OID - pop(nbucket - 1);
+                       bits = 8 * SIZEOF_OID - ctz(nbucket);
                } else {
                        nbucket = MAX(HASHmask(cnt), 1 << 16);
                }
diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c
--- a/gdk/gdk_heap.c
+++ b/gdk/gdk_heap.c
@@ -651,11 +651,13 @@ GDKupgradevarheap(BAT *b, var_t v, BUN c
  * dst->filename (or NULL), which might be used in HEAPalloc().
  */
 gdk_return
-HEAPcopy(Heap *dst, Heap *src)
+HEAPcopy(Heap *dst, Heap *src, size_t offset)
 {
-       if (HEAPalloc(dst, src->size, 1, 1) == GDK_SUCCEED) {
-               dst->free = src->free;
-               memcpy(dst->base, src->base, src->free);
+       if (offset > src->free)
+               offset = src->free;
+       if (HEAPalloc(dst, src->free - offset, 1, 1) == GDK_SUCCEED) {
+               dst->free = src->free - offset;
+               memcpy(dst->base, src->base + offset, src->free - offset);
                dst->hashash = src->hashash;
                dst->cleanhash = src->cleanhash;
                dst->dirty = true;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2570,11 +2570,10 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
        Hash *restrict hsh = NULL;
        bool locked = false;
 
-       assert(!BATtvoid(r));
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
 
        int t = ATOMbasetype(r->ttype);
-       if (r->ttype == TYPE_void || l->ttype == TYPE_void)
+       if (BATtvoid(r) || BATtvoid(l))
                t = TYPE_void;
 
        lwidth = l->twidth;
@@ -2644,6 +2643,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                          "existing hash%s\n",
                          ALGOBATPAR(r),
                          swapped ? " (swapped)" : "");
+       } else if (BATtdense(r)) {
+               /* no hash, just dense lookup */
+               MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)" 
: "hashjoin on dense");
        } else {
                /* we need to create a hash on r */
                MT_thread_setalgorithm(swapped ? "hashjoin using new hash 
(swapped)" : "hashjoin using new hash");
@@ -2654,7 +2656,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                        goto bailout;
                hsh = r->thash;
        }
-       assert(hsh != NULL);
+       assert(hsh != NULL || BATtdense(r));
 
        ri = bat_iterator(r);
 
@@ -2674,7 +2676,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                                                       false, false, __func__, 
t0);
                                }
                        }
-               } else {
+               } else if (!BATtdense(r)) {
                        for (rb = HASHget(hsh, HASHprobe(hsh, nil));
                             rb != HASHnil(hsh);
                             rb = HASHgetlink(hsh, rb)) {
@@ -2715,19 +2717,17 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                HASHJOIN(uuid);
                break;
        default:
-               if (!hash_cand) {
+               if (!hash_cand && hsh) {
                        MT_rwlock_rdlock(&r->thashlock);
                        locked = true;  /* in case we abandon */
                        hsh = r->thash; /* re-initialize inside lock */
                }
                while (lci->next < lci->ncand) {
                        lo = canditer_next(lci);
-                       if (BATtvoid(l)) {
-                               if (BATtdense(l))
-                                       lval = lo - l->hseqbase + l->tseqbase;
-                       } else {
+                       if (BATtdense(l))
+                               lval = lo - l->hseqbase + l->tseqbase;
+                       else if (l->ttype != TYPE_void)
                                v = VALUE(l, lo - l->hseqbase);
-                       }
                        nr = 0;
                        if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
                                /* no match */
@@ -2750,6 +2750,23 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                                        if (semi && !max_one)
                                                break;
                                }
+                       } else if (hsh == NULL) {
+                               assert(BATtdense(r));
+                               ro = *(const oid *) v;
+                               if (ro >= r->tseqbase &&
+                                   ro < r->tseqbase + r->batCount) {
+                                       ro -= r->tseqbase;
+                                       ro += rseq;
+                                       if (canditer_contains(rci, ro)) {
+                                               if (only_misses) {
+                                                       nr++;
+                                                       break;
+                                               }
+                                               HASHLOOPBODY();
+                                               if (semi && !max_one)
+                                                       break;
+                                       }
+                               }
                        } else if (rci->tpe != cand_dense) {
                                for (rb = HASHget(hsh, HASHprobe(hsh, v));
                                     rb != HASHnil(hsh);
@@ -2824,7 +2841,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                        if (nr > 0 && BATcount(r1) > nr)
                                r1->trevsorted = false;
                }
-               if (!hash_cand) {
+               if (!hash_cand && hsh) {
                        locked = false;
                        MT_rwlock_rdunlock(&r->thashlock);
                }
@@ -2897,35 +2914,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
        return GDK_FAIL;
 }
 
-/* population count: count number of 1 bits in a value */
-static inline uint32_t __attribute__((__const__))
-pop(uint32_t x)
-{
-#if defined(__GNUC__)
-       return (uint32_t) __builtin_popcount(x);
-#elif defined(_MSC_VER)
-       return (uint32_t) __popcnt((unsigned int) (x));
-#else
-       /* divide and conquer implementation (the two versions are
-        * essentially equivalent, but the first version is written a
-        * bit smarter) */
-#if 1
-       x -= (x >> 1) & ~0U/3 /* 0x55555555 */; /* 3-1=2; 2-1=1; 1-0=1; 0-0=0 */
-       x = (x & ~0U/5) + ((x >> 2) & ~0U/5) /* 0x33333333 */;
-       x = (x + (x >> 4)) & ~0UL/0x11 /* 0x0F0F0F0F */;
-       x = (x + (x >> 8)) & ~0UL/0x101 /* 0x00FF00FF */;
-       x = (x + (x >> 16)) & 0xFFFF /* ~0UL/0x10001 */;
-#else
-       x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
-       x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
-       x = (x & 0x0F0F0F0F) + ((x >>  4) & 0x0F0F0F0F);
-       x = (x & 0x00FF00FF) + ((x >>  8) & 0x00FF00FF);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to