Changeset: ed13daf4af23 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/ed13daf4af23 Modified Files: sql/backends/monet5/rel_bin.c sql/include/sql_catalog.h sql/storage/store.c Branch: iso Log Message:
merged with jul2021 diffs (truncated from 3335 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -539,8 +539,8 @@ gdk_return log_bat_transient(logger *lg, gdk_return log_constant(logger *lg, int type, ptr val, log_id id, lng offset, lng cnt); gdk_return log_delta(logger *lg, BAT *uid, BAT *uval, log_id id); gdk_return log_sequence(logger *lg, int seq, lng id); -gdk_return log_tend(logger *lg); -gdk_return log_tstart(logger *lg, ulng commit_ts, bool flush); +gdk_return log_tend(logger *lg, ulng commit_ts); +gdk_return log_tstart(logger *lg, bool flush); gdk_return logger_activate(logger *lg); lng logger_changes(logger *lg); logger *logger_create(int debug, const char *fn, const char *logdir, int version, preversionfix_fptr prefuncp, postversionfix_fptr postfuncp, void *funcdata); diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -809,12 +809,12 @@ COLcopy(BAT *b, int tt, bool writable, r /* first try case (1); create a view, possibly with different * atom-types */ - if (role == b->batRole && + if (!writable && + role == b->batRole && b->batRestricted == BAT_READ && ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */ (!VIEWtparent(b) || - BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ) && - !writable) { + BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ)) { bn = VIEWcreate(b->hseqbase, b); if (bn == NULL) return NULL; @@ -875,8 +875,8 @@ COLcopy(BAT *b, int tt, bool writable, r strconcat_len(thp.filename, sizeof(thp.filename), BBP_physical(bn->batCacheid), ".theap", NULL); - if ((b->ttype && HEAPcopy(&bthp, b->theap) != GDK_SUCCEED) || - (bn->tvheap && HEAPcopy(&thp, b->tvheap) != GDK_SUCCEED)) { + if ((b->ttype && HEAPcopy(&bthp, b->theap, b->tbaseoff << b->tshift) != GDK_SUCCEED) || + (bn->tvheap && HEAPcopy(&thp, b->tvheap, 0) != GDK_SUCCEED)) { HEAPfree(&thp, true); HEAPfree(&bthp, true); BBPreclaim(bn); diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -32,7 +32,7 @@ unshare_varsized_heap(BAT *b) h->farmid = BBPselectfarm(b->batRole, TYPE_str, varheap); strconcat_len(h->filename, sizeof(h->filename), BBP_physical(b->batCacheid), ".theap", NULL); - if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) { + if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) { HEAPfree(h, true); GDKfree(h); return GDK_FAIL; @@ -496,7 +496,7 @@ append_varsized_bat(BAT *b, BAT *n, stru h->farmid = BBPselectfarm(b->batRole, b->ttype, varheap); strconcat_len(h->filename, sizeof(h->filename), BBP_physical(b->batCacheid), ".theap", NULL); - if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) { + if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) { HEAPfree(h, true); GDKfree(h); return GDK_FAIL; diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -437,41 +437,43 @@ rev(oid x) return x; } -/* population count: count number of 1 bits in a value */ -static inline int -pop(oid x) +/* count trailing zeros, also see candmask_lobit in gdk_cand.h */ +static inline int __attribute__((__const__)) +ctz(oid x) { -#ifdef __GNUC__ +#if defined(__GNUC__) #if SIZEOF_OID == SIZEOF_INT - return __builtin_popcount(x); + return __builtin_ctz(x); #else - return __builtin_popcountl(x); + return __builtin_ctzl(x); #endif -#else -#ifdef _MSC_VER +#elif defined(_MSC_VER) #if SIZEOF_OID == SIZEOF_INT - return (int) __popcnt((unsigned int) (x)); -#else - return (int) __popcnt64((unsigned __int64) (x)); -#endif + unsigned long idx; + if (_BitScanForward(&idx, (unsigned long) x)) + return (int) idx; #else - /* divide and conquer implementation */ -#if SIZEOF_OID == 8 - x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555); - x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); - x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F); - x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF); - x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF); - x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF); + unsigned long idx; + if (_BitScanForward64(&idx, (unsigned __int64) x)) + return (int) idx; +#endif + return -1; #else - x = (x & 0x55555555) + ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); - x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF); - x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF); + /* use binary search for the lowest set bit */ + int n = 1; +#if SIZEOF_OID == SIZEOF_INT + if ((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } + if ((x & 0x000000FF) == 0) { n += 8; x >>= 8; } + if ((x & 0x0000000F) == 0) { n += 4; x >>= 4; } + if ((x & 0x00000003) == 0) { n += 2; x >>= 2; } +#else + if ((x & UINT64_C(0x00000000FFFFFFFF)) == 0) { n += 32; x >>= 32; } + if ((x & UINT64_C(0x000000000000FFFF)) == 0) { n += 16; x >>= 16; } + if ((x & UINT64_C(0x00000000000000FF)) == 0) { n += 8; x >>= 8; } + if ((x & UINT64_C(0x000000000000000F)) == 0) { n += 4; x >>= 4; } + if ((x & UINT64_C(0x0000000000000003)) == 0) { n += 2; x >>= 2; } #endif - return (int) x; -#endif + return n - (x & 1); #endif } @@ -1091,9 +1093,9 @@ BATgroup_internal(BAT **groups, BAT **ex nbucket |= nbucket >> 32; #endif nbucket++; - /* nbucket is a power of two, so pop(nbucket - 1) + /* nbucket is a power of two, so ctz(nbucket) * tells us which power of two */ - bits = 8 * SIZEOF_OID - pop(nbucket - 1); + bits = 8 * SIZEOF_OID - ctz(nbucket); } else { nbucket = MAX(HASHmask(cnt), 1 << 16); } diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c --- a/gdk/gdk_heap.c +++ b/gdk/gdk_heap.c @@ -651,11 +651,13 @@ GDKupgradevarheap(BAT *b, var_t v, BUN c * dst->filename (or NULL), which might be used in HEAPalloc(). */ gdk_return -HEAPcopy(Heap *dst, Heap *src) +HEAPcopy(Heap *dst, Heap *src, size_t offset) { - if (HEAPalloc(dst, src->size, 1, 1) == GDK_SUCCEED) { - dst->free = src->free; - memcpy(dst->base, src->base, src->free); + if (offset > src->free) + offset = src->free; + if (HEAPalloc(dst, src->free - offset, 1, 1) == GDK_SUCCEED) { + dst->free = src->free - offset; + memcpy(dst->base, src->base + offset, src->free - offset); dst->hashash = src->hashash; dst->cleanhash = src->cleanhash; dst->dirty = true; diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -2570,11 +2570,10 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B Hash *restrict hsh = NULL; bool locked = false; - assert(!BATtvoid(r)); assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); int t = ATOMbasetype(r->ttype); - if (r->ttype == TYPE_void || l->ttype == TYPE_void) + if (BATtvoid(r) || BATtvoid(l)) t = TYPE_void; lwidth = l->twidth; @@ -2644,6 +2643,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B "existing hash%s\n", ALGOBATPAR(r), swapped ? " (swapped)" : ""); + } else if (BATtdense(r)) { + /* no hash, just dense lookup */ + MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)" : "hashjoin on dense"); } else { /* we need to create a hash on r */ MT_thread_setalgorithm(swapped ? "hashjoin using new hash (swapped)" : "hashjoin using new hash"); @@ -2654,7 +2656,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B goto bailout; hsh = r->thash; } - assert(hsh != NULL); + assert(hsh != NULL || BATtdense(r)); ri = bat_iterator(r); @@ -2674,7 +2676,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B false, false, __func__, t0); } } - } else { + } else if (!BATtdense(r)) { for (rb = HASHget(hsh, HASHprobe(hsh, nil)); rb != HASHnil(hsh); rb = HASHgetlink(hsh, rb)) { @@ -2715,19 +2717,17 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B HASHJOIN(uuid); break; default: - if (!hash_cand) { + if (!hash_cand && hsh) { MT_rwlock_rdlock(&r->thashlock); locked = true; /* in case we abandon */ hsh = r->thash; /* re-initialize inside lock */ } while (lci->next < lci->ncand) { lo = canditer_next(lci); - if (BATtvoid(l)) { - if (BATtdense(l)) - lval = lo - l->hseqbase + l->tseqbase; - } else { + if (BATtdense(l)) + lval = lo - l->hseqbase + l->tseqbase; + else if (l->ttype != TYPE_void) v = VALUE(l, lo - l->hseqbase); - } nr = 0; if ((!nil_matches || not_in) && cmp(v, nil) == 0) { /* no match */ @@ -2750,6 +2750,23 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B if (semi && !max_one) break; } + } else if (hsh == NULL) { + assert(BATtdense(r)); + ro = *(const oid *) v; + if (ro >= r->tseqbase && + ro < r->tseqbase + r->batCount) { + ro -= r->tseqbase; + ro += rseq; + if (canditer_contains(rci, ro)) { + if (only_misses) { + nr++; + break; + } + HASHLOOPBODY(); + if (semi && !max_one) + break; + } + } } else if (rci->tpe != cand_dense) { for (rb = HASHget(hsh, HASHprobe(hsh, v)); rb != HASHnil(hsh); @@ -2824,7 +2841,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B if (nr > 0 && BATcount(r1) > nr) r1->trevsorted = false; } - if (!hash_cand) { + if (!hash_cand && hsh) { locked = false; MT_rwlock_rdunlock(&r->thashlock); } @@ -2897,35 +2914,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B return GDK_FAIL; } -/* population count: count number of 1 bits in a value */ -static inline uint32_t __attribute__((__const__)) -pop(uint32_t x) -{ -#if defined(__GNUC__) - return (uint32_t) __builtin_popcount(x); -#elif defined(_MSC_VER) - return (uint32_t) __popcnt((unsigned int) (x)); -#else - /* divide and conquer implementation (the two versions are - * essentially equivalent, but the first version is written a - * bit smarter) */ -#if 1 - x -= (x >> 1) & ~0U/3 /* 0x55555555 */; /* 3-1=2; 2-1=1; 1-0=1; 0-0=0 */ - x = (x & ~0U/5) + ((x >> 2) & ~0U/5) /* 0x33333333 */; - x = (x + (x >> 4)) & ~0UL/0x11 /* 0x0F0F0F0F */; - x = (x + (x >> 8)) & ~0UL/0x101 /* 0x00FF00FF */; - x = (x + (x >> 16)) & 0xFFFF /* ~0UL/0x10001 */; -#else - x = (x & 0x55555555) + ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); - x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list