Changeset: 0b3db1241854 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/0b3db1241854 Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_join.c gdk/gdk_select.c gdk/gdk_unique.c monetdb5/mal/mal_profiler.c monetdb5/mal/mal_resource.c monetdb5/mal/mal_runtime.c sql/backends/monet5/sql.c Branch: default Log Message:
Merge with Jul2021 branch. diffs (truncated from 843 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -156,7 +156,6 @@ BAT *BATintersectcand(BAT *a, BAT *b); BAT *BATintersectcand(BAT *a, BAT *b); gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATkey(BAT *b, bool onoff); -bool BATkeyed(BAT *b); gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected); void *BATmax(BAT *b, void *aggr); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -1380,7 +1380,6 @@ gdk_export gdk_return BATprint(stream *s * ordered. The result is returned and stored in the tsorted field of * the BAT. */ -gdk_export bool BATkeyed(BAT *b); gdk_export bool BATordered(BAT *b); gdk_export bool BATordered_rev(BAT *b); gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable) diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -385,9 +385,11 @@ insert_string_bat(BAT *b, BAT *n, struct assert(b->batCapacity >= b->batCount); b->theap->dirty = true; /* maintain hash */ + MT_rwlock_wrlock(&b->thashlock); for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) { - HASHappend(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth)); + HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth)); } + MT_rwlock_wrunlock(&b->thashlock); return GDK_SUCCEED; } @@ -494,6 +496,7 @@ append_varsized_bat(BAT *b, BAT *n, stru } /* copy data from n to b */ r = BUNlast(b); + MT_rwlock_wrlock(&b->thashlock); while (cnt > 0) { cnt--; BUN p = canditer_next(ci) - hseq; @@ -503,9 +506,10 @@ append_varsized_bat(BAT *b, BAT *n, stru return GDK_FAIL; } if (b->thash) - HASHappend(b, r, t); + HASHappend_locked(b, r, t); r++; } + MT_rwlock_wrunlock(&b->thashlock); BATsetcount(b, r); bat_iterator_end(&ni); b->theap->dirty = true; @@ -1843,157 +1847,6 @@ BATslice(BAT *b, BUN l, BUN h) return bn; } -/* Return whether the BAT has all unique values or not. It we don't - * know, invest in a proper check and record the results in the bat - * descriptor. */ -bool -BATkeyed(BAT *b) -{ - lng t0 = GDKusec(); - int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype); - BUN p, q, hb; - Hash *hs = NULL; - - if (b->ttype == TYPE_void) - return BATtdense(b) || BATcount(b) <= 1; - if (BATcount(b) <= 1) - return true; - if (ATOMstorage(b->ttype) == TYPE_msk) { - if (BATcount(b) > 2) - return false; - /* there are exactly two values */ - return mskGetVal(b, 0) != mskGetVal(b, 1); - } - if (b->twidth < SIZEOF_BUN && - BATcount(b) > (BUN) 1 << (8 << b->tshift)) { - /* more rows than possible bit combinations in the atom */ - assert(!b->tkey); - return false; - } - - b->batDirtydesc = true; - BATiter bi = bat_iterator(b); - if (!b->tkey && b->tnokey[0] == 0 && b->tnokey[1] == 0) { - if (b->tsorted || b->trevsorted) { - const void *prev = BUNtail(bi, 0); - const void *cur; - for (q = BUNlast(b), p = 1; p < q; p++) { - cur = BUNtail(bi, p); - if ((*cmpf)(prev, cur) == 0) { - b->tnokey[0] = p - 1; - b->tnokey[1] = p; - TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p - 1, p, ALGOBATPAR(b), GDKusec() - t0); - goto doreturn; - } - prev = cur; - } - /* we completed the scan: no duplicates */ - b->tkey = true; - } else if (BATcheckhash(b) || - (!b->batTransient && - BAThash(b) == GDK_SUCCEED) || - (/* DISABLES CODE */ (0) && - VIEWtparent(b) != 0 && - BATcheckhash(BBP_cache(VIEWtparent(b))))) { - /* we already have a hash table on b, or b is - * persistent and we could create a hash - * table, or b is a view on a bat that already - * has a hash table */ - BUN lo = 0; - - MT_rwlock_rdlock(&b->thashlock); - hs = b->thash; - if (hs == NULL && VIEWtparent(b) != 0) { - BAT *b2 = BBP_cache(VIEWtparent(b)); - lo = b->tbaseoff - b2->tbaseoff; - hs = b2->thash; - } - if (hs == NULL) { - /* between checking and locking, the - * hash was destroyed */ - MT_rwlock_rdunlock(&b->thashlock); - goto lost_hash; - } - for (q = BUNlast(b), p = 0; p < q; p++) { - const void *v = BUNtail(bi, p); - for (hb = HASHgetlink(hs, p + lo); - hb != BUN_NONE && hb >= lo; - hb = HASHgetlink(hs, hb)) { - assert(hb < p + lo); - if ((*cmpf)(v, BUNtail(bi, hb - lo)) == 0) { - b->tnokey[0] = hb - lo; - b->tnokey[1] = p; - TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", hb - lo, p, ALGOBATPAR(b), GDKusec() - t0); - MT_rwlock_rdunlock(&b->thashlock); - goto doreturn; - } - } - } - MT_rwlock_rdunlock(&b->thashlock); - /* we completed the scan: no duplicates */ - b->tkey = true; - } else { - const char *nme; - BUN prb; - BUN mask; - - lost_hash: - GDKclrerr(); /* not interested in BAThash errors */ - nme = BBP_physical(b->batCacheid); - if (ATOMbasetype(b->ttype) == TYPE_bte) { - mask = (BUN) 1 << 8; - cmpf = NULL; /* no compare needed, "hash" is perfect */ - } else if (ATOMbasetype(b->ttype) == TYPE_sht) { - mask = (BUN) 1 << 16; - cmpf = NULL; /* no compare needed, "hash" is perfect */ - } else { - mask = HASHmask(b->batCount); - if (mask < ((BUN) 1 << 16)) - mask = (BUN) 1 << 16; - } - if ((hs = GDKzalloc(sizeof(Hash))) == NULL) - goto doreturn; - if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshkeyl%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs->heaplink.filename) || - snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshkeyb%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs->heapbckt.filename) || - HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) != GDK_SUCCEED) { - GDKfree(hs); - /* err on the side of caution: not keyed */ - goto doreturn; - } - for (q = BUNlast(b), p = 0; p < q; p++) { - const void *v = BUNtail(bi, p); - prb = HASHprobe(hs, v); - for (hb = HASHget(hs, prb); - hb != BUN_NONE; - hb = HASHgetlink(hs, hb)) { - if (cmpf == NULL || - (*cmpf)(v, BUNtail(bi, hb)) == 0) { - b->tnokey[0] = hb; - b->tnokey[1] = p; - TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", hb, p, ALGOBATPAR(b), GDKusec() - t0); - goto doreturn_free; - } - } - /* enter into hash table */ - HASHputlink(hs, p, HASHget(hs, prb)); - HASHput(hs, prb, p); - } - doreturn_free: - HEAPfree(&hs->heaplink, true); - HEAPfree(&hs->heapbckt, true); - GDKfree(hs); - if (p == q) { - /* we completed the complete scan: no - * duplicates */ - b->tkey = true; - } - } - } - doreturn: - bat_iterator_end(&bi); - return b->tkey; -} - #define BAT_ORDERED(TPE) \ do { \ const TPE *restrict vals = Tloc(b, 0); \ diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -1972,19 +1972,22 @@ BBPdump(void) } } } - if (b->thash && b->thash != (Hash *) 1) { - size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt); - size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt); - fprintf(stderr, " Thash=[%zu,%zu,f=%d/%d]", m, v, - b->thash->heaplink.farmid, - b->thash->heapbckt.farmid); - if (BBP_logical(i) && BBP_logical(i)[0] == '.') { - cmem += m; - cvm += v; - } else { - mem += m; - vm += v; + if (MT_rwlock_rdtry(&b->thashlock)) { + if (b->thash && b->thash != (Hash *) 1) { + size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt); + size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt); + fprintf(stderr, " Thash=[%zu,%zu,f=%d/%d]", m, v, + b->thash->heaplink.farmid, + b->thash->heapbckt.farmid); + if (BBP_logical(i) && BBP_logical(i)[0] == '.') { + cmem += m; + cvm += v; + } else { + mem += m; + vm += v; + } } + MT_rwlock_rdunlock(&b->thashlock); } fprintf(stderr, " role: %s\n", b->batRole == PERSISTENT ? "persistent" : "transient"); @@ -2890,9 +2893,11 @@ BBPsave(BAT *b) if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirtydata(b)) { /* do nothing */ + MT_rwlock_rdlock(&b->thashlock); if (b->thash && b->thash != (Hash *) 1 && (b->thash->heaplink.dirty || b->thash->heapbckt.dirty)) BAThashsave(b, (BBP_status(bid) & BBPPERSISTENT) != 0); + MT_rwlock_rdunlock(&b->thashlock); return GDK_SUCCEED; } if (lock) diff --git a/gdk/gdk_delta.c b/gdk/gdk_delta.c --- a/gdk/gdk_delta.c +++ b/gdk/gdk_delta.c @@ -93,8 +93,7 @@ BATundo(BAT *b) gdk_return (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix; void (*tatmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel; - if (b->thash) - HASHdestroy(b); + HASHdestroy(b); if (tunfix || tatmdel) { for (p = bunfirst; p <= bunlast; p++, i++) { if (tunfix) diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -2520,11 +2520,6 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, TYPE *rvals = ri.base; \ TYPE *lvals = li.base; \ TYPE v; \ - if (!hash_cand) { \ - MT_rwlock_rdlock(&r->thashlock); \ - locked = true; /* in case we abandon */ \ - hsh = r->thash; /* re-initialize inside lock */ \ - } \ while (lci->next < lci->ncand) { \ GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \ lo = canditer_next(lci); \ @@ -2626,10 +2621,6 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, if (nr > 0 && BATcount(r1) > nr) \ r1->trevsorted = false; \ } \ - if (!hash_cand) { \ - locked = false; \ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list