Changeset: 8811b0f7e1c7 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/8811b0f7e1c7 Modified Files: gdk/gdk_bbp.c gdk/gdk_join.c gdk/gdk_private.h monetdb5/modules/mal/tablet.c sql/storage/objectset.c sql/storage/store.c Branch: default Log Message:
Merge with Jun2023 branch. diffs (truncated from 417 to 300 lines): diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -3844,10 +3844,10 @@ BBPsync(int cnt, bat *restrict subcommit assert(sizes == NULL || bi.width == 0 || (bi.type == TYPE_msk ? ((size + 31) / 32) * 4 : size << bi.shift) <= bi.hfree); if (size > bi.count) /* includes sizes==NULL */ size = bi.count; + MT_lock_set(&bi.b->theaplock); bi.b->batInserted = size; if (bi.b->ttype >= 0 && ATOMvarsized(bi.b->ttype)) { /* see epilogue() for other part of this */ - MT_lock_set(&bi.b->theaplock); /* remember the tail we're saving */ if (BATsetprop_nolock(bi.b, (enum prop_t) 20, TYPE_ptr, &bi.h) == NULL) { GDKerror("setprop failed\n"); @@ -3857,8 +3857,8 @@ BBPsync(int cnt, bat *restrict subcommit bi.b->oldtail = (Heap *) 1; HEAPincref(bi.h); } - MT_lock_unset(&bi.b->theaplock); } + MT_lock_unset(&bi.b->theaplock); if (ret == GDK_SUCCEED && b && size != 0) { /* wait for BBPSAVING so that we * can set it, wait for diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c --- a/gdk/gdk_cand.c +++ b/gdk/gdk_cand.c @@ -405,6 +405,14 @@ void canditer_init(struct canditer *ci, BAT *b, BAT *s) { assert(ci != NULL); + BUN batcount = 0; + oid hseq = 0; + if (b) { + MT_lock_set(&b->theaplock); + batcount = BATcount(b); + hseq = b->hseqbase; + MT_lock_unset(&b->theaplock); + } if (s == NULL) { if (b == NULL) { @@ -417,9 +425,9 @@ canditer_init(struct canditer *ci, BAT * /* every row is a candidate */ *ci = (struct canditer) { .tpe = cand_dense, - .seq = b->hseqbase, - .hseq = b->hseqbase, - .ncand = BATcount(b), + .seq = hseq, + .hseq = hseq, + .ncand = batcount, }; return; } @@ -432,7 +440,7 @@ canditer_init(struct canditer *ci, BAT * BUN cnt = BATcount(s); - if (cnt == 0 || (b != NULL && BATcount(b) == 0)) { + if (cnt == 0 || (b != NULL && batcount == 0)) { /* candidate list for empty BAT or empty candidate list */ *ci = (struct canditer) { .tpe = cand_dense, @@ -488,7 +496,7 @@ canditer_init(struct canditer *ci, BAT * switch (ci->tpe) { case cand_materialized: if (b != NULL) { - BUN p = binsearchcand(ci->oids, cnt - 1U, b->hseqbase); + BUN p = binsearchcand(ci->oids, cnt - 1U, hseq); /* p == cnt means candidate list is completely * before b */ ci->offset = p; @@ -496,7 +504,7 @@ canditer_init(struct canditer *ci, BAT * cnt -= p; if (cnt > 0) { cnt = binsearchcand(ci->oids, cnt - 1U, - b->hseqbase + BATcount(b)); + hseq + batcount); /* cnt == 0 means candidate list is * completely after b */ } @@ -532,8 +540,8 @@ canditer_init(struct canditer *ci, BAT * ci->oids[ci->nvals - 1U] == ci->seq + cnt + ci->nvals - 1U) ci->nvals--; if (b != NULL) { - if (ci->seq + cnt + ci->nvals <= b->hseqbase || - ci->seq >= b->hseqbase + BATcount(b)) { + if (ci->seq + cnt + ci->nvals <= hseq || + ci->seq >= hseq + batcount) { /* candidate list does not overlap with b */ *ci = (struct canditer) { .tpe = cand_dense, @@ -546,33 +554,33 @@ canditer_init(struct canditer *ci, BAT * if (b == NULL) break; BUN p; - p = binsearchcand(ci->oids, ci->nvals - 1U, b->hseqbase); + p = binsearchcand(ci->oids, ci->nvals - 1U, hseq); if (p == ci->nvals) { /* all exceptions before start of b */ - ci->offset = b->hseqbase - ci->seq - ci->nvals; - cnt = ci->seq + cnt + ci->nvals - b->hseqbase; - ci->seq = b->hseqbase; + ci->offset = hseq - ci->seq - ci->nvals; + cnt = ci->seq + cnt + ci->nvals - hseq; + ci->seq = hseq; ci->nvals = 0; ci->tpe = cand_dense; ci->oids = NULL; break; } - assert(b->hseqbase > ci->seq || p == 0); - if (b->hseqbase > ci->seq) { + assert(hseq > ci->seq || p == 0); + if (hseq > ci->seq) { /* skip candidates, possibly including * exceptions */ ci->oids += p; ci->nvals -= p; - p = b->hseqbase - ci->seq - p; + p = hseq - ci->seq - p; cnt -= p; ci->offset += p; - ci->seq = b->hseqbase; + ci->seq = hseq; } - if (ci->seq + cnt + ci->nvals > b->hseqbase + BATcount(b)) { + if (ci->seq + cnt + ci->nvals > hseq + batcount) { p = binsearchcand(ci->oids, ci->nvals - 1U, - b->hseqbase + BATcount(b)); + hseq + batcount); ci->nvals = p; - cnt = b->hseqbase + BATcount(b) - ci->seq - ci->nvals; + cnt = hseq + batcount - ci->seq - ci->nvals; } while (ci->nvals > 0 && ci->oids[0] == ci->seq) { ci->nvals--; @@ -590,8 +598,8 @@ canditer_init(struct canditer *ci, BAT * /* fall through */ case cand_dense: if (b != NULL) { - if (ci->seq + cnt <= b->hseqbase || - ci->seq >= b->hseqbase + BATcount(b)) { + if (ci->seq + cnt <= hseq || + ci->seq >= hseq + batcount) { /* no overlap */ *ci = (struct canditer) { .tpe = cand_dense, @@ -599,20 +607,20 @@ canditer_init(struct canditer *ci, BAT * }; return; } - if (b->hseqbase > ci->seq) { - cnt -= b->hseqbase - ci->seq; - ci->offset += b->hseqbase - ci->seq; - ci->seq = b->hseqbase; + if (hseq > ci->seq) { + cnt -= hseq - ci->seq; + ci->offset += hseq - ci->seq; + ci->seq = hseq; } - if (ci->seq + cnt > b->hseqbase + BATcount(b)) - cnt = b->hseqbase + BATcount(b) - ci->seq; + if (ci->seq + cnt > hseq + batcount) + cnt = hseq + batcount - ci->seq; } break; case cand_mask: assert(s->tseqbase != oid_nil); if (b != NULL) { - if (ci->seq + cnt <= b->hseqbase || - ci->seq >= b->hseqbase + BATcount(b)) { + if (ci->seq + cnt <= hseq || + ci->seq >= hseq + batcount) { /* no overlap */ *ci = (struct canditer) { .tpe = cand_dense, @@ -620,15 +628,15 @@ canditer_init(struct canditer *ci, BAT * }; return; } - if (b->hseqbase > ci->seq) { - cnt = b->hseqbase - ci->seq; + if (hseq > ci->seq) { + cnt = hseq - ci->seq; ci->mask += cnt / 32U; ci->firstbit = (uint8_t) (cnt % 32U); cnt = BATcount(s) - cnt; - ci->seq = b->hseqbase; + ci->seq = hseq; } - if (ci->seq + cnt > b->hseqbase + BATcount(b)) { - cnt = b->hseqbase + BATcount(b) - ci->seq; + if (ci->seq + cnt > hseq + batcount) { + cnt = hseq + batcount - ci->seq; } ci->nvals = (ci->firstbit + cnt + 31U) / 32U; } diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3288,22 +3288,26 @@ guess_uniques(BAT *b, struct canditer *c BUN cnt1, cnt2; BAT *s1; - if (b->tkey) + MT_lock_set(&b->theaplock); + bool key = b->tkey; + double unique_est = b->tunique_est; + BUN batcount = BATcount(b); + MT_lock_unset(&b->theaplock); + if (key) return (double) ci->ncand; if (ci->s == NULL || - (ci->tpe == cand_dense && ci->ncand == BATcount(b))) { - MT_lock_set(&b->theaplock); - double unique_est = b->tunique_est; - MT_lock_unset(&b->theaplock); + (ci->tpe == cand_dense && ci->ncand == batcount)) { if (unique_est != 0) { TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n", ALGOBATPAR(b)); return unique_est; } - s1 = BATsample_with_seed(b, 1000, (uint64_t) GDKusec() * (uint64_t) b->batCacheid); + s1 = BATcreatesample(b->hseqbase, batcount, 1000, + (uint64_t) GDKusec() * (uint64_t) b->batCacheid); } else { - BAT *s2 = BATsample_with_seed(ci->s, 1000, (uint64_t) GDKusec() * (uint64_t) b->batCacheid); + BAT *s2 = BATcreatesample(ci->s->hseqbase, ci->ncand, 1000, + (uint64_t) GDKusec() * (uint64_t) b->batCacheid); if (s2 == NULL) return -1; s1 = BATproject(s2, ci->s); @@ -3323,13 +3327,13 @@ guess_uniques(BAT *b, struct canditer *c double B = cnt1 - n1 * A; B += A * ci->ncand; + MT_lock_set(&b->theaplock); if (ci->s == NULL || - (ci->tpe == cand_dense && ci->ncand == BATcount(b))) { - MT_lock_set(&b->theaplock); + (ci->tpe == cand_dense && ci->ncand == BATcount(b) && ci->ncand == batcount)) { if (b->tunique_est == 0) b->tunique_est = B; - MT_lock_unset(&b->theaplock); } + MT_lock_unset(&b->theaplock); return B; } diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -58,6 +58,8 @@ gdk_return BATcheckmodes(BAT *b, bool pe __attribute__((__visibility__("hidden"))); BAT *BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width) __attribute__((__visibility__("hidden"))); +BAT *BATcreatesample(oid hseq, BUN cnt, BUN n, uint64_t seed) + __attribute__((__visibility__("hidden"))); void BATdelete(BAT *b) __attribute__((__visibility__("hidden"))); void BATdestroy(BAT *b) diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c --- a/gdk/gdk_sample.c +++ b/gdk/gdk_sample.c @@ -98,26 +98,24 @@ OIDTreeToBATAntiset(struct oidtreenode * } static BAT * -do_batsample(BAT *b, BUN n, random_state_engine rse, MT_Lock *lock) +do_batsample(oid hseq, BUN cnt, BUN n, random_state_engine rse, MT_Lock *lock) { BAT *bn; - BUN cnt, slen; + BUN slen; BUN rescnt; struct oidtreenode *tree = NULL; - BATcheck(b, NULL); ERRORcheck(n > BUN_MAX, "sample size larger than BUN_MAX\n", NULL); - cnt = BATcount(b); /* empty sample size */ if (n == 0) { bn = BATdense(0, 0, 0); } else if (cnt <= n) { /* sample size is larger than the input BAT, return * all oids */ - bn = BATdense(0, b->hseqbase, cnt); + bn = BATdense(0, hseq, cnt); } else { - oid minoid = b->hseqbase; - oid maxoid = b->hseqbase + cnt; + oid minoid = hseq; + oid maxoid = hseq + cnt; /* if someone samples more than half of our tree, we _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org