Changeset: 8811b0f7e1c7 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8811b0f7e1c7
Modified Files:
        gdk/gdk_bbp.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        monetdb5/modules/mal/tablet.c
        sql/storage/objectset.c
        sql/storage/store.c
Branch: default
Log Message:

Merge with Jun2023 branch.


diffs (truncated from 417 to 300 lines):

diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -3844,10 +3844,10 @@ BBPsync(int cnt, bat *restrict subcommit
                        assert(sizes == NULL || bi.width == 0 || (bi.type == 
TYPE_msk ? ((size + 31) / 32) * 4 : size << bi.shift) <= bi.hfree);
                        if (size > bi.count) /* includes sizes==NULL */
                                size = bi.count;
+                       MT_lock_set(&bi.b->theaplock);
                        bi.b->batInserted = size;
                        if (bi.b->ttype >= 0 && ATOMvarsized(bi.b->ttype)) {
                                /* see epilogue() for other part of this */
-                               MT_lock_set(&bi.b->theaplock);
                                /* remember the tail we're saving */
                                if (BATsetprop_nolock(bi.b, (enum prop_t) 20, 
TYPE_ptr, &bi.h) == NULL) {
                                        GDKerror("setprop failed\n");
@@ -3857,8 +3857,8 @@ BBPsync(int cnt, bat *restrict subcommit
                                                bi.b->oldtail = (Heap *) 1;
                                        HEAPincref(bi.h);
                                }
-                               MT_lock_unset(&bi.b->theaplock);
                        }
+                       MT_lock_unset(&bi.b->theaplock);
                        if (ret == GDK_SUCCEED && b && size != 0) {
                                /* wait for BBPSAVING so that we
                                 * can set it, wait for
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -405,6 +405,14 @@ void
 canditer_init(struct canditer *ci, BAT *b, BAT *s)
 {
        assert(ci != NULL);
+       BUN batcount = 0;
+       oid hseq = 0;
+       if (b) {
+               MT_lock_set(&b->theaplock);
+               batcount = BATcount(b);
+               hseq = b->hseqbase;
+               MT_lock_unset(&b->theaplock);
+       }
 
        if (s == NULL) {
                if (b == NULL) {
@@ -417,9 +425,9 @@ canditer_init(struct canditer *ci, BAT *
                /* every row is a candidate */
                *ci = (struct canditer) {
                        .tpe = cand_dense,
-                       .seq = b->hseqbase,
-                       .hseq = b->hseqbase,
-                       .ncand = BATcount(b),
+                       .seq = hseq,
+                       .hseq = hseq,
+                       .ncand = batcount,
                };
                return;
        }
@@ -432,7 +440,7 @@ canditer_init(struct canditer *ci, BAT *
 
        BUN cnt = BATcount(s);
 
-       if (cnt == 0 || (b != NULL && BATcount(b) == 0)) {
+       if (cnt == 0 || (b != NULL && batcount == 0)) {
                /* candidate list for empty BAT or empty candidate list */
                *ci = (struct canditer) {
                        .tpe = cand_dense,
@@ -488,7 +496,7 @@ canditer_init(struct canditer *ci, BAT *
        switch (ci->tpe) {
        case cand_materialized:
                if (b != NULL) {
-                       BUN p = binsearchcand(ci->oids, cnt - 1U, b->hseqbase);
+                       BUN p = binsearchcand(ci->oids, cnt - 1U, hseq);
                        /* p == cnt means candidate list is completely
                         * before b */
                        ci->offset = p;
@@ -496,7 +504,7 @@ canditer_init(struct canditer *ci, BAT *
                        cnt -= p;
                        if (cnt > 0) {
                                cnt = binsearchcand(ci->oids, cnt  - 1U,
-                                                   b->hseqbase + BATcount(b));
+                                                   hseq + batcount);
                                /* cnt == 0 means candidate list is
                                 * completely after b */
                        }
@@ -532,8 +540,8 @@ canditer_init(struct canditer *ci, BAT *
                       ci->oids[ci->nvals - 1U] == ci->seq + cnt + ci->nvals - 
1U)
                        ci->nvals--;
                if (b != NULL) {
-                       if (ci->seq + cnt + ci->nvals <= b->hseqbase ||
-                           ci->seq >= b->hseqbase + BATcount(b)) {
+                       if (ci->seq + cnt + ci->nvals <= hseq ||
+                           ci->seq >= hseq + batcount) {
                                /* candidate list does not overlap with b */
                                *ci = (struct canditer) {
                                        .tpe = cand_dense,
@@ -546,33 +554,33 @@ canditer_init(struct canditer *ci, BAT *
                        if (b == NULL)
                                break;
                        BUN p;
-                       p = binsearchcand(ci->oids, ci->nvals - 1U, 
b->hseqbase);
+                       p = binsearchcand(ci->oids, ci->nvals - 1U, hseq);
                        if (p == ci->nvals) {
                                /* all exceptions before start of b */
-                               ci->offset = b->hseqbase - ci->seq - ci->nvals;
-                               cnt = ci->seq + cnt + ci->nvals - b->hseqbase;
-                               ci->seq = b->hseqbase;
+                               ci->offset = hseq - ci->seq - ci->nvals;
+                               cnt = ci->seq + cnt + ci->nvals - hseq;
+                               ci->seq = hseq;
                                ci->nvals = 0;
                                ci->tpe = cand_dense;
                                ci->oids = NULL;
                                break;
                        }
-                       assert(b->hseqbase > ci->seq || p == 0);
-                       if (b->hseqbase > ci->seq) {
+                       assert(hseq > ci->seq || p == 0);
+                       if (hseq > ci->seq) {
                                /* skip candidates, possibly including
                                 * exceptions */
                                ci->oids += p;
                                ci->nvals -= p;
-                               p = b->hseqbase - ci->seq - p;
+                               p = hseq - ci->seq - p;
                                cnt -= p;
                                ci->offset += p;
-                               ci->seq = b->hseqbase;
+                               ci->seq = hseq;
                        }
-                       if (ci->seq + cnt + ci->nvals > b->hseqbase + 
BATcount(b)) {
+                       if (ci->seq + cnt + ci->nvals > hseq + batcount) {
                                p = binsearchcand(ci->oids, ci->nvals - 1U,
-                                                 b->hseqbase + BATcount(b));
+                                                 hseq + batcount);
                                ci->nvals = p;
-                               cnt = b->hseqbase + BATcount(b) - ci->seq - 
ci->nvals;
+                               cnt = hseq + batcount - ci->seq - ci->nvals;
                        }
                        while (ci->nvals > 0 && ci->oids[0] == ci->seq) {
                                ci->nvals--;
@@ -590,8 +598,8 @@ canditer_init(struct canditer *ci, BAT *
                /* fall through */
        case cand_dense:
                if (b != NULL) {
-                       if (ci->seq + cnt <= b->hseqbase ||
-                           ci->seq >= b->hseqbase + BATcount(b)) {
+                       if (ci->seq + cnt <= hseq ||
+                           ci->seq >= hseq + batcount) {
                                /* no overlap */
                                *ci = (struct canditer) {
                                        .tpe = cand_dense,
@@ -599,20 +607,20 @@ canditer_init(struct canditer *ci, BAT *
                                };
                                return;
                        }
-                       if (b->hseqbase > ci->seq) {
-                               cnt -= b->hseqbase - ci->seq;
-                               ci->offset += b->hseqbase - ci->seq;
-                               ci->seq = b->hseqbase;
+                       if (hseq > ci->seq) {
+                               cnt -= hseq - ci->seq;
+                               ci->offset += hseq - ci->seq;
+                               ci->seq = hseq;
                        }
-                       if (ci->seq + cnt > b->hseqbase + BATcount(b))
-                               cnt = b->hseqbase + BATcount(b) - ci->seq;
+                       if (ci->seq + cnt > hseq + batcount)
+                               cnt = hseq + batcount - ci->seq;
                }
                break;
        case cand_mask:
                assert(s->tseqbase != oid_nil);
                if (b != NULL) {
-                       if (ci->seq + cnt <= b->hseqbase ||
-                           ci->seq >= b->hseqbase + BATcount(b)) {
+                       if (ci->seq + cnt <= hseq ||
+                           ci->seq >= hseq + batcount) {
                                /* no overlap */
                                *ci = (struct canditer) {
                                        .tpe = cand_dense,
@@ -620,15 +628,15 @@ canditer_init(struct canditer *ci, BAT *
                                };
                                return;
                        }
-                       if (b->hseqbase > ci->seq) {
-                               cnt = b->hseqbase - ci->seq;
+                       if (hseq > ci->seq) {
+                               cnt = hseq - ci->seq;
                                ci->mask += cnt / 32U;
                                ci->firstbit = (uint8_t) (cnt % 32U);
                                cnt = BATcount(s) - cnt;
-                               ci->seq = b->hseqbase;
+                               ci->seq = hseq;
                        }
-                       if (ci->seq + cnt > b->hseqbase + BATcount(b)) {
-                               cnt = b->hseqbase + BATcount(b) - ci->seq;
+                       if (ci->seq + cnt > hseq + batcount) {
+                               cnt = hseq + batcount - ci->seq;
                        }
                        ci->nvals = (ci->firstbit + cnt + 31U) / 32U;
                }
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3288,22 +3288,26 @@ guess_uniques(BAT *b, struct canditer *c
        BUN cnt1, cnt2;
        BAT *s1;
 
-       if (b->tkey)
+       MT_lock_set(&b->theaplock);
+       bool key = b->tkey;
+       double unique_est = b->tunique_est;
+       BUN batcount = BATcount(b);
+       MT_lock_unset(&b->theaplock);
+       if (key)
                return (double) ci->ncand;
 
        if (ci->s == NULL ||
-           (ci->tpe == cand_dense && ci->ncand == BATcount(b))) {
-               MT_lock_set(&b->theaplock);
-               double unique_est = b->tunique_est;
-               MT_lock_unset(&b->theaplock);
+           (ci->tpe == cand_dense && ci->ncand == batcount)) {
                if (unique_est != 0) {
                        TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n",
                                  ALGOBATPAR(b));
                        return unique_est;
                }
-               s1 = BATsample_with_seed(b, 1000, (uint64_t) GDKusec() * 
(uint64_t) b->batCacheid);
+               s1 = BATcreatesample(b->hseqbase, batcount, 1000,
+                                    (uint64_t) GDKusec() * (uint64_t) 
b->batCacheid);
        } else {
-               BAT *s2 = BATsample_with_seed(ci->s, 1000, (uint64_t) GDKusec() 
* (uint64_t) b->batCacheid);
+               BAT *s2 = BATcreatesample(ci->s->hseqbase, ci->ncand, 1000,
+                                         (uint64_t) GDKusec() * (uint64_t) 
b->batCacheid);
                if (s2 == NULL)
                        return -1;
                s1 = BATproject(s2, ci->s);
@@ -3323,13 +3327,13 @@ guess_uniques(BAT *b, struct canditer *c
        double B = cnt1 - n1 * A;
 
        B += A * ci->ncand;
+       MT_lock_set(&b->theaplock);
        if (ci->s == NULL ||
-           (ci->tpe == cand_dense && ci->ncand == BATcount(b))) {
-               MT_lock_set(&b->theaplock);
+           (ci->tpe == cand_dense && ci->ncand == BATcount(b) && ci->ncand == 
batcount)) {
                if (b->tunique_est == 0)
                        b->tunique_est = B;
-               MT_lock_unset(&b->theaplock);
        }
+       MT_lock_unset(&b->theaplock);
        return B;
 }
 
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -58,6 +58,8 @@ gdk_return BATcheckmodes(BAT *b, bool pe
        __attribute__((__visibility__("hidden")));
 BAT *BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t 
width)
        __attribute__((__visibility__("hidden")));
+BAT *BATcreatesample(oid hseq, BUN cnt, BUN n, uint64_t seed)
+       __attribute__((__visibility__("hidden")));
 void BATdelete(BAT *b)
        __attribute__((__visibility__("hidden")));
 void BATdestroy(BAT *b)
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -98,26 +98,24 @@ OIDTreeToBATAntiset(struct oidtreenode *
 }
 
 static BAT *
-do_batsample(BAT *b, BUN n, random_state_engine rse, MT_Lock *lock)
+do_batsample(oid hseq, BUN cnt, BUN n, random_state_engine rse, MT_Lock *lock)
 {
        BAT *bn;
-       BUN cnt, slen;
+       BUN slen;
        BUN rescnt;
        struct oidtreenode *tree = NULL;
 
-       BATcheck(b, NULL);
        ERRORcheck(n > BUN_MAX, "sample size larger than BUN_MAX\n", NULL);
-       cnt = BATcount(b);
        /* empty sample size */
        if (n == 0) {
                bn = BATdense(0, 0, 0);
        } else if (cnt <= n) {
                /* sample size is larger than the input BAT, return
                 * all oids */
-               bn = BATdense(0, b->hseqbase, cnt);
+               bn = BATdense(0, hseq, cnt);
        } else {
-               oid minoid = b->hseqbase;
-               oid maxoid = b->hseqbase + cnt;
+               oid minoid = hseq;
+               oid maxoid = hseq + cnt;
 
                /* if someone samples more than half of our tree, we
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to