Changeset: 93ff376d3437 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/93ff376d3437 Modified Files: gdk/gdk_cand.c gdk/gdk_private.h gdk/gdk_select.c Branch: Aug2024 Log Message:
Do some smarter things for anti-equi-select. In cases where the equi select could use some smart algorithm, we now implement anti-equi select using the equi select and we invert the result (also taking nils into account). This fixes #7449. diffs (truncated from 551 to 300 lines): diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c --- a/gdk/gdk_cand.c +++ b/gdk/gdk_cand.c @@ -1293,7 +1293,7 @@ canditer_slice2val(const struct canditer } BAT * -BATnegcands(BUN nr, BAT *odels) +BATnegcands2(oid tseq, BUN nr, BAT *odels) { const char *nme; Heap *dels; @@ -1301,16 +1301,20 @@ BATnegcands(BUN nr, BAT *odels) ccand_t *c; BAT *bn; - bn = BATdense(0, 0, nr); + bn = BATdense(0, tseq, nr); if (bn == NULL) return NULL; if (BATcount(odels) == 0) - return bn; + goto doreturn; lo = SORTfndfirst(odels, &bn->tseqbase); hi = SORTfndfirst(odels, &(oid) {bn->tseqbase + BATcount(bn)}); if (lo == hi) return bn; + if (lo + nr == hi) { + BATsetcount(bn, 0); + goto doreturn; + } nme = BBP_physical(bn->batCacheid); if ((dels = GDKmalloc(sizeof(Heap))) == NULL){ @@ -1350,10 +1354,7 @@ BATnegcands(BUN nr, BAT *odels) assert(bn->tvheap == NULL); bn->tvheap = dels; BATsetcount(bn, bn->batCount - (hi - lo)); - TRC_DEBUG(ALGO, "BATnegcands(cands=" ALGOBATFMT "," - "dels=" ALGOBATFMT ")\n", - ALGOBATPAR(bn), - ALGOBATPAR(odels)); + doreturn: TRC_DEBUG(ALGO, "nr=" BUNFMT ", odels=" ALGOBATFMT " -> " ALGOBATFMT "\n", nr, ALGOBATPAR(odels), @@ -1362,6 +1363,12 @@ BATnegcands(BUN nr, BAT *odels) } BAT * +BATnegcands(BUN nr, BAT *odels) +{ + return BATnegcands2(0, nr, odels); +} + +BAT * BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected) { const char *nme; diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -87,6 +87,8 @@ BAT *BATload_intern(bat bid, bool lock) gdk_return BATmaterialize(BAT *b, BUN cap) __attribute__((__warn_unused_result__)) __attribute__((__visibility__("hidden"))); +BAT *BATnegcands2(oid hseq, BUN nr, BAT *odels) + __attribute__((__visibility__("hidden"))); gdk_return BATsave_iter(BAT *bd, BATiter *bi, BUN size) __attribute__((__visibility__("hidden"))); void BATsetdims(BAT *b, uint16_t width) diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -544,10 +544,6 @@ NAME##_##TYPE(BATiter *bi, struct candit (void) hi; \ (void) lval; \ (void) hval; \ - assert(li == !anti); \ - assert(hi == !anti); \ - assert(lval); \ - assert(hval); \ size_t counter = 0; \ QryCtx *qry_ctx = MT_thread_get_qry_ctx(); \ if (imprints && imprints->imprints.parentid != bi->b->batCacheid) { \ @@ -564,6 +560,104 @@ NAME##_##TYPE(BATiter *bi, struct candit } else { \ basesrc = src; \ } \ + /* Normalize the variables li, hi, lval, hval, possibly */ \ + /* changing anti in the process. This works for all */ \ + /* (and only) numeric types. */ \ + \ + /* Note that the expression x < v is equivalent to x <= */ \ + /* v' where v' is the next smaller value in the domain */ \ + /* of v (similarly for x > v). Also note that for */ \ + /* floating point numbers there actually is such a */ \ + /* value. In fact, there is a function in standard C */ \ + /* that calculates that value. */ \ + \ + /* The result is: */ \ + /* li == !anti, hi == !anti, lval == true, hval == true */ \ + /* This means that all ranges that we check for are */ \ + /* closed ranges. If a range is one-sided, we fill in */ \ + /* the minimum resp. maximum value in the domain so that */ \ + /* we create a closed range. */ \ + if (anti && li) { \ + /* -inf < x < vl === -inf < x <= vl-1 */ \ + if (vl == MINVALUE##TYPE) { \ + /* -inf < x < MIN || *th <[=] x < +inf */ \ + /* degenerates into half range */ \ + /* *th <[=] x < +inf */ \ + anti = false; \ + vl = vh; \ + li = !hi; \ + hval = false; \ + /* further dealt with below */ \ + } else { \ + vl = PREVVALUE##TYPE(vl); \ + li = false; \ + } \ + } \ + if (anti && hi) { \ + /* vl < x < +inf === vl+1 <= x < +inf */ \ + if (vh == MAXVALUE##TYPE) { \ + /* -inf < x <[=] *tl || MAX > x > +inf */ \ + /* degenerates into half range */ \ + /* -inf < x <[=] *tl */ \ + anti = false; \ + vh = vl; \ + hi = !li; \ + lval = false; \ + /* further dealt with below */ \ + } else { \ + vh = NEXTVALUE##TYPE(vh); \ + hi = false; \ + } \ + } \ + if (!anti) { \ + if (lval) { \ + /* range bounded on left */ \ + if (!li) { \ + /* open range on left */ \ + if (vl == MAXVALUE##TYPE) { \ + return 0; \ + } \ + /* vl < x === vl+1 <= x */ \ + vl = NEXTVALUE##TYPE(vl); \ + li = true; \ + } \ + } else { \ + /* -inf, i.e. smallest value */ \ + vl = MINVALUE##TYPE; \ + li = true; \ + lval = true; \ + } \ + if (hval) { \ + /* range bounded on right */ \ + if (!hi) { \ + /* open range on right */ \ + if (vh == MINVALUE##TYPE) { \ + return 0; \ + } \ + /* x < vh === x <= vh-1 */ \ + vh = PREVVALUE##TYPE(vh); \ + hi = true; \ + } \ + } else { \ + /* +inf, i.e. largest value */ \ + vh = MAXVALUE##TYPE; \ + hi = true; \ + hval = true; \ + } \ + if (vl > vh) { \ + return 0; \ + } \ + } \ + /* if anti is set, we can now check */ \ + /* (x <= vl || x >= vh) && x != nil */ \ + /* if anti is not set, we can check just */ \ + /* vl <= x && x <= vh */ \ + /* if equi==true, the check is x == vl */ \ + /* note that this includes the check for != nil */ \ + assert(li == !anti); \ + assert(hi == !anti); \ + assert(lval); \ + assert(hval); \ w = canditer_last(ci); \ if (equi) { \ assert(imprints == NULL); \ @@ -1220,121 +1314,6 @@ scanselect(BATiter *bi, struct canditer return bn; } -/* Normalize the variables li, hi, lval, hval, possibly changing anti - * in the process. This works for all (and only) numeric types. - * - * Note that the expression x < v is equivalent to x <= v' where v' is - * the next smaller value in the domain of v (similarly for x > v). - * Also note that for floating point numbers there actually is such a - * value. In fact, there is a function in standard C that calculates - * that value. - * - * The result of this macro is: - * li == !anti, hi == !anti, lval == true, hval == true - * This means that all ranges that we check for are closed ranges. If - * a range is one-sided, we fill in the minimum resp. maximum value in - * the domain so that we create a closed range. */ -#define NORMALIZE(TYPE) \ - do { \ - if (anti && li) { \ - /* -inf < x < vl === -inf < x <= vl-1 */ \ - if (*(TYPE*)tl == MINVALUE##TYPE) { \ - /* -inf < x < MIN || *th <[=] x < +inf */ \ - /* degenerates into half range */ \ - /* *th <[=] x < +inf */ \ - anti = false; \ - tl = th; \ - li = !hi; \ - hval = false; \ - /* further dealt with below */ \ - } else { \ - vl.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)tl); \ - tl = &vl.v_##TYPE; \ - li = false; \ - } \ - } \ - if (anti && hi) { \ - /* vl < x < +inf === vl+1 <= x < +inf */ \ - if (*(TYPE*)th == MAXVALUE##TYPE) { \ - /* -inf < x <[=] *tl || MAX > x > +inf */ \ - /* degenerates into half range */ \ - /* -inf < x <[=] *tl */ \ - anti = false; \ - if (tl == &vl.v_##TYPE) { \ - vh.v_##TYPE = vl.v_##TYPE; \ - th = &vh.v_##TYPE; \ - } else { \ - th = tl; \ - } \ - hi = !li; \ - lval = false; \ - /* further dealt with below */ \ - } else { \ - vh.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)th); \ - th = &vh.v_##TYPE; \ - hi = false; \ - } \ - } \ - if (!anti) { \ - if (lval) { \ - /* range bounded on left */ \ - if (!li) { \ - /* open range on left */ \ - if (*(TYPE*)tl == MAXVALUE##TYPE) { \ - bat_iterator_end(&bi); \ - return BATdense(0, 0, 0); \ - } \ - /* vl < x === vl+1 <= x */ \ - vl.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)tl); \ - li = true; \ - tl = &vl.v_##TYPE; \ - } \ - } else { \ - /* -inf, i.e. smallest value */ \ - vl.v_##TYPE = MINVALUE##TYPE; \ - li = true; \ - tl = &vl.v_##TYPE; \ - lval = true; \ - } \ - if (hval) { \ - /* range bounded on right */ \ - if (!hi) { \ - /* open range on right */ \ - if (*(TYPE*)th == MINVALUE##TYPE) { \ - bat_iterator_end(&bi); \ - return BATdense(0, 0, 0); \ - } \ - /* x < vh === x <= vh-1 */ \ - vh.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)th); \ - hi = true; \ - th = &vh.v_##TYPE; \ - } \ - } else { \ - /* +inf, i.e. largest value */ \ - vh.v_##TYPE = MAXVALUE##TYPE; \ - hi = true; \ - th = &vh.v_##TYPE; \ - hval = true; \ - } \ - if (*(TYPE*)tl > *(TYPE*)th) { \ - bat_iterator_end(&bi); \ - return BATdense(0, 0, 0); \ - } \ - } \ - assert(lval); \ - assert(hval); \ - assert(li != anti); \ - assert(hi != anti); \ _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org