Changeset: 0d2d8a07cd9b for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/0d2d8a07cd9b Modified Files: gdk/gdk_select.c Branch: Dec2023 Log Message:
Handle anti select on duplicate-eliminated string bats without nils more efficiently. diffs (truncated from 435 to 300 lines): diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -778,11 +778,35 @@ fullscan_str(BATiter *bi, struct candite timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0; } - if (!equi || !GDK_ELIMDOUBLES(bi->vh)) + if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) && + strcmp(tl, str_nil) != 0 && + strLocate(bi->vh, str_nil) == (var_t) -2) { + /* anti-equi select for non-nil value, and there are no + * nils, so we can use fast path; trigger by setting + * nonil */ + bi->nonil = true; + } + if (!((equi || + (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) && + GDK_ELIMDOUBLES(bi->vh))) return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil, cnt, hseq, dst, maximum, imprints, algo); if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) { + if (anti) { + /* return the whole shebang */ + *algo = "select: fullscan anti-equi strelim (all)"; + if (BATextend(bn, ncand) != GDK_SUCCEED) { + BBPreclaim(bn); + return BUN_NONE; + } + dst = Tloc(bn, 0); + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + dst[p] = canditer_next(ci); + } + TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); + return ncand; + } *algo = "select: fullscan equi strelim (nomatch)"; return 0; } @@ -790,40 +814,74 @@ fullscan_str(BATiter *bi, struct candite BBPreclaim(bn); return BUN_NONE; } - *algo = "select: fullscan equi strelim"; + *algo = anti ? "select: fullscan anti-equi strelim" : "select: fullscan equi strelim"; assert(pos >= GDK_VAROFFSET); switch (bi->width) { case 1: { const unsigned char *ptr = (const unsigned char *) bi->base; pos -= GDK_VAROFFSET; if (ci->tpe == cand_dense) { - TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { - o = canditer_next_dense(ci); - if (ptr[o - hseq] == pos) { - dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); - if (dst == NULL) { - BBPreclaim(bn); - return BUN_NONE; + if (anti) { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next_dense(ci); + if (ptr[o - hseq] != pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; } - cnt++; + } + } else { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next_dense(ci); + if (ptr[o - hseq] == pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; + } } } } else { - TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { - o = canditer_next(ci); - if (ptr[o - hseq] == pos) { - dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); - if (dst == NULL) { - BBPreclaim(bn); - return BUN_NONE; + if (anti) { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next(ci); + if (ptr[o - hseq] != pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; } - cnt++; + } + } else { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next(ci); + if (ptr[o - hseq] == pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; + } } } } @@ -833,33 +891,67 @@ fullscan_str(BATiter *bi, struct candite const unsigned short *ptr = (const unsigned short *) bi->base; pos -= GDK_VAROFFSET; if (ci->tpe == cand_dense) { - TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { - o = canditer_next_dense(ci); - if (ptr[o - hseq] == pos) { - dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); - if (dst == NULL) { - BBPreclaim(bn); - return BUN_NONE; + if (anti) { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next_dense(ci); + if (ptr[o - hseq] != pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; } - cnt++; + } + } else { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next_dense(ci); + if (ptr[o - hseq] == pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; + } } } } else { - TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { - o = canditer_next(ci); - if (ptr[o - hseq] == pos) { - dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); - if (dst == NULL) { - BBPreclaim(bn); - return BUN_NONE; + if (anti) { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next(ci); + if (ptr[o - hseq] != pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; } - cnt++; + } + } else { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next(ci); + if (ptr[o - hseq] == pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; + } } } } @@ -869,33 +961,67 @@ fullscan_str(BATiter *bi, struct candite case 4: { const unsigned int *ptr = (const unsigned int *) bi->base; if (ci->tpe == cand_dense) { - TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { - o = canditer_next_dense(ci); - if (ptr[o - hseq] == pos) { - dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); - if (dst == NULL) { - BBPreclaim(bn); - return BUN_NONE; + if (anti) { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next_dense(ci); + if (ptr[o - hseq] != pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; } - cnt++; + } + } else { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next_dense(ci); + if (ptr[o - hseq] == pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; + } } } } else { - TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { - o = canditer_next(ci); - if (ptr[o - hseq] == pos) { - dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); - if (dst == NULL) { - BBPreclaim(bn); - return BUN_NONE; + if (anti) { + TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { + o = canditer_next(ci); + if (ptr[o - hseq] != pos) { + dst = buninsfix(bn, dst, cnt, o, + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); + if (dst == NULL) { + BBPreclaim(bn); + return BUN_NONE; + } + cnt++; } - cnt++; _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org