Changeset: 0d2d8a07cd9b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/0d2d8a07cd9b
Modified Files:
        gdk/gdk_select.c
Branch: Dec2023
Log Message:

Handle anti select on duplicate-eliminated string bats without nils more 
efficiently.


diffs (truncated from 435 to 300 lines):

diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -778,11 +778,35 @@ fullscan_str(BATiter *bi, struct candite
                timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? 
(qry_ctx->starttime + qry_ctx->querytimeout) : 0;
        }
 
-       if (!equi || !GDK_ELIMDOUBLES(bi->vh))
+       if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
+           strcmp(tl, str_nil) != 0 &&
+           strLocate(bi->vh, str_nil) == (var_t) -2) {
+               /* anti-equi select for non-nil value, and there are no
+                * nils, so we can use fast path; trigger by setting
+                * nonil */
+               bi->nonil = true;
+       }
+       if (!((equi ||
+              (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
+             GDK_ELIMDOUBLES(bi->vh)))
                return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
                                    lval, hval, lnil, cnt, hseq, dst,
                                    maximum, imprints, algo);
        if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
+               if (anti) {
+                       /* return the whole shebang */
+                       *algo = "select: fullscan anti-equi strelim (all)";
+                       if (BATextend(bn, ncand) != GDK_SUCCEED) {
+                               BBPreclaim(bn);
+                               return BUN_NONE;
+                       }
+                       dst = Tloc(bn, 0);
+                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                               dst[p] = canditer_next(ci);
+                       }
+                       TIMEOUT_CHECK(timeoffset, 
GOTO_LABEL_TIMEOUT_HANDLER(bailout));
+                       return ncand;
+               }
                *algo = "select: fullscan equi strelim (nomatch)";
                return 0;
        }
@@ -790,40 +814,74 @@ fullscan_str(BATiter *bi, struct candite
                BBPreclaim(bn);
                return BUN_NONE;
        }
-       *algo = "select: fullscan equi strelim";
+       *algo = anti ? "select: fullscan anti-equi strelim" : "select: fullscan 
equi strelim";
        assert(pos >= GDK_VAROFFSET);
        switch (bi->width) {
        case 1: {
                const unsigned char *ptr = (const unsigned char *) bi->base;
                pos -= GDK_VAROFFSET;
                if (ci->tpe == cand_dense) {
-                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
-                               o = canditer_next_dense(ci);
-                               if (ptr[o - hseq] == pos) {
-                                       dst = buninsfix(bn, dst, cnt, o,
-                                                       (BUN) ((dbl) cnt / 
(dbl) (p == 0 ? 1 : p)
-                                                              * (dbl) 
(ncand-p) * 1.1 + 1024),
-                                                       maximum);
-                                       if (dst == NULL) {
-                                               BBPreclaim(bn);
-                                               return BUN_NONE;
+                       if (anti) {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next_dense(ci);
+                                       if (ptr[o - hseq] != pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
                                        }
-                                       cnt++;
+                               }
+                       } else {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next_dense(ci);
+                                       if (ptr[o - hseq] == pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
+                                       }
                                }
                        }
                } else {
-                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
-                               o = canditer_next(ci);
-                               if (ptr[o - hseq] == pos) {
-                                       dst = buninsfix(bn, dst, cnt, o,
-                                                       (BUN) ((dbl) cnt / 
(dbl) (p == 0 ? 1 : p)
-                                                              * (dbl) 
(ncand-p) * 1.1 + 1024),
-                                                       maximum);
-                                       if (dst == NULL) {
-                                               BBPreclaim(bn);
-                                               return BUN_NONE;
+                       if (anti) {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next(ci);
+                                       if (ptr[o - hseq] != pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
                                        }
-                                       cnt++;
+                               }
+                       } else {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next(ci);
+                                       if (ptr[o - hseq] == pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
+                                       }
                                }
                        }
                }
@@ -833,33 +891,67 @@ fullscan_str(BATiter *bi, struct candite
                const unsigned short *ptr = (const unsigned short *) bi->base;
                pos -= GDK_VAROFFSET;
                if (ci->tpe == cand_dense) {
-                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
-                               o = canditer_next_dense(ci);
-                               if (ptr[o - hseq] == pos) {
-                                       dst = buninsfix(bn, dst, cnt, o,
-                                                       (BUN) ((dbl) cnt / 
(dbl) (p == 0 ? 1 : p)
-                                                              * (dbl) 
(ncand-p) * 1.1 + 1024),
-                                                       maximum);
-                                       if (dst == NULL) {
-                                               BBPreclaim(bn);
-                                               return BUN_NONE;
+                       if (anti) {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next_dense(ci);
+                                       if (ptr[o - hseq] != pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
                                        }
-                                       cnt++;
+                               }
+                       } else {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next_dense(ci);
+                                       if (ptr[o - hseq] == pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
+                                       }
                                }
                        }
                } else {
-                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
-                               o = canditer_next(ci);
-                               if (ptr[o - hseq] == pos) {
-                                       dst = buninsfix(bn, dst, cnt, o,
-                                                       (BUN) ((dbl) cnt / 
(dbl) (p == 0 ? 1 : p)
-                                                              * (dbl) 
(ncand-p) * 1.1 + 1024),
-                                                       maximum);
-                                       if (dst == NULL) {
-                                               BBPreclaim(bn);
-                                               return BUN_NONE;
+                       if (anti) {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next(ci);
+                                       if (ptr[o - hseq] != pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
                                        }
-                                       cnt++;
+                               }
+                       } else {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next(ci);
+                                       if (ptr[o - hseq] == pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
+                                       }
                                }
                        }
                }
@@ -869,33 +961,67 @@ fullscan_str(BATiter *bi, struct candite
        case 4: {
                const unsigned int *ptr = (const unsigned int *) bi->base;
                if (ci->tpe == cand_dense) {
-                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
-                               o = canditer_next_dense(ci);
-                               if (ptr[o - hseq] == pos) {
-                                       dst = buninsfix(bn, dst, cnt, o,
-                                                       (BUN) ((dbl) cnt / 
(dbl) (p == 0 ? 1 : p)
-                                                              * (dbl) 
(ncand-p) * 1.1 + 1024),
-                                                       maximum);
-                                       if (dst == NULL) {
-                                               BBPreclaim(bn);
-                                               return BUN_NONE;
+                       if (anti) {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next_dense(ci);
+                                       if (ptr[o - hseq] != pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
                                        }
-                                       cnt++;
+                               }
+                       } else {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next_dense(ci);
+                                       if (ptr[o - hseq] == pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
+                                       }
                                }
                        }
                } else {
-                       TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
-                               o = canditer_next(ci);
-                               if (ptr[o - hseq] == pos) {
-                                       dst = buninsfix(bn, dst, cnt, o,
-                                                       (BUN) ((dbl) cnt / 
(dbl) (p == 0 ? 1 : p)
-                                                              * (dbl) 
(ncand-p) * 1.1 + 1024),
-                                                       maximum);
-                                       if (dst == NULL) {
-                                               BBPreclaim(bn);
-                                               return BUN_NONE;
+                       if (anti) {
+                               TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
+                                       o = canditer_next(ci);
+                                       if (ptr[o - hseq] != pos) {
+                                               dst = buninsfix(bn, dst, cnt, o,
+                                                               (BUN) ((dbl) 
cnt / (dbl) (p == 0 ? 1 : p)
+                                                                      * (dbl) 
(ncand-p) * 1.1 + 1024),
+                                                               maximum);
+                                               if (dst == NULL) {
+                                                       BBPreclaim(bn);
+                                                       return BUN_NONE;
+                                               }
+                                               cnt++;
                                        }
-                                       cnt++;
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to