Changeset: 93ff376d3437 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/93ff376d3437
Modified Files:
        gdk/gdk_cand.c
        gdk/gdk_private.h
        gdk/gdk_select.c
Branch: Aug2024
Log Message:

Do some smarter things for anti-equi-select.
In cases where the equi select could use some smart algorithm, we now
implement anti-equi select using the equi select and we invert the
result (also taking nils into account).
This fixes #7449.


diffs (truncated from 551 to 300 lines):

diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1293,7 +1293,7 @@ canditer_slice2val(const struct canditer
 }
 
 BAT *
-BATnegcands(BUN nr, BAT *odels)
+BATnegcands2(oid tseq, BUN nr, BAT *odels)
 {
        const char *nme;
        Heap *dels;
@@ -1301,16 +1301,20 @@ BATnegcands(BUN nr, BAT *odels)
        ccand_t *c;
        BAT *bn;
 
-       bn = BATdense(0, 0, nr);
+       bn = BATdense(0, tseq, nr);
        if (bn == NULL)
                return NULL;
        if (BATcount(odels) == 0)
-               return bn;
+               goto doreturn;
 
        lo = SORTfndfirst(odels, &bn->tseqbase);
        hi = SORTfndfirst(odels, &(oid) {bn->tseqbase + BATcount(bn)});
        if (lo == hi)
                return bn;
+       if (lo + nr == hi) {
+               BATsetcount(bn, 0);
+               goto doreturn;
+       }
 
        nme = BBP_physical(bn->batCacheid);
        if ((dels = GDKmalloc(sizeof(Heap))) == NULL){
@@ -1350,10 +1354,7 @@ BATnegcands(BUN nr, BAT *odels)
        assert(bn->tvheap == NULL);
        bn->tvheap = dels;
        BATsetcount(bn, bn->batCount - (hi - lo));
-       TRC_DEBUG(ALGO, "BATnegcands(cands=" ALGOBATFMT ","
-                 "dels=" ALGOBATFMT ")\n",
-                 ALGOBATPAR(bn),
-                 ALGOBATPAR(odels));
+  doreturn:
        TRC_DEBUG(ALGO, "nr=" BUNFMT ", odels=" ALGOBATFMT
                  " -> " ALGOBATFMT "\n",
                  nr, ALGOBATPAR(odels),
@@ -1362,6 +1363,12 @@ BATnegcands(BUN nr, BAT *odels)
 }
 
 BAT *
+BATnegcands(BUN nr, BAT *odels)
+{
+       return BATnegcands2(0, nr, odels);
+}
+
+BAT *
 BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected)
 {
        const char *nme;
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -87,6 +87,8 @@ BAT *BATload_intern(bat bid, bool lock)
 gdk_return BATmaterialize(BAT *b, BUN cap)
        __attribute__((__warn_unused_result__))
        __attribute__((__visibility__("hidden")));
+BAT *BATnegcands2(oid hseq, BUN nr, BAT *odels)
+       __attribute__((__visibility__("hidden")));
 gdk_return BATsave_iter(BAT *bd, BATiter *bi, BUN size)
        __attribute__((__visibility__("hidden")));
 void BATsetdims(BAT *b, uint16_t width)
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -544,10 +544,6 @@ NAME##_##TYPE(BATiter *bi, struct candit
        (void) hi;                                                      \
        (void) lval;                                                    \
        (void) hval;                                                    \
-       assert(li == !anti);                                            \
-       assert(hi == !anti);                                            \
-       assert(lval);                                                   \
-       assert(hval);                                                   \
        size_t counter = 0;                                             \
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();                      \
        if (imprints && imprints->imprints.parentid != bi->b->batCacheid) { \
@@ -564,6 +560,104 @@ NAME##_##TYPE(BATiter *bi, struct candit
        } else {                                                        \
                basesrc = src;                                          \
        }                                                               \
+       /* Normalize the variables li, hi, lval, hval, possibly */      \
+       /* changing anti in the process.  This works for all */         \
+       /* (and only) numeric types. */                                 \
+                                                                       \
+       /* Note that the expression x < v is equivalent to x <= */      \
+       /* v' where v' is the next smaller value in the domain */       \
+       /* of v (similarly for x > v).  Also note that for */           \
+       /* floating point numbers there actually is such a */           \
+       /* value.  In fact, there is a function in standard C */        \
+       /* that calculates that value. */                               \
+                                                                       \
+       /* The result is: */                                            \
+       /* li == !anti, hi == !anti, lval == true, hval == true */      \
+       /* This means that all ranges that we check for are */          \
+       /* closed ranges.  If a range is one-sided, we fill in */       \
+       /* the minimum resp. maximum value in the domain so that */     \
+       /* we create a closed range. */                                 \
+       if (anti && li) {                                               \
+               /* -inf < x < vl === -inf < x <= vl-1 */                \
+               if (vl == MINVALUE##TYPE) {                             \
+                       /* -inf < x < MIN || *th <[=] x < +inf */       \
+                       /* degenerates into half range */               \
+                       /* *th <[=] x < +inf */                         \
+                       anti = false;                                   \
+                       vl = vh;                                        \
+                       li = !hi;                                       \
+                       hval = false;                                   \
+                       /* further dealt with below */                  \
+               } else {                                                \
+                       vl = PREVVALUE##TYPE(vl);                       \
+                       li = false;                                     \
+               }                                                       \
+       }                                                               \
+       if (anti && hi) {                                               \
+               /* vl < x < +inf === vl+1 <= x < +inf */                \
+               if (vh == MAXVALUE##TYPE) {                             \
+                       /* -inf < x <[=] *tl || MAX > x > +inf */       \
+                       /* degenerates into half range */               \
+                       /* -inf < x <[=] *tl */                         \
+                       anti = false;                                   \
+                       vh = vl;                                        \
+                       hi = !li;                                       \
+                       lval = false;                                   \
+                       /* further dealt with below */                  \
+               } else {                                                \
+                       vh = NEXTVALUE##TYPE(vh);                       \
+                       hi = false;                                     \
+               }                                                       \
+       }                                                               \
+       if (!anti) {                                                    \
+               if (lval) {                                             \
+                       /* range bounded on left */                     \
+                       if (!li) {                                      \
+                               /* open range on left */                \
+                               if (vl == MAXVALUE##TYPE) {             \
+                                       return 0;                       \
+                               }                                       \
+                               /* vl < x === vl+1 <= x */              \
+                               vl = NEXTVALUE##TYPE(vl);               \
+                               li = true;                              \
+                       }                                               \
+               } else {                                                \
+                       /* -inf, i.e. smallest value */                 \
+                       vl = MINVALUE##TYPE;                            \
+                       li = true;                                      \
+                       lval = true;                                    \
+               }                                                       \
+               if (hval) {                                             \
+                       /* range bounded on right */                    \
+                       if (!hi) {                                      \
+                               /* open range on right */               \
+                               if (vh == MINVALUE##TYPE) {             \
+                                       return 0;                       \
+                               }                                       \
+                               /* x < vh === x <= vh-1 */              \
+                               vh = PREVVALUE##TYPE(vh);               \
+                               hi = true;                              \
+                       }                                               \
+               } else {                                                \
+                       /* +inf, i.e. largest value */                  \
+                       vh = MAXVALUE##TYPE;                            \
+                       hi = true;                                      \
+                       hval = true;                                    \
+               }                                                       \
+               if (vl > vh) {                                          \
+                       return 0;                                       \
+               }                                                       \
+       }                                                               \
+       /* if anti is set, we can now check */                          \
+       /* (x <= vl || x >= vh) && x != nil */                          \
+       /* if anti is not set, we can check just */                     \
+       /* vl <= x && x <= vh */                                        \
+       /* if equi==true, the check is x == vl */                       \
+       /* note that this includes the check for != nil */              \
+       assert(li == !anti);                                            \
+       assert(hi == !anti);                                            \
+       assert(lval);                                                   \
+       assert(hval);                                                   \
        w = canditer_last(ci);                                          \
        if (equi) {                                                     \
                assert(imprints == NULL);                               \
@@ -1220,121 +1314,6 @@ scanselect(BATiter *bi, struct canditer 
        return bn;
 }
 
-/* Normalize the variables li, hi, lval, hval, possibly changing anti
- * in the process.  This works for all (and only) numeric types.
- *
- * Note that the expression x < v is equivalent to x <= v' where v' is
- * the next smaller value in the domain of v (similarly for x > v).
- * Also note that for floating point numbers there actually is such a
- * value.  In fact, there is a function in standard C that calculates
- * that value.
- *
- * The result of this macro is:
- * li == !anti, hi == !anti, lval == true, hval == true
- * This means that all ranges that we check for are closed ranges.  If
- * a range is one-sided, we fill in the minimum resp. maximum value in
- * the domain so that we create a closed range. */
-#define NORMALIZE(TYPE)                                                        
\
-       do {                                                            \
-               if (anti && li) {                                       \
-                       /* -inf < x < vl === -inf < x <= vl-1 */        \
-                       if (*(TYPE*)tl == MINVALUE##TYPE) {             \
-                               /* -inf < x < MIN || *th <[=] x < +inf */ \
-                               /* degenerates into half range */       \
-                               /* *th <[=] x < +inf */                 \
-                               anti = false;                           \
-                               tl = th;                                \
-                               li = !hi;                               \
-                               hval = false;                           \
-                               /* further dealt with below */          \
-                       } else {                                        \
-                               vl.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)tl); \
-                               tl = &vl.v_##TYPE;                      \
-                               li = false;                             \
-                       }                                               \
-               }                                                       \
-               if (anti && hi) {                                       \
-                       /* vl < x < +inf === vl+1 <= x < +inf */        \
-                       if (*(TYPE*)th == MAXVALUE##TYPE) {             \
-                               /* -inf < x <[=] *tl || MAX > x > +inf */ \
-                               /* degenerates into half range */       \
-                               /* -inf < x <[=] *tl */                 \
-                               anti = false;                           \
-                               if (tl == &vl.v_##TYPE) {               \
-                                       vh.v_##TYPE = vl.v_##TYPE;      \
-                                       th = &vh.v_##TYPE;              \
-                               } else {                                \
-                                       th = tl;                        \
-                               }                                       \
-                               hi = !li;                               \
-                               lval = false;                           \
-                               /* further dealt with below */          \
-                       } else {                                        \
-                               vh.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)th); \
-                               th = &vh.v_##TYPE;                      \
-                               hi = false;                             \
-                       }                                               \
-               }                                                       \
-               if (!anti) {                                            \
-                       if (lval) {                                     \
-                               /* range bounded on left */             \
-                               if (!li) {                              \
-                                       /* open range on left */        \
-                                       if (*(TYPE*)tl == MAXVALUE##TYPE) { \
-                                               bat_iterator_end(&bi);  \
-                                               return BATdense(0, 0, 0); \
-                                       }                               \
-                                       /* vl < x === vl+1 <= x */      \
-                                       vl.v_##TYPE = 
NEXTVALUE##TYPE(*(TYPE*)tl); \
-                                       li = true;                      \
-                                       tl = &vl.v_##TYPE;              \
-                               }                                       \
-                       } else {                                        \
-                               /* -inf, i.e. smallest value */         \
-                               vl.v_##TYPE = MINVALUE##TYPE;           \
-                               li = true;                              \
-                               tl = &vl.v_##TYPE;                      \
-                               lval = true;                            \
-                       }                                               \
-                       if (hval) {                                     \
-                               /* range bounded on right */            \
-                               if (!hi) {                              \
-                                       /* open range on right */       \
-                                       if (*(TYPE*)th == MINVALUE##TYPE) { \
-                                               bat_iterator_end(&bi);  \
-                                               return BATdense(0, 0, 0); \
-                                       }                               \
-                                       /* x < vh === x <= vh-1 */      \
-                                       vh.v_##TYPE = 
PREVVALUE##TYPE(*(TYPE*)th); \
-                                       hi = true;                      \
-                                       th = &vh.v_##TYPE;              \
-                               }                                       \
-                       } else {                                        \
-                               /* +inf, i.e. largest value */          \
-                               vh.v_##TYPE = MAXVALUE##TYPE;           \
-                               hi = true;                              \
-                               th = &vh.v_##TYPE;                      \
-                               hval = true;                            \
-                       }                                               \
-                       if (*(TYPE*)tl > *(TYPE*)th) {                  \
-                               bat_iterator_end(&bi);                  \
-                               return BATdense(0, 0, 0);               \
-                       }                                               \
-               }                                                       \
-               assert(lval);                                           \
-               assert(hval);                                           \
-               assert(li != anti);                                     \
-               assert(hi != anti);                                     \
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to