Changeset: db3e875b98dc for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/db3e875b98dc Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_firstn.c gdk/gdk_join.c gdk/gdk_select.c monetdb5/modules/kernel/algebra.c sql/backends/monet5/dict.c sql/backends/monet5/sql.c sql/storage/bat/bat_logger.c sql/storage/bat/bat_storage.c sql/storage/bat/bat_table.c Branch: default Log Message:
Implemented extra argument in BATselect to choose how to handle nil. If new last argument is false, nil is handled how it was always handled. If it is true, nil is not a special value for equi and anti-equi selects, and so e.g. low==nil, high==nil selects for nil (or !=nil if anti is set). diffs (truncated from 1190 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -188,7 +188,7 @@ gdk_return BATrtree(BAT *wkb, BAT *mbr); BAT *BATsample(BAT *b, BUN n); BAT *BATsample_with_seed(BAT *b, BUN n, uint64_t seed); gdk_return BATsave(BAT *b) __attribute__((__warn_unused_result__)); -BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti); +BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti, bool nil_matches); gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one, BUN estimate) __attribute__((__warn_unused_result__)); BAT *BATsetaccess(BAT *b, restrict_t mode) __attribute__((__warn_unused_result__)); void BATsetcapacity(BAT *b, BUN cnt); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2265,7 +2265,7 @@ gdk_export ValPtr BATsetprop_nolock(BAT #define JOIN_BAND 3 #define JOIN_NE (-3) -gdk_export BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti); +gdk_export BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti, bool nil_matches); gdk_export BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char *op); gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role); diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c --- a/gdk/gdk_firstn.c +++ b/gdk/gdk_firstn.c @@ -1067,7 +1067,7 @@ BATfirstn_grouped(BAT **topn, BAT **gids BAT *bn1, *bn2; bn1 = bn; - bn2 = BATselect(bi->b, s, BUNtail(*bi, last - bi->b->hseqbase), NULL, true, false, false); + bn2 = BATselect(bi->b, s, BUNtail(*bi, last - bi->b->hseqbase), NULL, true, false, false, false); if (bn2 == NULL) { BBPunfix(bn1->batCacheid); return GDK_FAIL; @@ -1193,7 +1193,7 @@ BATfirstn_grouped_with_groups(BAT **topn BAT *bn1, *bn2, *bn3, *bn4; bn1 = bn; - bn2 = BATselect(g, NULL, &lastg, NULL, true, false, false); + bn2 = BATselect(g, NULL, &lastg, NULL, true, false, false, false); if (bn2 == NULL) { BBPunfix(bn1->batCacheid); return GDK_FAIL; @@ -1204,7 +1204,7 @@ BATfirstn_grouped_with_groups(BAT **topn BBPunfix(bn1->batCacheid); return GDK_FAIL; } - bn4 = BATselect(bi->b, bn3, BUNtail(*bi, last - hseq), NULL, true, false, false); + bn4 = BATselect(bi->b, bn3, BUNtail(*bi, last - hseq), NULL, true, false, false, false); BBPunfix(bn3->batCacheid); if (bn4 == NULL) { BBPunfix(bn1->batCacheid); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -391,7 +391,7 @@ selectjoin(BAT **r1p, BAT **r2p, BAT **r return rc; } - bn = BATselect(r, rci->s, v, NULL, true, true, false); + bn = BATselect(r, rci->s, v, NULL, true, true, false, false); bat_iterator_end(&li); if (bn == NULL) { return GDK_FAIL; @@ -518,7 +518,7 @@ selectjoin(BAT **r1p, BAT **r2p, BAT **r mark = 0; } else { /* no match, search for NIL in r */ - BAT *n = BATselect(r, rci->s, ATOMnilptr(r->ttype), NULL, true, true, false); + BAT *n = BATselect(r, rci->s, ATOMnilptr(r->ttype), NULL, true, true, false, false); if (n == NULL) goto bailout; mark = BATcount(n) == 0 ? 0 : bit_nil; @@ -605,12 +605,12 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT /* at this point, the matchable values in r are [lo..hi) */ if (!nil_on_miss) { assert(r3p == NULL); - r1 = BATselect(l, lci->s, &lo, &hi, true, false, only_misses); + r1 = BATselect(l, lci->s, &lo, &hi, true, false, only_misses, false); if (r1 == NULL) return GDK_FAIL; if (only_misses && !l->tnonil) { /* also look for NILs */ - r2 = BATselect(l, lci->s, &oid_nil, NULL, true, false, false); + r2 = BATselect(l, lci->s, &oid_nil, NULL, true, false, false, false); if (r2 == NULL) { BBPreclaim(r1); return GDK_FAIL; diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -460,8 +460,8 @@ quickins(oid *dst, BUN cnt, oid o, BAT * /* argument list for type-specific core scan select function call */ #define scanargs \ - bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil, \ - cnt, bi->b->hseqbase, dst, maximum, imprints, algo + bi, ci, bn, tl, th, li, hi, equi, anti, nil_matches, lval, hval, \ + lnil, cnt, bi->b->hseqbase, dst, maximum, imprints, algo #define PREVVALUEbte(x) ((x) - 1) #define PREVVALUEsht(x) ((x) - 1) @@ -521,9 +521,10 @@ quickins(oid *dst, BUN cnt, oid o, BAT * static BUN \ NAME##_##TYPE(BATiter *bi, struct canditer *restrict ci, BAT *bn, \ const TYPE *tl, const TYPE *th, bool li, bool hi, \ - bool equi, bool anti, bool lval, bool hval, \ - bool lnil, BUN cnt, const oid hseq, oid *restrict dst, \ - BUN maximum, Imprints *imprints, const char **algo) \ + bool equi, bool anti, bool nil_matches, bool lval, \ + bool hval, bool lnil, BUN cnt, const oid hseq, \ + oid *restrict dst, BUN maximum, Imprints *imprints, \ + const char **algo) \ { \ TYPE vl = *tl; \ TYPE vh = *th; \ @@ -574,6 +575,8 @@ NAME##_##TYPE(BATiter *bi, struct candit } else if (anti) { \ if (bi->nonil) { \ choose(NAME, ISDENSE, (v <= vl || v >= vh), TYPE); \ + } else if (nil_matches) { \ + choose(NAME, ISDENSE, is_##TYPE##_nil(v) || v <= vl || v >= vh, TYPE); \ } else { \ choose(NAME, ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh), TYPE); \ } \ @@ -597,9 +600,10 @@ NAME##_##TYPE(BATiter *bi, struct candit static BUN fullscan_any(BATiter *bi, struct canditer *restrict ci, BAT *bn, const void *tl, const void *th, - bool li, bool hi, bool equi, bool anti, bool lval, bool hval, - bool lnil, BUN cnt, const oid hseq, oid *restrict dst, - BUN maximum, Imprints *imprints, const char **algo) + bool li, bool hi, bool equi, bool anti, bool nil_matches, + bool lval, bool hval, bool lnil, BUN cnt, const oid hseq, + oid *restrict dst, BUN maximum, Imprints *imprints, + const char **algo) { const void *v; const void *restrict nil = ATOMnilptr(bi->type); @@ -637,9 +641,9 @@ fullscan_any(BATiter *bi, struct candite v = BUNtail(*bi, o-hseq); if ((*cmp)(tl, v) == 0) { dst = buninsfix(bn, dst, cnt, o, - (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) - * (dbl) (ncand-p) * 1.1 + 1024), - maximum); + (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) + * (dbl) (ncand-p) * 1.1 + 1024), + maximum); if (dst == NULL) { BBPreclaim(bn); return BUN_NONE; @@ -654,13 +658,15 @@ fullscan_any(BATiter *bi, struct candite TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) { o = canditer_next_dense(ci); v = BUNtail(*bi, o-hseq); - if ((nil == NULL || (*cmp)(v, nil) != 0) && - ((lval && - ((c = (*cmp)(tl, v)) > 0 || + bool isnil = nil != NULL && (*cmp)(v, nil) == 0; + if ((nil_matches && isnil) || + (!isnil && + ((lval && + ((c = (*cmp)(tl, v)) > 0 || (!li && c == 0))) || - (hval && - ((c = (*cmp)(th, v)) < 0 || - (!hi && c == 0))))) { + (hval && + ((c = (*cmp)(th, v)) < 0 || + (!hi && c == 0)))))) { dst = buninsfix(bn, dst, cnt, o, (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) * (dbl) (ncand-p) * 1.1 + 1024), @@ -676,13 +682,15 @@ fullscan_any(BATiter *bi, struct candite TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) { o = canditer_next(ci); v = BUNtail(*bi, o-hseq); - if ((nil == NULL || (*cmp)(v, nil) != 0) && - ((lval && - ((c = (*cmp)(tl, v)) > 0 || + bool isnil = nil != NULL && (*cmp)(v, nil) == 0; + if ((nil_matches && isnil) || + (!isnil && + ((lval && + ((c = (*cmp)(tl, v)) > 0 || (!li && c == 0))) || - (hval && - ((c = (*cmp)(th, v)) < 0 || - (!hi && c == 0))))) { + (hval && + ((c = (*cmp)(th, v)) < 0 || + (!hi && c == 0)))))) { dst = buninsfix(bn, dst, cnt, o, (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) * (dbl) (ncand-p) * 1.1 + 1024), @@ -702,12 +710,12 @@ fullscan_any(BATiter *bi, struct candite o = canditer_next_dense(ci); v = BUNtail(*bi, o-hseq); if ((nil == NULL || (*cmp)(v, nil) != 0) && - ((!lval || - (c = cmp(tl, v)) < 0 || - (li && c == 0)) && - (!hval || - (c = cmp(th, v)) > 0 || - (hi && c == 0)))) { + ((!lval || + (c = cmp(tl, v)) < 0 || + (li && c == 0)) && + (!hval || + (c = cmp(th, v)) > 0 || + (hi && c == 0)))) { dst = buninsfix(bn, dst, cnt, o, (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) * (dbl) (ncand-p) * 1.1 + 1024), @@ -724,12 +732,12 @@ fullscan_any(BATiter *bi, struct candite o = canditer_next(ci); v = BUNtail(*bi, o-hseq); if ((nil == NULL || (*cmp)(v, nil) != 0) && - ((!lval || - (c = cmp(tl, v)) < 0 || - (li && c == 0)) && - (!hval || - (c = cmp(th, v)) > 0 || - (hi && c == 0)))) { + ((!lval || + (c = cmp(tl, v)) < 0 || + (li && c == 0)) && + (!hval || + (c = cmp(th, v)) > 0 || + (hi && c == 0)))) { dst = buninsfix(bn, dst, cnt, o, (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) * (dbl) (ncand-p) * 1.1 + 1024), @@ -753,9 +761,10 @@ fullscan_any(BATiter *bi, struct candite static BUN fullscan_str(BATiter *bi, struct canditer *restrict ci, BAT *bn, const char *tl, const char *th, - bool li, bool hi, bool equi, bool anti, bool lval, bool hval, - bool lnil, BUN cnt, const oid hseq, oid *restrict dst, - BUN maximum, Imprints *imprints, const char **algo) + bool li, bool hi, bool equi, bool anti, bool nil_matches, + bool lval, bool hval, bool lnil, BUN cnt, const oid hseq, + oid *restrict dst, BUN maximum, Imprints *imprints, + const char **algo) { var_t pos; BUN p, ncand = ci->ncand; @@ -764,8 +773,8 @@ fullscan_str(BATiter *bi, struct candite if (!equi || !GDK_ELIMDOUBLES(bi->vh)) return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti, - lval, hval, lnil, cnt, hseq, dst, - maximum, imprints, algo); + nil_matches, lval, hval, lnil, cnt, hseq, + dst, maximum, imprints, algo); if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) { *algo = "select: fullscan equi strelim (nomatch)"; return 0; @@ -953,8 +962,9 @@ scan_sel(densescan, _dense) static BAT * scanselect(BATiter *bi, struct canditer *restrict ci, BAT *bn, const void *tl, const void *th, - bool li, bool hi, bool equi, bool anti, bool lval, bool hval, - bool lnil, BUN maximum, Imprints *imprints, const char **algo) + bool li, bool hi, bool equi, bool anti, bool nil_matches, + bool lval, bool hval, bool lnil, + BUN maximum, Imprints *imprints, const char **algo) { #ifndef NDEBUG int (*cmp)(const void *, const void *); @@ -1341,26 +1351,30 @@ BATrange(BATiter *bi, const void *tl, co * * If s is non-NULL, it is a list of candidates. s must be sorted. * - * tl may not be NULL, li, hi, and anti must be either 0 or 1. + * tl may not be NULL, li, hi, and anti must be either false or true. * * If th is NULL, hi is ignored. * - * If anti is 0, qualifying tuples are those whose value is between tl - * and th (as in x >[=] tl && x <[=] th, where equality depends on li + * If anti is false, qualifying tuples are those whose value is between + * tl and th (as in x >[=] tl && x <[=] th, where equality depends on li * and hi--so if tl > th, nothing will be returned). If li or hi is - * 1, the respective boundary is inclusive, otherwise exclusive. If - * th is NULL it is taken to be equal to tl, turning this into an - * equi- or point-select. Note that for a point select to return - * anything, li (and hi if th was not NULL) must be 1. There is a - * special case if tl is nil and th is NULL. This is the only way to - * select for nil values. + * true, the respective boundary is inclusive, otherwise exclusive. If + * th is NULL it is taken to be equal to tl, turning this into an equi- + * or point-select. Note that for a point select to return anything, li + * (and hi if th was not NULL) must be true. There is a special case if + * tl is nil and th is NULL. This is the only way to select for nil + * values. * _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org