Changeset: 59824dd68f98 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=59824dd68f98 Modified Files: gdk/ChangeLog gdk/gdk.h gdk/gdk_aggr.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_group.c gdk/gdk_private.h sql/storage/bat/bat_logger.c Branch: default Log Message:
Save (positions of) min and max values in bats. diffs (truncated from 645 to 300 lines): diff --git a/gdk/ChangeLog b/gdk/ChangeLog --- a/gdk/ChangeLog +++ b/gdk/ChangeLog @@ -1,3 +1,6 @@ # ChangeLog file for GDK # This file is updated with Maddlog +* Tue Dec 1 2020 Sjoerd Mullender <sjo...@acm.org> +- We now save the location of the min and max values when known. + diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -712,7 +712,10 @@ typedef struct { #define GDKLIBRARY_BLOB_SORT 061040U /* first in Mar2018: blob compare changed */ #define GDKLIBRARY_OLDDATE 061041U /* first in Apr2019: time/date in old format */ -#define GDKLIBRARY 061042U /* first in Nov2019 */ +#define GDKLIBRARY_MINMAX_POS 061042U /* first in Nov2019 */ + +/* if the version number is updated, also fix snapshot_bats() in bat_logger.c */ +#define GDKLIBRARY 061043U /* first after Oct2020 */ typedef struct BAT { /* static bat properties */ @@ -2020,7 +2023,9 @@ gdk_export void VIEWbounds(BAT *b, BAT * */ enum prop_t { GDK_MIN_VALUE = 3, /* smallest non-nil value in BAT */ + GDK_MIN_POS, /* BUN position of smallest value */ GDK_MAX_VALUE, /* largest non-nil value in BAT */ + GDK_MAX_POS, /* BUN position of largest value */ GDK_HASH_BUCKETS, /* last used hash bucket size */ GDK_NUNIQUE, /* number of unique values */ }; diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c --- a/gdk/gdk_aggr.c +++ b/gdk/gdk_aggr.c @@ -3534,6 +3534,7 @@ BATmin_skipnil(BAT *b, void *aggr, bit s bi = bat_iterator(b); res = BUNtail(bi, pos - b->hseqbase); BATsetprop(b, GDK_MIN_VALUE, b->ttype, res); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){pos - b->hseqbase}); } } if (aggr == NULL) { @@ -3633,8 +3634,10 @@ BATmax_skipnil(BAT *b, void *aggr, bit s } else { bi = bat_iterator(b); res = BUNtail(bi, pos - b->hseqbase); - if (b->tnonil) + if (b->tnonil) { BATsetprop(b, GDK_MAX_VALUE, b->ttype, res); + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){pos - b->hseqbase}); + } } } if (aggr == NULL) { diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -925,6 +925,8 @@ setcolprops(BAT *b, const void *x) if (!isnil && ATOMlinear(b->ttype)) { BATsetprop(b, GDK_MAX_VALUE, b->ttype, x); BATsetprop(b, GDK_MIN_VALUE, b->ttype, x); + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){0}); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){0}); } } return; @@ -977,11 +979,13 @@ setcolprops(BAT *b, const void *x) } else if (cmp < 0 && !isnil) { /* new largest value */ BATsetprop(b, GDK_MAX_VALUE, b->ttype, x); + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){BATcount(b)}); } } else if (!isnil && (prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL && ATOMcmp(b->ttype, VALptr(&prop->v), x) < 0) { BATsetprop(b, GDK_MAX_VALUE, b->ttype, x); + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){BATcount(b)}); } if (b->trevsorted) { if (cmp < 0) { @@ -997,15 +1001,18 @@ setcolprops(BAT *b, const void *x) (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL && ATOMcmp(b->ttype, VALptr(&prop->v), x) > 0) { BATsetprop(b, GDK_MIN_VALUE, b->ttype, x); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){BATcount(b)}); } } else if (cmp > 0 && !isnil) { /* new smallest value */ BATsetprop(b, GDK_MIN_VALUE, b->ttype, x); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){BATcount(b)}); } } else if (!isnil && (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL && ATOMcmp(b->ttype, VALptr(&prop->v), x) > 0) { BATsetprop(b, GDK_MIN_VALUE, b->ttype, x); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){BATcount(b)}); } if (BATtdense(b) && (cmp >= 0 || * (const oid *) prv + 1 != * (const oid *) x)) { assert(b->ttype == TYPE_oid); @@ -1119,11 +1126,15 @@ BUNdelete(BAT *b, oid o) val = BUNtail(bi, p); if (ATOMcmp(b->ttype, ATOMnilptr(b->ttype), val) != 0) { if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL - && ATOMcmp(b->ttype, VALptr(&prop->v), val) >= 0) + && ATOMcmp(b->ttype, VALptr(&prop->v), val) >= 0) { BATrmprop(b, GDK_MAX_VALUE); + BATrmprop(b, GDK_MAX_POS); + } if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL - && ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0) + && ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0) { BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MIN_POS); + } } if (ATOMunfix(b->ttype, val) != GDK_SUCCEED) return GDK_FAIL; @@ -1223,6 +1234,7 @@ BUNinplace(BAT *b, BUN p, const void *t, /* new value is larger than previous * largest */ BATsetprop(b, GDK_MAX_VALUE, b->ttype, t); + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){p}); } else if (ATOMcmp(b->ttype, t, val) != 0 && ATOMcmp(b->ttype, VALptr(&prop->v), val) == 0) { /* old value is equal to largest and @@ -1230,6 +1242,7 @@ BUNinplace(BAT *b, BUN p, const void *t, * so we don't know anymore which is * the largest */ BATrmprop(b, GDK_MAX_VALUE); + BATrmprop(b, GDK_MAX_POS); } } if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) { @@ -1238,6 +1251,7 @@ BUNinplace(BAT *b, BUN p, const void *t, /* new value is smaller than previous * smallest */ BATsetprop(b, GDK_MIN_VALUE, b->ttype, t); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){p}); } else if (ATOMcmp(b->ttype, t, val) != 0 && ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0) { /* old value is equal to smallest and @@ -1245,6 +1259,7 @@ BUNinplace(BAT *b, BUN p, const void *t, * we don't know anymore which is the * smallest */ BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MIN_POS); } } BATrmprop(b, GDK_NUNIQUE); @@ -2307,6 +2322,22 @@ BATassertProps(BAT *b) maxval = VALptr(&prop->v); if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) minval = VALptr(&prop->v); + if ((prop = BATgetprop(b, GDK_MAX_POS)) != NULL) { + if (maxval) { + assert(prop->v.vtype == TYPE_oid); + assert(prop->v.val.oval < b->batCount); + valp = BUNtail(bi, prop->v.val.oval); + assert(cmpf(maxval, valp) == 0); + } + } + if ((prop = BATgetprop(b, GDK_MIN_POS)) != NULL) { + if (minval) { + assert(prop->v.vtype == TYPE_oid); + assert(prop->v.val.oval < b->batCount); + valp = BUNtail(bi, prop->v.val.oval); + assert(cmpf(minval, valp) == 0); + } + } if (b->tsorted || b->trevsorted || !b->tkey) { /* if sorted (either way), or we don't have to * prove uniqueness, we can do a simple diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -551,25 +551,39 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) { if ((nprop = BATgetprop(n, GDK_MAX_VALUE)) != NULL) { if (ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) < 0) { - if (s == NULL) + if (s == NULL) { BATsetprop(b, GDK_MAX_VALUE, b->ttype, VALptr(&nprop->v)); - else + if ((nprop = BATgetprop(n, GDK_MAX_POS)) != NULL) + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){nprop->v.val.oval + BATcount(b)}); + else + BATrmprop(b, GDK_MAX_POS); + } else { BATrmprop(b, GDK_MAX_VALUE); + BATrmprop(b, GDK_MAX_POS); + } } } else { BATrmprop(b, GDK_MAX_VALUE); + BATrmprop(b, GDK_MAX_POS); } } if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) { if ((nprop = BATgetprop(n, GDK_MIN_VALUE)) != NULL) { if (ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) > 0) { - if (s == NULL) + if (s == NULL) { BATsetprop(b, GDK_MIN_VALUE, b->ttype, VALptr(&nprop->v)); - else + if ((nprop = BATgetprop(n, GDK_MIN_POS)) != NULL) + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){nprop->v.val.oval + BATcount(b)}); + else + BATrmprop(b, GDK_MIN_POS); + } else { BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MIN_POS); + } } } else { BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MIN_POS); } } BATrmprop(b, GDK_NUNIQUE); @@ -940,8 +954,8 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool atomcmp(VALptr(&maxprop->v), new) < 0) { /* new value is larger than * previous largest */ - BATsetprop(b, GDK_MAX_VALUE, b->ttype, new); - maxprop = BATgetprop(b, GDK_MAX_VALUE); + maxprop = BATsetprop(b, GDK_MAX_VALUE, b->ttype, new); + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){updid}); } else if (atomcmp(VALptr(&maxprop->v), old) == 0 && atomcmp(new, old) != 0) { /* old value is equal to @@ -950,6 +964,7 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool * anymore which is the * largest */ BATrmprop(b, GDK_MAX_VALUE); + BATrmprop(b, GDK_MAX_POS); maxprop = NULL; } } @@ -958,8 +973,8 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool atomcmp(VALptr(&minprop->v), new) > 0) { /* new value is smaller than * previous smallest */ - BATsetprop(b, GDK_MIN_VALUE, b->ttype, new); - minprop = BATgetprop(b, GDK_MIN_VALUE); + minprop = BATsetprop(b, GDK_MIN_VALUE, b->ttype, new); + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){updid}); } else if (atomcmp(VALptr(&minprop->v), old) == 0 && atomcmp(new, old) != 0) { /* old value is equal to @@ -968,6 +983,7 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool * anymore which is the * smallest */ BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MIN_POS); minprop = NULL; } } @@ -1042,6 +1058,8 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool * min/max values */ BATrmprop(b, GDK_MAX_VALUE); BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MAX_POS); + BATrmprop(b, GDK_MIN_POS); for (BUN i = 0, j = BATcount(p); i < j; i++) o[i] = oid_nil; b->tnil = true; @@ -1050,16 +1068,22 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool /* we know min/max of n, so we know * the new min/max of b if those of n * are smaller/larger than the old */ - if (minprop && v <= minprop->v.val.oval) + if (minprop && v <= minprop->v.val.oval) { BATsetprop(b, GDK_MIN_VALUE, TYPE_oid, &v); - else + BATsetprop(b, GDK_MIN_POS, TYPE_oid, &(oid){updid}); + } else { BATrmprop(b, GDK_MIN_VALUE); + BATrmprop(b, GDK_MIN_POS); + } for (BUN i = 0, j = BATcount(p); i < j; i++) o[i] = v++; - if (maxprop && --v >= maxprop->v.val.oval) + if (maxprop && --v >= maxprop->v.val.oval) { BATsetprop(b, GDK_MAX_VALUE, TYPE_oid, &v); - else + BATsetprop(b, GDK_MAX_POS, TYPE_oid, &(oid){updid + BATcount(p) - 1}); + } else { BATrmprop(b, GDK_MAX_VALUE); + BATrmprop(b, GDK_MAX_POS); + } } } else { /* if the extremes of n are at least as @@ -1069,16 +1093,28 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool PROPrec *prop; if (maxprop != NULL && _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list