Changeset: 97134b6f6871 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=97134b6f6871 Modified Files: gdk/gdk.h gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_hash.c Branch: linear-hashing Log Message:
Maintain a NUNIQUE property (number of unique values). diffs (113 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2626,6 +2626,7 @@ enum prop_t { GDK_MIN_VALUE = 3, /* smallest non-nil value in BAT */ GDK_MAX_VALUE, /* largest non-nil value in BAT */ GDK_HASH_BUCKETS, /* last used hash bucket size */ + GDK_NUNIQUE, /* number of unique values */ }; /* diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -1073,6 +1073,7 @@ BUNappend(BAT *b, const void *t, bool fo IMPSdestroy(b); /* no support for inserts in imprints yet */ OIDXdestroy(b); + BATrmprop(b, GDK_NUNIQUE); #if 0 /* enable if we have more properties than just min/max */ PROPrec *prop; do { @@ -1087,6 +1088,9 @@ BUNappend(BAT *b, const void *t, bool fo #endif if (b->thash) { HASHins(b, p, t); + if (b->thash) + BATsetprop(b, GDK_NUNIQUE, + TYPE_oid, &(oid){b->thash->nunique}); if (tsize && tsize != b->tvheap->size) HEAPwarm(b->tvheap); } @@ -1156,6 +1160,7 @@ BUNdelete(BAT *b, oid o) IMPSdestroy(b); OIDXdestroy(b); HASHdestroy(b); + BATrmprop(b, GDK_NUNIQUE); #if 0 /* enable if we have more properties than just min/max */ do { for (prop = b->tprops; prop; prop = prop->next) @@ -1244,6 +1249,7 @@ BUNinplace(BAT *b, BUN p, const void *t, BATrmprop(b, GDK_MIN_VALUE); } } + BATrmprop(b, GDK_NUNIQUE); #if 0 /* enable if we have more properties than just min/max */ do { for (prop = b->tprops; prop; prop = prop->next) diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -570,6 +570,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f BATrmprop(b, GDK_MIN_VALUE); } } + BATrmprop(b, GDK_NUNIQUE); #if 0 /* enable if we have more properties than just min/max */ do { for (prop = b->tprops; prop; prop = prop->next) @@ -716,6 +717,8 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f } b->theap.dirty = true; } + if (b->thash) + BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique}); return GDK_SUCCEED; bunins_failed: @@ -869,6 +872,7 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool HASHdestroy(b); OIDXdestroy(b); IMPSdestroy(b); + BATrmprop(b, GDK_NUNIQUE); b->tsorted = b->trevsorted = false; b->tnosorted = b->tnorevsorted = 0; diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -377,6 +377,11 @@ BATcheckhash(BAT *b) GDK_HASH_BUCKETS, TYPE_oid, &(oid){NHASHBUCKETS(h)}); + BATsetprop_nolock( + b, + GDK_NUNIQUE, + TYPE_oid, + &(oid){h->nunique}); b->thash = h; ACCELDEBUG fprintf(stderr, "#BATcheckhash: reusing persisted hash %s\n", BATgetId(b)); MT_lock_unset(&b->batIdxLock); @@ -601,6 +606,9 @@ BAThash_impl(BAT *b, BAT *s, const char /* if key, or if small, don't bother dynamically * adjusting the hash mask */ mask = HASHmask(cnt); + } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL) { + assert(prop->v.vtype == TYPE_oid); + mask = prop->v.val.oval * 4 / 3; } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS)) != NULL) { assert(prop->v.vtype == TYPE_oid); mask = prop->v.val.oval; @@ -750,8 +758,10 @@ BAThash_impl(BAT *b, BAT *s, const char } break; } - if (s == NULL) + if (s == NULL) { BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid, &(oid){NHASHBUCKETS(h)}); + BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){h->nunique}); + } h->heapbckt.parentid = b->batCacheid; h->heaplink.parentid = b->batCacheid; /* if the number of unique values is equal to the bat count, _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list