Changeset: 89ade6900146 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/89ade6900146 Modified Files: gdk/gdk_align.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_group.c gdk/gdk_hash.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_unique.c Branch: default Log Message:
Only maintain GDK_NUNIQUE and GDK_HASH_BUCKETS properties when freeing hash. diffs (270 lines): diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c --- a/gdk/gdk_align.c +++ b/gdk/gdk_align.c @@ -236,6 +236,7 @@ BATmaterialize(BAT *b) x[p++] = t++; } BATsetcount(b, b->batCount); + BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){is_oid_nil(t) ? 1 : b->batCount}); return GDK_SUCCEED; } diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -604,6 +604,10 @@ BATfree(BAT *b) if (b->tident && !default_ident(b->tident)) GDKfree(b->tident); b->tident = BATstring_t; + if (b->thash && b->thash != (Hash *) 1) { + BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique}); + BATsetprop(b, GDK_HASH_BUCKETS, TYPE_oid, &(oid){b->thash->nbucket}); + } HASHfree(b); IMPSfree(b); OIDXfree(b); @@ -1182,7 +1186,6 @@ BUNappendmulti(BAT *b, const void *value return rc; } - BATrmprop(b, GDK_NUNIQUE); BATrmprop(b, GDK_UNIQUE_ESTIMATE); for (BUN i = 0; i < count; i++) { void *t = b->ttype && b->tvarsized ? ((void **) values)[i] : @@ -1197,23 +1200,8 @@ BUNappendmulti(BAT *b, const void *value p++; } - if (b->thash) - BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique}); - IMPSdestroy(b); /* no support for inserts in imprints yet */ OIDXdestroy(b); -#if 0 /* enable if we have more properties than just min/max */ - PROPrec *prop; - do { - for (prop = b->tprops; prop; prop = prop->next) - if (prop->id != GDK_MAX_VALUE && - prop->id != GDK_MIN_VALUE && - prop->id != GDK_HASH_BUCKETS) { - BATrmprop(b, prop->id); - break; - } - } while (prop); -#endif return GDK_SUCCEED; } @@ -1303,19 +1291,7 @@ BUNdelete(BAT *b, oid o) } IMPSdestroy(b); OIDXdestroy(b); - BATrmprop(b, GDK_NUNIQUE); BATrmprop(b, GDK_UNIQUE_ESTIMATE); -#if 0 /* enable if we have more properties than just min/max */ - do { - for (prop = b->tprops; prop; prop = prop->next) - if (prop->id != GDK_MAX_VALUE && - prop->id != GDK_MIN_VALUE && - prop->id != GDK_HASH_BUCKETS) { - BATrmprop(b, prop->id); - break; - } - } while (prop); -#endif return GDK_SUCCEED; } @@ -1401,19 +1377,7 @@ BUNinplacemulti(BAT *b, const oid *posit BATrmprop(b, GDK_MIN_POS); } } - BATrmprop(b, GDK_NUNIQUE); BATrmprop(b, GDK_UNIQUE_ESTIMATE); -#if 0 /* enable if we have more properties than just min/max */ - do { - for (prop = b->tprops; prop; prop = prop->next) - if (prop->id != GDK_MAX_VALUE && - prop->id != GDK_MIN_VALUE && - prop->id != GDK_HASH_BUCKETS) { - BATrmprop(b, prop->id); - break; - } - } while (prop); -#endif } else { PROPdestroy(b); } diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -763,19 +763,7 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool BATrmprop(b, GDK_MIN_POS); } } - BATrmprop(b, GDK_NUNIQUE); BATrmprop(b, GDK_UNIQUE_ESTIMATE); -#if 0 /* enable if we have more properties than just min/max */ - do { - for (prop = b->tprops; prop; prop = prop->next) - if (prop->id != GDK_MAX_VALUE && - prop->id != GDK_MIN_VALUE && - prop->id != GDK_HASH_BUCKETS) { - BATrmprop(b, prop->id); - break; - } - } while (prop); -#endif /* load hash so that we can maintain it */ (void) BATcheckhash(b); @@ -915,8 +903,6 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool } b->theap->dirty = true; } - if (b->thash) - BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique}); doreturn: TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT @@ -1112,7 +1098,6 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool HASHdestroy(b); OIDXdestroy(b); IMPSdestroy(b); - BATrmprop(b, GDK_NUNIQUE); BATrmprop(b, GDK_UNIQUE_ESTIMATE); b->tsorted = b->trevsorted = false; @@ -2666,7 +2651,7 @@ BATgetprop_nolock(BAT *b, enum prop_t id return p; } -static void +void BATrmprop_nolock(BAT *b, enum prop_t idx) { PROPrec *prop = b->tprops, *prev = NULL; diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -768,10 +768,14 @@ BATgroup_internal(BAT **groups, BAT **ex if (gn == NULL) goto error; ngrps = (oid *) Tloc(gn, 0); - if ((prop = BATgetprop(b, GDK_NUNIQUE)) != NULL) + MT_rwlock_rdlock(&b->batIdxLock); + if (b->thash && b->thash != (Hash *) 1) + maxgrps = b->thash->nunique; + else if ((prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL) maxgrps = prop->v.val.oval; else maxgrps = cnt / 10; + MT_rwlock_rdunlock(&b->batIdxLock); if (!is_oid_nil(maxgrp) && maxgrps < maxgrp) maxgrps += maxgrp; if (e && maxgrps < BATcount(e)) diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -398,8 +398,6 @@ HASHgrowbucket(BAT *b) HASHput(h, old, HASHnil(h)); else HASHputlink(h, lold, HASHnil(h)); - BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid, - &(oid){h->nbucket}); } TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) { TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT @@ -523,16 +521,6 @@ BATcheckhash(BAT *b) h->heapbckt.parentid = b->batCacheid; h->heaplink.dirty = false; h->heapbckt.dirty = false; - BATsetprop_nolock( - b, - GDK_HASH_BUCKETS, - TYPE_oid, - &(oid){h->nbucket}); - BATsetprop_nolock( - b, - GDK_NUNIQUE, - TYPE_oid, - &(oid){h->nunique}); b->thash = h; TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b)); @@ -976,8 +964,8 @@ BAThash_impl(BAT *restrict b, struct can break; } if (!hascand) { - BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid, &(oid){h->nbucket}); - BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){h->nunique}); + BATrmprop_nolock(b, GDK_HASH_BUCKETS); + BATrmprop_nolock(b, GDK_NUNIQUE); } h->heapbckt.parentid = b->batCacheid; h->heaplink.parentid = b->batCacheid; diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3184,14 +3184,18 @@ joincost(BAT *r, struct canditer *lci, s * since the searching of the candidate list * (canditer_idx) will kill us */ double rccost; - PROPrec *prop = BATgetprop(r, GDK_NUNIQUE); - if (prop) { - /* we know number of unique values, assume some - * chains */ - rccost = 1.1 * ((double) BATcount(r) / prop->v.val.oval); + if (rhash && !prhash) { + rccost = (double) BATcount(r) / r->thash->nheads; } else { - /* guess number of unique value and work with that */ - rccost = 1.1 * ((double) BATcount(r) / guess_uniques(r, rci)); + PROPrec *prop = BATgetprop(r, GDK_NUNIQUE); + if (prop) { + /* we know number of unique values, assume some + * chains */ + rccost = 1.1 * ((double) BATcount(r) / prop->v.val.oval); + } else { + /* guess number of unique value and work with that */ + rccost = 1.1 * ((double) BATcount(r) / guess_uniques(r, rci)); + } } rccost *= lci->ncand; rccost += rci->ncand * 2.0; /* cost of building the hash */ diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -81,6 +81,8 @@ gdk_return BATmaterialize(BAT *b) __attribute__((__visibility__("hidden"))); void BATrmprop(BAT *b, enum prop_t idx) __attribute__((__visibility__("hidden"))); +void BATrmprop_nolock(BAT *b, enum prop_t idx) + __attribute__((__visibility__("hidden"))); void BATsetdims(BAT *b) __attribute__((__visibility__("hidden"))); PROPrec *BATsetprop(BAT *b, enum prop_t idx, int type, const void *v) diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c --- a/gdk/gdk_unique.c +++ b/gdk/gdk_unique.c @@ -76,10 +76,16 @@ BATunique(BAT *b, BAT *s) assert(b->ttype != TYPE_void); - if (s == NULL && (prop = BATgetprop(b, GDK_NUNIQUE)) != NULL) - bn = COLnew(0, TYPE_oid, prop->v.val.oval, TRANSIENT); - else - bn = COLnew(0, TYPE_oid, 1024, TRANSIENT); + BUN initsize = 1024; + if (s == NULL) { + MT_rwlock_rdlock(&b->batIdxLock); + if (b->thash != NULL && b->thash != (Hash *) 1) + initsize = b->thash->nunique; + else if ((prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL) + initsize = prop->v.val.oval; + MT_rwlock_rdunlock(&b->batIdxLock); + } + bn = COLnew(0, TYPE_oid, initsize, TRANSIENT); if (bn == NULL) return NULL; vals = Tloc(b, 0); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list