Changeset: 89ade6900146 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/89ade6900146
Modified Files:
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_group.c
        gdk/gdk_hash.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_unique.c
Branch: default
Log Message:

Only maintain GDK_NUNIQUE and GDK_HASH_BUCKETS properties when freeing hash.


diffs (270 lines):

diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -236,6 +236,7 @@ BATmaterialize(BAT *b)
                        x[p++] = t++;
        }
        BATsetcount(b, b->batCount);
+       BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){is_oid_nil(t) ? 1 : 
b->batCount});
 
        return GDK_SUCCEED;
 }
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -604,6 +604,10 @@ BATfree(BAT *b)
        if (b->tident && !default_ident(b->tident))
                GDKfree(b->tident);
        b->tident = BATstring_t;
+       if (b->thash && b->thash != (Hash *) 1) {
+               BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique});
+               BATsetprop(b, GDK_HASH_BUCKETS, TYPE_oid, 
&(oid){b->thash->nbucket});
+       }
        HASHfree(b);
        IMPSfree(b);
        OIDXfree(b);
@@ -1182,7 +1186,6 @@ BUNappendmulti(BAT *b, const void *value
                        return rc;
        }
 
-       BATrmprop(b, GDK_NUNIQUE);
        BATrmprop(b, GDK_UNIQUE_ESTIMATE);
        for (BUN i = 0; i < count; i++) {
                void *t = b->ttype && b->tvarsized ? ((void **) values)[i] :
@@ -1197,23 +1200,8 @@ BUNappendmulti(BAT *b, const void *value
                p++;
        }
 
-       if (b->thash)
-               BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique});
-
        IMPSdestroy(b); /* no support for inserts in imprints yet */
        OIDXdestroy(b);
-#if 0          /* enable if we have more properties than just min/max */
-       PROPrec *prop;
-       do {
-               for (prop = b->tprops; prop; prop = prop->next)
-                       if (prop->id != GDK_MAX_VALUE &&
-                           prop->id != GDK_MIN_VALUE &&
-                           prop->id != GDK_HASH_BUCKETS) {
-                               BATrmprop(b, prop->id);
-                               break;
-                       }
-       } while (prop);
-#endif
        return GDK_SUCCEED;
 }
 
@@ -1303,19 +1291,7 @@ BUNdelete(BAT *b, oid o)
        }
        IMPSdestroy(b);
        OIDXdestroy(b);
-       BATrmprop(b, GDK_NUNIQUE);
        BATrmprop(b, GDK_UNIQUE_ESTIMATE);
-#if 0          /* enable if we have more properties than just min/max */
-       do {
-               for (prop = b->tprops; prop; prop = prop->next)
-                       if (prop->id != GDK_MAX_VALUE &&
-                           prop->id != GDK_MIN_VALUE &&
-                           prop->id != GDK_HASH_BUCKETS) {
-                               BATrmprop(b, prop->id);
-                               break;
-                       }
-       } while (prop);
-#endif
        return GDK_SUCCEED;
 }
 
@@ -1401,19 +1377,7 @@ BUNinplacemulti(BAT *b, const oid *posit
                                        BATrmprop(b, GDK_MIN_POS);
                                }
                        }
-                       BATrmprop(b, GDK_NUNIQUE);
                        BATrmprop(b, GDK_UNIQUE_ESTIMATE);
-#if 0          /* enable if we have more properties than just min/max */
-                       do {
-                               for (prop = b->tprops; prop; prop = prop->next)
-                                       if (prop->id != GDK_MAX_VALUE &&
-                                           prop->id != GDK_MIN_VALUE &&
-                                           prop->id != GDK_HASH_BUCKETS) {
-                                               BATrmprop(b, prop->id);
-                                               break;
-                                       }
-                       } while (prop);
-#endif
                } else {
                        PROPdestroy(b);
                }
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -763,19 +763,7 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool 
                        BATrmprop(b, GDK_MIN_POS);
                }
        }
-       BATrmprop(b, GDK_NUNIQUE);
        BATrmprop(b, GDK_UNIQUE_ESTIMATE);
-#if 0          /* enable if we have more properties than just min/max */
-       do {
-               for (prop = b->tprops; prop; prop = prop->next)
-                       if (prop->id != GDK_MAX_VALUE &&
-                           prop->id != GDK_MIN_VALUE &&
-                           prop->id != GDK_HASH_BUCKETS) {
-                               BATrmprop(b, prop->id);
-                               break;
-                       }
-       } while (prop);
-#endif
        /* load hash so that we can maintain it */
        (void) BATcheckhash(b);
 
@@ -915,8 +903,6 @@ BATappend2(BAT *b, BAT *n, BAT *s, bool 
                }
                b->theap->dirty = true;
        }
-       if (b->thash)
-               BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique});
 
   doreturn:
        TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
@@ -1112,7 +1098,6 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool 
        HASHdestroy(b);
        OIDXdestroy(b);
        IMPSdestroy(b);
-       BATrmprop(b, GDK_NUNIQUE);
        BATrmprop(b, GDK_UNIQUE_ESTIMATE);
 
        b->tsorted = b->trevsorted = false;
@@ -2666,7 +2651,7 @@ BATgetprop_nolock(BAT *b, enum prop_t id
        return p;
 }
 
-static void
+void
 BATrmprop_nolock(BAT *b, enum prop_t idx)
 {
        PROPrec *prop = b->tprops, *prev = NULL;
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -768,10 +768,14 @@ BATgroup_internal(BAT **groups, BAT **ex
        if (gn == NULL)
                goto error;
        ngrps = (oid *) Tloc(gn, 0);
-       if ((prop = BATgetprop(b, GDK_NUNIQUE)) != NULL)
+       MT_rwlock_rdlock(&b->batIdxLock);
+       if (b->thash && b->thash != (Hash *) 1)
+               maxgrps = b->thash->nunique;
+       else if ((prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL)
                maxgrps = prop->v.val.oval;
        else
                maxgrps = cnt / 10;
+       MT_rwlock_rdunlock(&b->batIdxLock);
        if (!is_oid_nil(maxgrp) && maxgrps < maxgrp)
                maxgrps += maxgrp;
        if (e && maxgrps < BATcount(e))
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -398,8 +398,6 @@ HASHgrowbucket(BAT *b)
                        HASHput(h, old, HASHnil(h));
                else
                        HASHputlink(h, lold, HASHnil(h));
-               BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid,
-                                 &(oid){h->nbucket});
        }
        TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
                TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
@@ -523,16 +521,6 @@ BATcheckhash(BAT *b)
                                                                
h->heapbckt.parentid = b->batCacheid;
                                                                
h->heaplink.dirty = false;
                                                                
h->heapbckt.dirty = false;
-                                                               
BATsetprop_nolock(
-                                                                       b,
-                                                                       
GDK_HASH_BUCKETS,
-                                                                       
TYPE_oid,
-                                                                       
&(oid){h->nbucket});
-                                                               
BATsetprop_nolock(
-                                                                       b,
-                                                                       
GDK_NUNIQUE,
-                                                                       
TYPE_oid,
-                                                                       
&(oid){h->nunique});
                                                                b->thash = h;
                                                                
TRC_DEBUG(ACCELERATOR,
                                                                          
ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
@@ -976,8 +964,8 @@ BAThash_impl(BAT *restrict b, struct can
                break;
        }
        if (!hascand) {
-               BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid, 
&(oid){h->nbucket});
-               BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){h->nunique});
+               BATrmprop_nolock(b, GDK_HASH_BUCKETS);
+               BATrmprop_nolock(b, GDK_NUNIQUE);
        }
        h->heapbckt.parentid = b->batCacheid;
        h->heaplink.parentid = b->batCacheid;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3184,14 +3184,18 @@ joincost(BAT *r, struct canditer *lci, s
                 * since the searching of the candidate list
                 * (canditer_idx) will kill us */
                double rccost;
-               PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
-               if (prop) {
-                       /* we know number of unique values, assume some
-                        * chains */
-                       rccost = 1.1 * ((double) BATcount(r) / 
prop->v.val.oval);
+               if (rhash && !prhash) {
+                       rccost = (double) BATcount(r) / r->thash->nheads;
                } else {
-                       /* guess number of unique value and work with that */
-                       rccost = 1.1 * ((double) BATcount(r) / guess_uniques(r, 
rci));
+                       PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
+                       if (prop) {
+                               /* we know number of unique values, assume some
+                                * chains */
+                               rccost = 1.1 * ((double) BATcount(r) / 
prop->v.val.oval);
+                       } else {
+                               /* guess number of unique value and work with 
that */
+                               rccost = 1.1 * ((double) BATcount(r) / 
guess_uniques(r, rci));
+                       }
                }
                rccost *= lci->ncand;
                rccost += rci->ncand * 2.0; /* cost of building the hash */
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -81,6 +81,8 @@ gdk_return BATmaterialize(BAT *b)
        __attribute__((__visibility__("hidden")));
 void BATrmprop(BAT *b, enum prop_t idx)
        __attribute__((__visibility__("hidden")));
+void BATrmprop_nolock(BAT *b, enum prop_t idx)
+       __attribute__((__visibility__("hidden")));
 void BATsetdims(BAT *b)
        __attribute__((__visibility__("hidden")));
 PROPrec *BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -76,10 +76,16 @@ BATunique(BAT *b, BAT *s)
 
        assert(b->ttype != TYPE_void);
 
-       if (s == NULL && (prop = BATgetprop(b, GDK_NUNIQUE)) != NULL)
-               bn = COLnew(0, TYPE_oid, prop->v.val.oval, TRANSIENT);
-       else
-               bn = COLnew(0, TYPE_oid, 1024, TRANSIENT);
+       BUN initsize = 1024;
+       if (s == NULL) {
+               MT_rwlock_rdlock(&b->batIdxLock);
+               if (b->thash != NULL && b->thash != (Hash *) 1)
+                       initsize = b->thash->nunique;
+               else if ((prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL)
+                       initsize = prop->v.val.oval;
+               MT_rwlock_rdunlock(&b->batIdxLock);
+       }
+       bn = COLnew(0, TYPE_oid, initsize, TRANSIENT);
        if (bn == NULL)
                return NULL;
        vals = Tloc(b, 0);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to