Changeset: 97134b6f6871 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=97134b6f6871
Modified Files:
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_hash.c
Branch: linear-hashing
Log Message:

Maintain a NUNIQUE property (number of unique values).


diffs (113 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2626,6 +2626,7 @@ enum prop_t {
        GDK_MIN_VALUE = 3,      /* smallest non-nil value in BAT */
        GDK_MAX_VALUE,          /* largest non-nil value in BAT */
        GDK_HASH_BUCKETS,       /* last used hash bucket size */
+       GDK_NUNIQUE,            /* number of unique values */
 };
 
 /*
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1073,6 +1073,7 @@ BUNappend(BAT *b, const void *t, bool fo
 
        IMPSdestroy(b); /* no support for inserts in imprints yet */
        OIDXdestroy(b);
+       BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
        PROPrec *prop;
        do {
@@ -1087,6 +1088,9 @@ BUNappend(BAT *b, const void *t, bool fo
 #endif
        if (b->thash) {
                HASHins(b, p, t);
+               if (b->thash)
+                       BATsetprop(b, GDK_NUNIQUE,
+                                  TYPE_oid, &(oid){b->thash->nunique});
                if (tsize && tsize != b->tvheap->size)
                        HEAPwarm(b->tvheap);
        }
@@ -1156,6 +1160,7 @@ BUNdelete(BAT *b, oid o)
        IMPSdestroy(b);
        OIDXdestroy(b);
        HASHdestroy(b);
+       BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
        do {
                for (prop = b->tprops; prop; prop = prop->next)
@@ -1244,6 +1249,7 @@ BUNinplace(BAT *b, BUN p, const void *t,
                                BATrmprop(b, GDK_MIN_VALUE);
                        }
                }
+               BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
                do {
                        for (prop = b->tprops; prop; prop = prop->next)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -570,6 +570,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                        BATrmprop(b, GDK_MIN_VALUE);
                }
        }
+       BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
        do {
                for (prop = b->tprops; prop; prop = prop->next)
@@ -716,6 +717,8 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                }
                b->theap.dirty = true;
        }
+       if (b->thash)
+               BATsetprop(b, GDK_NUNIQUE, TYPE_oid, &(oid){b->thash->nunique});
        return GDK_SUCCEED;
 
       bunins_failed:
@@ -869,6 +872,7 @@ BATreplace(BAT *b, BAT *p, BAT *n, bool 
        HASHdestroy(b);
        OIDXdestroy(b);
        IMPSdestroy(b);
+       BATrmprop(b, GDK_NUNIQUE);
 
        b->tsorted = b->trevsorted = false;
        b->tnosorted = b->tnorevsorted = 0;
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -377,6 +377,11 @@ BATcheckhash(BAT *b)
                                                        GDK_HASH_BUCKETS,
                                                        TYPE_oid,
                                                        
&(oid){NHASHBUCKETS(h)});
+                                               BATsetprop_nolock(
+                                                       b,
+                                                       GDK_NUNIQUE,
+                                                       TYPE_oid,
+                                                       &(oid){h->nunique});
                                                b->thash = h;
                                                ACCELDEBUG fprintf(stderr, 
"#BATcheckhash: reusing persisted hash %s\n", BATgetId(b));
                                                MT_lock_unset(&b->batIdxLock);
@@ -601,6 +606,9 @@ BAThash_impl(BAT *b, BAT *s, const char 
                /* if key, or if small, don't bother dynamically
                 * adjusting the hash mask */
                mask = HASHmask(cnt);
+       } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != 
NULL) {
+               assert(prop->v.vtype == TYPE_oid);
+               mask = prop->v.val.oval * 4 / 3;
        } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS)) 
!= NULL) {
                assert(prop->v.vtype == TYPE_oid);
                mask = prop->v.val.oval;
@@ -750,8 +758,10 @@ BAThash_impl(BAT *b, BAT *s, const char 
                }
                break;
        }
-       if (s == NULL)
+       if (s == NULL) {
                BATsetprop_nolock(b, GDK_HASH_BUCKETS, TYPE_oid, 
&(oid){NHASHBUCKETS(h)});
+               BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){h->nunique});
+       }
        h->heapbckt.parentid = b->batCacheid;
        h->heaplink.parentid = b->batCacheid;
        /* if the number of unique values is equal to the bat count,
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to