Changeset: fce06d888f5b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fce06d888f5b
Modified Files:
        gdk/gdk.h
        gdk/gdk_hash.c
        gdk/gdk_hash.h
Branch: leftmart
Log Message:

prime number hash function for lng, int, oid etc.

This is a test implementation of the FNV hashing function.


diffs (134 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -671,6 +671,7 @@ typedef struct {
        BUN nil;                /* nil representation */
        BUN lim;                /* collision list size */
        BUN mask;               /* number of hash buckets-1 (power of 2) */
+       int n;                  /* the power of 2 (aka log_2(mask+1)) */
        void *Hash;             /* hash table */
        void *Link;             /* collision list */
        Heap *heap;             /* heap where the hash is stored */
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -87,6 +87,17 @@ HASHclear(Hash *h)
        memset(h->Hash, 0xFF, (h->mask + 1) * h->width);
 }
 
+static int
+log2mask(BUN m) {
+       int n = 0;
+
+       while (m) {
+               m >>= 1;
+               n++;
+       }
+       return n;
+}
+
 #define HASH_VERSION           1
 #define HASH_HEADER_SIZE       5 /* nr of size_t fields in header */
 
@@ -104,6 +115,7 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
                return NULL;
        h->lim = size;
        h->mask = mask - 1;
+       h->n = log2mask(mask);
        h->width = width;
        switch (width) {
        case BUN2:
@@ -230,6 +242,7 @@ BATcheckhash(BAT *b)
                                        h->lim = (BUN) hdata[1];
                                        h->type = ATOMtype(b->ttype);
                                        h->mask = (BUN) (hdata[2] - 1);
+                                       h->n = log2mask(h->mask+1);
                                        h->heap = hp;
                                        h->width = (int) hdata[3];
                                        switch (h->width) {
@@ -254,6 +267,8 @@ BATcheckhash(BAT *b)
                                        b->T->hash = h;
                                        ALGODEBUG fprintf(stderr, 
"#BATcheckhash: reusing persisted hash %s\n", BATgetId(b));
                                        
MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
+                                       IDXACCESS fprintf(stderr, 
"[%4d->%4d]:%s (" BUNFMT ") #BATcheckhash: load persistent hash index (ms=" 
LLFMT
+                                                                 ")\n", 
b->batCacheid,-VIEWtparent(b), "type", BATcount(b), GDKusec() - t);
                                        return 1;
                                }
                                GDKfree(h);
@@ -514,9 +529,10 @@ BAThash(BAT *b, BUN masksize)
                b->T->hash = h;
                t1 = GDKusec();
                ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT 
" usec\n", t1 - t0);
-               ALGODEBUG HASHcollisions(b, b->T->hash);
+               HASHcollisions(b, b->T->hash);
        }
        MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
+       IDXACCESS fprintf(stderr, "[%4d->%4d]:%s (" BUNFMT ") #BAThash: create 
hash index (ms=" LLFMT ")\n", b->batCacheid,-VIEWtparent(b), "type", 
BATcount(b), t1 - t0);
        return GDK_SUCCEED;
 }
 
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -128,6 +128,49 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
        } while (0)
 #endif
 
+
+/* FNV hash */
+
+#if SIZEOF_BUN == 8
+#define FNV_PRIME 0x100000001b3ULL
+#define FNV_INIT 0xcbf29ce484222325ULL
+#else
+#define FNV_PRIME 0x01000193
+#define FNV_INIT 0x811c9dc5
+#endif
+
+static inline BUN fnvhash_int(const void *v) {
+       unsigned int v_int = *(unsigned int *) v;
+       BUN r = FNV_INIT;
+#if SIZEOF_INT == 8
+       int octets = 8;
+#else
+       int octets = 4;
+#endif
+       while (octets--) {
+               r ^= (v_int & 0xff);
+               v_int >>= 8;
+               r *= FNV_PRIME;
+       }
+       return r;
+}
+
+static inline BUN fnvhash_lng(const void *v) {
+       ulng v_int = *(ulng *) v;
+       BUN r = FNV_INIT;
+       int octets = 8;
+       while (octets--) {
+               r ^= (v_int & 0xff);
+               v_int >>= 8;
+               r *= FNV_PRIME;
+       }
+       return r;
+}
+
+#define hash_int2(H,R) ((H)->n > 32 ? ((BUN) ((R) >> (H)->n) ^ ((R) & 
(H)->mask)) : ((BUN) ((((R) >> (H)->n) ^ (R)) & (H)->mask)))
+#define hash_int(H,V)  hash_int2(H, fnvhash_int(V))
+#define hash_lng(H,V)  hash_int2(H, fnvhash_lng(V))
+
 #define mix_bte(X)     ((unsigned int) (unsigned char) (X))
 #define mix_sht(X)     ((unsigned int) (unsigned short) (X))
 #define mix_int(X)     (((unsigned int) (X) >> 7) ^    \
@@ -145,9 +188,9 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
 #define heap_hash_any(hp,H,V)  ((hp) && (hp)->hashash ? ((BUN *) (V))[-1] & 
(H)->mask : hash_any(H,V))
 #define hash_bte(H,V)  (assert(((H)->mask & 0xFF) == 0xFF), (BUN) 
mix_bte(*(const unsigned char*) (V)))
 #define hash_sht(H,V)  (assert(((H)->mask & 0xFFFF) == 0xFFFF), (BUN) 
mix_sht(*(const unsigned short*) (V)))
-#define hash_int(H,V)  ((BUN) mix_int(*(const unsigned int *) (V)) & (H)->mask)
+/*#define hash_int(H,V)        ((BUN) mix_int(*(const unsigned int *) (V)) & 
(H)->mask)*/
 /* XXX return size_t-sized value for 8-byte oid? */
-#define hash_lng(H,V)  ((BUN) mix_lng(*(const ulng *) (V)) & (H)->mask)
+/*#define hash_lng(H,V)        ((BUN) mix_lng(*(const ulng *) (V)) & 
(H)->mask)*/
 #ifdef HAVE_HGE
 #define hash_hge(H,V)  ((BUN) mix_hge(*(const uhge *) (V)) & (H)->mask)
 #endif
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to