Changeset: fce06d888f5b for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fce06d888f5b Modified Files: gdk/gdk.h gdk/gdk_hash.c gdk/gdk_hash.h Branch: leftmart Log Message:
prime number hash function for lng, int, oid etc. This is a test implementation of the FNV hashing function. diffs (134 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -671,6 +671,7 @@ typedef struct { BUN nil; /* nil representation */ BUN lim; /* collision list size */ BUN mask; /* number of hash buckets-1 (power of 2) */ + int n; /* the power of 2 (aka log_2(mask+1)) */ void *Hash; /* hash table */ void *Link; /* collision list */ Heap *heap; /* heap where the hash is stored */ diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -87,6 +87,17 @@ HASHclear(Hash *h) memset(h->Hash, 0xFF, (h->mask + 1) * h->width); } +static int +log2mask(BUN m) { + int n = 0; + + while (m) { + m >>= 1; + n++; + } + return n; +} + #define HASH_VERSION 1 #define HASH_HEADER_SIZE 5 /* nr of size_t fields in header */ @@ -104,6 +115,7 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN return NULL; h->lim = size; h->mask = mask - 1; + h->n = log2mask(mask); h->width = width; switch (width) { case BUN2: @@ -230,6 +242,7 @@ BATcheckhash(BAT *b) h->lim = (BUN) hdata[1]; h->type = ATOMtype(b->ttype); h->mask = (BUN) (hdata[2] - 1); + h->n = log2mask(h->mask+1); h->heap = hp; h->width = (int) hdata[3]; switch (h->width) { @@ -254,6 +267,8 @@ BATcheckhash(BAT *b) b->T->hash = h; ALGODEBUG fprintf(stderr, "#BATcheckhash: reusing persisted hash %s\n", BATgetId(b)); MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); + IDXACCESS fprintf(stderr, "[%4d->%4d]:%s (" BUNFMT ") #BATcheckhash: load persistent hash index (ms=" LLFMT + ")\n", b->batCacheid,-VIEWtparent(b), "type", BATcount(b), GDKusec() - t); return 1; } GDKfree(h); @@ -514,9 +529,10 @@ BAThash(BAT *b, BUN masksize) b->T->hash = h; t1 = GDKusec(); ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT " usec\n", t1 - t0); - ALGODEBUG HASHcollisions(b, b->T->hash); + HASHcollisions(b, b->T->hash); } MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); + IDXACCESS fprintf(stderr, "[%4d->%4d]:%s (" BUNFMT ") #BAThash: create hash index (ms=" LLFMT ")\n", b->batCacheid,-VIEWtparent(b), "type", BATcount(b), t1 - t0); return GDK_SUCCEED; } diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h --- a/gdk/gdk_hash.h +++ b/gdk/gdk_hash.h @@ -128,6 +128,49 @@ gdk_export BUN HASHlist(Hash *h, BUN i); } while (0) #endif + +/* FNV hash */ + +#if SIZEOF_BUN == 8 +#define FNV_PRIME 0x100000001b3ULL +#define FNV_INIT 0xcbf29ce484222325ULL +#else +#define FNV_PRIME 0x01000193 +#define FNV_INIT 0x811c9dc5 +#endif + +static inline BUN fnvhash_int(const void *v) { + unsigned int v_int = *(unsigned int *) v; + BUN r = FNV_INIT; +#if SIZEOF_INT == 8 + int octets = 8; +#else + int octets = 4; +#endif + while (octets--) { + r ^= (v_int & 0xff); + v_int >>= 8; + r *= FNV_PRIME; + } + return r; +} + +static inline BUN fnvhash_lng(const void *v) { + ulng v_int = *(ulng *) v; + BUN r = FNV_INIT; + int octets = 8; + while (octets--) { + r ^= (v_int & 0xff); + v_int >>= 8; + r *= FNV_PRIME; + } + return r; +} + +#define hash_int2(H,R) ((H)->n > 32 ? ((BUN) ((R) >> (H)->n) ^ ((R) & (H)->mask)) : ((BUN) ((((R) >> (H)->n) ^ (R)) & (H)->mask))) +#define hash_int(H,V) hash_int2(H, fnvhash_int(V)) +#define hash_lng(H,V) hash_int2(H, fnvhash_lng(V)) + #define mix_bte(X) ((unsigned int) (unsigned char) (X)) #define mix_sht(X) ((unsigned int) (unsigned short) (X)) #define mix_int(X) (((unsigned int) (X) >> 7) ^ \ @@ -145,9 +188,9 @@ gdk_export BUN HASHlist(Hash *h, BUN i); #define heap_hash_any(hp,H,V) ((hp) && (hp)->hashash ? ((BUN *) (V))[-1] & (H)->mask : hash_any(H,V)) #define hash_bte(H,V) (assert(((H)->mask & 0xFF) == 0xFF), (BUN) mix_bte(*(const unsigned char*) (V))) #define hash_sht(H,V) (assert(((H)->mask & 0xFFFF) == 0xFFFF), (BUN) mix_sht(*(const unsigned short*) (V))) -#define hash_int(H,V) ((BUN) mix_int(*(const unsigned int *) (V)) & (H)->mask) +/*#define hash_int(H,V) ((BUN) mix_int(*(const unsigned int *) (V)) & (H)->mask)*/ /* XXX return size_t-sized value for 8-byte oid? */ -#define hash_lng(H,V) ((BUN) mix_lng(*(const ulng *) (V)) & (H)->mask) +/*#define hash_lng(H,V) ((BUN) mix_lng(*(const ulng *) (V)) & (H)->mask)*/ #ifdef HAVE_HGE #define hash_hge(H,V) ((BUN) mix_hge(*(const uhge *) (V)) & (H)->mask) #endif _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list