Changeset: 932837855b0a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=932837855b0a Modified Files: gdk/gdk.h gdk/gdk_imprints.c Branch: default Log Message:
First take on implementing a bloom filter for a column. diffs (134 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2196,6 +2196,8 @@ gdk_export void IMPSdestroy(BAT *b); gdk_export BAT *BATimprints(BAT *b); gdk_export lng IMPSimprintsize(BAT *b); +gdk_export BAT *BATbloom(BAT *b); + /* * @- Multilevel Storage Modes * diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c --- a/gdk/gdk_imprints.c +++ b/gdk/gdk_imprints.c @@ -902,3 +902,118 @@ do { \ free(s); } #endif + +/* round hashing */ + +#define smpl_xor_rng(R,X) {\ +R = X; \ +R ^= (R<<13); \ +R ^= (R>>17); \ +R ^= (R<<5); \ +} + +#define hash_init(S,X,Y,Z) {\ +smpl_xor_rng(X,S); \ +smpl_xor_rng(Y,X); \ +smpl_xor_rng(Z,Y); \ +} + +#define next_hash(N,X,Y,Z) {\ +N = (X^(X<<3))^(Y^(Y>>19))^(Z^(Z<<6)); \ +X = Y; \ +Y = Z; \ +Z = N; \ +} + +#define hash_mod(V,MOD) ((V) % (MOD)) + +BAT * +BATbloom(BAT *b) { + BAT *bn; + BUN cnt; + BUN mn; + BUN p; + bit *o; + + assert(BAThdense(b)); /* assert void head */ + + switch (ATOMstorage(b->T->type)) { + case TYPE_bte: + case TYPE_sht: + case TYPE_int: + case TYPE_lng: + case TYPE_flt: + case TYPE_dbl: + break; + default: /* type not supported */ + GDKerror("#BATbloom: col type not " + "suitable for bloom filters.\n"); + return b; /* do nothing */ + } + + BATcheck(b, "BATblooms"); + + cnt = BATcount(b); + mn = 4 * cnt; /* make it power of 2 for faster modulo */ + + bn = BATnew(TYPE_void, TYPE_bit, mn); + if (bn == NULL) { + GDKerror("#BATbloom: memory allocation error"); + return NULL; + } + + o = (bit *) Tloc(bn, BUNfirst(bn)); + for (p = 0; (o[p] = 0) && (p < mn); p++); + +#define BLOOM_BUILD(TYPE) \ +do { \ + oid key,hv,x,y,z; /* for hashing */ \ + TYPE *ob = (TYPE *)Tloc(b, BUNfirst(b)); \ + for (p = 0; p < cnt; p++) { \ + key = (oid) ob[p]; \ + hash_init(key, x,y,z); \ + next_hash(hv, x,y,z); \ + o[hash_mod(hv,mn)] = 1; \ + next_hash(hv, x,y,z); \ + o[hash_mod(hv,mn)] = 1; \ + next_hash(hv, x,y,z); \ + o[hash_mod(hv,mn)] = 1; \ + } \ +} while (0) + switch (ATOMstorage(b->T->type)) { + case TYPE_bte: + BLOOM_BUILD(bte); + break; + case TYPE_sht: + BLOOM_BUILD(sht); + break; + case TYPE_int: + BLOOM_BUILD(int); + break; + case TYPE_lng: + BLOOM_BUILD(lng); + break; + case TYPE_flt: + BLOOM_BUILD(flt); + break; + case TYPE_dbl: + BLOOM_BUILD(dbl); + break; + default: + /* should never reach here */ + assert(0); + } + + /* property management */ + BATsetcount(bn, mn); + bn->trevsorted = 0; + bn->tsorted = 0; + bn->tkey = 0; + bn->tdense = 0; + bn->hdense = 1; + bn->hseqbase = 0; + bn->hkey = 1; + bn->hrevsorted = bn->batCount <= 1; + + return bn; +} _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list