Changeset: a53f3b4db482 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a53f3b4db482 Modified Files: gdk/gdk.h gdk/gdk_imprints.c monetdb5/modules/mal/batExtensions.c monetdb5/modules/mal/batExtensions.h monetdb5/modules/mal/batExtensions.mal Branch: transaction-replication Log Message:
Merge with default branch diffs (250 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2197,6 +2197,7 @@ gdk_export BAT *BATimprints(BAT *b); gdk_export lng IMPSimprintsize(BAT *b); gdk_export BAT *BATbloom(BAT *b); +gdk_export BAT *BLOOMselect(BAT *b, BAT *s, BAT *bf); /* * @- Multilevel Storage Modes diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c --- a/gdk/gdk_imprints.c +++ b/gdk/gdk_imprints.c @@ -28,6 +28,7 @@ #include "gdk.h" #include "gdk_private.h" #include "gdk_imprints.h" +#include "gdk_calc_private.h" #define BINSIZE(B, FUNC, T) do { \ switch (B) { \ @@ -1019,3 +1020,123 @@ do { \ return bn; } + +BAT * +BLOOMselect(BAT *b, BAT *s, BAT *bf) { + BAT *bn; + BUN start, end, cnt, mn; + const oid *cand = NULL, *candend = NULL; + const bit *bloom; + + assert(BAThdense(b)); /* assert void head */ + assert(BAThdense(bf)); /* assert void head*/ + + switch (ATOMstorage(b->T->type)) { + case TYPE_bte: + case TYPE_sht: + case TYPE_int: + case TYPE_lng: + case TYPE_flt: + case TYPE_dbl: + break; + default: /* type not supported */ + GDKerror("#BATbloom: b col type not " + "suitable for bloom filters.\n"); + return NULL; /* do nothing */ + } + + if (BATttype(bf) != TYPE_bit) { + GDKerror("#BATbloom: bf col type not " + "a bloom filters.\n"); + return NULL; /* do nothing */ + } + + bloom = (bit *) Tloc(bf, BUNfirst(bf)); + mn = BATcount(bf); + + CANDINIT(b, s, start, end, cnt, cand, candend); + + if (start == end) { + /* trivial: empty result */ + bn = BATnew(TYPE_void, TYPE_void, 0); + if (bn == NULL) { + return NULL; + } + BATsetcount(bn, 0); + BATseqbase(bn, 0); + BATseqbase(BATmirror(bn), b->hseqbase); + return bn; + } + + bn = BATnew(TYPE_void, TYPE_oid, 1024); + if (bn == NULL) { + return NULL; + } + +#define TEST_BLOOM(TYPE) \ +do { \ + oid key,hv,x,y,z; /* for hashing */ \ + oid i, o; \ + TYPE *ob = (TYPE *)Tloc(b, BUNfirst(b)); \ + for (;;) { \ + if (cand) { \ + if (cand == candend) \ + break; \ + i = *cand++ - b->hseqbase; \ + if (i >= end) \ + break; \ + } else { \ + i = start++; \ + if (i == end) \ + break; \ + } \ + key = ob[i]; \ + hash_init(key, x,y,z); \ + next_hash(hv, x,y,z); \ + if (bloom[hash_mod(hv,mn)]) { \ + next_hash(hv, x,y,z); \ + if (bloom[hash_mod(hv,mn)]) { \ + next_hash(hv, x,y,z); \ + if (bloom[hash_mod(hv,mn)]) {\ + o = i + b->hseqbase; \ + bunfastapp(bn, &o); \ + } \ + } \ + } \ + } \ +} while (0) + switch (ATOMstorage(b->T->type)) { + case TYPE_bte: + TEST_BLOOM(bte); + break; + case TYPE_sht: + TEST_BLOOM(sht); + break; + case TYPE_int: + TEST_BLOOM(int); + break; + case TYPE_lng: + TEST_BLOOM(lng); + break; + case TYPE_flt: + TEST_BLOOM(flt); + break; + case TYPE_dbl: + TEST_BLOOM(dbl); + break; + default: + /* should never reach here */ + assert(0); + } + + bn->tsorted = 1; + bn->trevsorted = BATcount(bn) <= 1; + bn->tkey = 1; + bn->T->nil = 0; + bn->T->nonil = 1; + return bn; + +bunins_failed: + BBPreclaim(bn); + return NULL; +} diff --git a/monetdb5/modules/mal/batExtensions.c b/monetdb5/modules/mal/batExtensions.c --- a/monetdb5/modules/mal/batExtensions.c +++ b/monetdb5/modules/mal/batExtensions.c @@ -283,6 +283,7 @@ CMDBATimprints(int *ret, int *bid) BBPkeepref(*ret = b->batCacheid); return MAL_SUCCEED; } + str CMDBATimprintsize(lng *ret, int *bid) { @@ -295,3 +296,45 @@ CMDBATimprintsize(lng *ret, int *bid) BBPreleaseref(b->batCacheid); return MAL_SUCCEED; } + +str +CMDBATbloom(int *ret, int *bid) +{ + BAT *b, *bn; + + if ((b = BATdescriptor(*bid)) == NULL) + throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS); + + bn = BATbloom(b); + BBPkeepref(*ret = bn->batCacheid); + BBPreleaseref(b->batCacheid); + return MAL_SUCCEED; +} + +str +CMDBLOOMselect(int *ret, int *bid, int *sid, int *bfid) +{ + BAT *b, *s, *bf, *bn; + + if ((b = BATdescriptor(*bid)) == NULL) + throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS); + + if ((s = BATdescriptor(*sid)) == NULL) + throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS); + + if ((bf = BATdescriptor(*bfid)) == NULL) + throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS); + + bn = BLOOMselect(b, s, bf); + if (bn == NULL) { + BBPunfix(b->batCacheid); + BBPunfix(s->batCacheid); + BBPunfix(bf->batCacheid); + throw(MAL,"bat.bloom", INTERNAL_OBJ_CREATE); + } + BBPkeepref(*ret = bn->batCacheid); + BBPreleaseref(b->batCacheid); + BBPreleaseref(s->batCacheid); + BBPreleaseref(bf->batCacheid); + return MAL_SUCCEED; +} diff --git a/monetdb5/modules/mal/batExtensions.h b/monetdb5/modules/mal/batExtensions.h --- a/monetdb5/modules/mal/batExtensions.h +++ b/monetdb5/modules/mal/batExtensions.h @@ -44,5 +44,8 @@ be_export str CMDBATpartition(Client cnt be_export str CMDBATpartition2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); be_export str CMDBATimprints(int *ret, int *bid); be_export str CMDBATimprintsize(lng *ret, int *bid); +be_export str CMDBATbloom(int *ret, int *bid); +be_export str CMDBLOOMselect(int *ret, int *bid, int *sid, int *bfid); + #endif /* _BAT_EXTENSIONS_ */ diff --git a/monetdb5/modules/mal/batExtensions.mal b/monetdb5/modules/mal/batExtensions.mal --- a/monetdb5/modules/mal/batExtensions.mal +++ b/monetdb5/modules/mal/batExtensions.mal @@ -72,3 +72,31 @@ command bat.imprintsize(b:bat[:oid,:dbl] address CMDBATimprintsize comment "Return the size of the imprints"; +command bat.bloom(b:bat[:oid,:bte]):bat[:oid,:bit] +address CMDBATbloom; +command bat.bloom(b:bat[:oid,:sht]):bat[:oid,:bit] +address CMDBATbloom; +command bat.bloom(b:bat[:oid,:int]):bat[:oid,:bit] +address CMDBATbloom; +command bat.bloom(b:bat[:oid,:lng]):bat[:oid,:bit] +address CMDBATbloom; +command bat.bloom(b:bat[:oid,:flt]):bat[:oid,:bit] +address CMDBATbloom; +command bat.bloom(b:bat[:oid,:dbl]):bat[:oid,:bit] +address CMDBATbloom +comment "Create a Bloom filter on the BAT"; + +command bat.bloomselect(b:bat[:oid,:bte], cand:bat[:oid,:oid], bloom:bat[:oid,:bit]):bat[:oid,:oid] +address CMDBLOOMselect; +command bat.bloomselect(b:bat[:oid,:sht], cand:bat[:oid,:oid], bloom:bat[:oid,:bit]):bat[:oid,:oid] +address CMDBLOOMselect; +command bat.bloomselect(b:bat[:oid,:int], cand:bat[:oid,:oid], bloom:bat[:oid,:bit]):bat[:oid,:oid] +address CMDBLOOMselect; +command bat.bloomselect(b:bat[:oid,:lng], cand:bat[:oid,:oid], bloom:bat[:oid,:bit]):bat[:oid,:oid] +address CMDBLOOMselect; +command bat.bloomselect(b:bat[:oid,:flt], cand:bat[:oid,:oid], bloom:bat[:oid,:bit]):bat[:oid,:oid] +address CMDBLOOMselect; +command bat.bloomselect(b:bat[:oid,:dbl], cand:bat[:oid,:oid], bloom:bat[:oid,:bit]):bat[:oid,:oid] +address CMDBLOOMselect +comment "Select using a Bloom filter"; + _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list