Changeset: 9e5542ae6ee1 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9e5542ae6ee1 Removed Files: gdk/gdk_bloom.c gdk/gdk_bloom.h Modified Files: gdk/Makefile.ag gdk/gdk.h gdk/gdk_hash.c Branch: orderidx Log Message:
Remove bloom filters implementation diffs (truncated from 508 to 300 lines): diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag --- a/gdk/Makefile.ag +++ b/gdk/Makefile.ag @@ -28,7 +28,6 @@ lib_gdk = { gdk_calc.c gdk_calc.h gdk_calc_compare.h gdk_calc_private.h \ gdk_aggr.c gdk_group.c \ gdk_imprints.c gdk_imprints.h \ - gdk_bloom.c gdk_bloom.h \ gdk_join.c gdk_project.c \ gdk_unique.c \ gdk_firstn.c \ diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -673,7 +673,6 @@ typedef struct { BUN nil; /* nil representation */ BUN lim; /* collision list size */ BUN mask; /* number of hash buckets-1 (power of 2) */ - int n; /* the power of 2 (aka log_2(mask+1)) */ void *Hash; /* hash table */ void *Link; /* collision list */ Heap *heap; /* heap where the hash is stored */ @@ -804,7 +803,6 @@ gdk_export int VALisnil(const ValRecord * Hash *hhash; // linear chained hash table on head * Imprints *himprints; // column imprints index on head * orderidx horderidx; // order oid index on head - * bloomfilter hbloom; // bloomfilter on head * // Tail properties * int ttype; // Tail type number * str tident; // name for tail column @@ -819,7 +817,6 @@ gdk_export int VALisnil(const ValRecord * Hash *thash; // linear chained hash table on tail * Imprints *timprints; // column imprints index on tail * orderidx torderidx; // order oid index on tail - * bloomfilter tbloom; // bloom filter on tail * } BAT; * @end verbatim * @@ -896,7 +893,6 @@ typedef struct { Hash *hash; /* hash table */ Imprints *imprints; /* column imprints index */ Heap *orderidx; /* order oid index */ - Bloomfilter *bloom; /* a bloomfilter */ PROPrec *props; /* list of dynamic properties stored in the bat descriptor */ } COLrec; @@ -971,8 +967,6 @@ typedef int (*GDKfcn) (); #define talign T->align #define horderidx H->orderidx #define torderidx T->orderidx -#define hbloom H->bloom -#define tbloom T->bloom @@ -2038,9 +2032,6 @@ gdk_export lng IMPSimprintsize(BAT *b); gdk_export gdk_return BATorderidx(BAT *b, int stable); gdk_export gdk_return GDKmergeidx(BAT *b, BAT**a, int n_ar); -/* The bloom filters */ -gdk_export gdk_return BATbloom(BAT *b); - /* * @- Multilevel Storage Modes * diff --git a/gdk/gdk_bloom.c b/gdk/gdk_bloom.c deleted file mode 100644 --- a/gdk/gdk_bloom.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V. - */ - -/* - * Implementation of bloom filters. - * L.Sidirourgos - */ - -#include "monetdb_config.h" -#include "gdk.h" -#include "gdk_private.h" -#include "gdk_bloom.h" - -#define BLOOM_VERSION 1 -#define BLOOM_HEADER_SIZE 4 /* nr of size_t fields in header */ - -int -BATcheckbloom(BAT *b) -{ - int ret; - - MT_lock_set(&GDKhashLock(abs(b->batCacheid))); - - /* if b does not have a bloom filter, but the parent has, then use it */ - if ((b->T->bloom == NULL) && VIEWtparent(b)) { - BAT *pb = BATmirror(BATdescriptor(VIEWtparent(b))); - b->T->bloom = pb->T->bloom; - BBPunfix(pb->batCacheid); - } - ret = b->T->bloom != NULL; - MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); - - ALGODEBUG if (ret) fprintf(stderr, "#BATcheckbloom: already has a bloom %d\n", b->batCacheid); - return ret; -} - -static BUN -BLOOMsize(BUN cnt) { - BUN m = cnt; - - /* smaller power of 2 that is greater or equal to cnt */ - --m; - m |= m >> 1; - m |= m >> 2; - m |= m >> 4; - m |= m >> 8; - m |= m >> 16; -#if SIZEOF_BUN == 8 - m |= m >> 32; -#endif - m++; - - /* double it */ - m <<= 3; - - /* if m is almost 2*cnt, double again */ - if (m / cnt == 2) - m <<= 1; - - return m; -} - -static void bloom_stats(BAT *b, Bloomfilter *bloom) { - BUN on = 0, off=0; - size_t i, j; - unsigned char *filter = (unsigned char *) bloom->filter->base; - - - for (i=0; i < bloom->bytes; i++) - for (j=0;j<8;j++) - if (filter[i] & (1 << j)) - on++; - else - off++; - fprintf(stderr, "#BATbloom(b=%s#" BUNFMT ") %s: bits on = " BUNFMT ", bits off = " BUNFMT "\n", - BATgetId(b), BATcount(b), b->T->heap.filename, on, off); -} - -#define BLOOM_BUILD(TYPE) \ -do { \ - const TYPE *restrict col = (TYPE *) Tloc(b, b->batFirst); \ - BUN p; \ - BUN key,mv,hv,x,y,z; \ - for (p=0; p<cnt; p++) { \ - key = (BUN) col[p]; \ - hash_init(key, x,y,z); \ - next_hash(hv, x,y,z); \ - mv = modulor(hv,bloom->mask); \ - filter[quotient8(mv)] |= (1 << remainder8(mv)); \ - next_hash(hv, x,y,z); \ - mv = modulor(hv,bloom->mask); \ - filter[quotient8(mv)] |= (1 << remainder8(mv)); \ - next_hash(hv, x,y,z); \ - mv = modulor(hv,bloom->mask); \ - filter[quotient8(mv)] |= (1 << remainder8(mv)); \ - next_hash(hv, x,y,z); \ - mv = modulor(hv,bloom->mask); \ - filter[quotient8(mv)] |= (1 << remainder8(mv)); \ - } \ -} while (0) - -gdk_return -BATbloom(BAT *b) -{ - lng t0 = 0, t1 = 0; - - assert(BAThdense(b)); /* assert void head */ - - /* we only create bloom filters for types that look like types we know */ - switch (ATOMbasetype(b->T->type)) { - case TYPE_bte: - case TYPE_sht: - case TYPE_int: - case TYPE_lng: -#ifdef HAVE_HGE - case TYPE_hge: -#endif - case TYPE_flt: - case TYPE_dbl: - break; - default: /* type not supported */ - /* doesn't look enough like base type: do nothing */ - /* GDKerror("BATbloom: unsupported type\n"); */ - return GDK_FAIL; - } - - BATcheck(b, "BATbloom", GDK_FAIL); - - if (BATcheckbloom(b)) - return GDK_SUCCEED; - assert(b->T->bloom == NULL); - - MT_lock_set(&GDKhashLock(abs(b->batCacheid))); - t0 = GDKusec(); - - if (b->T->bloom == NULL) { - BUN cnt; - Bloomfilter *bloom; - unsigned char *filter; - size_t i; - - bloom = (Bloomfilter *) GDKzalloc(sizeof(Bloomfilter)); - if (bloom == NULL) { - GDKerror("#BATbloom: memory allocation error"); - MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); - return GDK_FAIL; - } - bloom->filter = (Heap *) GDKzalloc(sizeof(Heap)); - if (bloom->filter == NULL) { - GDKerror("#BATbloom: memory allocation error"); - GDKfree(bloom); - MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); - return GDK_FAIL; - } - - cnt = BATcount(b); - /* TODO: check also if max-min < mbits and use identiry hash */ - if ( (ATOMbasetype(b->T->type) == TYPE_bte && (bloom->mbits = (1 << 8))) || - (ATOMbasetype(b->T->type) == TYPE_sht && (bloom->mbits = (1 << 16))) ) { - bloom->kfunc = 1; - bloom->mask = bloom->mbits-1; - bloom->bytes = quotient8(bloom->mbits); - } else { - bloom->mbits = BLOOMsize(cnt); - /* 2 functions if the ratio is close to 3, 3 otherwise */ - bloom->kfunc = bloom->mbits/cnt == 3 ? 2 : 3; - bloom->mask = bloom->mbits-1; - bloom->bytes = quotient8(bloom->mbits) + 1; - } - - if (HEAPalloc(bloom->filter, bloom->bytes, 1) != GDK_SUCCEED) { - GDKerror("#BATbloom: memory allocation error"); - GDKfree(bloom->filter); - GDKfree(bloom); - MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); - return GDK_FAIL; - } - - ALGODEBUG fprintf(stderr, "#BATbloom(b=%s#" BUNFMT ") %s: " - "create bloom filter: mbits = " BUNFMT ", ratio = " BUNFMT - ", kfunc = %d, bytes = " SZFMT "\n", BATgetId(b), - BATcount(b), b->T->heap.filename, - bloom->mbits, bloom->mbits/BATcount(b), - bloom->kfunc, bloom->bytes); - - filter = (unsigned char *) bloom->filter->base; - - for (i=0; i < bloom->bytes; i++) - filter[i] = 0; - - switch (ATOMbasetype(b->T->type)) { - case TYPE_bte: - { - const unsigned char *restrict col = (unsigned char *) Tloc(b, b->batFirst); - BUN p; - assert(bloom->kfunc == 1); - for (p=0; p<cnt; p++) - filter[quotient8(col[p])] |= (1 << remainder8(col[p])); - } - break; - case TYPE_sht: - { - const unsigned short *restrict col = (unsigned short *) Tloc(b, b->batFirst); - BUN p; - assert(bloom->kfunc == 1); - for (p=0; p<cnt; p++) - filter[quotient8(col[p])] |= (1 << remainder8(col[p])); - } - break; - case TYPE_int: BLOOM_BUILD(int); break; - case TYPE_lng: BLOOM_BUILD(lng); break; -#ifdef HAVE_HGE - case TYPE_hge: BLOOM_BUILD(hge); break; -#endif - case TYPE_flt: BLOOM_BUILD(flt); break; - case TYPE_dbl: BLOOM_BUILD(dbl); break; - default: - /* should never reach here */ - assert(0); - HEAPfree(bloom->filter,1); - MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); - return GDK_FAIL; - } - - b->T->bloom = bloom; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list