Changeset: 9e5542ae6ee1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9e5542ae6ee1
Removed Files:
        gdk/gdk_bloom.c
        gdk/gdk_bloom.h
Modified Files:
        gdk/Makefile.ag
        gdk/gdk.h
        gdk/gdk_hash.c
Branch: orderidx
Log Message:

Remove bloom filters implementation


diffs (truncated from 508 to 300 lines):

diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -28,7 +28,6 @@ lib_gdk = {
                gdk_calc.c gdk_calc.h gdk_calc_compare.h gdk_calc_private.h \
                gdk_aggr.c gdk_group.c \
                gdk_imprints.c gdk_imprints.h \
-               gdk_bloom.c gdk_bloom.h \
                gdk_join.c gdk_project.c \
                gdk_unique.c \
                gdk_firstn.c \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -673,7 +673,6 @@ typedef struct {
        BUN nil;                /* nil representation */
        BUN lim;                /* collision list size */
        BUN mask;               /* number of hash buckets-1 (power of 2) */
-       int n;                  /* the power of 2 (aka log_2(mask+1)) */
        void *Hash;             /* hash table */
        void *Link;             /* collision list */
        Heap *heap;             /* heap where the hash is stored */
@@ -804,7 +803,6 @@ gdk_export int VALisnil(const ValRecord 
  *           Hash   *hhash;           // linear chained hash table on head
  *           Imprints *himprints;     // column imprints index on head
  *           orderidx horderidx;      // order oid index on head
- *           bloomfilter hbloom;      // bloomfilter on head
  *           // Tail properties
  *           int    ttype;            // Tail type number
  *           str    tident;           // name for tail column
@@ -819,7 +817,6 @@ gdk_export int VALisnil(const ValRecord 
  *           Hash   *thash;           // linear chained hash table on tail
  *           Imprints *timprints;     // column imprints index on tail
  *           orderidx torderidx;      // order oid index on tail
- *           bloomfilter tbloom;      // bloom filter on tail
  *  } BAT;
  * @end verbatim
  *
@@ -896,7 +893,6 @@ typedef struct {
        Hash *hash;             /* hash table */
        Imprints *imprints;     /* column imprints index */
        Heap *orderidx;         /* order oid index */
-       Bloomfilter *bloom;     /* a bloomfilter */
 
        PROPrec *props;         /* list of dynamic properties stored in the bat 
descriptor */
 } COLrec;
@@ -971,8 +967,6 @@ typedef int (*GDKfcn) ();
 #define talign         T->align
 #define horderidx      H->orderidx
 #define torderidx      T->orderidx
-#define hbloom         H->bloom
-#define tbloom         T->bloom
 
 
 
@@ -2038,9 +2032,6 @@ gdk_export lng IMPSimprintsize(BAT *b);
 gdk_export gdk_return BATorderidx(BAT *b, int stable);
 gdk_export gdk_return GDKmergeidx(BAT *b, BAT**a, int n_ar);
 
-/* The bloom filters */
-gdk_export gdk_return BATbloom(BAT *b);
-
 /*
  * @- Multilevel Storage Modes
  *
diff --git a/gdk/gdk_bloom.c b/gdk/gdk_bloom.c
deleted file mode 100644
--- a/gdk/gdk_bloom.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0.  If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V.
- */
-
-/*
- * Implementation of bloom filters.
- * L.Sidirourgos
- */
-
-#include "monetdb_config.h"
-#include "gdk.h"
-#include "gdk_private.h"
-#include "gdk_bloom.h"
-
-#define BLOOM_VERSION  1
-#define BLOOM_HEADER_SIZE      4 /* nr of size_t fields in header */
-
-int
-BATcheckbloom(BAT *b)
-{
-       int ret;
-
-       MT_lock_set(&GDKhashLock(abs(b->batCacheid)));
-
-       /* if b does not have a bloom filter, but the parent has, then use it */
-       if ((b->T->bloom == NULL) && VIEWtparent(b)) {
-               BAT *pb = BATmirror(BATdescriptor(VIEWtparent(b)));
-               b->T->bloom = pb->T->bloom;
-               BBPunfix(pb->batCacheid);
-       }
-       ret = b->T->bloom != NULL;
-       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
-
-       ALGODEBUG if (ret) fprintf(stderr, "#BATcheckbloom: already has a bloom 
%d\n", b->batCacheid);
-       return ret;
-}
-
-static BUN
-BLOOMsize(BUN cnt) {
-       BUN m = cnt;
-
-       /* smaller power of 2 that is greater or equal to cnt */
-       --m;
-       m |= m >> 1;
-       m |= m >> 2;
-       m |= m >> 4;
-       m |= m >> 8;
-       m |= m >> 16;
-#if SIZEOF_BUN == 8
-       m |= m >> 32;
-#endif
-       m++;
-
-       /* double it */
-       m <<= 3;
-
-       /* if m is almost 2*cnt, double again */
-       if (m / cnt == 2)
-               m <<= 1;
-
-       return m;
-}
-
-static void bloom_stats(BAT *b, Bloomfilter *bloom) {
-       BUN on = 0, off=0;
-       size_t i, j;
-       unsigned char *filter = (unsigned char *) bloom->filter->base;
-
-
-       for (i=0; i < bloom->bytes; i++)
-               for (j=0;j<8;j++)
-                       if (filter[i] & (1 << j))
-                               on++;
-                       else
-                               off++;
-       fprintf(stderr, "#BATbloom(b=%s#" BUNFMT ") %s: bits on = " BUNFMT ", 
bits off = " BUNFMT "\n",
-               BATgetId(b), BATcount(b), b->T->heap.filename, on, off);
-}
-
-#define BLOOM_BUILD(TYPE)                                                      
\
-do {                                                                           
\
-       const TYPE *restrict col = (TYPE *) Tloc(b, b->batFirst);               
\
-       BUN p;                                                                  
\
-       BUN key,mv,hv,x,y,z;                                                    
\
-       for (p=0; p<cnt; p++) {                                                 
\
-               key = (BUN) col[p];                                             
\
-               hash_init(key, x,y,z);                                          
\
-               next_hash(hv, x,y,z);                                           
\
-               mv = modulor(hv,bloom->mask);                                   
\
-               filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
-               next_hash(hv, x,y,z);                                           
\
-               mv = modulor(hv,bloom->mask);                                   
\
-               filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
-               next_hash(hv, x,y,z);                                           
\
-               mv = modulor(hv,bloom->mask);                                   
\
-               filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
-               next_hash(hv, x,y,z);                                           
\
-               mv = modulor(hv,bloom->mask);                                   
\
-               filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
-       }                                                                       
\
-} while (0)
-
-gdk_return
-BATbloom(BAT *b)
-{
-       lng t0 = 0, t1 = 0;
-
-       assert(BAThdense(b));   /* assert void head */
-
-       /* we only create bloom filters for types that look like types we know 
*/
-       switch (ATOMbasetype(b->T->type)) {
-       case TYPE_bte:
-       case TYPE_sht:
-       case TYPE_int:
-       case TYPE_lng:
-#ifdef HAVE_HGE
-       case TYPE_hge:
-#endif
-       case TYPE_flt:
-       case TYPE_dbl:
-               break;
-       default:                /* type not supported */
-               /* doesn't look enough like base type: do nothing */
-               /* GDKerror("BATbloom: unsupported type\n"); */
-               return GDK_FAIL;
-       }
-
-       BATcheck(b, "BATbloom", GDK_FAIL);
-
-       if (BATcheckbloom(b))
-               return GDK_SUCCEED;
-       assert(b->T->bloom == NULL);
-
-       MT_lock_set(&GDKhashLock(abs(b->batCacheid)));
-       t0 = GDKusec();
-
-       if (b->T->bloom == NULL) {
-               BUN cnt;
-               Bloomfilter *bloom;
-               unsigned char *filter;
-               size_t i;
-
-               bloom = (Bloomfilter *) GDKzalloc(sizeof(Bloomfilter));
-               if (bloom == NULL) {
-                       GDKerror("#BATbloom: memory allocation error");
-                       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
-                       return GDK_FAIL;
-               }
-               bloom->filter = (Heap *) GDKzalloc(sizeof(Heap));
-               if (bloom->filter == NULL) {
-                       GDKerror("#BATbloom: memory allocation error");
-                       GDKfree(bloom);
-                       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
-                       return GDK_FAIL;
-               }
-
-               cnt = BATcount(b);
-               /* TODO: check also if max-min < mbits and use identiry hash */
-               if ( (ATOMbasetype(b->T->type) == TYPE_bte && (bloom->mbits = 
(1 << 8))) ||
-                    (ATOMbasetype(b->T->type) == TYPE_sht && (bloom->mbits = 
(1 << 16))) ) {
-                       bloom->kfunc = 1;
-                       bloom->mask = bloom->mbits-1;
-                       bloom->bytes = quotient8(bloom->mbits);
-               } else {
-                       bloom->mbits = BLOOMsize(cnt);
-                       /* 2 functions if the ratio is close to 3, 3 otherwise 
*/
-                       bloom->kfunc = bloom->mbits/cnt == 3 ? 2 : 3;
-                       bloom->mask = bloom->mbits-1;
-                       bloom->bytes = quotient8(bloom->mbits) + 1;
-               }
-
-               if (HEAPalloc(bloom->filter, bloom->bytes, 1) != GDK_SUCCEED) {
-                       GDKerror("#BATbloom: memory allocation error");
-                       GDKfree(bloom->filter);
-                       GDKfree(bloom);
-                       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
-                       return GDK_FAIL;
-               }
-
-               ALGODEBUG fprintf(stderr, "#BATbloom(b=%s#" BUNFMT ") %s: "
-                               "create bloom filter: mbits = " BUNFMT ", ratio 
= " BUNFMT
-                               ", kfunc = %d, bytes = " SZFMT "\n", 
BATgetId(b),
-                               BATcount(b), b->T->heap.filename,
-                               bloom->mbits,  bloom->mbits/BATcount(b),
-                               bloom->kfunc, bloom->bytes);
-
-               filter = (unsigned char *) bloom->filter->base;
-
-               for (i=0; i < bloom->bytes; i++)
-                       filter[i] = 0;
-
-               switch (ATOMbasetype(b->T->type)) {
-               case TYPE_bte:
-               {
-                       const unsigned char *restrict col = (unsigned char *) 
Tloc(b, b->batFirst);
-                       BUN p;
-                       assert(bloom->kfunc == 1);
-                       for (p=0; p<cnt; p++)
-                               filter[quotient8(col[p])] |= (1 << 
remainder8(col[p]));
-               }
-               break;
-               case TYPE_sht:
-               {
-                       const unsigned short *restrict col = (unsigned short *) 
Tloc(b, b->batFirst);
-                       BUN p;
-                       assert(bloom->kfunc == 1);
-                       for (p=0; p<cnt; p++)
-                               filter[quotient8(col[p])] |= (1 << 
remainder8(col[p]));
-               }
-               break;
-               case TYPE_int: BLOOM_BUILD(int); break;
-               case TYPE_lng: BLOOM_BUILD(lng); break;
-#ifdef HAVE_HGE
-               case TYPE_hge: BLOOM_BUILD(hge); break;
-#endif
-               case TYPE_flt: BLOOM_BUILD(flt); break;
-               case TYPE_dbl: BLOOM_BUILD(dbl); break;
-               default:
-                       /* should never reach here */
-                       assert(0);
-                       HEAPfree(bloom->filter,1);
-                       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
-                       return GDK_FAIL;
-               }
-
-               b->T->bloom = bloom;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to