Changeset: 1279200883a7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1279200883a7
Modified Files:
        gdk/gdk_bloom.c
        gdk/gdk_join.c
Branch: leftmart
Log Message:

Use larger bloom filters to reduce false positives.


diffs (61 lines):

diff --git a/gdk/gdk_bloom.c b/gdk/gdk_bloom.c
--- a/gdk/gdk_bloom.c
+++ b/gdk/gdk_bloom.c
@@ -56,7 +56,7 @@ BLOOMsize(BUN cnt) {
        m++;
 
        /* double it */
-       m <<= 1;
+       m <<= 3;
 
        /* if m is almost 2*cnt, double again */
        if (m / cnt == 2)
@@ -95,11 +95,12 @@ do {                                                        
                        \
                next_hash(hv, x,y,z);                                           
\
                mv = modulor(hv,bloom->mask);                                   
\
                filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
-               if (bloom->kfunc == 3) {                                        
\
-                       next_hash(hv, x,y,z);                                   
\
-                       mv = modulor(hv,bloom->mask);                           
\
-                       filter[quotient8(mv)] |= (1 << remainder8(mv));         
\
-               }                                                               
\
+               next_hash(hv, x,y,z);                                           
\
+               mv = modulor(hv,bloom->mask);                                   
\
+               filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
+               next_hash(hv, x,y,z);                                           
\
+               mv = modulor(hv,bloom->mask);                                   
\
+               filter[quotient8(mv)] |= (1 << remainder8(mv));                 
\
        }                                                                       
\
 } while (0)
 
@@ -258,10 +259,15 @@ int BLOOMask(BUN v, Bloomfilter *bloom)
                next_hash(hv, x,y,z);
                mv = modulor(hv, bloom->mask);
                ret = (filter[quotient8(mv)] & (1 << remainder8(mv)));
-               if (bloom->kfunc == 3 && ret) {
+               if (ret) {
                        next_hash(hv, x,y,z);
                        mv = modulor(hv, bloom->mask);
                        ret = (filter[quotient8(mv)] & (1 << remainder8(mv)));
+                       if (ret) {
+                               next_hash(hv, x,y,z);
+                               mv = modulor(hv, bloom->mask);
+                               ret = (filter[quotient8(mv)] & (1 << 
remainder8(mv)));
+                       }
                }
        }
 
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3990,8 +3990,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
                /* both sorted */
                return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0, 
maxsize, t0, 0);
        } else if (lhash && rhash) {
-               /* both have hash, smallest on left (TODO) */
-               swap = lcount > rcount;
+               /* both have hash, smallest on right */
+               swap = lcount < rcount;
                reason = "both have hash";
        } else if (lhash) {
                /* only left has hash, swap */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to