Changeset: 237093560788 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/237093560788
Modified Files:
        gdk/gdk_select.c
Branch: Aug2024
Log Message:

Use binary search instead of hash on sorted bat with available hash table.
It turns out, even with large bats, binary search is faster than using a
hash, even if the hash chain has length 1.


diffs (30 lines):

diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1873,11 +1873,12 @@ BATselect(BAT *b, BAT *s, const void *tl
        else
                pb = NULL;
        pbi = bat_iterator(pb);
-       /* use hash only for equi-join, and then only if b or its
-        * parent already has a hash, or if b or its parent is
-        * persistent and the total size wouldn't be too large; check
-        * for existence of hash last since that may involve I/O */
-       if (equi || antiequi) {
+       /* use hash only for equi-join if the bat is not sorted, but
+        * only if b or its parent already has a hash, or if b or its
+        * parent is persistent and the total size wouldn't be too
+        * large; check for existence of hash last since that may
+        * involve I/O */
+       if ((equi || antiequi) && !bi.sorted && !bi.revsorted) {
                double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
                if (cost > 0 && cost < ci.ncand) {
                        wanthash = true;
@@ -1980,7 +1981,7 @@ BATselect(BAT *b, BAT *s, const void *tl
                }
        }
 
-       if (!havehash && (bi.sorted || bi.revsorted || oidxh != NULL)) {
+       if (bi.sorted || bi.revsorted || (!havehash && oidxh != NULL)) {
                BUN low = 0;
                BUN high = bi.count;
 
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to