Changeset: 237093560788 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/237093560788 Modified Files: gdk/gdk_select.c Branch: Aug2024 Log Message:
Use binary search instead of hash on sorted bat with available hash table. It turns out, even with large bats, binary search is faster than using a hash, even if the hash chain has length 1. diffs (30 lines): diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -1873,11 +1873,12 @@ BATselect(BAT *b, BAT *s, const void *tl else pb = NULL; pbi = bat_iterator(pb); - /* use hash only for equi-join, and then only if b or its - * parent already has a hash, or if b or its parent is - * persistent and the total size wouldn't be too large; check - * for existence of hash last since that may involve I/O */ - if (equi || antiequi) { + /* use hash only for equi-join if the bat is not sorted, but + * only if b or its parent already has a hash, or if b or its + * parent is persistent and the total size wouldn't be too + * large; check for existence of hash last since that may + * involve I/O */ + if ((equi || antiequi) && !bi.sorted && !bi.revsorted) { double cost = joincost(b, 1, &ci, &havehash, &phash, NULL); if (cost > 0 && cost < ci.ncand) { wanthash = true; @@ -1980,7 +1981,7 @@ BATselect(BAT *b, BAT *s, const void *tl } } - if (!havehash && (bi.sorted || bi.revsorted || oidxh != NULL)) { + if (bi.sorted || bi.revsorted || (!havehash && oidxh != NULL)) { BUN low = 0; BUN high = bi.count; _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org