Changeset: 6980f2d26c8a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/6980f2d26c8a
Modified Files:
        gdk/gdk_group.c
Branch: Oct2020
Log Message:

Don't use small hash size when subgrouping.
This fixes a problem when subgrouping a large column of width 1 or 2
which would take forever with the small hash size due to collisions.
Note that when subgrouping, we add the subgroup to the hash value.
Otherwise a small hash would be fine, it would then be a perfect hash.


diffs (43 lines):

diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -658,6 +658,8 @@ BATgroup_internal(BAT **groups, BAT **ex
                        maxgrp = g->tseqbase + BATcount(g);
                else if (BATtordered(g))
                        maxgrp = * (oid *) Tloc(g, BATcount(g) - 1);
+               else if (BATtrevordered(g))
+                       maxgrp = * (oid *) Tloc(g, 0);
                else {
                        prop = BATgetprop(g, GDK_MAX_VALUE);
                        if (prop)
@@ -1073,15 +1075,21 @@ BATgroup_internal(BAT **groups, BAT **ex
                } else {
                        nbucket = MAX(HASHmask(cnt), 1 << 16);
                }
-               switch (t) {
-               case TYPE_bte:
-                       nbucket = 256;
-                       break;
-               case TYPE_sht:
-                       nbucket = 65536;
-                       break;
-               default:
-                       break;
+               if (grps == NULL || is_oid_nil(maxgrp)
+#if SIZEOF_OID == SIZEOF_LNG
+                   || maxgrp >= ((oid) 1 << (SIZEOF_LNG * 8 - 8))
+#endif
+                       ) {
+                       switch (t) {
+                       case TYPE_bte:
+                               nbucket = 256;
+                               break;
+                       case TYPE_sht:
+                               nbucket = 65536;
+                               break;
+                       default:
+                               break;
+                       }
                }
                if ((hs = GDKzalloc(sizeof(Hash))) == NULL ||
                    (hs->heaplink.farmid = BBPselectfarm(TRANSIENT, b->ttype, 
hashheap)) < 0 ||
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to