Changeset: 88acc1bec9c7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=88acc1bec9c7
Modified Files:
        gdk/gdk_group.c
Branch: Feb2013
Log Message:

We can't exploit hash links being in reverse order for existing hash tables.
We tried to exploit the supposed fact that links in the collision
lists of our hash tables were in reverse order of the BUN number.  It
seems this assumption was not correct, at least not in all cases.
This should fix bug 3237.


diffs (105 lines):

diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -64,9 +64,9 @@
  *
  * Otherwise we build a partial hash table on the fly.
  *
- * A decision should be made on the order in which grouping occurs Let
- * |b| have << different values than |g| then the linked lists gets
- * extremely long, leading to a n^2 algorithm.
+ * A decision should be made on the order in which grouping occurs.
+ * Let |b| have << different values than |g| then the linked lists
+ * gets extremely long, leading to a n^2 algorithm.
  * At the MAL level, the multigroup function would perform the dynamic
  * optimization.
  */
@@ -448,20 +448,17 @@ BATgroup_internal(BAT **groups, BAT **ex
                        GRPnotfound();
                }
        } else if (b->T->hash) {
-               bit gc = g && (g->tsorted || g->trevsorted);
-
-               /* we already have a hash table on b;
-                * we also exploit if g is clustered */
+               /* we already have a hash table on b */
                ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
-                                 "use existing hash table%s\n",
+                                 "use existing hash table\n",
                                  BATgetId(b), BATcount(b),
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
                                  h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
-                                 subsorted, gc ? " (g clustered)" : "");
+                                 subsorted);
                hs = b->T->hash;
                gn->tsorted = 1; /* be optimistic */
                for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) {
@@ -470,45 +467,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                         * HASHloop: the difference is that we only
                         * consider BUNs smaller than the one we're
                         * looking up (p), and that we also consider
-                        * the input groups;
-                        * we also exploit if g is clustered */
-                       /* skip irrelevant BUNs after the current
-                        * BUNs; exploit that hash-table links
-                        * backwards through BAT */
-                       for (hb = hs->hash[HASHprobe(hs, v)];
-                            hb != BUN_NONE && hb >= p;
-                            hb = hs->link[hb]) {
-                               assert(hs->link[hb] == BUN_NONE
-                                      || hs->link[hb] < hb);
-                       }
-                       if (gc) {
-                               for (;
-                                    hb != BUN_NONE && grps[hb - r] == grps[p - 
r];
-                                    hb = hs->link[hb]) {
-                                       assert(hs->link[hb] == BUN_NONE
-                                              || hs->link[hb] < hb);
-                                       if (cmp(v, BUNtail(bi, hb)) == 0) {
-                                               oid grp = ngrps[hb - r];
-                                               ngrps[p - r] = grp;
-                                               if (histo)
-                                                       cnts[grp]++;
-                                               if (gn->tsorted &&
-                                                   grp != ngrp - 1)
-                                                       gn->tsorted = 0;
-                                               break;
-                                       }
-                               }
-                               if (hb != BUN_NONE &&
-                                   grps[hb - r] != grps[p - r]) {
-                                       /* we didn't assign a group
-                                        * yet */
-                                       hb = BUN_NONE;
-                               }
-                       } else if (grps) {
-                               for (;
+                        * the input groups */
+                       if (grps) {
+                               for (hb = hs->hash[HASHprobe(hs, v)];
                                     hb != BUN_NONE;
                                     hb = hs->link[hb]) {
-                                       if (grps[hb - r] == grps[p - r] &&
+                                       if (hb < p &&
+                                           grps[hb - r] == grps[p - r] &&
                                            cmp(v, BUNtail(bi, hb)) == 0) {
                                                oid grp = ngrps[hb - r];
                                                ngrps[p - r] = grp;
@@ -521,10 +486,11 @@ BATgroup_internal(BAT **groups, BAT **ex
                                        }
                                }
                        } else {
-                               for (;
+                               for (hb = hs->hash[HASHprobe(hs, v)];
                                     hb != BUN_NONE;
                                     hb = hs->link[hb]) {
-                                       if (cmp(v, BUNtail(bi, hb)) == 0) {
+                                       if (hb < p &&
+                                           cmp(v, BUNtail(bi, hb)) == 0) {
                                                oid grp = ngrps[hb - r];
                                                ngrps[p - r] = grp;
                                                if (histo)
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to