Changeset: 88acc1bec9c7 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=88acc1bec9c7 Modified Files: gdk/gdk_group.c Branch: Feb2013 Log Message:
We can't exploit hash links being in reverse order for existing hash tables. We tried to exploit the supposed fact that links in the collision lists of our hash tables were in reverse order of the BUN number. It seems this assumption was not correct, at least not in all cases. This should fix bug 3237. diffs (105 lines): diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -64,9 +64,9 @@ * * Otherwise we build a partial hash table on the fly. * - * A decision should be made on the order in which grouping occurs Let - * |b| have << different values than |g| then the linked lists gets - * extremely long, leading to a n^2 algorithm. + * A decision should be made on the order in which grouping occurs. + * Let |b| have << different values than |g| then the linked lists + * gets extremely long, leading to a n^2 algorithm. * At the MAL level, the multigroup function would perform the dynamic * optimization. */ @@ -448,20 +448,17 @@ BATgroup_internal(BAT **groups, BAT **ex GRPnotfound(); } } else if (b->T->hash) { - bit gc = g && (g->tsorted || g->trevsorted); - - /* we already have a hash table on b; - * we also exploit if g is clustered */ + /* we already have a hash table on b */ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " - "use existing hash table%s\n", + "use existing hash table\n", BATgetId(b), BATcount(b), g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0, - subsorted, gc ? " (g clustered)" : ""); + subsorted); hs = b->T->hash; gn->tsorted = 1; /* be optimistic */ for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) { @@ -470,45 +467,13 @@ BATgroup_internal(BAT **groups, BAT **ex * HASHloop: the difference is that we only * consider BUNs smaller than the one we're * looking up (p), and that we also consider - * the input groups; - * we also exploit if g is clustered */ - /* skip irrelevant BUNs after the current - * BUNs; exploit that hash-table links - * backwards through BAT */ - for (hb = hs->hash[HASHprobe(hs, v)]; - hb != BUN_NONE && hb >= p; - hb = hs->link[hb]) { - assert(hs->link[hb] == BUN_NONE - || hs->link[hb] < hb); - } - if (gc) { - for (; - hb != BUN_NONE && grps[hb - r] == grps[p - r]; - hb = hs->link[hb]) { - assert(hs->link[hb] == BUN_NONE - || hs->link[hb] < hb); - if (cmp(v, BUNtail(bi, hb)) == 0) { - oid grp = ngrps[hb - r]; - ngrps[p - r] = grp; - if (histo) - cnts[grp]++; - if (gn->tsorted && - grp != ngrp - 1) - gn->tsorted = 0; - break; - } - } - if (hb != BUN_NONE && - grps[hb - r] != grps[p - r]) { - /* we didn't assign a group - * yet */ - hb = BUN_NONE; - } - } else if (grps) { - for (; + * the input groups */ + if (grps) { + for (hb = hs->hash[HASHprobe(hs, v)]; hb != BUN_NONE; hb = hs->link[hb]) { - if (grps[hb - r] == grps[p - r] && + if (hb < p && + grps[hb - r] == grps[p - r] && cmp(v, BUNtail(bi, hb)) == 0) { oid grp = ngrps[hb - r]; ngrps[p - r] = grp; @@ -521,10 +486,11 @@ BATgroup_internal(BAT **groups, BAT **ex } } } else { - for (; + for (hb = hs->hash[HASHprobe(hs, v)]; hb != BUN_NONE; hb = hs->link[hb]) { - if (cmp(v, BUNtail(bi, hb)) == 0) { + if (hb < p && + cmp(v, BUNtail(bi, hb)) == 0) { oid grp = ngrps[hb - r]; ngrps[p - r] = grp; if (histo) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list