Changeset: 155b1ef95b0e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/155b1ef95b0e Modified Files: gdk/gdk_batop.c gdk/gdk_group.c gdk/gdk_join.c gdk/gdk_project.c sql/backends/monet5/sql.c Branch: default Log Message:
propagate the tunique_est a bit more and use this in append (strings). When we know only a limited number of (unique) strings is expected, don't reuse the (largish) vheap. In group by set the tunique_est based on the group result. diffs (118 lines): diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -125,10 +125,13 @@ insert_string_bat(BAT *b, BATiter *ni, s r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12); /* r is estimate of number of strings in * double-eliminated area */ - if (r < ci->ncand) - len = GDK_ELIMLIMIT + (ci->ncand - r) * len; + BUN ecnt = ci->ncand; + if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est) + ecnt = ni->b->tunique_est; + if (r < ecnt) + len = GDK_ELIMLIMIT + (ecnt - r) * len; else - len = GDK_STRHASHSIZE + ci->ncand * (len + 12); + len = GDK_STRHASHSIZE + ecnt * (len + 12); /* len is total estimated expected size of vheap */ if (len > ni->vhfree / 2) { diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -1282,6 +1282,7 @@ BATgroup_internal(BAT **groups, BAT **ex en->trevsorted = ngrp == 1; en->tnonil = true; en->tnil = false; + en->tunique_est = ngrp; *extents = virtualize(en); } if (histo) { @@ -1304,7 +1305,11 @@ BATgroup_internal(BAT **groups, BAT **ex gn->tnonil = true; gn->tnil = false; gn->tmaxpos = maxgrppos; + gn->tunique_est = ngrp; *groups = gn; + if (!g && !e && !s) { + b->tunique_est = ngrp; + } TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT ",g=" ALGOOPTBATFMT ",e=" ALGOOPTBATFMT ",h=" ALGOOPTBATFMT ",subsorted=%s -> groups=" diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3259,6 +3259,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p } /* also set other bits of heap to correct value to indicate size */ BATsetcount(r1, BATcount(r1)); + r1->tunique_est = MIN(l->tunique_est, r->tunique_est); if (BATcount(r1) <= 1) { r1->tsorted = true; r1->trevsorted = true; @@ -3274,11 +3275,13 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p r2->tkey = true; r2->tseqbase = 0; } + r2->tunique_est = MIN(l->tunique_est, r->tunique_est); } if (r3) { r3->tnonil = !r3->tnil; BATsetcount(r3, BATcount(r3)); assert(BATcount(r1) == BATcount(r3)); + r3->tunique_est = MIN(l->tunique_est, r->tunique_est); } if (BATcount(r1) > 0) { if (BATtdense(r1)) diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c --- a/gdk/gdk_project.c +++ b/gdk/gdk_project.c @@ -571,6 +571,9 @@ project_str(BATiter *restrict li, struct bn->tnil = false; bn->tnonil = r1i->nonil & r2i->nonil; bn->tkey = false; + bn->tunique_est = + MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b), + r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b)); TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT " -> " ALGOBATFMT "%s " LLFMT "us\n", ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b), @@ -820,6 +823,9 @@ BATproject2(BAT *restrict l, BAT *restri bn->tascii = r1i.ascii; } + bn->tunique_est = + MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b), + r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b)); if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i))) BATtseqbase(bn, oid_nil); @@ -1138,6 +1144,15 @@ BATprojectchain(BAT **bats) bn->tnonil = nonil & b->tnonil; bn->tseqbase = oid_nil; bn->tkey = (ba[0].cnt <= 1); + double est = 0; + for (int i = 0; i < n; i++) { + double nest = ba[i].b->tunique_est?ba[i].b->tunique_est:BATcount(ba[i].b); + if (est) + est = MIN(est, nest); + else + est = nest; + } + bn->tunique_est = est; /* note, b may point to one of the bats in tobedeleted, so * reclaim after the last use of b */ while (ndelete-- > 0) diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -2430,6 +2430,7 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt nr_parts = *getArgReference_int(stk, pci, 5); } BAT *b = store->storage_api.bind_cands(tr, t, nr_parts, part_nr); + b->tunique_est = BATcount(b); if (b) { *res = b->batCacheid; BBPkeepref(b); _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org