Changeset: 2e019d46b9e3 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2e019d46b9e3 Modified Files: gdk/gdk_group.c gdk/gdk_hash.c monetdb5/optimizer/opt_deadcode.c sql/backends/monet5/sql_optimizer.c Branch: default Log Message:
Merge with Jul2017 branch. diffs (293 lines): diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -331,6 +331,48 @@ /* COMP */ cmp(v, BUNtail(bi, hb)) == 0 \ ) +/* reverse the bits of an OID value */ +static inline oid +rev(oid x) +{ +#if SIZEOF_OID == 8 + x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555); + x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333); + x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F); + x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF); + x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF); + x = ((x & 0x00000000FFFFFFFF) << 32) | ((x >> 32) & 0x00000000FFFFFFFF); +#else + x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555); + x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333); + x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F); + x = ((x & 0x00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF); + x = ((x & 0x0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF); +#endif + return x; +} + +/* population count: count number of 1 bits in a value */ +static inline int +pop(oid x) +{ + /* divide and conquer implementation */ +#if SIZEOF_OID == 8 + x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555); + x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); + x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F); + x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF); + x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF); + x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF); +#else + x = (x & 0x55555555) + ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); + x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF); + x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF); +#endif + return (int) x; +} #define GRP_create_partial_hash_table(INIT_0,INIT_1,HASH,COMP) \ do { \ @@ -396,7 +438,11 @@ assert(p < end); \ INIT_1; \ prb = HASH; \ - prb = (prb ^ (BUN) grps[r] << bits) & hs->mask; \ + /* cleverly combine group ID with */ \ + /* hash value: group ID in top-most */ \ + /* bits of hash by reversing the */ \ + /* bits and shifting them in place */ \ + prb = (prb ^ rev(grps[r]) >> bits) & hs->mask; \ for (hb = HASHget(hs, prb); \ hb != HASHnil(hs) && hb >= start; \ hb = HASHgetlink(hs, hb)) { \ @@ -552,13 +598,13 @@ BATgroup_internal(BAT **groups, BAT **ex } if (b->tkey || cnt <= 1 || (g && (g->tkey || BATtdense(g)))) { /* grouping is trivial: 1 element per group */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " "trivial case: 1 element per group\n", - BATgetId(b), BATcount(b), + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -593,13 +639,13 @@ BATgroup_internal(BAT **groups, BAT **ex /* all values are equal */ if (g == NULL || (BATordered(g) && BATordered_rev(g))) { /* there's only a single group: 0 */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " - "trivial case: single output group\n", - BATgetId(b), BATcount(b), + "trivial case: single output group\n", + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -635,13 +681,13 @@ BATgroup_internal(BAT **groups, BAT **ex * e/h available in order to copy them, * otherwise we will need to calculate them * which we will do using the "normal" case */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " - "trivial case: copy input groups\n", - BATgetId(b), BATcount(b), + "trivial case: copy input groups\n", + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -731,13 +777,13 @@ BATgroup_internal(BAT **groups, BAT **ex ((BATordered(b) || BATordered_rev(b)) && (g == NULL || BATordered(g) || BATordered_rev(g)))) { /* we only need to compare each entry with the previous */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " "compare consecutive values\n", - BATgetId(b), BATcount(b), + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -788,13 +834,13 @@ BATgroup_internal(BAT **groups, BAT **ex * last time we saw that group, so if the last time we * saw the old group of the current value is within * this range, we can reuse the new group */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " "subscan old groups\n", - BATgetId(b), BATcount(b), + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -943,13 +989,13 @@ BATgroup_internal(BAT **groups, BAT **ex /* we already have a hash table on b, or b is * persistent and we could create a hash table, or b * is a view on a bat that already has a hash table */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " "use existing hash table\n", - BATgetId(b), BATcount(b), + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -1009,8 +1055,8 @@ BATgroup_internal(BAT **groups, BAT **ex size_t nmelen; Heap *hp = NULL; BUN prb; + int bits; BUN mask; - int bits; GDKclrerr(); /* not interested in BAThash errors */ @@ -1018,13 +1064,13 @@ BATgroup_internal(BAT **groups, BAT **ex * build an incomplete hash table on the fly--also see * BATassertProps for similar code; we also exploit if * g is clustered */ - ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," + ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s]," "s=%s#" BUNFMT "," "g=%s#" BUNFMT "," "e=%s#" BUNFMT "," "h=%s#" BUNFMT ",subsorted=%d): " "create partial hash table%s\n", - BATgetId(b), BATcount(b), + BATgetId(b), BATcount(b), ATOMname(b->ttype), s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0, g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0, e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, @@ -1032,25 +1078,10 @@ BATgroup_internal(BAT **groups, BAT **ex subsorted, gc ? " (g clustered)" : ""); nme = BBP_physical(b->batCacheid); nmelen = strlen(nme); - if (ATOMsize(t) == 1) { - mask = 1 << 16; - bits = 8; - } else if (ATOMsize(t) == 2) { - mask = 1 << 16; - bits = 8; - } else { - /* when combining value and group-id hashes, - * we left-shift one of them by half the - * hash-mask width to better spread bits and - * use the entire hash-mask, and thus reduce - * collisions */ - mask = HASHmask(cnt) >> 3; - bits = 3; - while (mask >>= 1) - bits++; - bits /= 2; - mask = HASHmask(cnt); - } + mask = MAX(HASHmask(cnt), 1 << 16); + /* mask is a power of two, so pop(mask - 1) tells us + * which power of two */ + bits = 8 * SIZEOF_OID - pop(mask - 1); if ((hp = GDKzalloc(sizeof(Heap))) == NULL || (hp->farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0 || (hp->filename = GDKmalloc(nmelen + 30)) == NULL || @@ -1058,7 +1089,7 @@ BATgroup_internal(BAT **groups, BAT **ex "%s.hash" SZFMT, nme, MT_getpid()) < 0 || (ext = GDKstrdup(hp->filename + nmelen + 1)) == NULL || (hs = HASHnew(hp, b->ttype, BUNlast(b), - MAX(HASHmask(cnt), 1 << 16), BUN_NONE)) == NULL) { + mask, BUN_NONE)) == NULL) { if (hp) { if (hp->filename) GDKfree(hp->filename); diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -130,7 +130,7 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN ((size_t *) hp->base)[2] = mask; ((size_t *) hp->base)[3] = width; ((size_t *) hp->base)[4] = count; - ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask " BUNFMT ",width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, width, h->nil, (size + mask) * width); + ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask " BUNFMT ", width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, width, h->nil, (size + mask) * width); return h; } diff --git a/monetdb5/optimizer/opt_deadcode.c b/monetdb5/optimizer/opt_deadcode.c --- a/monetdb5/optimizer/opt_deadcode.c +++ b/monetdb5/optimizer/opt_deadcode.c @@ -58,7 +58,7 @@ OPTdeadcodeImplementation(Client cntxt, varused[getArg(p,0)]++; // force keeping continue; } - if ( getModuleId(p) == batRef && isUpdateInstruction(p)){ + if ( getModuleId(p) == batRef && isUpdateInstruction(p) && !p->barrier){ /* bat.append and friends are intermediates that need not be retained * unless they are used */ } else diff --git a/sql/backends/monet5/sql_optimizer.c b/sql/backends/monet5/sql_optimizer.c --- a/sql/backends/monet5/sql_optimizer.c +++ b/sql/backends/monet5/sql_optimizer.c @@ -60,11 +60,24 @@ SQLgetColumnSize(sql_trans *tr, sql_colu return size; } +/* + * The maximal space occupied by a query is calculated + * under the assumption that the complete database should fit in memory. + * The assumption is that the plan does not contain duplicate bind operations. + * Calculation of the precise footprint is much more complex + * and can not deal with intermediate structures, or fast + * access using sorted probing. + * + * A run where we only take the size of a table only once, + * caused major degration on SF100 Q3 with SSD(>6x) + */ + static lng SQLgetSpace(mvc *m, MalBlkPtr mb, int prepare) { sql_trans *tr = m->session->tr; lng size,space = 0, i; + str lasttable = 0; for (i = 0; i < mb->stop; i++) { InstrPtr p = mb->stmt[i]; @@ -90,9 +103,10 @@ SQLgetSpace(mvc *m, MalBlkPtr mb, int pr continue; /* we have to sum the cost of all three components of a BAT */ - if (c && (!isRemote(c->t) && !isMergeTable(c->t))) { + if (c && (!isRemote(c->t) && !isMergeTable(c->t)) && (lasttable == 0 || strcmp(lasttable,tname)==0)) { size = SQLgetColumnSize(tr, c, access); - space += size; // accumulate once + space += size; // accumulate once per table + //lasttable = tname; invalidate this attempt if( !prepare && size == 0 && ! t->system){ //mnstr_printf(GDKout,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,cname,prepare,size); setFunctionId(p, emptybindRef); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list