Changeset: 2e019d46b9e3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2e019d46b9e3
Modified Files:
        gdk/gdk_group.c
        gdk/gdk_hash.c
        monetdb5/optimizer/opt_deadcode.c
        sql/backends/monet5/sql_optimizer.c
Branch: default
Log Message:

Merge with Jul2017 branch.


diffs (293 lines):

diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -331,6 +331,48 @@
        /* COMP   */    cmp(v, BUNtail(bi, hb)) == 0            \
        )
 
+/* reverse the bits of an OID value */
+static inline oid
+rev(oid x)
+{
+#if SIZEOF_OID == 8
+       x = ((x & 0x5555555555555555) <<  1) | ((x >>  1) & 0x5555555555555555);
+       x = ((x & 0x3333333333333333) <<  2) | ((x >>  2) & 0x3333333333333333);
+       x = ((x & 0x0F0F0F0F0F0F0F0F) <<  4) | ((x >>  4) & 0x0F0F0F0F0F0F0F0F);
+       x = ((x & 0x00FF00FF00FF00FF) <<  8) | ((x >>  8) & 0x00FF00FF00FF00FF);
+       x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF);
+       x = ((x & 0x00000000FFFFFFFF) << 32) | ((x >> 32) & 0x00000000FFFFFFFF);
+#else
+       x = ((x & 0x55555555) <<  1) | ((x >>  1) & 0x55555555);
+       x = ((x & 0x33333333) <<  2) | ((x >>  2) & 0x33333333);
+       x = ((x & 0x0F0F0F0F) <<  4) | ((x >>  4) & 0x0F0F0F0F);
+       x = ((x & 0x00FF00FF) <<  8) | ((x >>  8) & 0x00FF00FF);
+       x = ((x & 0x0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF);
+#endif
+       return x;
+}
+
+/* population count: count number of 1 bits in a value */
+static inline int
+pop(oid x)
+{
+       /* divide and conquer implementation */
+#if SIZEOF_OID == 8
+       x = (x & 0x5555555555555555) + ((x >>  1) & 0x5555555555555555);
+       x = (x & 0x3333333333333333) + ((x >>  2) & 0x3333333333333333);
+       x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >>  4) & 0x0F0F0F0F0F0F0F0F);
+       x = (x & 0x00FF00FF00FF00FF) + ((x >>  8) & 0x00FF00FF00FF00FF);
+       x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
+       x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
+#else
+       x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
+       x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
+       x = (x & 0x0F0F0F0F) + ((x >>  4) & 0x0F0F0F0F);
+       x = (x & 0x00FF00FF) + ((x >>  8) & 0x00FF00FF);
+       x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
+#endif
+       return (int) x;
+}
 
 #define GRP_create_partial_hash_table(INIT_0,INIT_1,HASH,COMP)         \
        do {                                                            \
@@ -396,7 +438,11 @@
                                assert(p < end);                        \
                                INIT_1;                                 \
                                prb = HASH;                             \
-                               prb = (prb ^ (BUN) grps[r] << bits) & hs->mask; 
\
+                               /* cleverly combine group ID with */    \
+                               /* hash value: group ID in top-most */  \
+                               /* bits of hash by reversing the */     \
+                               /* bits and shifting them in place */   \
+                               prb = (prb ^ rev(grps[r]) >> bits) & hs->mask; \
                                for (hb = HASHget(hs, prb);             \
                                     hb != HASHnil(hs) && hb >= start;  \
                                     hb = HASHgetlink(hs, hb)) {        \
@@ -552,13 +598,13 @@ BATgroup_internal(BAT **groups, BAT **ex
        }
        if (b->tkey || cnt <= 1 || (g && (g->tkey || BATtdense(g)))) {
                /* grouping is trivial: 1 element per group */
-               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
                                  "trivial case: 1 element per group\n",
-                                 BATgetId(b), BATcount(b),
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -593,13 +639,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                /* all values are equal */
                if (g == NULL || (BATordered(g) && BATordered_rev(g))) {
                        /* there's only a single group: 0 */
-                       ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+                       ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT 
"[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
-                                         "trivial case: single output group\n",
-                                 BATgetId(b), BATcount(b),
+                                 "trivial case: single output group\n",
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -635,13 +681,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                         * e/h available in order to copy them,
                         * otherwise we will need to calculate them
                         * which we will do using the "normal" case */
-                       ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+                       ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT 
"[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
-                                         "trivial case: copy input groups\n",
-                                 BATgetId(b), BATcount(b),
+                                 "trivial case: copy input groups\n",
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -731,13 +777,13 @@ BATgroup_internal(BAT **groups, BAT **ex
            ((BATordered(b) || BATordered_rev(b)) &&
             (g == NULL || BATordered(g) || BATordered_rev(g)))) {
                /* we only need to compare each entry with the previous */
-               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
                                  "compare consecutive values\n",
-                                 BATgetId(b), BATcount(b),
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -788,13 +834,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                 * last time we saw that group, so if the last time we
                 * saw the old group of the current value is within
                 * this range, we can reuse the new group */
-               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
                                  "subscan old groups\n",
-                                 BATgetId(b), BATcount(b),
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -943,13 +989,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                /* we already have a hash table on b, or b is
                 * persistent and we could create a hash table, or b
                 * is a view on a bat that already has a hash table */
-               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
                                  "use existing hash table\n",
-                                 BATgetId(b), BATcount(b),
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -1009,8 +1055,8 @@ BATgroup_internal(BAT **groups, BAT **ex
                size_t nmelen;
                Heap *hp = NULL;
                BUN prb;
+               int bits;
                BUN mask;
-               int bits;
 
                GDKclrerr();    /* not interested in BAThash errors */
 
@@ -1018,13 +1064,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                 * build an incomplete hash table on the fly--also see
                 * BATassertProps for similar code; we also exploit if
                 * g is clustered */
-               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+               ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
                                  "s=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
                                  "create partial hash table%s\n",
-                                 BATgetId(b), BATcount(b),
+                                 BATgetId(b), BATcount(b), ATOMname(b->ttype),
                                  s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -1032,25 +1078,10 @@ BATgroup_internal(BAT **groups, BAT **ex
                                  subsorted, gc ? " (g clustered)" : "");
                nme = BBP_physical(b->batCacheid);
                nmelen = strlen(nme);
-               if (ATOMsize(t) == 1) {
-                       mask = 1 << 16;
-                       bits = 8;
-               } else if (ATOMsize(t) == 2) {
-                       mask = 1 << 16;
-                       bits = 8;
-               } else {
-                       /* when combining value and group-id hashes,
-                        * we left-shift one of them by half the
-                        * hash-mask width to better spread bits and
-                        * use the entire hash-mask, and thus reduce
-                        * collisions */
-                       mask = HASHmask(cnt) >> 3;
-                       bits = 3;
-                       while (mask >>= 1)
-                               bits++;
-                       bits /= 2;
-                       mask = HASHmask(cnt);
-               }
+               mask = MAX(HASHmask(cnt), 1 << 16);
+               /* mask is a power of two, so pop(mask - 1) tells us
+                * which power of two */
+               bits = 8 * SIZEOF_OID - pop(mask - 1);
                if ((hp = GDKzalloc(sizeof(Heap))) == NULL ||
                    (hp->farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) 
< 0 ||
                    (hp->filename = GDKmalloc(nmelen + 30)) == NULL ||
@@ -1058,7 +1089,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                             "%s.hash" SZFMT, nme, MT_getpid()) < 0 ||
                    (ext = GDKstrdup(hp->filename + nmelen + 1)) == NULL ||
                    (hs = HASHnew(hp, b->ttype, BUNlast(b),
-                                 MAX(HASHmask(cnt), 1 << 16), BUN_NONE)) == 
NULL) {
+                                 mask, BUN_NONE)) == NULL) {
                        if (hp) {
                                if (hp->filename)
                                        GDKfree(hp->filename);
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -130,7 +130,7 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
        ((size_t *) hp->base)[2] = mask;
        ((size_t *) hp->base)[3] = width;
        ((size_t *) hp->base)[4] = count;
-       ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask 
" BUNFMT ",width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, 
width, h->nil, (size + mask) * width);
+       ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask 
" BUNFMT ", width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, 
width, h->nil, (size + mask) * width);
        return h;
 }
 
diff --git a/monetdb5/optimizer/opt_deadcode.c 
b/monetdb5/optimizer/opt_deadcode.c
--- a/monetdb5/optimizer/opt_deadcode.c
+++ b/monetdb5/optimizer/opt_deadcode.c
@@ -58,7 +58,7 @@ OPTdeadcodeImplementation(Client cntxt, 
                        varused[getArg(p,0)]++; // force keeping 
                        continue;
                }
-               if ( getModuleId(p) == batRef && isUpdateInstruction(p)){
+               if ( getModuleId(p) == batRef && isUpdateInstruction(p) && 
!p->barrier){
                        /* bat.append and friends are intermediates that need 
not be retained 
                         * unless they are used */
                } else
diff --git a/sql/backends/monet5/sql_optimizer.c 
b/sql/backends/monet5/sql_optimizer.c
--- a/sql/backends/monet5/sql_optimizer.c
+++ b/sql/backends/monet5/sql_optimizer.c
@@ -60,11 +60,24 @@ SQLgetColumnSize(sql_trans *tr, sql_colu
        return size;
 }
 
+/*
+ * The maximal space occupied by a query is calculated
+ * under the assumption that the complete database should fit in memory.
+ * The assumption is that the plan does not contain duplicate bind operations.
+ * Calculation of the precise footprint is much more complex
+ * and can not deal with intermediate structures, or fast
+ * access using sorted probing.
+ *
+ * A run where we only take the size of a table only once,
+ * caused major degration on SF100 Q3 with SSD(>6x) 
+ */
+
 static lng
 SQLgetSpace(mvc *m, MalBlkPtr mb, int prepare)
 {
        sql_trans *tr = m->session->tr;
        lng size,space = 0, i;
+       str lasttable = 0;
 
        for (i = 0; i < mb->stop; i++) {
                InstrPtr p = mb->stmt[i];
@@ -90,9 +103,10 @@ SQLgetSpace(mvc *m, MalBlkPtr mb, int pr
                                continue;
 
                        /* we have to sum the cost of all three components of a 
BAT */
-                       if (c && (!isRemote(c->t) && !isMergeTable(c->t))) {
+                       if (c && (!isRemote(c->t) && !isMergeTable(c->t)) && 
(lasttable == 0 || strcmp(lasttable,tname)==0)) {
                                size = SQLgetColumnSize(tr, c, access);
-                               space += size;  // accumulate once
+                               space += size;  // accumulate once per table
+                               //lasttable = tname;     invalidate this attempt
                                if( !prepare && size == 0  && ! t->system){
                                        //mnstr_printf(GDKout,"found empty 
column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,cname,prepare,size);
                                        setFunctionId(p, emptybindRef);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to