Changeset: 45faaeba855a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=45faaeba855a
Added Files:
        MonetDB5/modules/mal/groupby.c
Modified Files:
        MonetDB5/mal/mal_dataflow.c
        MonetDB5/mal/mal_instruction.c
        MonetDB5/mal/mal_listing.c
        MonetDB5/mal/mal_resolve.c
        MonetDB5/modules/mal/mal_mapi.c
        MonetDB5/modules/mal/tablet.c
        MonetDB5/modules/mal/wlc.c
        MonetDB5/optimizer/opt_dataflow.c
        MonetDB5/optimizer/opt_garbageCollector.c
        MonetDB5/optimizer/opt_inline.c
        MonetDB5/optimizer/opt_mergetable.c
        MonetDB5/optimizer/opt_multiplex.c
        MonetDB5/optimizer/opt_projectionpath.c
        MonetDB5/optimizer/opt_remap.c
        MonetDB5/optimizer/opt_reorder.c
        gdk/gdk_tracer.h
Branch: gdk-tracer
Log Message:

Found the bug with debugInstruction - Added comments for if statements and for 
loops


diffs (truncated from 605 to 300 lines):

diff --git a/MonetDB5/mal/mal_dataflow.c b/MonetDB5/mal/mal_dataflow.c
--- a/MonetDB5/mal/mal_dataflow.c
+++ b/MonetDB5/mal/mal_dataflow.c
@@ -30,6 +30,7 @@
 #include "mal_private.h"
 #include "mal_runtime.h"
 #include "mal_resource.h"
+#include "gdk_tracer.h"
 
 #define DFLOWpending 0         /* runnable */
 #define DFLOWrunning 1         /* currently in progress */
@@ -658,6 +659,8 @@ DFLOWinitBlk(DataFlow flow, MalBlkPtr mb
        }
        GDKfree(assign);
 
+       /* CHECK */
+       // The whole for-loop is in PARDEBUG
        for (n = 0; n < flow->stop - flow->start; n++) {
                DEBUG(PAR, "[%d] %d\n", flow->start + n, n);
                debugInstruction(PAR, mb, 0, getInstrPtr(mb, n + flow->start), 
LIST_MAL_ALL);
diff --git a/MonetDB5/mal/mal_instruction.c b/MonetDB5/mal/mal_instruction.c
--- a/MonetDB5/mal/mal_instruction.c
+++ b/MonetDB5/mal/mal_instruction.c
@@ -929,6 +929,8 @@ trimMalVariables_(MalBlkPtr mb, MalStkPt
                cnt++;
        }
 
+       /* CHECK */
+       // The 1st DEBUG message and the for loop are both in DEBUG MAL_REDUCE
        DEBUG(MAL_REDUCE, "Variable reduction %d -> %d\n", mb->vtop, cnt);
        for (i = 0; i < mb->vtop; i++)
                DEBUG(MAL_REDUCE, "map %d -> %d\n", i, alias[i]);
diff --git a/MonetDB5/mal/mal_listing.c b/MonetDB5/mal/mal_listing.c
--- a/MonetDB5/mal/mal_listing.c
+++ b/MonetDB5/mal/mal_listing.c
@@ -644,24 +644,19 @@ debugInstruction(COMPONENT comp, MalBlkP
 {
        /* CHECK */
        // Fix it - Activating this function causes mclient to hang and mserver 
to crash
-
-       // str ps;
+       str ps;
 
-       // ps = instruction2str(mb, stk, p, flg);
-       // /* ps[strlen(ps)-1] = 0; remove '\n' */
-       // if ( ps ){
-       //      // DEBUG(comp, "%s%s\n", (flg & LIST_MAL_MAPI ? "=" : ""), ps);
-       //      GDKfree(ps);
-       // } else {
-       //      // DEBUG(comp, "Failed instruction2str()\n");
-       // }
+       ps = instruction2str(mb, stk, p, flg);
+       /* ps[strlen(ps)-1] = 0; remove '\n' */
+       if ( ps ){
+               DEBUG(comp, "%s%s\n", (flg & LIST_MAL_MAPI ? "=" : ""), ps);
+               GDKfree(ps);
+       } else {
+               DEBUG(comp, "Failed instruction2str()\n");
+       }
 
        /* compiler complains about unused parameter */
        (void) comp; 
-       (void) mb; 
-       (void) stk; 
-       (void) p; 
-       (void) flg; 
 }
 
 void
diff --git a/MonetDB5/mal/mal_resolve.c b/MonetDB5/mal/mal_resolve.c
--- a/MonetDB5/mal/mal_resolve.c
+++ b/MonetDB5/mal/mal_resolve.c
@@ -180,6 +180,8 @@ findFunctionType(Module scope, MalBlkPtr
                                continue;
                        }
 
+                       /* CHECK */
+                       // If is in DEBUG MAL_RESOLVE
                        if (sig->polymorphic || sig->retc == p->retc) {
                                DEBUG(MAL_RESOLVE, "Resolving\n");
                                debugInstruction(MAL_RESOLVE, mb, 0, p, 
LIST_MAL_ALL);
@@ -270,6 +272,8 @@ findFunctionType(Module scope, MalBlkPtr
                                continue;
                        }
 
+                       /* CHECK */
+                       // If is in DEBUG MAL_RESOLVE
                        if (sig->polymorphic || sig->retc == p->retc) {
                                DEBUG(MAL_RESOLVE, "Resolving\n");
                                debugInstruction(MAL_RESOLVE, mb, 0, p, 
LIST_MAL_ALL);
@@ -295,6 +299,8 @@ findFunctionType(Module scope, MalBlkPtr
                 * An optimizer may at a later stage automatically insert such
                 * coercion requests.
                 */
+               /* CHECK */
+               // From here
                DEBUG(MAL_RESOLVE, 
                        "Finished %s.%s unmatched=%d polymorphic=%d %d\n",
                        getModuleId(sig), getFunctionId(sig), unmatched,
@@ -319,7 +325,8 @@ findFunctionType(Module scope, MalBlkPtr
                                unmatched, 
                                getTypeName(getArgType(mb, p, unmatched)), 
                                getTypeName(getArgType(s->def, sig, 
unmatched)));
-               
+               // Till here - in in DEBUG MAL_RESOLVE
+
                if (unmatched) {
                        s = s->peer;
                        continue;
diff --git a/MonetDB5/modules/mal/groupby.c b/MonetDB5/modules/mal/groupby.c
new file mode 100644
--- /dev/null
+++ b/MonetDB5/modules/mal/groupby.c
@@ -0,0 +1,212 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
+ */
+
+/*
+ * (c) Martin Kersten
+ * Multicolumn group-by support
+ * The group-by support module is meant to simplify code analysis and
+ * speedup the kernel on multi-attribute grouping routines.
+ *
+ * The target is to support SQL-like multicolumngroup_by operations, which are 
lists of
+ * attributes and a group aggregate function.
+ * Each group can be represented with an oid into the n-ary table.
+ * Consider the query "select count(*), max(A) from R group by A, B,C." whose 
code
+ * snippet in MAL would become something like:
+ *
+ * @verbatim
+ * _1:bat[:int]  := sql.bind("sys","r","a",0);
+ * _2:bat[:str]  := sql.bind("sys","r","b",0);
+ * _3:bat[:date]  := sql.bind("sys","r","c",0);
+ * ...
+ * _9 := algebra.select(_1,0,100);
+ * ..
+ * (grp_4:bat[:lng], gid:bat[:oid]) := groupby.count(_9,_2);
+ * (grp_5:bat[:lng], gid:bat[:oid]) := groupby.max(_9,_2,_3);
+ * @end verbatim
+ *
+ * The id() function merely becomes the old-fashioned oid-based group 
identification list.
+ * This way related values can be obtained from the attribute columns. It can 
be the input
+ * for the count() function, which saves some re-computation.
+ *
+ * Aside the group ids, we also provide options to return the value based 
aggregate table
+ * to ease development of parallel plans.
+ *
+ * The implementation is optimized for a limited number of groups. The default 
is
+ * to fall back on the old code sequences.
+ *
+ */
+#include "monetdb_config.h"
+#include "groupby.h"
+#include "group.h"
+#include "gdk_tracer.h"
+
+/*
+ * The implementation is based on a two-phase process. In phase 1, we estimate
+ * the number of groups to deal with using column independence.
+ * The grouping is performed in parallel over slices of the tables.
+ * The final pieces are glued together.
+ */
+typedef struct{
+       bat *bid;       /* input bats */
+       BAT *candidate; /* list */
+       BAT **cols;
+       BUN *unique; /* number of different values */
+       int last;
+       BUN size;
+} AGGRtask;
+
+static AGGRtask*
+GROUPcollect( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){
+       AGGRtask *a;
+       int i;
+       BAT *b, *bs, *bh = NULL;
+       BUN sample;
+
+       (void) mb;
+       (void) cntxt;
+       a= (AGGRtask *) GDKzalloc(sizeof(*a));
+       if ( a == NULL)
+               return NULL;
+       a->bid = (bat*) GDKzalloc(pci->argc * sizeof(bat));
+       a->cols = (BAT**) GDKzalloc(pci->argc * sizeof(BAT*));
+       a->unique = (BUN *) GDKzalloc(pci->argc * sizeof(BUN));
+       if ( a->cols == NULL || a->bid == NULL || a->unique == NULL){
+               if(a->cols) GDKfree(a->cols);
+               if(a->bid) GDKfree(a->bid);
+               if(a->unique) GDKfree(a->unique);
+               GDKfree(a);
+               return NULL;
+       }
+       for ( i= pci->retc; i< pci->argc; i++, a->last++) {
+               a->bid[a->last] = *getArgReference_bat(stk,pci,i);
+               b = a->cols[a->last]= BATdescriptor(a->bid[a->last]);
+               if ( a->cols[a->last] == NULL){
+                       for(a->last--; a->last>=0; a->last--)
+                               BBPunfix(a->cols[a->last]->batCacheid);
+                       GDKfree(a->cols);
+                       GDKfree(a->bid);
+                       GDKfree(a->unique);
+                       GDKfree(a);
+                       return NULL;
+               }
+               sample = BATcount(b) < 1000 ? BATcount(b): 1000;
+               bs = BATsample( b, sample);
+               if (bs) {
+                       bh = BATunique(b, bs);
+                       if (bh) {
+                               a->unique[a->last] = BATcount(bh);
+                               BBPunfix(bh->batCacheid);
+                       }
+                       BBPunfix(bs->batCacheid);
+               }
+               if ( b->tsorted)
+                       a->unique[a->last] = 1000; /* sorting helps grouping */
+               a->size = BATcount(b);
+       }
+
+       /* CHECK */
+       // The for-loop is in DEBUG MAL_GROUPBY
+       for(i=0; i<a->last; i++)
+               DEBUG(MAL_GROUPBY, "Group '%d' unique "BUNFMT "\n", i, 
a->unique[i]);
+
+       return a;
+}
+
+static void 
+GROUPcollectSort(AGGRtask *a, int start, int finish)
+{
+       int i,j,k;
+       BAT *b;
+       BUN sample;
+
+       /* sort the columns by decreasing unique */
+       for (i = start; i< finish; i++)
+       for( j = i+1; j<finish; j++)
+       if ( a->unique[i] < a->unique[j]){
+               k =a->bid[i];
+               a->bid[i] = a->bid[j];
+               a->bid[j] = k;
+
+               b= a->cols[i];
+               a->cols[i] = a->cols[j];
+               a->cols[j] = b;
+
+               sample = a->unique[i];
+               a->unique[i] = a->unique[j];
+               a->unique[j] = sample;
+       }
+}
+
+static void
+GROUPdelete(AGGRtask *a){
+       for(a->last--; a->last>=0; a->last--){
+               BBPunfix(a->cols[a->last]->batCacheid);
+       }
+       GDKfree(a->bid);
+       GDKfree(a->cols);
+       GDKfree(a->unique);
+       GDKfree(a);
+}
+
+/*
+ * The groups optimizer takes a grouping sequence and attempts to
+ * minimize the intermediate result.  The choice depends on a good
+ * estimate of intermediate results using properties.
+ */
+
+str
+GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       bat *grp = getArgReference_bat(stk, pci, 0);
+       bat *ext = getArgReference_bat(stk, pci, 1);
+       bat *hist = getArgReference_bat(stk, pci, 2);
+       int i, j;
+       bat oldgrp, oldext, oldhist;
+       str msg = MAL_SUCCEED;
+       BAT *b;
+       BUN count = 0;
+       AGGRtask *aggr;
+
+       aggr = GROUPcollect(cntxt, mb, stk, pci);
+       if( aggr == NULL)
+               throw(MAL,"group.multicolumn", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+       GROUPcollectSort(aggr, 0, aggr->last);
+
+       /* (grp,ext,hist) := group.group(..) */
+       /* use the old pattern to perform the incremental grouping */
+       *grp = 0;
+       *ext = 0;
+       *hist = 0;
+       msg = GRPgroup1(grp, ext, hist, &aggr->bid[0]);
+       i = 1;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to