Changeset: 45faaeba855a for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=45faaeba855a Added Files: MonetDB5/modules/mal/groupby.c Modified Files: MonetDB5/mal/mal_dataflow.c MonetDB5/mal/mal_instruction.c MonetDB5/mal/mal_listing.c MonetDB5/mal/mal_resolve.c MonetDB5/modules/mal/mal_mapi.c MonetDB5/modules/mal/tablet.c MonetDB5/modules/mal/wlc.c MonetDB5/optimizer/opt_dataflow.c MonetDB5/optimizer/opt_garbageCollector.c MonetDB5/optimizer/opt_inline.c MonetDB5/optimizer/opt_mergetable.c MonetDB5/optimizer/opt_multiplex.c MonetDB5/optimizer/opt_projectionpath.c MonetDB5/optimizer/opt_remap.c MonetDB5/optimizer/opt_reorder.c gdk/gdk_tracer.h Branch: gdk-tracer Log Message:
Found the bug with debugInstruction - Added comments for if statements and for loops diffs (truncated from 605 to 300 lines): diff --git a/MonetDB5/mal/mal_dataflow.c b/MonetDB5/mal/mal_dataflow.c --- a/MonetDB5/mal/mal_dataflow.c +++ b/MonetDB5/mal/mal_dataflow.c @@ -30,6 +30,7 @@ #include "mal_private.h" #include "mal_runtime.h" #include "mal_resource.h" +#include "gdk_tracer.h" #define DFLOWpending 0 /* runnable */ #define DFLOWrunning 1 /* currently in progress */ @@ -658,6 +659,8 @@ DFLOWinitBlk(DataFlow flow, MalBlkPtr mb } GDKfree(assign); + /* CHECK */ + // The whole for-loop is in PARDEBUG for (n = 0; n < flow->stop - flow->start; n++) { DEBUG(PAR, "[%d] %d\n", flow->start + n, n); debugInstruction(PAR, mb, 0, getInstrPtr(mb, n + flow->start), LIST_MAL_ALL); diff --git a/MonetDB5/mal/mal_instruction.c b/MonetDB5/mal/mal_instruction.c --- a/MonetDB5/mal/mal_instruction.c +++ b/MonetDB5/mal/mal_instruction.c @@ -929,6 +929,8 @@ trimMalVariables_(MalBlkPtr mb, MalStkPt cnt++; } + /* CHECK */ + // The 1st DEBUG message and the for loop are both in DEBUG MAL_REDUCE DEBUG(MAL_REDUCE, "Variable reduction %d -> %d\n", mb->vtop, cnt); for (i = 0; i < mb->vtop; i++) DEBUG(MAL_REDUCE, "map %d -> %d\n", i, alias[i]); diff --git a/MonetDB5/mal/mal_listing.c b/MonetDB5/mal/mal_listing.c --- a/MonetDB5/mal/mal_listing.c +++ b/MonetDB5/mal/mal_listing.c @@ -644,24 +644,19 @@ debugInstruction(COMPONENT comp, MalBlkP { /* CHECK */ // Fix it - Activating this function causes mclient to hang and mserver to crash - - // str ps; + str ps; - // ps = instruction2str(mb, stk, p, flg); - // /* ps[strlen(ps)-1] = 0; remove '\n' */ - // if ( ps ){ - // // DEBUG(comp, "%s%s\n", (flg & LIST_MAL_MAPI ? "=" : ""), ps); - // GDKfree(ps); - // } else { - // // DEBUG(comp, "Failed instruction2str()\n"); - // } + ps = instruction2str(mb, stk, p, flg); + /* ps[strlen(ps)-1] = 0; remove '\n' */ + if ( ps ){ + DEBUG(comp, "%s%s\n", (flg & LIST_MAL_MAPI ? "=" : ""), ps); + GDKfree(ps); + } else { + DEBUG(comp, "Failed instruction2str()\n"); + } /* compiler complains about unused parameter */ (void) comp; - (void) mb; - (void) stk; - (void) p; - (void) flg; } void diff --git a/MonetDB5/mal/mal_resolve.c b/MonetDB5/mal/mal_resolve.c --- a/MonetDB5/mal/mal_resolve.c +++ b/MonetDB5/mal/mal_resolve.c @@ -180,6 +180,8 @@ findFunctionType(Module scope, MalBlkPtr continue; } + /* CHECK */ + // If is in DEBUG MAL_RESOLVE if (sig->polymorphic || sig->retc == p->retc) { DEBUG(MAL_RESOLVE, "Resolving\n"); debugInstruction(MAL_RESOLVE, mb, 0, p, LIST_MAL_ALL); @@ -270,6 +272,8 @@ findFunctionType(Module scope, MalBlkPtr continue; } + /* CHECK */ + // If is in DEBUG MAL_RESOLVE if (sig->polymorphic || sig->retc == p->retc) { DEBUG(MAL_RESOLVE, "Resolving\n"); debugInstruction(MAL_RESOLVE, mb, 0, p, LIST_MAL_ALL); @@ -295,6 +299,8 @@ findFunctionType(Module scope, MalBlkPtr * An optimizer may at a later stage automatically insert such * coercion requests. */ + /* CHECK */ + // From here DEBUG(MAL_RESOLVE, "Finished %s.%s unmatched=%d polymorphic=%d %d\n", getModuleId(sig), getFunctionId(sig), unmatched, @@ -319,7 +325,8 @@ findFunctionType(Module scope, MalBlkPtr unmatched, getTypeName(getArgType(mb, p, unmatched)), getTypeName(getArgType(s->def, sig, unmatched))); - + // Till here - in in DEBUG MAL_RESOLVE + if (unmatched) { s = s->peer; continue; diff --git a/MonetDB5/modules/mal/groupby.c b/MonetDB5/modules/mal/groupby.c new file mode 100644 --- /dev/null +++ b/MonetDB5/modules/mal/groupby.c @@ -0,0 +1,212 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. + */ + +/* + * (c) Martin Kersten + * Multicolumn group-by support + * The group-by support module is meant to simplify code analysis and + * speedup the kernel on multi-attribute grouping routines. + * + * The target is to support SQL-like multicolumngroup_by operations, which are lists of + * attributes and a group aggregate function. + * Each group can be represented with an oid into the n-ary table. + * Consider the query "select count(*), max(A) from R group by A, B,C." whose code + * snippet in MAL would become something like: + * + * @verbatim + * _1:bat[:int] := sql.bind("sys","r","a",0); + * _2:bat[:str] := sql.bind("sys","r","b",0); + * _3:bat[:date] := sql.bind("sys","r","c",0); + * ... + * _9 := algebra.select(_1,0,100); + * .. + * (grp_4:bat[:lng], gid:bat[:oid]) := groupby.count(_9,_2); + * (grp_5:bat[:lng], gid:bat[:oid]) := groupby.max(_9,_2,_3); + * @end verbatim + * + * The id() function merely becomes the old-fashioned oid-based group identification list. + * This way related values can be obtained from the attribute columns. It can be the input + * for the count() function, which saves some re-computation. + * + * Aside the group ids, we also provide options to return the value based aggregate table + * to ease development of parallel plans. + * + * The implementation is optimized for a limited number of groups. The default is + * to fall back on the old code sequences. + * + */ +#include "monetdb_config.h" +#include "groupby.h" +#include "group.h" +#include "gdk_tracer.h" + +/* + * The implementation is based on a two-phase process. In phase 1, we estimate + * the number of groups to deal with using column independence. + * The grouping is performed in parallel over slices of the tables. + * The final pieces are glued together. + */ +typedef struct{ + bat *bid; /* input bats */ + BAT *candidate; /* list */ + BAT **cols; + BUN *unique; /* number of different values */ + int last; + BUN size; +} AGGRtask; + +static AGGRtask* +GROUPcollect( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ + AGGRtask *a; + int i; + BAT *b, *bs, *bh = NULL; + BUN sample; + + (void) mb; + (void) cntxt; + a= (AGGRtask *) GDKzalloc(sizeof(*a)); + if ( a == NULL) + return NULL; + a->bid = (bat*) GDKzalloc(pci->argc * sizeof(bat)); + a->cols = (BAT**) GDKzalloc(pci->argc * sizeof(BAT*)); + a->unique = (BUN *) GDKzalloc(pci->argc * sizeof(BUN)); + if ( a->cols == NULL || a->bid == NULL || a->unique == NULL){ + if(a->cols) GDKfree(a->cols); + if(a->bid) GDKfree(a->bid); + if(a->unique) GDKfree(a->unique); + GDKfree(a); + return NULL; + } + for ( i= pci->retc; i< pci->argc; i++, a->last++) { + a->bid[a->last] = *getArgReference_bat(stk,pci,i); + b = a->cols[a->last]= BATdescriptor(a->bid[a->last]); + if ( a->cols[a->last] == NULL){ + for(a->last--; a->last>=0; a->last--) + BBPunfix(a->cols[a->last]->batCacheid); + GDKfree(a->cols); + GDKfree(a->bid); + GDKfree(a->unique); + GDKfree(a); + return NULL; + } + sample = BATcount(b) < 1000 ? BATcount(b): 1000; + bs = BATsample( b, sample); + if (bs) { + bh = BATunique(b, bs); + if (bh) { + a->unique[a->last] = BATcount(bh); + BBPunfix(bh->batCacheid); + } + BBPunfix(bs->batCacheid); + } + if ( b->tsorted) + a->unique[a->last] = 1000; /* sorting helps grouping */ + a->size = BATcount(b); + } + + /* CHECK */ + // The for-loop is in DEBUG MAL_GROUPBY + for(i=0; i<a->last; i++) + DEBUG(MAL_GROUPBY, "Group '%d' unique "BUNFMT "\n", i, a->unique[i]); + + return a; +} + +static void +GROUPcollectSort(AGGRtask *a, int start, int finish) +{ + int i,j,k; + BAT *b; + BUN sample; + + /* sort the columns by decreasing unique */ + for (i = start; i< finish; i++) + for( j = i+1; j<finish; j++) + if ( a->unique[i] < a->unique[j]){ + k =a->bid[i]; + a->bid[i] = a->bid[j]; + a->bid[j] = k; + + b= a->cols[i]; + a->cols[i] = a->cols[j]; + a->cols[j] = b; + + sample = a->unique[i]; + a->unique[i] = a->unique[j]; + a->unique[j] = sample; + } +} + +static void +GROUPdelete(AGGRtask *a){ + for(a->last--; a->last>=0; a->last--){ + BBPunfix(a->cols[a->last]->batCacheid); + } + GDKfree(a->bid); + GDKfree(a->cols); + GDKfree(a->unique); + GDKfree(a); +} + +/* + * The groups optimizer takes a grouping sequence and attempts to + * minimize the intermediate result. The choice depends on a good + * estimate of intermediate results using properties. + */ + +str +GROUPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + bat *grp = getArgReference_bat(stk, pci, 0); + bat *ext = getArgReference_bat(stk, pci, 1); + bat *hist = getArgReference_bat(stk, pci, 2); + int i, j; + bat oldgrp, oldext, oldhist; + str msg = MAL_SUCCEED; + BAT *b; + BUN count = 0; + AGGRtask *aggr; + + aggr = GROUPcollect(cntxt, mb, stk, pci); + if( aggr == NULL) + throw(MAL,"group.multicolumn", SQLSTATE(HY001) MAL_MALLOC_FAIL); + GROUPcollectSort(aggr, 0, aggr->last); + + /* (grp,ext,hist) := group.group(..) */ + /* use the old pattern to perform the incremental grouping */ + *grp = 0; + *ext = 0; + *hist = 0; + msg = GRPgroup1(grp, ext, hist, &aggr->bid[0]); + i = 1; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list