Changeset: 735c252d2dff for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=735c252d2dff Modified Files: monetdb5/modules/mal/mal_weld.c monetdb5/modules/mal/mal_weld.h monetdb5/modules/mal/mal_weld.mal monetdb5/modules/mal/mal_weld.mal.sh monetdb5/optimizer/opt_prelude.c monetdb5/optimizer/opt_prelude.h monetdb5/optimizer/opt_weld.c Branch: mal-weld Log Message:
weld impl for group.group and group.groupdone diffs (250 lines): diff --git a/monetdb5/modules/mal/mal_weld.c b/monetdb5/modules/mal/mal_weld.c --- a/monetdb5/modules/mal/mal_weld.c +++ b/monetdb5/modules/mal/mal_weld.c @@ -117,12 +117,14 @@ static void dumpWeldProgram(weldState *w } str -WeldInitState(ptr *retval) +WeldInitState(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { + (void)cntxt; weldState *wstate = malloc(sizeof(weldState)); wstate->programMaxLen = 1; wstate->program = calloc(wstate->programMaxLen, sizeof(char)); - *retval = wstate; + wstate->groupDeps = calloc(mb->vtop, sizeof(InstrPtr)); + *getArgReference_ptr(stk, pci, 0) = wstate;; return MAL_SUCCEED; } @@ -178,6 +180,7 @@ WeldRun(Client cntxt, MalBlkPtr mb, MalS weld_module_t m = weld_module_compile(wstate->program, conf, e); weld_conf_free(conf); free(wstate->program); + free(wstate->groupDeps); free(wstate); if (weld_error_code(e)) { throw(MAL, "weld.run", PROGRAM_GENERAL ": %s", weld_error_message(e)); @@ -531,6 +534,90 @@ WeldBatcalcMULsignal(Client cntxt, MalBl return WeldBatcalcBinary(mb, stk, pci, "*", "weld.batcalcmul"); } +/* Ignore the existing groups and instead use all the columns up to this point to + * generate the new group ids. Weld will remove the unnecessary computations. e.g.: + * g1, e1, h1 = group.group(col1) -> for(zip(col1), dictmerger[ty1, i64, min]... + * g2, e2, h2 = group.grou(col2, g1) -> for(zip(col2, col1), dictmerger[{ty1, ty2}, i64, min]... + */ +str +WeldGroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void)cntxt; + int groups = getArg(pci, 0); /* bat[:oid] */ + int extents = getArg(pci, 1); /* bat[:oid] */ + int histo = getArg(pci, 2); /* bat[:lng] */ + weldState *wstate; + if (pci->argc == 6) { + wstate = *getArgReference_ptr(stk, pci, 5); /* has value */ + } else { + wstate = *getArgReference_ptr(stk, pci, 4); /* has value */ + } + + /* Build zip(col1, col2, ...) */ + wstate->groupDeps[groups] = pci; + InstrPtr dep = pci; + char zipStmt[STR_SIZE_INC] = {'\0'}; + char dictTypeStmt[STR_SIZE_INC] = {'\0'}; + int count = 0; + while (dep != NULL) { + ++count; + int col = getArg(dep, 3); + int colType = getBatType(getArgType(mb, dep, 3)); + sprintf(zipStmt + strlen(zipStmt), "v%d,", col); + sprintf(dictTypeStmt + strlen(dictTypeStmt), " %s,", getWeldType(colType)); + if (dep->argc == 6) { + int oldGrps = getArg(dep, 4); + dep = wstate->groupDeps[oldGrps]; + } else { + dep = NULL; + } + } + /* Replace the last comma */ + zipStmt[strlen(zipStmt) - 1] = '\0'; + if (count == 1) { + dictTypeStmt[strlen(dictTypeStmt) - 1] = '\0'; + } else { + dictTypeStmt[0] = '{'; + dictTypeStmt[strlen(dictTypeStmt) - 1] = '}'; + } + + char weldStmt[STR_SIZE_INC * 2]; + sprintf(weldStmt, "\ + let groupHash = result( \ + for(zip(%s), dictmerger[%s, i64, min], |b, i, n| \ + merge(b, {n, i}) \ + ) \ + ); \ + let groupHashVec = tovec(groupHash); \ + let groupIdsDict = result( \ + for(groupHashVec, dictmerger[%s, i64, min], |b, i, n| \ + merge(b, {n.$0, i}) \ + ) \ + ); \ + let empty = result( \ + for(rangeiter(0L, len(groupHashVec), 1L), appender[i64], |b, i, n| \ + merge(b, 0L) \ + ) \ + ); \ + let idsAndCounts = for(zip(%s), {appender[i64], vecmerger[i64, +](empty)}, |b, i, n| \ + let groupId = lookup(groupIdsDict, n); \ + {merge(b.$0, groupId), merge(b.$1, {groupId, 1L})} \ + ); \ + let v%d = result(idsAndCounts.$0); \ + let v%dhseqbase = 0; \ + let v%d = result(idsAndCounts.$1); \ + let v%dhseqbase = 0; \ + let v%d = result( \ + for(groupHashVec, vecmerger[i64, +](empty), |b, i, n| \ + merge(b, {i, lookup(groupHash, n.$0)}) \ + ) \ + ); \ + let v%dhseqbase = 0;", + zipStmt, dictTypeStmt, dictTypeStmt, zipStmt, groups, groups, histo, histo, extents, extents); + appendWeldStmt(wstate, weldStmt); + return MAL_SUCCEED; +} + str WeldLanguagePass(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { diff --git a/monetdb5/modules/mal/mal_weld.h b/monetdb5/modules/mal/mal_weld.h --- a/monetdb5/modules/mal/mal_weld.h +++ b/monetdb5/modules/mal/mal_weld.h @@ -13,10 +13,11 @@ typedef struct { char *program; + InstrPtr *groupDeps; size_t programMaxLen; } weldState; -mal_export str WeldInitState(ptr *retval); +mal_export str WeldInitState(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str WeldRun(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str WeldAggrSum(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str WeldAlgebraProjection(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); @@ -27,6 +28,7 @@ mal_export str WeldAlgebraThetaselect2(C mal_export str WeldBatcalcADDsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str WeldBatcalcSUBsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str WeldBatcalcMULsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); +mal_export str WeldGroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str WeldLanguagePass(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); #endif diff --git a/monetdb5/modules/mal/mal_weld.mal b/monetdb5/modules/mal/mal_weld.mal --- a/monetdb5/modules/mal/mal_weld.mal +++ b/monetdb5/modules/mal/mal_weld.mal @@ -9,7 +9,7 @@ module weld; -command initstate():ptr +pattern initstate():ptr address WeldInitState comment "Initialize the state structure that is used to build a weld program"; @@ -37,6 +37,14 @@ pattern algebrathetaselect(b:bat[:any_1] address WeldAlgebraThetaselect2 comment "algebra.thetaselect"; +pattern groupgroup(b:bat[:any_1], wstate:ptr) (groups:bat[:oid], extents:bat[:oid], histo:bat[:lng]) +address WeldGroup; +comment "group.group" + +pattern groupgroup(b:bat[:any_1], g:bat[:oid], wstate:ptr) (groups:bat[:oid], extents:bat[:oid], histo:bat[:lng]) +address WeldGroup; +comment "group.groupdone" + pattern aggrsum(b:bat[:bte], wstate:ptr):bte address WeldAggrSum comment "aggr.sum"; diff --git a/monetdb5/modules/mal/mal_weld.mal.sh b/monetdb5/modules/mal/mal_weld.mal.sh --- a/monetdb5/modules/mal/mal_weld.mal.sh +++ b/monetdb5/modules/mal/mal_weld.mal.sh @@ -19,7 +19,7 @@ alltypes=(bit ${numeric[@]} oid str) cat <<EOF -command initstate():ptr +pattern initstate():ptr address WeldInitState comment "Initialize the state structure that is used to build a weld program"; @@ -47,6 +47,14 @@ pattern algebrathetaselect(b:bat[:any_1] address WeldAlgebraThetaselect2 comment "algebra.thetaselect"; +pattern groupgroup(b:bat[:any_1], wstate:ptr) (groups:bat[:oid], extents:bat[:oid], histo:bat[:lng]) +address WeldGroup; +comment "group.group" + +pattern groupgroup(b:bat[:any_1], g:bat[:oid], wstate:ptr) (groups:bat[:oid], extents:bat[:oid], histo:bat[:lng]) +address WeldGroup; +comment "group.groupdone" + EOF for tp in ${numeric[@]}; do diff --git a/monetdb5/optimizer/opt_prelude.c b/monetdb5/optimizer/opt_prelude.c --- a/monetdb5/optimizer/opt_prelude.c +++ b/monetdb5/optimizer/opt_prelude.c @@ -307,6 +307,7 @@ str weldBatcalcAddRef; str weldBatcalcSubRef; str weldBatcalcMulRef; str weldGetResultRef; +str weldGroupRef; str weldInitStateRef; str weldLanguagePassRef; str weldRef; @@ -604,6 +605,7 @@ void optimizerInit(void) weldBatcalcSubRef = putName("batcalcsub"); weldBatcalcMulRef = putName("batcalcmul"); weldGetResultRef = putName("getresult"); + weldGroupRef = putName("groupgroup"); weldInitStateRef = putName("initstate"); weldLanguagePassRef = putName("languagepass"); weldRef = putName("weld"); diff --git a/monetdb5/optimizer/opt_prelude.h b/monetdb5/optimizer/opt_prelude.h --- a/monetdb5/optimizer/opt_prelude.h +++ b/monetdb5/optimizer/opt_prelude.h @@ -315,6 +315,7 @@ mal_export str weldBatcalcAddRef; mal_export str weldBatcalcSubRef; mal_export str weldBatcalcMulRef; mal_export str weldGetResultRef; +mal_export str weldGroupRef; mal_export str weldInitStateRef; mal_export str weldLanguagePassRef; mal_export str weldRef; diff --git a/monetdb5/optimizer/opt_weld.c b/monetdb5/optimizer/opt_weld.c --- a/monetdb5/optimizer/opt_weld.c +++ b/monetdb5/optimizer/opt_weld.c @@ -18,7 +18,7 @@ #include "mal_instruction.h" #include "opt_weld.h" -#define NUM_WELD_INSTR 8 +#define NUM_WELD_INSTR 12 #define UNMARKED 0 #define TEMP_MARK 1 #define PERM_MARK 2 @@ -52,6 +52,10 @@ static void initWeldInstrs(void) { addWeldInstr(batcalcRef, minusRef, weldBatcalcSubRef); /* batcalc.- */ addWeldInstr(batcalcRef, mulRef, weldBatcalcMulRef); /* batcalc.* */ addWeldInstr(languageRef, passRef, weldLanguagePassRef); /* language.pass */ + addWeldInstr(groupRef, groupRef, weldGroupRef); /* group.group*/ + addWeldInstr(groupRef, subgroupRef, weldGroupRef); /* group.subgroup */ + addWeldInstr(groupRef, groupdoneRef, weldGroupRef); /* group.groupdone */ + addWeldInstr(groupRef, subgroupdoneRef, weldGroupRef); /* group.subgroupdone */ } static str getWeldRef(InstrPtr instr) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list