Changeset: cbc9325c81d9 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cbc9325c81d9 Modified Files: monetdb5/extras/jaql/jaqlgencode.c Branch: Oct2012 Log Message:
jaql: optimise operations by avoiding modifications to bats Instead of removing elements/rewriting arrays for original oid ranges, just append new arrays with the items to use. For this 0@0 no longer is the start of the document, so make sure we pass that onto further pipe operations. Doing so, we avoid many expensive kdiff/sdiff operations, by just appending some data to the existing BATs. When we store in a variable, or print, we cleanup the whole structure using a json.extract. diffs (truncated from 716 to 300 lines): diff --git a/monetdb5/extras/jaql/jaqlgencode.c b/monetdb5/extras/jaql/jaqlgencode.c --- a/monetdb5/extras/jaql/jaqlgencode.c +++ b/monetdb5/extras/jaql/jaqlgencode.c @@ -3418,15 +3418,6 @@ dumpvariabletransformation(jc *j, Client q = pushArgument(mb, q, h); c = getArg(q, 0); pushInstruction(mb, q); - dumpbatwritable(j, mb, 1); - q = newInstruction(mb, ASSIGNsymbol); - setModuleId(q, batRef); - setFunctionId(q, insertRef); - q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); - q = pushArgument(mb, q, j->j1); - q = pushArgument(mb, q, c); - j->j1 = getArg(q, 0); - pushInstruction(mb, q); /* prepare return, new ids (head) with elem ids (tail) in * original elems order */ @@ -3450,14 +3441,14 @@ dumpvariabletransformation(jc *j, Client setFunctionId(q, mirrorRef); q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); q = pushArgument(mb, q, c); - c = getArg(q, 0); + d = getArg(q, 0); pushInstruction(mb, q); q = newInstruction(mb, ASSIGNsymbol); setModuleId(q, algebraRef); setFunctionId(q, leftjoinRef); q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); q = pushArgument(mb, q, b); - q = pushArgument(mb, q, c); + q = pushArgument(mb, q, d); a = getArg(q, 0); pushInstruction(mb, q); q = newInstruction(mb, ASSIGNsymbol); @@ -3468,6 +3459,32 @@ dumpvariabletransformation(jc *j, Client a = getArg(q, 0); pushInstruction(mb, q); + /* and insert into kinds */ + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, mirrorRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, a); + b = getArg(q, 0); + pushInstruction(mb, q); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, algebraRef); + setFunctionId(q, leftjoinRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, b); + q = pushArgument(mb, q, c); + b = getArg(q, 0); + pushInstruction(mb, q); + dumpbatwritable(j, mb, 1); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, insertRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j1); + q = pushArgument(mb, q, b); + j->j1 = getArg(q, 0); + pushInstruction(mb, q); + MALCOMMENT(mb, "} dumpvariabletransformation(X_%d)", elems); return a; } @@ -5269,22 +5286,64 @@ dumptree(jc *j, Client cntxt, MalBlkPtr while (t != NULL) { switch (t->type) { case j_output_var: - q = newInstruction(mb, ASSIGNsymbol); - setModuleId(q, putName("jaql", 4)); - setFunctionId(q, putName("setVar", 6)); - q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); - q = pushStr(mb, q, t->sval); - q = pushArgument(mb, q, j->j1); - q = pushArgument(mb, q, j->j2); - q = pushArgument(mb, q, j->j3); - q = pushArgument(mb, q, j->j4); - q = pushArgument(mb, q, j->j5); - q = pushArgument(mb, q, j->j6); - q = pushArgument(mb, q, j->j7); - a = getArg(q, 0); - pushInstruction(mb, q); - break; case j_output: + if (j->startoid != 0) { + a = dumpwalkvar(mb, j->j1, j->j5, j->startoid); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, mirrorRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, a); + a = getArg(q, 0); + pushInstruction(mb, q); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, putName("json", 4)); + setFunctionId(q, putName("extract", 7)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j1); + q = pushArgument(mb, q, j->j2); + q = pushArgument(mb, q, j->j3); + q = pushArgument(mb, q, j->j4); + q = pushArgument(mb, q, j->j5); + q = pushArgument(mb, q, j->j6); + q = pushArgument(mb, q, j->j7); + q = pushArgument(mb, q, a); + q = pushOid(mb, q, 0); + j->j1 = getArg(q, 0); + j->j2 = getArg(q, 1); + j->j3 = getArg(q, 2); + j->j4 = getArg(q, 3); + j->j5 = getArg(q, 4); + j->j6 = getArg(q, 5); + j->j7 = getArg(q, 6); + pushInstruction(mb, q); + j->startoid = 0; + } + + if (t->type == j_output_var) { + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, putName("jaql", 4)); + setFunctionId(q, putName("setVar", 6)); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushStr(mb, q, t->sval); + q = pushArgument(mb, q, j->j1); + q = pushArgument(mb, q, j->j2); + q = pushArgument(mb, q, j->j3); + q = pushArgument(mb, q, j->j4); + q = pushArgument(mb, q, j->j5); + q = pushArgument(mb, q, j->j6); + q = pushArgument(mb, q, j->j7); + a = getArg(q, 0); + pushInstruction(mb, q); + break; + } + q = newInstruction(mb, ASSIGNsymbol); setModuleId(q, ioRef); setFunctionId(q, putName("stdout", 6)); @@ -5344,216 +5403,222 @@ dumptree(jc *j, Client cntxt, MalBlkPtr } } break; case j_filter: + MALCOMMENT(mb, "j_filter {"); a = dumpwalkvar(mb, j->j1, j->j5, j->startoid); b = dumppred(j, cntxt, mb, t->tval2, a); /* b = matching ids from dumpwalkvar (first array) */ + + /* create new array with result */ q = newInstruction(mb, ASSIGNsymbol); setModuleId(q, algebraRef); - setFunctionId(q, putName("kdifference", 11)); - q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); - q = pushArgument(mb, q, a); + setFunctionId(q, semijoinRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j1); q = pushArgument(mb, q, b); - a = getArg(q, 0); + b = getArg(q, 0); + pushInstruction(mb, q); + + j->startoid = c = dumpnextid(mb, j->j1); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, reverseRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, b); + d = getArg(q, 0); pushInstruction(mb, q); q = newInstruction(mb, ASSIGNsymbol); setModuleId(q, algebraRef); setFunctionId(q, projectRef); q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, c); + q = pushArgument(mb, q, d); + d = getArg(q, 0); + pushInstruction(mb, q); + + dumpbatwritable(j, mb, 5); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, insertRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j5); + q = pushArgument(mb, q, d); + j->j5 = getArg(q, 0); + pushInstruction(mb, q); + + dumpbatwritable(j, mb, 1); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, insertRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j1); + q = pushArgument(mb, q, c); + q = pushBte(mb, q, 'a'); + j->j1 = getArg(q, 0); + pushInstruction(mb, q); + + MALCOMMENT(mb, "} j_filter"); + break; + case j_transform: + MALCOMMENT(mb, "j_transform {"); + a = dumpwalkvar(mb, j->j1, j->j5, j->startoid); + b = dumpvariabletransformation(j, cntxt, mb, t->tval2, a); + + /* construct new array members, respecting the old + * element ids order */ + c = dumpnextid(mb, j->j1); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, algebraRef); + setFunctionId(q, projectRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); q = pushArgument(mb, q, a); - q = pushOid(mb, q, 0); - a = getArg(q, 0); + q = pushArgument(mb, q, c); + g = getArg(q, 0); pushInstruction(mb, q); q = newInstruction(mb, ASSIGNsymbol); setModuleId(q, batRef); setFunctionId(q, reverseRef); q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, g); + g = getArg(q, 0); + pushInstruction(mb, q); + + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, reverseRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, b); + d = getArg(q, 0); + pushInstruction(mb, q); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, algebraRef); + setFunctionId(q, joinRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, g); + q = pushArgument(mb, q, d); + e = getArg(q, 0); + pushInstruction(mb, q); + + /* append */ + dumpbatwritable(j, mb, 5); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, insertRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j5); + q = pushArgument(mb, q, e); + j->j5 = getArg(q, 0); + pushInstruction(mb, q); + + dumpbatwritable(j, mb, 1); + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, batRef); + setFunctionId(q, insertRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j1); + q = pushArgument(mb, q, c); + q = pushBte(mb, q, 'a'); + j->j1 = getArg(q, 0); + pushInstruction(mb, q); + + /* start of this document is now at e */ + j->startoid = c; + MALCOMMENT(mb, "} j_transform"); + break; + case j_expand: + MALCOMMENT(mb, "j_expand {"); + a = dumpwalkvar(mb, j->j1, j->j5, j->startoid); + c = dumprefvar(j, mb, t->tval2, a); + + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, algebraRef); + setFunctionId(q, semijoinRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, j->j1); + q = pushArgument(mb, q, c); + a = getArg(q, 0); + pushInstruction(mb, q); + + q = newInstruction(mb, ASSIGNsymbol); + setModuleId(q, algebraRef); + setFunctionId(q, uselectRef); + q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); + q = pushArgument(mb, q, a); + q = pushBte(mb, q, 'a'); /* only arrays match expand */ _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list