Changeset: cbc9325c81d9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cbc9325c81d9
Modified Files:
        monetdb5/extras/jaql/jaqlgencode.c
Branch: Oct2012
Log Message:

jaql: optimise operations by avoiding modifications to bats

Instead of removing elements/rewriting arrays for original oid ranges,
just append new arrays with the items to use.  For this 0@0 no longer is
the start of the document, so make sure we pass that onto further pipe
operations.  Doing so, we avoid many expensive kdiff/sdiff operations,
by just appending some data to the existing BATs.
When we store in a variable, or print, we cleanup the whole structure
using a json.extract.


diffs (truncated from 716 to 300 lines):

diff --git a/monetdb5/extras/jaql/jaqlgencode.c 
b/monetdb5/extras/jaql/jaqlgencode.c
--- a/monetdb5/extras/jaql/jaqlgencode.c
+++ b/monetdb5/extras/jaql/jaqlgencode.c
@@ -3418,15 +3418,6 @@ dumpvariabletransformation(jc *j, Client
                        q = pushArgument(mb, q, h);
                        c = getArg(q, 0);
                        pushInstruction(mb, q);
-                       dumpbatwritable(j, mb, 1);
-                       q = newInstruction(mb, ASSIGNsymbol);
-                       setModuleId(q, batRef);
-                       setFunctionId(q, insertRef);
-                       q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any));
-                       q = pushArgument(mb, q, j->j1);
-                       q = pushArgument(mb, q, c);
-                       j->j1 = getArg(q, 0);
-                       pushInstruction(mb, q);
 
                        /* prepare return, new ids (head) with elem ids (tail) 
in
                         * original elems order */
@@ -3450,14 +3441,14 @@ dumpvariabletransformation(jc *j, Client
                        setFunctionId(q, mirrorRef);
                        q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any));
                        q = pushArgument(mb, q, c);
-                       c = getArg(q, 0);
+                       d = getArg(q, 0);
                        pushInstruction(mb, q);
                        q = newInstruction(mb, ASSIGNsymbol);
                        setModuleId(q, algebraRef);
                        setFunctionId(q, leftjoinRef);
                        q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any));
                        q = pushArgument(mb, q, b);
-                       q = pushArgument(mb, q, c);
+                       q = pushArgument(mb, q, d);
                        a = getArg(q, 0);
                        pushInstruction(mb, q);
                        q = newInstruction(mb, ASSIGNsymbol);
@@ -3468,6 +3459,32 @@ dumpvariabletransformation(jc *j, Client
                        a = getArg(q, 0);
                        pushInstruction(mb, q);
 
+                       /* and insert into kinds */
+                       q = newInstruction(mb, ASSIGNsymbol);
+                       setModuleId(q, batRef);
+                       setFunctionId(q, mirrorRef);
+                       q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any));
+                       q = pushArgument(mb, q, a);
+                       b = getArg(q, 0);
+                       pushInstruction(mb, q);
+                       q = newInstruction(mb, ASSIGNsymbol);
+                       setModuleId(q, algebraRef);
+                       setFunctionId(q, leftjoinRef);
+                       q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any));
+                       q = pushArgument(mb, q, b);
+                       q = pushArgument(mb, q, c);
+                       b = getArg(q, 0);
+                       pushInstruction(mb, q);
+                       dumpbatwritable(j, mb, 1);
+                       q = newInstruction(mb, ASSIGNsymbol);
+                       setModuleId(q, batRef);
+                       setFunctionId(q, insertRef);
+                       q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any));
+                       q = pushArgument(mb, q, j->j1);
+                       q = pushArgument(mb, q, b);
+                       j->j1 = getArg(q, 0);
+                       pushInstruction(mb, q);
+
                        MALCOMMENT(mb, "} dumpvariabletransformation(X_%d)", 
elems);
                        return a;
                }
@@ -5269,22 +5286,64 @@ dumptree(jc *j, Client cntxt, MalBlkPtr 
        while (t != NULL) {
                switch (t->type) {
                        case j_output_var:
-                               q = newInstruction(mb, ASSIGNsymbol);
-                               setModuleId(q, putName("jaql", 4));
-                               setFunctionId(q, putName("setVar", 6));
-                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
-                               q = pushStr(mb, q, t->sval);
-                               q = pushArgument(mb, q, j->j1);
-                               q = pushArgument(mb, q, j->j2);
-                               q = pushArgument(mb, q, j->j3);
-                               q = pushArgument(mb, q, j->j4);
-                               q = pushArgument(mb, q, j->j5);
-                               q = pushArgument(mb, q, j->j6);
-                               q = pushArgument(mb, q, j->j7);
-                               a = getArg(q, 0);
-                               pushInstruction(mb, q);
-                               break;
                        case j_output:
+                               if (j->startoid != 0) {
+                                       a = dumpwalkvar(mb, j->j1, j->j5, 
j->startoid);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, batRef);
+                                       setFunctionId(q, mirrorRef);
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, a);
+                                       a = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, putName("json", 4));
+                                       setFunctionId(q, putName("extract", 7));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, j->j1);
+                                       q = pushArgument(mb, q, j->j2);
+                                       q = pushArgument(mb, q, j->j3);
+                                       q = pushArgument(mb, q, j->j4);
+                                       q = pushArgument(mb, q, j->j5);
+                                       q = pushArgument(mb, q, j->j6);
+                                       q = pushArgument(mb, q, j->j7);
+                                       q = pushArgument(mb, q, a);
+                                       q = pushOid(mb, q, 0);
+                                       j->j1 = getArg(q, 0);
+                                       j->j2 = getArg(q, 1);
+                                       j->j3 = getArg(q, 2);
+                                       j->j4 = getArg(q, 3);
+                                       j->j5 = getArg(q, 4);
+                                       j->j6 = getArg(q, 5);
+                                       j->j7 = getArg(q, 6);
+                                       pushInstruction(mb, q);
+                                       j->startoid = 0;
+                               }
+
+                               if (t->type == j_output_var) {
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, putName("jaql", 4));
+                                       setFunctionId(q, putName("setVar", 6));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushStr(mb, q, t->sval);
+                                       q = pushArgument(mb, q, j->j1);
+                                       q = pushArgument(mb, q, j->j2);
+                                       q = pushArgument(mb, q, j->j3);
+                                       q = pushArgument(mb, q, j->j4);
+                                       q = pushArgument(mb, q, j->j5);
+                                       q = pushArgument(mb, q, j->j6);
+                                       q = pushArgument(mb, q, j->j7);
+                                       a = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       break;
+                               }
+
                                q = newInstruction(mb, ASSIGNsymbol);
                                setModuleId(q, ioRef);
                                setFunctionId(q, putName("stdout", 6));
@@ -5344,216 +5403,222 @@ dumptree(jc *j, Client cntxt, MalBlkPtr 
                                }
                        } break;
                        case j_filter:
+                               MALCOMMENT(mb, "j_filter {");
                                a = dumpwalkvar(mb, j->j1, j->j5, j->startoid);
                                b = dumppred(j, cntxt, mb, t->tval2, a);
                                /* b = matching ids from dumpwalkvar (first 
array) */
+
+                               /* create new array with result */
                                q = newInstruction(mb, ASSIGNsymbol);
                                setModuleId(q, algebraRef);
-                               setFunctionId(q, putName("kdifference", 11));
-                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
-                               q = pushArgument(mb, q, a);
+                               setFunctionId(q, semijoinRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, j->j1);
                                q = pushArgument(mb, q, b);
-                               a = getArg(q, 0);
+                               b = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               j->startoid = c = dumpnextid(mb, j->j1);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, batRef);
+                               setFunctionId(q, reverseRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, b);
+                               d = getArg(q, 0);
                                pushInstruction(mb, q);
                                q = newInstruction(mb, ASSIGNsymbol);
                                setModuleId(q, algebraRef);
                                setFunctionId(q, projectRef);
                                q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, c);
+                               q = pushArgument(mb, q, d);
+                               d = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               dumpbatwritable(j, mb, 5);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, batRef);
+                               setFunctionId(q, insertRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, j->j5);
+                               q = pushArgument(mb, q, d);
+                               j->j5 = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               dumpbatwritable(j, mb, 1);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, batRef);
+                               setFunctionId(q, insertRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, j->j1);
+                               q = pushArgument(mb, q, c);
+                               q = pushBte(mb, q, 'a');
+                               j->j1 = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               MALCOMMENT(mb, "} j_filter");
+                               break;
+                       case j_transform:
+                               MALCOMMENT(mb, "j_transform {");
+                               a = dumpwalkvar(mb, j->j1, j->j5, j->startoid);
+                               b = dumpvariabletransformation(j, cntxt, mb, 
t->tval2, a);
+
+                               /* construct new array members, respecting the 
old
+                                * element ids order */
+                               c = dumpnextid(mb, j->j1);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, algebraRef);
+                               setFunctionId(q, projectRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
                                q = pushArgument(mb, q, a);
-                               q = pushOid(mb, q, 0);
-                               a = getArg(q, 0);
+                               q = pushArgument(mb, q, c);
+                               g = getArg(q, 0);
                                pushInstruction(mb, q);
                                q = newInstruction(mb, ASSIGNsymbol);
                                setModuleId(q, batRef);
                                setFunctionId(q, reverseRef);
                                q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, g);
+                               g = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, batRef);
+                               setFunctionId(q, reverseRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, b);
+                               d = getArg(q, 0);
+                               pushInstruction(mb, q);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, algebraRef);
+                               setFunctionId(q, joinRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, g);
+                               q = pushArgument(mb, q, d);
+                               e = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               /* append */
+                               dumpbatwritable(j, mb, 5);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, batRef);
+                               setFunctionId(q, insertRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, j->j5);
+                               q = pushArgument(mb, q, e);
+                               j->j5 = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               dumpbatwritable(j, mb, 1);
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, batRef);
+                               setFunctionId(q, insertRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, j->j1);
+                               q = pushArgument(mb, q, c);
+                               q = pushBte(mb, q, 'a');
+                               j->j1 = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               /* start of this document is now at e */
+                               j->startoid = c;
+                               MALCOMMENT(mb, "} j_transform");
+                               break;
+                       case j_expand:
+                               MALCOMMENT(mb, "j_expand {");
+                               a = dumpwalkvar(mb, j->j1, j->j5, j->startoid);
+                               c = dumprefvar(j, mb, t->tval2, a);
+
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, algebraRef);
+                               setFunctionId(q, semijoinRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, j->j1);
+                               q = pushArgument(mb, q, c);
+                               a = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               q = newInstruction(mb, ASSIGNsymbol);
+                               setModuleId(q, algebraRef);
+                               setFunctionId(q, uselectRef);
+                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
+                               q = pushArgument(mb, q, a);
+                               q = pushBte(mb, q, 'a');  /* only arrays match 
expand */
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to