Changeset: 742ae7042a29 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=742ae7042a29 Added Files: monetdb5/extras/jaql/jaqltests/Tests/multiline.jaql monetdb5/extras/jaql/jaqltests/Tests/multiline.stable.err monetdb5/extras/jaql/jaqltests/Tests/multiline.stable.out monetdb5/optimizer/opt_pushselect.c monetdb5/optimizer/opt_pushselect.h sql/scripts/75_storagemodel.sql Modified Files: clients/Tests/exports.stable.out gdk/gdk_group.c monetdb5/extras/jaql/jaqlscenario.c monetdb5/extras/jaql/jaqltests/Tests/All monetdb5/extras/jaql/jaqltests/Tests/documents.stable.err monetdb5/extras/jaql/jaqltree.h monetdb5/extras/jaql/parser/jaql.l monetdb5/mal/mal_profiler.c monetdb5/optimizer/Makefile.ag monetdb5/optimizer/opt_pipes.c monetdb5/optimizer/opt_prelude.c monetdb5/optimizer/opt_prelude.h monetdb5/optimizer/opt_pushranges.c monetdb5/optimizer/opt_support.c monetdb5/optimizer/opt_support.h monetdb5/optimizer/opt_wrapper.c monetdb5/optimizer/optimizer.mal sql/backends/monet5/UDF/Tests/udf-fuse.stable.out sql/backends/monet5/UDF/Tests/udf-reverse.stable.out sql/backends/monet5/sql.mx sql/backends/monet5/sql_gencode.c sql/scripts/Makefile.ag sql/server/bin_optimizer.c sql/server/rel_bin.c sql/server/rel_schema.c sql/server/sql_parser.y sql/server/sql_rel2bin.c sql/server/sql_statement.c sql/server/sql_statement.h sql/storage/bat/bat_storage.c sql/storage/store.c sql/test/BugTracker-2009/Tests/POWER_vs_prod.SF-2596114.stable.out sql/test/BugTracker-2009/Tests/explain_gives_crash.SF-2741829.stable.out sql/test/BugTracker-2010/Tests/LIMIT_OFFSET_big-endian.Bug-2622.stable.out sql/test/BugTracker-2010/Tests/group-by_ordered_column.Bug-2564.stable.out sql/test/BugTracker-2011/Tests/exists-select.Bug-2933.stable.out sql/test/BugTracker-2011/Tests/func_iter_vs_bulk.Bug-2826.stable.out sql/test/BugTracker/Tests/explain.SF-1739353.stable.out sql/test/BugTracker/Tests/jdbc_no_debug.SF-1739356.stable.out sql/test/Dependencies/Tests/Dependencies.stable.out sql/test/Tests/setoptimizer.stable.out sql/test/Tests/trace.stable.out sql/test/bugs/Tests/crash_order_by.stable.out sql/test/leaks/Tests/check0.stable.out sql/test/leaks/Tests/check1.stable.out sql/test/leaks/Tests/check2.stable.out sql/test/leaks/Tests/check3.stable.out sql/test/leaks/Tests/check4.stable.out sql/test/leaks/Tests/check5.stable.out sql/test/sql_xml/Tests/xml.sql Branch: rdf Log Message:
Merge with default branch diffs (truncated from 4390 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -682,10 +682,12 @@ str ALARMusec(lng *ret); str ALGBATmaximum(ptr *result, int *bid); str ALGBATminimum(ptr *result, int *bid); str ALGantijoin(int *result, int *lid, int *rid); +str ALGantijoin2(int *l, int *r, int *lid, int *rid); str ALGantiuselect1(int *result, int *bid, ptr value); str ALGantiuselectInclusive(int *result, int *bid, ptr low, ptr high, bit *lin, bit *rin); str ALGavg(dbl *res, int *bid); str ALGbandjoin(int *result, int *lid, int *rid, ptr *minus, ptr *plus, bit *li, bit *hi); +str ALGbandjoin2(int *l, int *r, int *lid, int *rid, ptr *minus, ptr *plus, bit *li, bit *hi); str ALGbandjoin_default(int *result, int *lid, int *rid, ptr *minus, ptr *plus); str ALGcard(lng *result, int *bid); str ALGcopy(int *result, int *bid); @@ -693,6 +695,7 @@ str ALGcount_bat(wrd *result, int *bid); str ALGcount_nil(wrd *result, int *bid, bit *ignore_nils); str ALGcount_no_nil(wrd *result, int *bid); str ALGcross(int *result, int *lid, int *rid); +str ALGcrossproduct2(int *l, int *r, int *lid, int *rid); str ALGexist(bit *ret, int *bid, ptr val); str ALGexistBUN(bit *ret, int *bid, ptr val, ptr tval); str ALGfetch(ptr ret, int *bid, lng *pos); @@ -714,6 +717,7 @@ str ALGhtsort(int *result, int *lid); str ALGidentity(int *ret, int *bid); str ALGindexjoin(int *result, int *lid, int *rid); str ALGjoin(int *result, int *lid, int *rid); +str ALGjoin2(int *l, int *r, int *lid, int *rid); str ALGjoinPath(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); BAT *ALGjoinPathBody(Client cntxt, int top, BAT **joins, int flag); str ALGjoinestimate(int *result, int *lid, int *rid, lng *estimate); @@ -780,6 +784,7 @@ str ALGprojectNIL(int *ret, int *bid); str ALGprojecthead(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str ALGprojecttail(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str ALGrangejoin(int *result, int *lid, int *rlid, int *rhid, bit *li, bit *hi); +str ALGrangejoin2(int *l, int *r, int *lid, int *rlid, int *rhid, bit *li, bit *hi); str ALGreuse(int *ret, int *bid); str ALGrevert(int *result, int *bid); str ALGsample(int *result, int *bid, int *param); @@ -845,6 +850,7 @@ str ALGsunion(int *result, int *lid, int str ALGsunique(int *result, int *bid); str ALGtdiff(int *result, int *lid, int *rid); str ALGthetajoin(int *result, int *lid, int *rid, int *opc); +str ALGthetajoin2(int *l, int *r, int *lid, int *rid, int *opc); str ALGthetajoinEstimate(int *result, int *lid, int *rid, int *opc, lng *estimate); str ALGthetaselect(int *result, int *bid, ptr low, str *op); str ALGthetasubselect1(bat *result, bat *bid, const void *val, const char **op); @@ -1380,7 +1386,6 @@ str CSTtoString(Client cntxt, MalBlkPtr int CTrefine(BAT **res, BAT *b, BAT *a); int CTrefine_rev(BAT **res, BAT *b, BAT *a); str CemptySet(int *k, int *bid); -int DFLOWadmission(lng argclaim, lng hotclaim); str DICTbind(int *idx, int *val, str *nme); str DICTcompress(int *idx, str *nme, int *bid); str DICTdecompress(int *ret, str *nme); @@ -1571,6 +1576,7 @@ str LCKunset(int *res, monet_lock *l); char *M5OutOfMemory; str MACROprocessor(Client cntxt, MalBlkPtr mb, Symbol t); int MAL_MAXCLIENTS; +int MALadmission(lng argclaim, lng hotclaim); str MALassertBit(int *ret, bit *val, str *msg); str MALassertInt(int *ret, int *val, str *msg); str MALassertLng(int *ret, lng *val, str *msg); @@ -1588,6 +1594,7 @@ str MALoptimizer(Client c); str MALparser(Client c); str MALpipeline(Client c); str MALreader(Client c); +void MALresourceFairness(Client cntxt, MalBlkPtr mb, lng usec); str MALstartDataflow(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str MANUALcompletion(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str MANUALcreate0(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); @@ -1879,6 +1886,7 @@ int OPToriginImplementation(Client cntxt str OPTpeers(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); int OPTprejoinImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); int OPTpushrangesImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); +int OPTpushselectImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); int OPTrecyclerImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); int OPTreduceImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p); int OPTremapImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); @@ -2331,6 +2339,7 @@ str TKNZRgetIndex(int *r); str TKNZRgetLevel(int *r, int *level); str TKNZRlocate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str TKNZRopen(int *r, str *name); +str TKNZRrdf2str(bat *res, bat *bid, bat *map); str TKNZRtakeOid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); void TRACEtable(BAT **r); str TRADERmakeBid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); @@ -2555,6 +2564,8 @@ void delVariable(MalBlkPtr mb, int varid str deleteRef; void deleteSymbol(Module scope, Symbol prg); int deletesProp; +str deltaRef; +str delta_projectRef; str dense_rank_grpRef; int depositBox(Box box, str name, int type, ValPtr val); str depositRef; @@ -2645,7 +2656,7 @@ char **getHelp(Module m, str pat, int fl char **getHelpMatch(char *pat); MalBlkPtr getMalBlkHistory(MalBlkPtr mb, int idx); MalBlkPtr getMalBlkMarker(MalBlkPtr mb, str marker); -lng getMemoryClaim(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int i, int flag); +lng getMemoryClaim(MalBlkPtr mb, MalStkPtr stk, int pc, int i, int flag); str getName(str nme, size_t len); int getPC(MalBlkPtr mb, InstrPtr p); str getPipeCatalog(int *nme, int *def, int *stat); @@ -2747,6 +2758,7 @@ str kunionRef; str kuniqueRef; str languageRef; str lastline(Client cntxt); +str leftfetchjoinRef; str leftjoinPathRef; str leftjoinRef; str levenshtein_impl(int *result, str *s, str *t, int *insdel_cost, int *replace_cost, int *transpose_cost); @@ -2754,6 +2766,7 @@ str levenshteinbasic2_impl(int *result, str levenshteinbasic_impl(int *result, str *s, str *t); str likeRef; str likeselectRef; +str likesubselectRef; str likeuselectRef; void listFunction(stream *fd, MalBlkPtr mb, MalStkPtr stk, int flg, int first, int step); str listRef; @@ -3041,7 +3054,7 @@ int rule_tostr(str *buf, int *len, rule str runFactory(Client cntxt, MalBlkPtr mb, MalBlkPtr mbcaller, MalStkPtr stk, InstrPtr pci); str runMAL(Client c, MalBlkPtr mb, MalBlkPtr mbcaller, MalStkPtr env); str runMALDebugger(Client cntxt, Symbol s); -str runMALdataflow(Client cntxt, MalBlkPtr mb, int startpc, int stoppc, MalStkPtr stk, MalStkPtr env, InstrPtr pcicaller); +str runMALdataflow(Client cntxt, MalBlkPtr mb, int startpc, int stoppc, MalStkPtr stk); str runMALsequence(Client cntxt, MalBlkPtr mb, int startpc, int stoppc, MalStkPtr stk, MalStkPtr env, InstrPtr pcicaller); str runScenario(Client c); int runonceProp; @@ -3142,19 +3155,24 @@ int stringLength(Client cntxt); str stringdiff_impl(int *res, str *s1, str *s2); str subgroupRef; str subgroupdoneRef; +str subselectRef; str sumRef; str sunionRef; int tableProp; int takeBox(Box box, str name, ValPtr val, int tpe); str takeRef; +str tdifferenceRef; str thetajoinRef; str thetaselectRef; +str thetasubselectRef; str thetauselectRef; +str tidRef; int timestamp_fromstr(str buf, int *len, timestamp **ret); timestamp *timestamp_nil; int timestamp_tostr(str *buf, int *len, timestamp *val); int timestamp_tostr(str *buf, int *len, timestamp *val); int timestamp_tz_tostr(str *buf, int *len, timestamp *val, tzone *timezone); +str tintersectRef; int tlbProp; str toString(Box box, lng i); str topn_maxRef; diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -63,6 +63,12 @@ * If a hash table already exists on b, we can make use of it. * * Otherwise we build a partial hash table on the fly. + * + * A decision should be made on the order in which grouping occurs + * Let |b| has << different values as |g| then the linked lists gets + * extremely long, leading to a n^2 algorithm. + * At the MAL level, the multigroup function would perform the dynamic + * optimization. */ gdk_return BATgroup_internal(BAT **groups, BAT **extents, BAT **histo, @@ -243,11 +249,8 @@ BATgroup_internal(BAT **groups, BAT **ex if ((grps && *grps != prev) || cmp(pv, v) != 0) { ngrp++; if (ngrp == maxgrps) { - /* we need to extend extents - * and histo bats */ - maxgrps += GROUPBATINCR; - if (maxgrps > BATcount(b)) - maxgrps = BATcount(b); + /* we need to extend extents and histo bats, do it once */ + maxgrps = BATcount(b); if (extents) { BATsetcount(en, ngrp); en = BATextend(en, maxgrps); @@ -321,11 +324,8 @@ BATgroup_internal(BAT **groups, BAT **ex } /* start a new group */ if (ngrp == maxgrps) { - /* we need to extend extents and histo - * bats */ - maxgrps += GROUPBATINCR; - if (maxgrps > BATcount(b)) - maxgrps = BATcount(b); + /* we need to extend extents and histo bats, do it once */ + maxgrps = BATcount(b); if (extents) { BATsetcount(en, ngrp); en = BATextend(en, maxgrps); @@ -371,11 +371,8 @@ BATgroup_internal(BAT **groups, BAT **ex if (hb == BUN_NONE) { /* no equal found: start new group */ if (ngrp == maxgrps) { - /* we need to extend extents - * and histo bats */ - maxgrps += GROUPBATINCR; - if (maxgrps > BATcount(b)) - maxgrps = BATcount(b); + /* we need to extend extents and histo bats, do it once */ + maxgrps = BATcount(b); if (extents) { BATsetcount(en, ngrp); en = BATextend(en, maxgrps); @@ -428,30 +425,54 @@ BATgroup_internal(BAT **groups, BAT **ex GDKerror("BATgroup: cannot allocate hash table\n"); goto error; } +#define GRPhashloop(TYPE,EXP1,EXP2) {\ +v = BUNtail(bi, p);\ +prb = hash_##TYPE(hs, v) EXP1;\ +for (hb = hs->hash[prb];\ + hb != BUN_NONE;\ + hb = hs->link[hb]) {\ + if (EXP2 *(TYPE*) v == *(TYPE*) BUNtail(bi,hb) ){\ + ngrps[p - r] = ngrps[hb - r];\ + if (histo)\ + cnts[ngrps[hb - r]]++;\ + break;\ + }\ +} } + +#define GRPhashfactor(TYPE) \ + if (grps == NULL ) { GRPhashloop(TYPE,, ) }\ + else GRPhashloop(TYPE, ^ hash_oid(hs, (oid *)&grps[p-r]) ,grps[hb - r] == grps[p - r] &&) + +#define GRPhashswitch \ +switch( ATOMstorage(hs->type)){\ +case TYPE_bte: GRPhashfactor(bte); break;\ +case TYPE_sht: GRPhashfactor(sht); break;\ +case TYPE_int: GRPhashfactor(int); break;\ +case TYPE_flt: GRPhashfactor(flt); break;\ +case TYPE_lng: GRPhashfactor(lng); break;\ +default: \ + v = BUNtail(bi, p);\ + prb = hash_any(hs, v);\ + for (hb = hs->hash[prb];\ + hb != BUN_NONE;\ + hb = hs->link[hb]) {\ + if ((grps == NULL ||\ + grps[hb - r] == grps[p - r]) &&\ + cmp(v, BUNtail(bi, hb)) == 0) {\ + ngrps[p - r] = ngrps[hb - r];\ + if (histo)\ + cnts[ngrps[hb - r]]++;\ + break;\ + } } } + for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) { - v = BUNtail(bi, p); - prb = HASHprobe(hs, v); - for (hb = hs->hash[prb]; - hb != BUN_NONE; - hb = hs->link[hb]) { - if ((grps == NULL || - grps[hb - r] == grps[p - r]) && - cmp(v, BUNtail(bi, hb)) == 0) { - ngrps[p - r] = ngrps[hb - r]; - if (histo) - cnts[ngrps[hb - r]]++; - break; - } - } + GRPhashswitch; if (hb == BUN_NONE) { /* no equal found: start new group and * enter into hash table */ if (ngrp == maxgrps) { - /* we need to extend extents - * and histo bats */ - maxgrps += GROUPBATINCR; - if (maxgrps > BATcount(b)) - maxgrps = BATcount(b); + /* we need to extend extents and histo bats, do it at most once */ + maxgrps = BATcount(b); if (extents) { BATsetcount(en, ngrp); en = BATextend(en, maxgrps); diff --git a/monetdb5/extras/jaql/jaqlscenario.c b/monetdb5/extras/jaql/jaqlscenario.c --- a/monetdb5/extras/jaql/jaqlscenario.c +++ b/monetdb5/extras/jaql/jaqlscenario.c @@ -145,14 +145,19 @@ freeVariables(Client c, MalBlkPtr mb, Ma str JAQLreader(Client c) { - if (MCreadClient(c) > 0) - return MAL_SUCCEED; + /* dummy stub, the scanner reads for us + * TODO: pre-fill the buf if we have single line mode */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list