Changeset: 391b643810b9 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/391b643810b9 Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out gdk/gdk_group.c monetdb5/optimizer/opt_mergetable.c monetdb5/optimizer/opt_prelude.c monetdb5/optimizer/opt_prelude.h sql/backends/monet5/for.c sql/backends/monet5/rel_bin.c sql/backends/monet5/sql.c sql/backends/monet5/sql_statement.c sql/backends/monet5/sql_statement.h sql/test/BugTracker-2010/Tests/LIMIT_OFFSET_big-endian.Bug-2622.test Branch: pushcands Log Message:
Merged with default diffs (truncated from 3728 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -7126,6 +7126,15 @@ stdout of test 'MAL-signatures` in direc [ "color", "value", "command color.value(X_0:color):flt ", "CLRvalue;", "Extracts value component from a color atom" ] [ "color", "value", "command color.value(X_0:color):int ", "CLRvalueInt;", "Extracts value component from a color atom" ] [ "color", "ycc", "command color.ycc(X_0:int, X_1:int, X_2:int):color ", "CLRycc;", "Converts an YCC triplets to a color atom" ] +[ "dict", "compress", "pattern dict.compress(X_0:bat[:any_1]) (X_1:bat[:any], X_2:bat[:any_1]) ", "DICTcompress;", "dict compress a bat" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str, X_3:bit):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "convert", "pattern dict.convert(X_0:bat[:any]):bat[:any_1] ", "DICTconvert;", "convert candidate list into compressed offsets" ] +[ "dict", "decompress", "pattern dict.decompress(X_0:bat[:any], X_1:bat[:any_1]):bat[:any_1] ", "DICTdecompress;", "decompress a dictionary compressed (sub)column" ] +[ "dict", "join", "pattern dict.join(X_0:bat[:any], X_1:bat[:any_1], X_2:bat[:any], X_3:bat[:any_1], X_4:bat[:oid], X_5:bat[:oid], X_6:bit, X_7:lng) (X_8:bat[:oid], X_9:bat[:oid]) ", "DICTjoin;", "join 2 dictionaries" ] +[ "dict", "renumber", "pattern dict.renumber(X_0:bat[:any_1], X_1:bat[:any_1]):bat[:any_1] ", "DICTrenumber;", "renumber offsets" ] +[ "dict", "select", "pattern dict.select(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:any_1, X_5:bit, X_6:bit, X_7:bit, X_8:bit):bat[:oid] ", "DICTselect;", "value - range select on a dictionary" ] +[ "dict", "thetaselect", "pattern dict.thetaselect(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:str):bat[:oid] ", "DICTthetaselect;", "thetaselect on a dictionary" ] [ "factories", "getArrival", "command factories.getArrival():bat[:timestamp] ", "FCTgetArrival;", "Retrieve the time stamp the last call was made." ] [ "factories", "getCaller", "command factories.getCaller():int ", "FCTgetCaller;", "Retrieve the unique identity of the factory caller." ] [ "factories", "getDeparture", "command factories.getDeparture():bat[:timestamp] ", "FCTgetDeparture;", "Retrieve the time stamp the last answer was returned." ] @@ -7138,6 +7147,8 @@ stdout of test 'MAL-signatures` in direc [ "fits", "listdir", "unsafe pattern fits.listdir(X_0:str):void ", "FITSdir;", "Attach all FITS files in the directory" ] [ "fits", "listdirpattern", "unsafe pattern fits.listdirpattern(X_0:str, X_1:str):void ", "FITSdirpat;", "Attach all FITS file in the directory, giving a pattern" ] [ "fits", "load", "unsafe pattern fits.load(X_0:str):void ", "FITSloadTable;", "Load a FITS table from an attached file" ] +[ "for", "compress", "pattern for.compress(X_0:str, X_1:str, X_2:str):void ", "FORcompress_col;", "compress a sql column"] +[ "for", "decompress", "pattern for.decompress(X_0:bat[:any], X_1:any_1):bat[:any_1] ", "FORdecompress;", "decompress a for compressed (sub)column" ] [ "generator", "join", "pattern generator.join(X_0:bat[:bte], X_1:bat[:bte]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] [ "generator", "join", "pattern generator.join(X_0:bat[:dbl], X_1:bat[:dbl]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "Overloaded join operation" ] [ "generator", "join", "pattern generator.join(X_0:bat[:flt], X_1:bat[:flt]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] @@ -7765,11 +7776,15 @@ stdout of test 'MAL-signatures` in direc [ "optimizer", "default_pipe", "function optimizer.default_pipe():void;", "", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast():str ", "OPTwrapper;", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast(X_0:str, X_1:str):str ", "OPTwrapper;", "Fast compound default optimizer pipe" ] +[ "optimizer", "dict", "pattern optimizer.dict():str ", "OPTwrapper;", "" ] +[ "optimizer", "dict", "pattern optimizer.dict(X_0:str, X_1:str):str ", "OPTwrapper;", "Push dict decompress down" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind():str ", "OPTwrapper;", "" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate empty set expressions" ] [ "optimizer", "epilogue", "command optimizer.epilogue():void ", "optimizer_epilogue;", "release the resources held by the optimizer module" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate():str ", "OPTwrapper;", "" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate constant expressions once" ] +[ "optimizer", "for", "pattern optimizer.for():str ", "OPTwrapper;", "" ] +[ "optimizer", "for", "pattern optimizer.for(X_0:str, X_1:str):str ", "OPTwrapper;", "Push for decompress down" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector():str ", "OPTwrapper;", "" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector(X_0:str, X_1:str):str ", "OPTwrapper;", "Garbage collector optimizer" ] [ "optimizer", "generator", "pattern optimizer.generator():str ", "OPTwrapper;", "" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -10008,6 +10008,15 @@ stdout of test 'MAL-signatures` in direc [ "color", "value", "command color.value(X_0:color):flt ", "CLRvalue;", "Extracts value component from a color atom" ] [ "color", "value", "command color.value(X_0:color):int ", "CLRvalueInt;", "Extracts value component from a color atom" ] [ "color", "ycc", "command color.ycc(X_0:int, X_1:int, X_2:int):color ", "CLRycc;", "Converts an YCC triplets to a color atom" ] +[ "dict", "compress", "pattern dict.compress(X_0:bat[:any_1]) (X_1:bat[:any], X_2:bat[:any_1]) ", "DICTcompress;", "dict compress a bat" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str, X_3:bit):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "convert", "pattern dict.convert(X_0:bat[:any]):bat[:any_1] ", "DICTconvert;", "convert candidate list into compressed offsets" ] +[ "dict", "decompress", "pattern dict.decompress(X_0:bat[:any], X_1:bat[:any_1]):bat[:any_1] ", "DICTdecompress;", "decompress a dictionary compressed (sub)column" ] +[ "dict", "join", "pattern dict.join(X_0:bat[:any], X_1:bat[:any_1], X_2:bat[:any], X_3:bat[:any_1], X_4:bat[:oid], X_5:bat[:oid], X_6:bit, X_7:lng) (X_8:bat[:oid], X_9:bat[:oid]) ", "DICTjoin;", "join 2 dictionaries" ] +[ "dict", "renumber", "pattern dict.renumber(X_0:bat[:any_1], X_1:bat[:any_1]):bat[:any_1] ", "DICTrenumber;", "renumber offsets" ] +[ "dict", "select", "pattern dict.select(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:any_1, X_5:bit, X_6:bit, X_7:bit, X_8:bit):bat[:oid] ", "DICTselect;", "value - range select on a dictionary" ] +[ "dict", "thetaselect", "pattern dict.thetaselect(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:str):bat[:oid] ", "DICTthetaselect;", "thetaselect on a dictionary" ] [ "factories", "getArrival", "command factories.getArrival():bat[:timestamp] ", "FCTgetArrival;", "Retrieve the time stamp the last call was made." ] [ "factories", "getCaller", "command factories.getCaller():int ", "FCTgetCaller;", "Retrieve the unique identity of the factory caller." ] [ "factories", "getDeparture", "command factories.getDeparture():bat[:timestamp] ", "FCTgetDeparture;", "Retrieve the time stamp the last answer was returned." ] @@ -10017,9 +10026,11 @@ stdout of test 'MAL-signatures` in direc [ "fits", "attach", "unsafe pattern fits.attach(X_0:str):void ", "FITSattach;", "Open a FITS file and return catalog of the table HDUs" ] [ "fits", "export", "pattern fits.export(X_0:str):void ", "FITSexportTable;", "Export a table to a FITS file" ] [ "fits", "fitstest", "command fits.fitstest(X_0:str):int ", "FITStest;", "Returns the type of first extension in the FITS file filename" ] -[ "fits", "listdir", "unsafe pattern fits.listdir(X_0:str):void ", "FITSdir;", "Attach all FITS files in the directory" ] -[ "fits", "listdirpattern", "unsafe pattern fits.listdirpattern(X_0:str, X_1:str):void ", "FITSdirpat;", "Attach all FITS file in the directory, giving a pattern" ] -[ "fits", "load", "unsafe pattern fits.load(X_0:str):void ", "FITSloadTable;", "Load a FITS table from an attached file" ] +[ "fits", "listdir", "pattern fits.listdir(X_0:str):void ", "FITSdir;", "Attach all FITS files in the directory" ] +[ "fits", "listdirpattern", "pattern fits.listdirpattern(X_0:str, X_1:str):void ", "FITSdirpat;", "Attach all FITS file in the directory, giving a pattern" ] +[ "fits", "load", "pattern fits.load(X_0:str):void ", "FITSloadTable;", "Load a FITS table from an attached file" ] +[ "for", "compress", "pattern for.compress(X_0:str, X_1:str, X_2:str):void ", "FORcompress_col;", "compress a sql column"] +[ "for", "decompress", "pattern for.decompress(X_0:bat[:any], X_1:any_1):bat[:any_1] ", "FORdecompress;", "decompress a for compressed (sub)column" ] [ "generator", "join", "pattern generator.join(X_0:bat[:bte], X_1:bat[:bte]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] [ "generator", "join", "pattern generator.join(X_0:bat[:dbl], X_1:bat[:dbl]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "Overloaded join operation" ] [ "generator", "join", "pattern generator.join(X_0:bat[:flt], X_1:bat[:flt]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] @@ -10661,11 +10672,15 @@ stdout of test 'MAL-signatures` in direc [ "optimizer", "default_pipe", "function optimizer.default_pipe():void;", "", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast():str ", "OPTwrapper;", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast(X_0:str, X_1:str):str ", "OPTwrapper;", "Fast compound default optimizer pipe" ] +[ "optimizer", "dict", "pattern optimizer.dict():str ", "OPTwrapper;", "" ] +[ "optimizer", "dict", "pattern optimizer.dict(X_0:str, X_1:str):str ", "OPTwrapper;", "Push dict decompress down" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind():str ", "OPTwrapper;", "" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate empty set expressions" ] [ "optimizer", "epilogue", "command optimizer.epilogue():void ", "optimizer_epilogue;", "release the resources held by the optimizer module" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate():str ", "OPTwrapper;", "" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate constant expressions once" ] +[ "optimizer", "for", "pattern optimizer.for():str ", "OPTwrapper;", "" ] +[ "optimizer", "for", "pattern optimizer.for(X_0:str, X_1:str):str ", "OPTwrapper;", "Push for decompress down" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector():str ", "OPTwrapper;", "" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector(X_0:str, X_1:str):str ", "OPTwrapper;", "Garbage collector optimizer" ] [ "optimizer", "generator", "pattern optimizer.generator():str ", "OPTwrapper;", "" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -901,6 +901,7 @@ const char *columnRef; const char *comment_onRef; const char *commitRef; str compileString(Symbol *fcn, Client c, str s); +const char *compressRef; char *concatErrors(char *err1, const char *err2) __attribute__((__nonnull__(1, 2))) __attribute__((__returns_nonnull__)); const char *connectRef; const char *contextRef; @@ -934,6 +935,7 @@ const char *dateRef; const char *dblRef; str deblockdataflow(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); void debugFunction(stream *fd, MalBlkPtr mb, MalStkPtr stk, int flg, int first, int size); +const char *decompressRef; int defConstant(MalBlkPtr mb, int type, ValPtr cst); const char *defineRef; void delArgument(InstrPtr p, int varid); @@ -944,6 +946,7 @@ const char *deltaRef; const char *dense_rankRef; const char *deregisterRef; malType destinationType(MalBlkPtr mb, InstrPtr p); +const char *dictRef; const char *diffRef; const char *diffcandRef; const char *differenceRef; @@ -985,6 +988,7 @@ const char *first_valueRef; const char *firstnRef; Module fixModule(const char *nme); int fndConstant(MalBlkPtr mb, const ValRecord *cst, int depth); +const char *forRef; void freeException(str); void freeInstruction(InstrPtr p); void freeMalBlk(MalBlkPtr mb); @@ -1296,6 +1300,7 @@ const char *rename_columnRef; const char *rename_schemaRef; const char *rename_tableRef; const char *rename_userRef; +const char *renumberRef; const char *replaceRef; const char *replicatorRef; void resetMalBlk(MalBlkPtr mb); diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -2082,11 +2082,11 @@ BBPdir_step(bat bid, BUN size, int n, ch } } if (!found) { - TRC_WARNING(GDK, "file %s not found (expected size %zu)\n", fname, free); + TRC_WARNING(GDK, "file %s not found (expected size %" PRIu64 ")\n", fname, free); } else { assert((uint64_t) stb.st_size >= free); if ((uint64_t) stb.st_size < free) - TRC_WARNING(GDK, "file %s too small (expected %zu, actual %zu)\n", fname, free, (size_t) stb.st_size); + TRC_WARNING(GDK, "file %s too small (expected %" PRIu64 ", actual %zu)\n", fname, free, (size_t) stb.st_size); } GDKfree(fname); if (vfree == 0) @@ -2114,7 +2114,7 @@ BBPdir_step(bat bid, BUN size, int n, ch } assert((uint64_t) stb.st_size >= vfree); if ((uint64_t) stb.st_size < vfree) - TRC_WARNING(GDK, "file %s too small (expected %zu, actual %zu)\n", fname, vfree, (size_t) stb.st_size); + TRC_WARNING(GDK, "file %s too small (expected %" PRIu64 ", actual %zu)\n", fname, vfree, (size_t) stb.st_size); GDKfree(fname); break; } diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -598,6 +598,54 @@ ctz(oid x) /* EQUAL */ cmp(v, BUNtail(bi, hb)) == 0 \ ) +#define GRP_small_values(BG, BV, GV) \ + do { \ + uint##BG##_t sgrps[1 << BG]; \ + const uint##BV##_t *restrict w = (const uint##BV##_t *) bi.base; \ + uint##BG##_t v; \ + memset(sgrps, 0xFF, sizeof(sgrps)); \ + if (histo) \ + memset(cnts, 0, maxgrps * sizeof(lng)); \ + ngrp = 0; \ + gn->tsorted = true; \ + if (ci.tpe == cand_dense) { \ + TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { \ + oid o = canditer_next_dense(&ci); \ + p = o - b->hseqbase; \ + uint##BG##_t x = GV; \ + if ((v = sgrps[x]) == (uint##BG##_t) ~0 && ngrp < (1 << BG)) { \ + sgrps[x] = v = (uint##BG##_t) ngrp++; \ + maxgrppos = r; \ + if (extents) \ + exts[v] = ci.hseq + r; \ + } \ + ngrps[r] = v; \ + if (r > 0 && v < ngrps[r - 1]) \ + gn->tsorted = false; \ + if (histo) \ + cnts[v]++; \ + } \ + } else { \ + TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { \ + oid o = canditer_next(&ci); \ + p = o - b->hseqbase; \ + uint##BG##_t x = GV; \ + if ((v = sgrps[x]) == (uint##BG##_t) ~0 && ngrp < (1 << BG)) { \ + sgrps[x] = v = (uint##BG##_t) ngrp++; \ + maxgrppos = r; \ + if (extents) \ + exts[v] = ci.hseq + r; \ + } \ + ngrps[r] = v; \ + if (r > 0 && v < ngrps[r - 1]) \ + gn->tsorted = false; \ + if (histo) \ + cnts[v]++; \ + } \ + } \ + TIMEOUT_CHECK(timeoffset, \ + GOTO_LABEL_TIMEOUT_HANDLER(error)); \ + } while (0) gdk_return BATgroup_internal(BAT **groups, BAT **extents, BAT **histo, @@ -691,9 +739,8 @@ BATgroup_internal(BAT **groups, BAT **ex } assert(!BATtdense(b)); if (g) { - if (BATtdense(g)) - maxgrp = g->tseqbase + BATcount(g); - else if (BATtordered(g)) + assert(!BATtdense(g)); + if (BATtordered(g)) maxgrp = * (oid *) Tloc(g, BATcount(g) - 1); else if (BATtrevordered(g)) maxgrp = * (oid *) Tloc(g, 0); @@ -814,8 +861,11 @@ BATgroup_internal(BAT **groups, BAT **ex maxgrps = GROUPBATINCR; bi = bat_iterator(b); - if (bi.width <= 2) + if (bi.width <= 2) { maxgrps = (BUN) 1 << (8 * bi.width); + if (bi.width == 1 && maxgrp < 256) + maxgrps *= maxgrp; + } if (extents) { en = COLnew(0, TYPE_oid, maxgrps, TRANSIENT); if (en == NULL) @@ -860,7 +910,26 @@ BATgroup_internal(BAT **groups, BAT **ex } } - if (subsorted || + if (g == NULL && t == TYPE_bte) { + /* byte-sized values, use 256 entry array to keep + * track of doled out group ids; note that we can't + * possibly have more than 256 groups, so the group id + * fits in a uint8_t */ + GRP_small_values(8, 8, w[p]); + } else if (t == TYPE_bte && maxgrp < 256) { + /* subgrouping byte-sized values with a limited number + * of groups, use 65536 entry array to keep track of + * doled out group ids; note that we can't possibly have + * more than 65536 goups, so the group id fits in a + * uint16_t */ + GRP_small_values(16, 8, (uint16_t) (w[p] | (grps[p] << 8))); + } else if (g == NULL && t == TYPE_sht) { + /* short-sized values, use 65536 entry array to keep + * track of doled out group ids; note that we can't + * possibly have more than 65536 groups, so the group + * id fits in a uint16_t */ + GRP_small_values(16, 16, w[p]); + } else if (subsorted || ((BATordered(b) || BATordered_rev(b)) && (g == NULL || BATordered(g) || BATordered_rev(g)))) { /* we only need to compare each entry with the previous */ @@ -961,73 +1030,6 @@ BATgroup_internal(BAT **groups, BAT **ex } GDKfree(pgrp); - } else if (g == NULL && t == TYPE_bte) { - /* byte-sized values, use 256 entry array to keep - * track of doled out group ids; note that we can't - * possibly have more than 256 groups, so the group id - * fits in an uint8_t */ - uint8_t bgrps[256]; - const uint8_t *restrict w = (const uint8_t *) bi.base; - uint8_t v; - - algomsg = "byte-sized groups -- "; - memset(bgrps, 0xFF, sizeof(bgrps)); - if (histo) - memset(cnts, 0, maxgrps * sizeof(lng)); - ngrp = 0; - gn->tsorted = true; - TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { - oid o = canditer_next(&ci); - p = o - b->hseqbase; - if ((v = bgrps[w[p]]) == 0xFF && ngrp < 256) { - bgrps[w[p]] = v = (uint8_t) ngrp++; - maxgrppos = r; - if (extents) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list