Changeset: 669b4dea64ad for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/669b4dea64ad Branch: default Log Message:
Merge dict branch into default. diffs (truncated from 3484 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -8449,6 +8449,15 @@ [ "color", "value", "command color.value(X_0:color):flt ", "CLRvalue;", "Extracts value component from a color atom" ] [ "color", "value", "command color.value(X_0:color):int ", "CLRvalueInt;", "Extracts value component from a color atom" ] [ "color", "ycc", "command color.ycc(X_0:int, X_1:int, X_2:int):color ", "CLRycc;", "Converts an YCC triplets to a color atom" ] +[ "dict", "compress", "pattern dict.compress(X_0:bat[:any_1]) (X_1:bat[:any], X_2:bat[:any_1]) ", "DICTcompress;", "dict compress a bat" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str, X_3:bit):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "convert", "pattern dict.convert(X_0:bat[:any]):bat[:any_1] ", "DICTconvert;", "convert candidate list into compressed offsets" ] +[ "dict", "decompress", "pattern dict.decompress(X_0:bat[:any], X_1:bat[:any_1]):bat[:any_1] ", "DICTdecompress;", "decompress a dictionary compressed (sub)column" ] +[ "dict", "join", "pattern dict.join(X_0:bat[:any], X_1:bat[:any_1], X_2:bat[:any], X_3:bat[:any_1], X_4:bat[:oid], X_5:bat[:oid], X_6:bit, X_7:lng) (X_8:bat[:oid], X_9:bat[:oid]) ", "DICTjoin;", "join 2 dictionaries" ] +[ "dict", "renumber", "pattern dict.renumber(X_0:bat[:any_1], X_1:bat[:any_1]):bat[:any_1] ", "DICTrenumber;", "renumber offsets" ] +[ "dict", "select", "pattern dict.select(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:any_1, X_5:bit, X_6:bit, X_7:bit, X_8:bit):bat[:oid] ", "DICTselect;", "value - range select on a dictionary" ] +[ "dict", "thetaselect", "pattern dict.thetaselect(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:str):bat[:oid] ", "DICTthetaselect;", "thetaselect on a dictionary" ] [ "factories", "getArrival", "command factories.getArrival():bat[:timestamp] ", "FCTgetArrival;", "Retrieve the time stamp the last call was made." ] [ "factories", "getCaller", "command factories.getCaller():int ", "FCTgetCaller;", "Retrieve the unique identity of the factory caller." ] [ "factories", "getDeparture", "command factories.getDeparture():bat[:timestamp] ", "FCTgetDeparture;", "Retrieve the time stamp the last answer was returned." ] @@ -8461,6 +8470,8 @@ [ "fits", "listdir", "unsafe pattern fits.listdir(X_0:str):void ", "FITSdir;", "Attach all FITS files in the directory" ] [ "fits", "listdirpattern", "unsafe pattern fits.listdirpattern(X_0:str, X_1:str):void ", "FITSdirpat;", "Attach all FITS file in the directory, giving a pattern" ] [ "fits", "load", "unsafe pattern fits.load(X_0:str):void ", "FITSloadTable;", "Load a FITS table from an attached file" ] +[ "for", "compress", "pattern for.compress(X_0:str, X_1:str, X_2:str):void ", "FORcompress_col;", "compress a sql column"] +[ "for", "decompress", "pattern for.decompress(X_0:bat[:any], X_1:any_1):bat[:any_1] ", "FORdecompress;", "decompress a for compressed (sub)column" ] [ "generator", "join", "pattern generator.join(X_0:bat[:bte], X_1:bat[:bte]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] [ "generator", "join", "pattern generator.join(X_0:bat[:dbl], X_1:bat[:dbl]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "Overloaded join operation" ] [ "generator", "join", "pattern generator.join(X_0:bat[:flt], X_1:bat[:flt]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] @@ -9100,11 +9111,15 @@ [ "optimizer", "default_pipe", "function optimizer.default_pipe():void;", "", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast():str ", "OPTwrapper;", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast(X_0:str, X_1:str):str ", "OPTwrapper;", "Fast compound default optimizer pipe" ] +[ "optimizer", "dict", "pattern optimizer.dict():str ", "OPTwrapper;", "" ] +[ "optimizer", "dict", "pattern optimizer.dict(X_0:str, X_1:str):str ", "OPTwrapper;", "Push dict decompress down" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind():str ", "OPTwrapper;", "" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate empty set expressions" ] [ "optimizer", "epilogue", "command optimizer.epilogue():void ", "optimizer_epilogue;", "release the resources held by the optimizer module" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate():str ", "OPTwrapper;", "" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate constant expressions once" ] +[ "optimizer", "for", "pattern optimizer.for():str ", "OPTwrapper;", "" ] +[ "optimizer", "for", "pattern optimizer.for(X_0:str, X_1:str):str ", "OPTwrapper;", "Push for decompress down" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector():str ", "OPTwrapper;", "" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector(X_0:str, X_1:str):str ", "OPTwrapper;", "Garbage collector optimizer" ] [ "optimizer", "generator", "pattern optimizer.generator():str ", "OPTwrapper;", "" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -11734,6 +11734,15 @@ [ "color", "value", "command color.value(X_0:color):flt ", "CLRvalue;", "Extracts value component from a color atom" ] [ "color", "value", "command color.value(X_0:color):int ", "CLRvalueInt;", "Extracts value component from a color atom" ] [ "color", "ycc", "command color.ycc(X_0:int, X_1:int, X_2:int):color ", "CLRycc;", "Converts an YCC triplets to a color atom" ] +[ "dict", "compress", "pattern dict.compress(X_0:bat[:any_1]) (X_1:bat[:any], X_2:bat[:any_1]) ", "DICTcompress;", "dict compress a bat" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "compress", "pattern dict.compress(X_0:str, X_1:str, X_2:str, X_3:bit):void ", "DICTcompress_col;", "compress a sql column" ] +[ "dict", "convert", "pattern dict.convert(X_0:bat[:any]):bat[:any_1] ", "DICTconvert;", "convert candidate list into compressed offsets" ] +[ "dict", "decompress", "pattern dict.decompress(X_0:bat[:any], X_1:bat[:any_1]):bat[:any_1] ", "DICTdecompress;", "decompress a dictionary compressed (sub)column" ] +[ "dict", "join", "pattern dict.join(X_0:bat[:any], X_1:bat[:any_1], X_2:bat[:any], X_3:bat[:any_1], X_4:bat[:oid], X_5:bat[:oid], X_6:bit, X_7:lng) (X_8:bat[:oid], X_9:bat[:oid]) ", "DICTjoin;", "join 2 dictionaries" ] +[ "dict", "renumber", "pattern dict.renumber(X_0:bat[:any_1], X_1:bat[:any_1]):bat[:any_1] ", "DICTrenumber;", "renumber offsets" ] +[ "dict", "select", "pattern dict.select(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:any_1, X_5:bit, X_6:bit, X_7:bit, X_8:bit):bat[:oid] ", "DICTselect;", "value - range select on a dictionary" ] +[ "dict", "thetaselect", "pattern dict.thetaselect(X_0:bat[:any], X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:str):bat[:oid] ", "DICTthetaselect;", "thetaselect on a dictionary" ] [ "factories", "getArrival", "command factories.getArrival():bat[:timestamp] ", "FCTgetArrival;", "Retrieve the time stamp the last call was made." ] [ "factories", "getCaller", "command factories.getCaller():int ", "FCTgetCaller;", "Retrieve the unique identity of the factory caller." ] [ "factories", "getDeparture", "command factories.getDeparture():bat[:timestamp] ", "FCTgetDeparture;", "Retrieve the time stamp the last answer was returned." ] @@ -11746,6 +11755,8 @@ [ "fits", "listdir", "pattern fits.listdir(X_0:str):void ", "FITSdir;", "Attach all FITS files in the directory" ] [ "fits", "listdirpattern", "pattern fits.listdirpattern(X_0:str, X_1:str):void ", "FITSdirpat;", "Attach all FITS file in the directory, giving a pattern" ] [ "fits", "load", "pattern fits.load(X_0:str):void ", "FITSloadTable;", "Load a FITS table from an attached file" ] +[ "for", "compress", "pattern for.compress(X_0:str, X_1:str, X_2:str):void ", "FORcompress_col;", "compress a sql column"] +[ "for", "decompress", "pattern for.decompress(X_0:bat[:any], X_1:any_1):bat[:any_1] ", "FORdecompress;", "decompress a for compressed (sub)column" ] [ "generator", "join", "pattern generator.join(X_0:bat[:bte], X_1:bat[:bte]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] [ "generator", "join", "pattern generator.join(X_0:bat[:dbl], X_1:bat[:dbl]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "Overloaded join operation" ] [ "generator", "join", "pattern generator.join(X_0:bat[:flt], X_1:bat[:flt]) (X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;", "" ] @@ -12400,11 +12411,15 @@ [ "optimizer", "default_pipe", "function optimizer.default_pipe():void;", "", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast():str ", "OPTwrapper;", "" ] [ "optimizer", "defaultfast", "pattern optimizer.defaultfast(X_0:str, X_1:str):str ", "OPTwrapper;", "Fast compound default optimizer pipe" ] +[ "optimizer", "dict", "pattern optimizer.dict():str ", "OPTwrapper;", "" ] +[ "optimizer", "dict", "pattern optimizer.dict(X_0:str, X_1:str):str ", "OPTwrapper;", "Push dict decompress down" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind():str ", "OPTwrapper;", "" ] [ "optimizer", "emptybind", "pattern optimizer.emptybind(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate empty set expressions" ] [ "optimizer", "epilogue", "command optimizer.epilogue():void ", "optimizer_epilogue;", "release the resources held by the optimizer module" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate():str ", "OPTwrapper;", "" ] [ "optimizer", "evaluate", "pattern optimizer.evaluate(X_0:str, X_1:str):str ", "OPTwrapper;", "Evaluate constant expressions once" ] +[ "optimizer", "for", "pattern optimizer.for():str ", "OPTwrapper;", "" ] +[ "optimizer", "for", "pattern optimizer.for(X_0:str, X_1:str):str ", "OPTwrapper;", "Push for decompress down" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector():str ", "OPTwrapper;", "" ] [ "optimizer", "garbageCollector", "pattern optimizer.garbageCollector(X_0:str, X_1:str):str ", "OPTwrapper;", "Garbage collector optimizer" ] [ "optimizer", "generator", "pattern optimizer.generator():str ", "OPTwrapper;", "" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -901,6 +901,7 @@ const char *columnRef; const char *comment_onRef; const char *commitRef; str compileString(Symbol *fcn, Client c, str s); +const char *compressRef; char *concatErrors(char *err1, const char *err2) __attribute__((__nonnull__(1, 2))) __attribute__((__returns_nonnull__)); const char *connectRef; const char *contextRef; @@ -934,6 +935,7 @@ const char *dateRef; const char *dblRef; str deblockdataflow(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); void debugFunction(stream *fd, MalBlkPtr mb, MalStkPtr stk, int flg, int first, int size); +const char *decompressRef; int defConstant(MalBlkPtr mb, int type, ValPtr cst); const char *defineRef; void delArgument(InstrPtr p, int varid); @@ -944,6 +946,7 @@ const char *deltaRef; const char *dense_rankRef; const char *deregisterRef; malType destinationType(MalBlkPtr mb, InstrPtr p); +const char *dictRef; const char *diffRef; const char *diffcandRef; const char *differenceRef; @@ -985,6 +988,7 @@ const char *first_valueRef; const char *firstnRef; Module fixModule(const char *nme); int fndConstant(MalBlkPtr mb, const ValRecord *cst, int depth); +const char *forRef; void freeException(str); void freeInstruction(InstrPtr p); void freeMalBlk(MalBlkPtr mb); @@ -1297,6 +1301,7 @@ const char *rename_columnRef; const char *rename_schemaRef; const char *rename_tableRef; const char *rename_userRef; +const char *renumberRef; const char *replaceRef; const char *replicatorRef; void resetMalBlk(MalBlkPtr mb); diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -598,6 +598,54 @@ ctz(oid x) /* EQUAL */ cmp(v, BUNtail(bi, hb)) == 0 \ ) +#define GRP_small_values(BG, BV, GV) \ + do { \ + uint##BG##_t sgrps[1 << BG]; \ + const uint##BV##_t *restrict w = (const uint##BV##_t *) bi.base; \ + uint##BG##_t v; \ + memset(sgrps, 0xFF, sizeof(sgrps)); \ + if (histo) \ + memset(cnts, 0, maxgrps * sizeof(lng)); \ + ngrp = 0; \ + gn->tsorted = true; \ + if (ci.tpe == cand_dense) { \ + TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { \ + oid o = canditer_next_dense(&ci); \ + p = o - b->hseqbase; \ + uint##BG##_t x = GV; \ + if ((v = sgrps[x]) == (uint##BG##_t) ~0 && ngrp < (1 << BG)) { \ + sgrps[x] = v = (uint##BG##_t) ngrp++; \ + maxgrppos = r; \ + if (extents) \ + exts[v] = o; \ + } \ + ngrps[r] = v; \ + if (r > 0 && v < ngrps[r - 1]) \ + gn->tsorted = false; \ + if (histo) \ + cnts[v]++; \ + } \ + } else { \ + TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { \ + oid o = canditer_next(&ci); \ + p = o - b->hseqbase; \ + uint##BG##_t x = GV; \ + if ((v = sgrps[x]) == (uint##BG##_t) ~0 && ngrp < (1 << BG)) { \ + sgrps[x] = v = (uint##BG##_t) ngrp++; \ + maxgrppos = r; \ + if (extents) \ + exts[v] = o; \ + } \ + ngrps[r] = v; \ + if (r > 0 && v < ngrps[r - 1]) \ + gn->tsorted = false; \ + if (histo) \ + cnts[v]++; \ + } \ + } \ + TIMEOUT_CHECK(timeoffset, \ + GOTO_LABEL_TIMEOUT_HANDLER(error)); \ + } while (0) gdk_return BATgroup_internal(BAT **groups, BAT **extents, BAT **histo, @@ -691,9 +739,8 @@ BATgroup_internal(BAT **groups, BAT **ex } assert(!BATtdense(b)); if (g) { - if (BATtdense(g)) - maxgrp = g->tseqbase + BATcount(g); - else if (BATtordered(g)) + assert(!BATtdense(g)); + if (BATtordered(g)) maxgrp = * (oid *) Tloc(g, BATcount(g) - 1); else if (BATtrevordered(g)) maxgrp = * (oid *) Tloc(g, 0); @@ -814,8 +861,11 @@ BATgroup_internal(BAT **groups, BAT **ex maxgrps = GROUPBATINCR; bi = bat_iterator(b); - if (bi.width <= 2) + if (bi.width <= 2) { maxgrps = (BUN) 1 << (8 * bi.width); + if (bi.width == 1 && maxgrp < 256) + maxgrps *= maxgrp; + } if (extents) { en = COLnew(0, TYPE_oid, maxgrps, TRANSIENT); if (en == NULL) @@ -860,7 +910,26 @@ BATgroup_internal(BAT **groups, BAT **ex } } - if (subsorted || + if (g == NULL && t == TYPE_bte) { + /* byte-sized values, use 256 entry array to keep + * track of doled out group ids; note that we can't + * possibly have more than 256 groups, so the group id + * fits in a uint8_t */ + GRP_small_values(8, 8, w[p]); + } else if (t == TYPE_bte && maxgrp < 256) { + /* subgrouping byte-sized values with a limited number + * of groups, use 65536 entry array to keep track of + * doled out group ids; note that we can't possibly have + * more than 65536 goups, so the group id fits in a + * uint16_t */ + GRP_small_values(16, 8, (uint16_t) (w[p] | (grps[p] << 8))); + } else if (g == NULL && t == TYPE_sht) { + /* short-sized values, use 65536 entry array to keep + * track of doled out group ids; note that we can't + * possibly have more than 65536 groups, so the group + * id fits in a uint16_t */ + GRP_small_values(16, 16, w[p]); + } else if (subsorted || ((BATordered(b) || BATordered_rev(b)) && (g == NULL || BATordered(g) || BATordered_rev(g)))) { /* we only need to compare each entry with the previous */ @@ -961,73 +1030,6 @@ BATgroup_internal(BAT **groups, BAT **ex } GDKfree(pgrp); - } else if (g == NULL && t == TYPE_bte) { - /* byte-sized values, use 256 entry array to keep - * track of doled out group ids; note that we can't - * possibly have more than 256 groups, so the group id - * fits in an uint8_t */ - uint8_t bgrps[256]; - const uint8_t *restrict w = (const uint8_t *) bi.base; - uint8_t v; - - algomsg = "byte-sized groups -- "; - memset(bgrps, 0xFF, sizeof(bgrps)); - if (histo) - memset(cnts, 0, maxgrps * sizeof(lng)); - ngrp = 0; - gn->tsorted = true; - TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { - oid o = canditer_next(&ci); - p = o - b->hseqbase; - if ((v = bgrps[w[p]]) == 0xFF && ngrp < 256) { - bgrps[w[p]] = v = (uint8_t) ngrp++; - maxgrppos = r; - if (extents) - exts[v] = o; - } - ngrps[r] = v; - if (r > 0 && v < ngrps[r - 1]) - gn->tsorted = false; - if (histo) - cnts[v]++; - } - TIMEOUT_CHECK(timeoffset, - GOTO_LABEL_TIMEOUT_HANDLER(error)); - } else if (g == NULL && t == TYPE_sht) { - /* short-sized values, use 65536 entry array to keep - * track of doled out group ids; note that we can't - * possibly have more than 65536 groups, so the group - * id fits in an uint16_t */ - uint16_t *restrict sgrps = GDKmalloc(65536 * sizeof(short)); - const uint16_t *restrict w = (const uint16_t *) bi.base; - uint16_t v; - - algomsg = "short-sized groups -- "; - if (sgrps == NULL) - goto error1; - memset(sgrps, 0xFF, 65536 * sizeof(uint16_t)); - if (histo) - memset(cnts, 0, maxgrps * sizeof(lng)); - ngrp = 0; - gn->tsorted = true; - TIMEOUT_LOOP_IDX(r, cnt, timeoffset) { - oid o = canditer_next(&ci); - p = o - b->hseqbase; - if ((v = sgrps[w[p]]) == 0xFFFF && ngrp < 65536) { - sgrps[w[p]] = v = (uint16_t) ngrp++; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list