Changeset: 67af55b88549 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=67af55b88549 Modified Files: monetdb5/modules/mosaic/mosaic.c monetdb5/modules/mosaic/mosaic.h sql/backends/monet5/sql.mal sql/backends/monet5/sql_mosaic.c sql/scripts/76_mosaic.sql sql/test/mosaic/Tests/analysis.sql sql/test/mosaic/Tests/analysis.stable.out Branch: mosaic Log Message:
Add 'common_compression' a.k.a. exceptions argument to analysis. diffs (254 lines): diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c --- a/monetdb5/modules/mosaic/mosaic.c +++ b/monetdb5/modules/mosaic/mosaic.c @@ -1238,7 +1238,7 @@ MOSjoin(Client cntxt, MalBlkPtr mb, MalS */ static int -makepatterns(uint16_t *patterns, int size, str compressions, BAT* b) +makepatterns(uint16_t *patterns, int size, str compressions, sht common_mask, BAT* b) { int i,j,k, idx, bit=1, step = MOSAIC_METHODS; int lim= 8*7*6*5*4*3*2; @@ -1252,6 +1252,10 @@ makepatterns(uint16_t *patterns, int siz // Unset corresponding bit if type is not allowed. UNSET_METHOD(compression_mask, i); } + if ( METHOD_IS_SET(common_mask, i) && !MOSisTypeAllowed(i, b) ) { + // Unset corresponding bit if type is not allowed. + UNSET_METHOD(common_mask, i); + } } for( k=0, i=0; i<lim && k <size; i++){ @@ -1264,11 +1268,13 @@ makepatterns(uint16_t *patterns, int siz idx /= step; } + patterns[k] |= common_mask; // Make sure the common mask is always present in the to-be-analyzed patterns. + // weed out duplicates for( j=0; j< k; j++) if(patterns[k] == patterns[j]) break; if( j < k ) continue; - + #ifdef _MOSAIC_DEBUG_ mnstr_printf(GDKstdout,"#"); for(j=0, bit=1; j < MOSAIC_METHODS; j++){ @@ -1303,7 +1309,7 @@ struct PAT{ }; str -MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *boutput, BAT *bratio, BAT *bcompress, BAT *bdecompress, str compressions) +MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *boutput, BAT *bratio, BAT *bcompress, BAT *bdecompress, str compressions, str common_compressions) { unsigned i,j,cases, bid= b->batCacheid; uint16_t pattern[CANDIDATES]; @@ -1314,8 +1320,12 @@ MOSAnalysis(BAT *b, BAT *btech, BAT *bla struct PAT pat[CANDIDATES]; + // create mask of common compressions, e.g. exception methods. + sht common_mask = 0; + _construct_compression_mask(&common_mask, common_compressions); + // create the list of all possible 2^6 compression patterns - cases = makepatterns(pattern,CANDIDATES, compressions, b); + cases = makepatterns(pattern,CANDIDATES, compressions, common_mask, b); memset(antipattern,0, sizeof(antipattern)); antipatternSize++; // the first pattern aka 0 is always an antipattern. @@ -1419,7 +1429,7 @@ MOSAnalysis(BAT *b, BAT *btech, BAT *bla } for(j=0; j < MOSAIC_METHODS; j++){ - if ( ((MosaicHdr) b->tmosaic->base)->blks[j] == 0) { + if ( ((MosaicHdr) b->tmosaic->base)->blks[j] == 0 && (!(MOSmethods[j].bit & common_mask))) { antipattern[antipatternSize++] = pattern[i]; } } diff --git a/monetdb5/modules/mosaic/mosaic.h b/monetdb5/modules/mosaic/mosaic.h --- a/monetdb5/modules/mosaic/mosaic.h +++ b/monetdb5/modules/mosaic/mosaic.h @@ -286,7 +286,7 @@ mal_export str MOSthetaselect(Client cnt mal_export str MOSprojection(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str MOSjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); mal_export str MOSlayout(BAT *b, BAT *bbsn, BAT *btech, BAT *bcount, BAT *binput, BAT *boutput, BAT *bproperties); -mal_export str MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *output, BAT *factor, BAT *compress, BAT *decompress, str compressions); +mal_export str MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *output, BAT *factor, BAT *compress, BAT *decompress, str compressions, str common_compressions); void MOSupdateHeader(MOStask* task); void MOSinitHeader(MOStask* task); diff --git a/sql/backends/monet5/sql.mal b/sql/backends/monet5/sql.mal --- a/sql/backends/monet5/sql.mal +++ b/sql/backends/monet5/sql.mal @@ -546,7 +546,7 @@ pattern mosaicanalysis(sch:str,tbl:str,c compress:bat[:lng], decompress:bat[:lng]) address sql_mosaicAnalysis; -pattern mosaicanalysis(sch:str,tbl:str,col:str)( +pattern mosaicanalysis(sch:str,tbl:str,col:str,compression:str, common_compression:str)( technique:bat[:str], layout:bat[:json], outputsize:bat[:lng], diff --git a/sql/backends/monet5/sql_mosaic.c b/sql/backends/monet5/sql_mosaic.c --- a/sql/backends/monet5/sql_mosaic.c +++ b/sql/backends/monet5/sql_mosaic.c @@ -160,6 +160,7 @@ sql_mosaicAnalysis(Client cntxt, MalBlkP int *tech,*output, *factor, *compress, *decompress, *layout; BAT *bn, *btech, *boutput, *bfactor, *bcompress, *bdecompress, *blayout; str compressions = NULL; + str common_compressions = "raw"; if (msg != MAL_SUCCEED || (msg = checkSQLContext(cntxt)) != NULL) return msg; @@ -219,12 +220,14 @@ sql_mosaicAnalysis(Client cntxt, MalBlkP layout = getArgReference_bat(stk, pci, 1); *layout = blayout->batCacheid; - sch = *getArgReference_str(stk, pci, 6); - tbl = *getArgReference_str(stk, pci, 7); - col = *getArgReference_str(stk, pci, 8); - if ( pci->argc == 10){ + sch = *getArgReference_str(stk, pci, 6); + tbl = *getArgReference_str(stk, pci, 7); + col = *getArgReference_str(stk, pci, 8); + compressions = *getArgReference_str(stk, pci, 9); + + if ( pci->argc == 11) { // use a predefined collection of compression schemes. - compressions = *getArgReference_str(stk,pci,9); + common_compressions = *getArgReference_str(stk, pci, 10); } #ifdef DEBUG_SQL_MOSAIC @@ -253,7 +256,7 @@ sql_mosaicAnalysis(Client cntxt, MalBlkP continue; // perform the analysis bn = store_funcs.bind_col(m->session->tr, c, 0); - msg = MOSAnalysis(bn, btech, blayout, boutput, bfactor, bcompress, bdecompress, compressions); + msg = MOSAnalysis(bn, btech, blayout, boutput, bfactor, bcompress, bdecompress, compressions, common_compressions); BBPunfix(bn->batCacheid); (void) c; diff --git a/sql/scripts/76_mosaic.sql b/sql/scripts/76_mosaic.sql --- a/sql/scripts/76_mosaic.sql +++ b/sql/scripts/76_mosaic.sql @@ -11,10 +11,10 @@ create function mosaic.layout(sch string returns table(technique string, "count" bigint, inputsize bigint, outputsize bigint,properties json, bsn bigint) external name sql.mosaiclayout; -create function mosaic.analysis(sch string, tbl string, col string) +create function mosaic.analysis(sch string, tbl string, col string, compression string) returns table(technique string, layout json, outputsize bigint, factor float, "compress" bigint, "decompress" bigint) external name sql.mosaicanalysis; -create function mosaic.analysis(sch string, tbl string, col string, compression string) +create function mosaic.analysis(sch string, tbl string, col string, compression string, common_compression string) returns table(technique string, layout json, outputsize bigint, factor float, "compress" bigint, "decompress" bigint) external name sql.mosaicanalysis; diff --git a/sql/test/mosaic/Tests/analysis.sql b/sql/test/mosaic/Tests/analysis.sql --- a/sql/test/mosaic/Tests/analysis.sql +++ b/sql/test/mosaic/Tests/analysis.sql @@ -8,15 +8,17 @@ insert into tmp4 select * from generate_ -- tmp4 is currently uncompressed -select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i') order by technique, factor desc; +select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', NULL, '') order by technique, factor desc; -- should be materialized as the graph of a cutoff function. insert into tmp4 select 10000000 from tmp4; -select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i') order by technique, factor desc; +select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', NULL, '') order by technique, factor desc; -select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength') order by technique, factor desc; +select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength', '') order by technique, factor desc; + +select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength', 'raw') order by technique, factor desc; -- Make sure that mosaic.analysis also works correctly on a column with compression @@ -24,9 +26,9 @@ set optimizer='mosaic_pipe'; alter table tmp4 alter column i set storage 'runlength, linear'; -select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i') order by technique, factor desc; +select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', NULL, '') order by technique, factor desc; -select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength') order by technique, factor desc; +select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength', '') order by technique, factor desc; drop table tmp4; diff --git a/sql/test/mosaic/Tests/analysis.stable.out b/sql/test/mosaic/Tests/analysis.stable.out --- a/sql/test/mosaic/Tests/analysis.stable.out +++ b/sql/test/mosaic/Tests/analysis.stable.out @@ -27,8 +27,8 @@ stdout of test 'analysis` in directory ' [ 0 ] #insert into tmp4 select * from generate_series(0, 10000000); [ 10000000 ] -#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i') order by technique, factor desc; -% .%4, .%4, ., . # table_name +#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', NULL, '') order by technique, factor desc; +% .%6, .%6, ., . # table_name % technique, factor, blocks, elements # name % clob, double, json, json # type % 18, 24, 39, 41 # length @@ -68,8 +68,8 @@ stdout of test 'analysis` in directory ' [ "runlength prefix ", 2.132, "[[{\"prefix\":306}]]", "[[{\"prefix\":10000000}]]" ] #insert into tmp4 select 10000000 from tmp4; [ 10000000 ] -#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i') order by technique, factor desc; -% .%4, .%4, ., . # table_name +#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', NULL, '') order by technique, factor desc; +% .%6, .%6, ., . # table_name % technique, factor, blocks, elements # name % clob, double, json, json # type % 18, 24, 39, 46 # length @@ -107,18 +107,26 @@ stdout of test 'analysis` in directory ' [ "runlength frame ", 2.285, "[[{\"frame\":306}]]", "[[{\"frame\":20000000}]]" ] [ "runlength linear ", 357142.843, "[[{\"runlength\":1},{\"linear\":1}]]", "[[{\"runlength\":9999999},{\"linear\":10000001}]]" ] [ "runlength prefix ", 2.285, "[[{\"prefix\":306}]]", "[[{\"prefix\":20000000}]]" ] -#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength') order by technique, factor desc; -% .%5, .%5, ., . # table_name +#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength', '') order by technique, factor desc; +% .%6, .%6, ., . # table_name % technique, factor, blocks, elements # name % clob, double, json, json # type % 17, 24, 32, 45 # length [ "linear ", 350877.187, "[[{\"linear\":2}]]", "[[{\"linear\":20000000}]]" ] [ "runlength ", 0.666, "[[{\"runlength\":10000001}]]", "[[{\"runlength\":20000000}]]" ] [ "runlength linear ", 357142.843, "[[{\"runlength\":1},{\"linear\":1}]]", "[[{\"runlength\":9999999},{\"linear\":10000001}]]" ] +#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength', 'raw') order by technique, factor desc; +% .%6, .%6, ., . # table_name +% technique, factor, blocks, elements # name +% clob, double, json, json # type +% 21, 24, 32, 45 # length +[ "raw linear ", 350877.187, "[[{\"linear\":2}]]", "[[{\"linear\":20000000}]]" ] +[ "raw runlength ", 1.999, "[[{\"raw\":1},{\"runlength\":1}]]", "[[{\"raw\":10000000},{\"runlength\":10000000}]]" ] +[ "raw runlength linear ", 357142.843, "[[{\"runlength\":1},{\"linear\":1}]]", "[[{\"runlength\":9999999},{\"linear\":10000001}]]" ] #set optimizer='mosaic_pipe'; -#alter table tmp4 alter column i set storage 'dict'; -#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i') order by technique, factor desc; -% .%4, .%4, ., . # table_name +#alter table tmp4 alter column i set storage 'runlength, linear'; +#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', NULL, '') order by technique, factor desc; +% .%6, .%6, ., . # table_name % technique, factor, blocks, elements # name % clob, double, json, json # type % 18, 24, 39, 46 # length @@ -156,8 +164,8 @@ stdout of test 'analysis` in directory ' [ "runlength frame ", 2.285, "[[{\"frame\":306}]]", "[[{\"frame\":20000000}]]" ] [ "runlength linear ", 357142.843, "[[{\"runlength\":1},{\"linear\":1}]]", "[[{\"runlength\":9999999},{\"linear\":10000001}]]" ] [ "runlength prefix ", 2.285, "[[{\"prefix\":306}]]", "[[{\"prefix\":20000000}]]" ] -#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength') order by technique, factor desc; -% .%5, .%5, ., . # table_name +#select technique, factor, json.filter(layout, 'blks') as blocks, json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 'i', 'linear, runlength', '') order by technique, factor desc; +% .%6, .%6, ., . # table_name % technique, factor, blocks, elements # name % clob, double, json, json # type % 17, 24, 32, 45 # length _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list