Changeset: 67af55b88549 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=67af55b88549
Modified Files:
        monetdb5/modules/mosaic/mosaic.c
        monetdb5/modules/mosaic/mosaic.h
        sql/backends/monet5/sql.mal
        sql/backends/monet5/sql_mosaic.c
        sql/scripts/76_mosaic.sql
        sql/test/mosaic/Tests/analysis.sql
        sql/test/mosaic/Tests/analysis.stable.out
Branch: mosaic
Log Message:

Add 'common_compression' a.k.a. exceptions argument to analysis.


diffs (254 lines):

diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -1238,7 +1238,7 @@ MOSjoin(Client cntxt, MalBlkPtr mb, MalS
  */
 
 static int
-makepatterns(uint16_t *patterns, int size, str compressions, BAT* b)
+makepatterns(uint16_t *patterns, int size, str compressions, sht common_mask, 
BAT* b)
 {
        int i,j,k, idx, bit=1, step = MOSAIC_METHODS;
        int lim= 8*7*6*5*4*3*2;
@@ -1252,6 +1252,10 @@ makepatterns(uint16_t *patterns, int siz
                        // Unset corresponding bit if type is not allowed.
                        UNSET_METHOD(compression_mask, i);
                }
+               if ( METHOD_IS_SET(common_mask, i) && !MOSisTypeAllowed(i, b) ) 
{
+                       // Unset corresponding bit if type is not allowed.
+                       UNSET_METHOD(common_mask, i);
+               }
        }
 
        for( k=0, i=0; i<lim && k <size; i++){
@@ -1264,11 +1268,13 @@ makepatterns(uint16_t *patterns, int siz
                        idx /= step;
                }
 
+               patterns[k] |= common_mask; // Make sure the common mask is 
always present in the to-be-analyzed patterns.
+
                // weed out duplicates
                for( j=0; j< k; j++)
                        if(patterns[k] == patterns[j]) break;
                if( j < k ) continue;
-               
+
 #ifdef _MOSAIC_DEBUG_
                mnstr_printf(GDKstdout,"#");
                for(j=0, bit=1; j < MOSAIC_METHODS; j++){
@@ -1303,7 +1309,7 @@ struct PAT{
 };
 
 str
-MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *boutput, BAT *bratio, BAT 
*bcompress, BAT *bdecompress, str compressions)
+MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *boutput, BAT *bratio, BAT 
*bcompress, BAT *bdecompress, str compressions, str common_compressions)
 {
        unsigned i,j,cases, bid= b->batCacheid;
        uint16_t pattern[CANDIDATES];
@@ -1314,8 +1320,12 @@ MOSAnalysis(BAT *b, BAT *btech, BAT *bla
 
        struct PAT pat[CANDIDATES];
 
+       // create mask of common compressions, e.g. exception methods.
+       sht common_mask = 0;
+       _construct_compression_mask(&common_mask, common_compressions);
+
        // create the list of all possible 2^6 compression patterns 
-       cases = makepatterns(pattern,CANDIDATES, compressions, b);
+       cases = makepatterns(pattern,CANDIDATES, compressions, common_mask, b);
 
        memset(antipattern,0, sizeof(antipattern));
        antipatternSize++; // the first pattern aka 0 is always an antipattern.
@@ -1419,7 +1429,7 @@ MOSAnalysis(BAT *b, BAT *btech, BAT *bla
                }
 
                for(j=0; j < MOSAIC_METHODS; j++){
-                       if ( ((MosaicHdr)  b->tmosaic->base)->blks[j] == 0) {
+                       if ( ((MosaicHdr)  b->tmosaic->base)->blks[j] == 0 && 
(!(MOSmethods[j].bit & common_mask))) {
                                antipattern[antipatternSize++] = pattern[i];
                        }
                }
diff --git a/monetdb5/modules/mosaic/mosaic.h b/monetdb5/modules/mosaic/mosaic.h
--- a/monetdb5/modules/mosaic/mosaic.h
+++ b/monetdb5/modules/mosaic/mosaic.h
@@ -286,7 +286,7 @@ mal_export str MOSthetaselect(Client cnt
 mal_export str MOSprojection(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 mal_export str MOSjoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str MOSlayout(BAT *b, BAT *bbsn, BAT *btech, BAT *bcount, BAT 
*binput, BAT *boutput, BAT *bproperties);
-mal_export str MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *output, BAT 
*factor, BAT *compress, BAT *decompress, str compressions);
+mal_export str MOSAnalysis(BAT *b, BAT *btech, BAT *blayout, BAT *output, BAT 
*factor, BAT *compress, BAT *decompress, str compressions, str 
common_compressions);
 
 void MOSupdateHeader(MOStask* task);
 void MOSinitHeader(MOStask* task);
diff --git a/sql/backends/monet5/sql.mal b/sql/backends/monet5/sql.mal
--- a/sql/backends/monet5/sql.mal
+++ b/sql/backends/monet5/sql.mal
@@ -546,7 +546,7 @@ pattern mosaicanalysis(sch:str,tbl:str,c
        compress:bat[:lng],
        decompress:bat[:lng])
 address sql_mosaicAnalysis;
-pattern mosaicanalysis(sch:str,tbl:str,col:str)(
+pattern mosaicanalysis(sch:str,tbl:str,col:str,compression:str, 
common_compression:str)(
        technique:bat[:str],
        layout:bat[:json],
        outputsize:bat[:lng],
diff --git a/sql/backends/monet5/sql_mosaic.c b/sql/backends/monet5/sql_mosaic.c
--- a/sql/backends/monet5/sql_mosaic.c
+++ b/sql/backends/monet5/sql_mosaic.c
@@ -160,6 +160,7 @@ sql_mosaicAnalysis(Client cntxt, MalBlkP
        int *tech,*output, *factor, *compress, *decompress, *layout;
        BAT *bn, *btech, *boutput, *bfactor, *bcompress, *bdecompress, *blayout;
        str compressions = NULL;
+       str common_compressions = "raw";
 
        if (msg != MAL_SUCCEED || (msg = checkSQLContext(cntxt)) != NULL)
                return msg;
@@ -219,12 +220,14 @@ sql_mosaicAnalysis(Client cntxt, MalBlkP
        layout = getArgReference_bat(stk, pci, 1);
        *layout = blayout->batCacheid;
 
-       sch = *getArgReference_str(stk, pci, 6);
-       tbl = *getArgReference_str(stk, pci, 7);
-       col = *getArgReference_str(stk, pci, 8);
-       if ( pci->argc == 10){
+       sch                             = *getArgReference_str(stk, pci, 6);
+       tbl                             = *getArgReference_str(stk, pci, 7);
+       col                             = *getArgReference_str(stk, pci, 8);
+       compressions    = *getArgReference_str(stk, pci, 9);
+
+       if ( pci->argc == 11) {
                // use a predefined collection of compression schemes.
-               compressions = *getArgReference_str(stk,pci,9);
+               common_compressions = *getArgReference_str(stk, pci, 10);
        }
 
 #ifdef DEBUG_SQL_MOSAIC
@@ -253,7 +256,7 @@ sql_mosaicAnalysis(Client cntxt, MalBlkP
                                                        continue;
                                                // perform the analysis
                                                bn = 
store_funcs.bind_col(m->session->tr, c, 0);
-                                               msg = MOSAnalysis(bn, btech, 
blayout, boutput, bfactor, bcompress, bdecompress, compressions);
+                                               msg = MOSAnalysis(bn, btech, 
blayout, boutput, bfactor, bcompress, bdecompress, compressions, 
common_compressions);
                                                BBPunfix(bn->batCacheid);
                                                (void) c;
 
diff --git a/sql/scripts/76_mosaic.sql b/sql/scripts/76_mosaic.sql
--- a/sql/scripts/76_mosaic.sql
+++ b/sql/scripts/76_mosaic.sql
@@ -11,10 +11,10 @@ create function mosaic.layout(sch string
 returns table(technique string, "count" bigint, inputsize bigint, outputsize 
bigint,properties json, bsn bigint)
 external name sql.mosaiclayout;
 
-create function mosaic.analysis(sch string, tbl string, col string) 
+create function mosaic.analysis(sch string, tbl string, col string, 
compression string) 
 returns table(technique string, layout json, outputsize bigint, factor float, 
"compress" bigint, "decompress" bigint)
 external name sql.mosaicanalysis;
 
-create function mosaic.analysis(sch string, tbl string, col string, 
compression string) 
+create function mosaic.analysis(sch string, tbl string, col string, 
compression string, common_compression string) 
 returns table(technique string, layout json, outputsize bigint, factor float, 
"compress" bigint, "decompress" bigint)
 external name sql.mosaicanalysis;
diff --git a/sql/test/mosaic/Tests/analysis.sql 
b/sql/test/mosaic/Tests/analysis.sql
--- a/sql/test/mosaic/Tests/analysis.sql
+++ b/sql/test/mosaic/Tests/analysis.sql
@@ -8,15 +8,17 @@ insert into tmp4 select * from generate_
 
 -- tmp4 is currently uncompressed
 
-select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i') order by technique, factor desc;
+select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', NULL, '') order by technique, factor desc;
 
 -- should be materialized as the graph of a cutoff function.
 
 insert into tmp4 select 10000000 from tmp4;
 
-select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i') order by technique, factor desc;
+select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', NULL, '') order by technique, factor desc;
 
-select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength') order by technique, factor desc;
+select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength', '') order by technique, factor desc;
+
+select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength', 'raw') order by technique, factor desc;
 
 -- Make sure that mosaic.analysis also works correctly on a column with 
compression
 
@@ -24,9 +26,9 @@ set optimizer='mosaic_pipe';
 
 alter table tmp4 alter column i set storage 'runlength, linear';
 
-select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i') order by technique, factor desc;
+select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', NULL, '') order by technique, factor desc;
 
-select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements  from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength') order by technique, factor desc;
+select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements  from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength', '') order by technique, factor desc;
 
 drop table tmp4;
 
diff --git a/sql/test/mosaic/Tests/analysis.stable.out 
b/sql/test/mosaic/Tests/analysis.stable.out
--- a/sql/test/mosaic/Tests/analysis.stable.out
+++ b/sql/test/mosaic/Tests/analysis.stable.out
@@ -27,8 +27,8 @@ stdout of test 'analysis` in directory '
 [ 0    ]
 #insert into tmp4 select * from generate_series(0, 10000000);
 [ 10000000     ]
-#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i') order by technique, factor desc;
-% .%4, .%4,    .,      . # table_name
+#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', NULL, '') order by technique, factor desc;
+% .%6, .%6,    .,      . # table_name
 % technique,   factor, blocks, elements # name
 % clob,        double, json,   json # type
 % 18,  24,     39,     41 # length
@@ -68,8 +68,8 @@ stdout of test 'analysis` in directory '
 [ "runlength prefix ", 2.132,  "[[{\"prefix\":306}]]", 
"[[{\"prefix\":10000000}]]"     ]
 #insert into tmp4 select 10000000 from tmp4;
 [ 10000000     ]
-#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i') order by technique, factor desc;
-% .%4, .%4,    .,      . # table_name
+#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', NULL, '') order by technique, factor desc;
+% .%6, .%6,    .,      . # table_name
 % technique,   factor, blocks, elements # name
 % clob,        double, json,   json # type
 % 18,  24,     39,     46 # length
@@ -107,18 +107,26 @@ stdout of test 'analysis` in directory '
 [ "runlength frame ",  2.285,  "[[{\"frame\":306}]]",  
"[[{\"frame\":20000000}]]"      ]
 [ "runlength linear ", 357142.843,     "[[{\"runlength\":1},{\"linear\":1}]]", 
"[[{\"runlength\":9999999},{\"linear\":10000001}]]"     ]
 [ "runlength prefix ", 2.285,  "[[{\"prefix\":306}]]", 
"[[{\"prefix\":20000000}]]"     ]
-#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength') order by technique, factor desc;
-% .%5, .%5,    .,      . # table_name
+#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength', '') order by technique, factor desc;
+% .%6, .%6,    .,      . # table_name
 % technique,   factor, blocks, elements # name
 % clob,        double, json,   json # type
 % 17,  24,     32,     45 # length
 [ "linear ",   350877.187,     "[[{\"linear\":2}]]",   
"[[{\"linear\":20000000}]]"     ]
 [ "runlength ",        0.666,  "[[{\"runlength\":10000001}]]", 
"[[{\"runlength\":20000000}]]"  ]
 [ "runlength linear ", 357142.843,     "[[{\"runlength\":1},{\"linear\":1}]]", 
"[[{\"runlength\":9999999},{\"linear\":10000001}]]"     ]
+#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength', 'raw') order by technique, factor desc;
+% .%6, .%6,    .,      . # table_name
+% technique,   factor, blocks, elements # name
+% clob,        double, json,   json # type
+% 21,  24,     32,     45 # length
+[ "raw linear ",       350877.187,     "[[{\"linear\":2}]]",   
"[[{\"linear\":20000000}]]"     ]
+[ "raw runlength ",    1.999,  "[[{\"raw\":1},{\"runlength\":1}]]",    
"[[{\"raw\":10000000},{\"runlength\":10000000}]]"       ]
+[ "raw runlength linear ",     357142.843,     
"[[{\"runlength\":1},{\"linear\":1}]]", 
"[[{\"runlength\":9999999},{\"linear\":10000001}]]"     ]
 #set optimizer='mosaic_pipe';
-#alter table tmp4 alter column i set storage 'dict';
-#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i') order by technique, factor desc;
-% .%4, .%4,    .,      . # table_name
+#alter table tmp4 alter column i set storage 'runlength, linear';
+#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements from mosaic.analysis('sys', 'tmp4', 
'i', NULL, '') order by technique, factor desc;
+% .%6, .%6,    .,      . # table_name
 % technique,   factor, blocks, elements # name
 % clob,        double, json,   json # type
 % 18,  24,     39,     46 # length
@@ -156,8 +164,8 @@ stdout of test 'analysis` in directory '
 [ "runlength frame ",  2.285,  "[[{\"frame\":306}]]",  
"[[{\"frame\":20000000}]]"      ]
 [ "runlength linear ", 357142.843,     "[[{\"runlength\":1},{\"linear\":1}]]", 
"[[{\"runlength\":9999999},{\"linear\":10000001}]]"     ]
 [ "runlength prefix ", 2.285,  "[[{\"prefix\":306}]]", 
"[[{\"prefix\":20000000}]]"     ]
-#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements  from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength') order by technique, factor desc;
-% .%5, .%5,    .,      . # table_name
+#select technique, factor, json.filter(layout, 'blks') as blocks, 
json.filter(layout, 'elms') as elements  from mosaic.analysis('sys', 'tmp4', 
'i', 'linear, runlength', '') order by technique, factor desc;
+% .%6, .%6,    .,      . # table_name
 % technique,   factor, blocks, elements # name
 % clob,        double, json,   json # type
 % 17,  24,     32,     45 # length
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to