Changeset: f0bf3f7a67b7 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f0bf3f7a67b7 Modified Files: clients/Tests/exports.stable.out monetdb5/modules/mosaic/mosaic.c monetdb5/modules/mosaic/mosaic.h monetdb5/modules/mosaic/mosaic.mal sql/backends/monet5/sql.c sql/backends/monet5/sql.mal sql/backends/monet5/sql_mosaic.c sql/scripts/76_mosaic.sql sql/test/mosaic/Tests/compressionRLE2.stable.out Branch: mosaic Log Message:
Improve speed of mosaic analysis Compression techniques that in isolation do not shrink the data are ignored in finding the workable combination(s) diffs (truncated from 681 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -395,6 +395,7 @@ int geomversion_get(void); void geomversion_set(void); bat getBBPsize(void); int getBitVector(BitVector vector, BUN i, int bits); +lng getBitVectorSize(const BUN cnt, const int width); char *get_bin_path(void); int gettimeofday(struct timeval *tv, int *ignore_zone); int gprof_pthread_create(pthread_t *__restrict, __const pthread_attr_t *__restrict, void *( *fcn)(void *), void *__restrict); @@ -476,7 +477,7 @@ int ptrToStr(str *dst, int *len, const p const ptr ptr_nil; struct dirent *readdir(DIR *dir); void rewinddir(DIR *dir); -void setBitVector(BitVector vector, const BUN i, const int bits, const int value); +void setBitVector(BitVector vector, const BUN i, const int bits, const unsigned int value); int shtFromStr(const char *src, int *len, sht **dst); int shtToStr(str *dst, int *len, const sht *src); const sht sht_nil; @@ -1416,7 +1417,7 @@ void MOSanalyseReport(Client cntxt, BAT void MOSblk(MosaicBlk blk); BUN MOSblocklimit; str MOScompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); -str MOScompressInternal(Client cntxt, bat *ret, bat *bid, MOStask task, int debug); +str MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug); void MOScompress_delta(Client cntxt, MOStask task); void MOScompress_dictionary(Client cntxt, MOStask task); void MOScompress_frame(Client cntxt, MOStask task); @@ -1427,7 +1428,7 @@ void MOScompress_runlength(Client cntxt, void MOScreatedictionary(Client cntxt, MOStask task); void MOScreateframeDictionary(Client cntxt, MOStask task); str MOSdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); -str MOSdecompressInternal(Client cntxt, bat *ret, bat *bid); +str MOSdecompressInternal(Client cntxt, bat *bid); str MOSdecompressStorage(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); void MOSdecompress_delta(Client cntxt, MOStask task); void MOSdecompress_dictionary(Client cntxt, MOStask task); diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c --- a/monetdb5/modules/mosaic/mosaic.c +++ b/monetdb5/modules/mosaic/mosaic.c @@ -37,7 +37,7 @@ char *MOSfiltername[]={"literal","runlength","dictionary","delta","linear","frame","prefix","EOL"}; BUN MOSblocklimit = 100000; -str MOScompressInternal(Client cntxt, bat *ret, bat *bid, MOStask task, int debug); +str MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug); static void MOSinit(MOStask task, BAT *b){ @@ -333,8 +333,9 @@ MOSoptimizerCost(Client cntxt, MOStask t return cand; } +/* the source is extended with a BAT mosaic mirror */ str -MOScompressInternal(Client cntxt, bat *ret, bat *bid, MOStask task, int debug) +MOScompressInternal(Client cntxt, bat *bid, MOStask task, int debug) { BAT *o = NULL, *bsrc; // the BAT to be augmented with a compressed heap str msg = MAL_SUCCEED; @@ -342,8 +343,6 @@ MOScompressInternal(Client cntxt, bat *r int tpe, typewidth; lng t0,t1; - *ret = 0; - if ((bsrc = BATdescriptor(*bid)) == NULL) throw(MAL, "mosaic.compress", INTERNAL_BAT_ACCESS); @@ -363,32 +362,33 @@ MOScompressInternal(Client cntxt, bat *r typewidth = ATOMsize(tpe) * 8; // size in bits break; default: - // don't compress them - BBPkeepref(*ret = bsrc->batCacheid); - return msg; + // don't compress it + BBPunfix(bsrc->batCacheid); + return MAL_SUCCEED; } if (BATcheckmosaic(bsrc)){ /* already compressed */ - BBPkeepref(*ret = bsrc->batCacheid); + BBPunfix(bsrc->batCacheid); return msg; } assert(bsrc->tmosaic == NULL); + /* views are never compressed */ if (VIEWtparent(bsrc)) { bat p = VIEWtparent(bsrc); o = bsrc; bsrc = BATdescriptor(p); if (BATcheckmosaic(bsrc)) { - BBPunfix(bsrc->batCacheid); + BBPunfix(o->batCacheid); return MAL_SUCCEED; } - assert(bsrc->timprints == NULL); + assert(bsrc->tmosaic == NULL); } if ( BATcount(bsrc) < MOSAIC_THRESHOLD ){ /* no need to compress */ - BBPkeepref(*ret = bsrc->batCacheid); + BBPunfix(bsrc->batCacheid); return msg; } @@ -404,7 +404,8 @@ MOScompressInternal(Client cntxt, bat *r // Then we total size may go beyond the original size and we should terminate the process. // This should be detected before we compress a block, in the estimate functions // or when we extend the non-compressed collector block - throw(MAL,"mosaic.compress", "heap construction failes"); + BBPunfix(bsrc->batCacheid); + throw(MAL,"mosaic.compress", "heap construction failes"); } // initialize the non-compressed read pointer @@ -533,8 +534,7 @@ MOScompressInternal(Client cntxt, bat *r task->ratio = task->hdr->ratio = (flt)task->bsrc->theap.free/ task->bsrc->tmosaic->free; finalize: MCexitMaintenance(cntxt); - *ret= bsrc->batCacheid; - BBPkeepref(bsrc->batCacheid); + BBPunfix(bsrc->batCacheid); #ifdef _DEBUG_MOSAIC_ MOSdumpInternal(cntxt,bsrc); @@ -552,7 +552,7 @@ finalize: str MOScompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { - str prop = NULL; + str msg = MAL_SUCCEED; int i; MOStask task; @@ -567,22 +567,22 @@ MOScompress(Client cntxt, MalBlkPtr mb, throw(MAL, "mosaic.compress", MAL_MALLOC_FAIL); if( pci->argc == 3) - prop = *getArgReference_str(stk,pci,2); - if( prop && !strstr(prop,"mosaic")) + msg = *getArgReference_str(stk,pci,2); + if( msg && !strstr(msg,"mosaic")) for( i = 0; i< MOSAIC_METHODS; i++) - task->filter[i]= strstr(prop,MOSfiltername[i]) != 0; + task->filter[i]= strstr(msg,MOSfiltername[i]) != 0; else for( i = 0; i< MOSAIC_METHODS; i++) task->filter[i]= 1; - prop= MOScompressInternal(cntxt, getArgReference_bat(stk,pci,0), getArgReference_bat(stk,pci,1), task, flg); + msg= MOScompressInternal(cntxt, getArgReference_bat(stk,pci,1), task, flg); GDKfree(task); - return prop; + return msg; } // recreate the uncompressed heap from its mosaic version str -MOSdecompressInternal(Client cntxt, bat *ret, bat *bid) +MOSdecompressInternal(Client cntxt, bat *bid) { BAT *bsrc; MOStask task; @@ -598,11 +598,10 @@ MOSdecompressInternal(Client cntxt, bat if (BATcheckmosaic(bsrc) == 0 ){ BBPunfix(bsrc->batCacheid); - BBPkeepref(*ret = bsrc->batCacheid); return MAL_SUCCEED; } if (!bsrc->tmosaic) { - BBPkeepref(*ret = bsrc->batCacheid); + BBPunfix(bsrc->batCacheid); return MAL_SUCCEED; } @@ -664,6 +663,7 @@ MOSdecompressInternal(Client cntxt, bat } } + task->ratio = (flt)task->bsrc->theap.free/ task->bsrc->tmosaic->free; error = 0; switch( ATOMbasetype(task->type)){ @@ -687,11 +687,13 @@ MOSdecompressInternal(Client cntxt, bat break; case TYPE_str: break; +#ifdef _DEBUG_MOSAIC_ default: mnstr_printf(cntxt->fdout,"#unknown compression compatibility\n"); +#endif } - if(error) - mnstr_printf(cntxt->fdout,"#incompatible compression\n"); + if(error) + mnstr_printf(cntxt->fdout,"#incompatible compression for type %d ratio %f\n", ATOMbasetype(task->type),task->ratio); task->timer = GDKusec() - task->timer; @@ -701,7 +703,7 @@ MOSdecompressInternal(Client cntxt, bat bsrc->batDirty = 1; MOSdestroy(bsrc); BATsettrivprop(bsrc); - BBPkeepref( *ret = bsrc->batCacheid); + BBPunfix(bsrc->batCacheid); MCexitMaintenance(cntxt); return MAL_SUCCEED; @@ -711,14 +713,14 @@ str MOSdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { (void) mb; - return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,0), getArgReference_bat(stk,pci,1)); + return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,1)); } str MOSdecompressStorage(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { (void) mb; - return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,0), getArgReference_bat(stk,pci,1)); + return MOSdecompressInternal(cntxt, getArgReference_bat(stk,pci,1)); } // The remainders is cloned from the generator code base @@ -1341,7 +1343,6 @@ int MOSanalyseInternal(Client cntxt, int threshold, MOStask task, bat bid) { BAT *b; - int ret = 0; str type; b = BATdescriptor(bid); @@ -1384,18 +1385,14 @@ MOSanalyseInternal(Client cntxt, int thr #endif case TYPE_str: mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t", bid, BBP_physical(bid), type, BATcount(b)); - MOScompressInternal(cntxt, &ret, &bid, task,TRUE); + MOScompressInternal(cntxt, &bid, task,TRUE); MOSdestroy(BBPdescriptor(bid)); - if( ret != b->batCacheid) - BBPdecref(ret, TRUE); break; default: if( b->ttype == TYPE_timestamp || b->ttype == TYPE_date || b->ttype == TYPE_daytime){ mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t", bid, BBP_physical(bid), type, BATcount(b)); - MOScompressInternal(cntxt, &ret, &bid, task,TRUE); + MOScompressInternal(cntxt, &bid, task,TRUE); MOSdestroy(BBPdescriptor(bid)); - if( ret != b->batCacheid) - BBPdecref(ret, TRUE); } else mnstr_printf(cntxt->fdout,"#%d\t%-8s\t%s\t"BUNFMT"\t illegal compression type %s\n", bid, BBP_logical(bid), type, BATcount(b), getTypeName(b->ttype)); ; @@ -1405,47 +1402,63 @@ MOSanalyseInternal(Client cntxt, int thr return 1; } +/* + * An analysis of all possible compressors + * Drop techniques if they are not able to reduce the size below a factor 1.0 + */ #define CANDIDATES 256 /* all three combinations */ + void -MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio, BAT *brun, str compressions) +MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio, BAT *bcompress, BAT *bdecompress, str compressions) { - int i,j,k,cases, bit=1, ret, bid= b->batCacheid; - BUN cnt= BATcount(b), xsize; - lng input; + int i,j,k,cases, bit=1, bid= b->batCacheid; + BUN xsize; MOStask task; int pattern[CANDIDATES]; char technique[CANDIDATES]={0}, *t = technique; dbl xf[CANDIDATES], ratio; - lng clk; + lng clk,clk1; - cases = makepatterns(pattern,CANDIDATES, compressions); task = (MOStask) GDKzalloc(sizeof(*task)); if( task == NULL) return; + // create the list of all possible 2^6 compression patterns + cases = makepatterns(pattern,CANDIDATES, compressions); + for( i = 0; i < CANDIDATES; i++) xf[i]= -1; - input = cnt * ATOMsize(b->ttype); for( i = 1; i< cases; i++){ - // filter in-effective sub-patterns - for( j=0; j < i; j++) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list