Changeset: ea4dfcacc9ca for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ea4dfcacc9ca Modified Files: monetdb5/modules/mal/mosaic.c monetdb5/modules/mal/mosaic_delta.c monetdb5/modules/mal/mosaic_dict.c monetdb5/modules/mal/mosaic_hdr.c monetdb5/modules/mal/mosaic_linear.c monetdb5/modules/mal/mosaic_rle.c monetdb5/modules/mal/mosaic_zone.c Branch: mosaic Log Message:
Prepare for database analyse The complete dbfarm director is compressed tentatively for extracting the performance diffs (truncated from 321 to 300 lines): diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c --- a/monetdb5/modules/mal/mosaic.c +++ b/monetdb5/modules/mal/mosaic.c @@ -49,23 +49,10 @@ static void MOSdumpTask(Client cntxt,MOStask task) { int i; - lng cnt = 0; - mnstr_printf(cntxt->fdout,"#blk type %s todo "BUNFMT"\n", filtername[task->type], task->elm); - mnstr_printf(cntxt->fdout,"#wins "); - for(i=0; i< MOSAIC_METHODS; i++) - mnstr_printf(cntxt->fdout,LLFMT " ",task->wins[i]); - mnstr_printf(cntxt->fdout,"\n#elms "); - for(i=0; i< MOSAIC_METHODS; i++){ - mnstr_printf(cntxt->fdout,LLFMT " ",task->elms[i]); - cnt += task->elms[i]; + for ( i=0; i < MOSAIC_METHODS; i++){ + mnstr_printf(cntxt->fdout, "#%s wins "LLFMT " elms "LLFMT " time " LLFMT "\n", + filtername[i], task->wins[i], task->elms[i],task->time[i]); } - mnstr_printf(cntxt->fdout,"\n#time "); - for(i=0; i< MOSAIC_METHODS; i++) - mnstr_printf(cntxt->fdout, LLFMT" ",task->time[i]); - mnstr_printf(cntxt->fdout,"\n#perc "); - for(i=0; i< MOSAIC_METHODS; i++) - mnstr_printf(cntxt->fdout, "%d ",(int)((100.0 *task->elms[i])/cnt)); - mnstr_printf(cntxt->fdout,"\n"); } // dump a compressed BAT @@ -98,8 +85,6 @@ MOSdumpInternal(Client cntxt, BAT *b){ case MOSAIC_ZONE: MOSdump_zone(cntxt,task); MOSskip_zone(task); - break; - default: assert(0); } } } @@ -224,7 +209,7 @@ MOScompressInternal(Client cntxt, int *r // It should always take less space then the orginal column. // But be prepared that a last block header may be stored // use a size overshoot. Also be aware of possible dictionary headers - bn = BATnew( TYPE_void, b->ttype, 2*cnt+MosaicBlkSize, TRANSIENT); + bn = BATnew( TYPE_void, b->ttype, 2 * cnt * MosaicBlkSize, TRANSIENT); if (bn == NULL) { BBPreleaseref(b->batCacheid); throw(MAL,"mosaic.compress", MAL_MALLOC_FAIL); @@ -251,42 +236,43 @@ MOScompressInternal(Client cntxt, int *r while(task->elm > 0){ // default is to extend the non-compressed block + //mnstr_printf(cntxt->fdout,"#elements "BUNFMT"\n",task->elm); cand = MOSAIC_NONE; perc = 100; percentage = 100; // select candidate amongst those - if (filter[MOSAIC_RLE]){ + if ( filter[MOSAIC_RLE]){ perc = MOSestimate_rle(cntxt,task); if ( perc < percentage){ cand = MOSAIC_RLE; percentage = perc; } } - if (filter[MOSAIC_DICT]){ + if ( filter[MOSAIC_DICT]){ perc = MOSestimate_dict(cntxt,task); - if ( perc <= percentage){ + if (perc >= 0 && perc <= percentage){ cand = MOSAIC_DICT; percentage = perc; } } - if (filter[MOSAIC_ZONE]){ + if ( filter[MOSAIC_ZONE]){ perc = MOSestimate_zone(cntxt,task); - if ( perc < percentage){ + if (perc >= 0 && perc < percentage){ cand = MOSAIC_ZONE; percentage = perc; } } - if (filter[MOSAIC_DELTA]){ + if ( filter[MOSAIC_DELTA]){ perc = MOSestimate_delta(cntxt,task); - if ( perc < percentage){ + if ( perc >=0 && perc < percentage){ cand = MOSAIC_DELTA; percentage = perc; } } - if (filter[MOSAIC_LINEAR]){ + if ( filter[MOSAIC_LINEAR]){ perc = MOSestimate_linear(cntxt,task); - if ( perc < percentage){ + if ( perc >=0 && perc < percentage){ cand = MOSAIC_LINEAR; percentage = perc; } @@ -396,10 +382,10 @@ MOScompressInternal(Client cntxt, int *r bn->tkey = b->tkey; BBPkeepref(*ret = bn->batCacheid); BBPreleaseref(b->batCacheid); - GDKfree(task); #ifdef _DEBUG_MOSAIC_ MOSdumpInternal(cntxt,bn); #endif + GDKfree(task); return msg; } @@ -1036,3 +1022,49 @@ MOSjoin(Client cntxt, MalBlkPtr mb, MalS } return msg; } + +// The analyse routine runs through the BAT dictionary and assess +// all possible compression options. + +static void +MOSanalyseInternal(Client cntxt, int bid) +{ + BAT *b; + int ret; + + b = BATdescriptor(bid); + if( b == NULL) + return; + mnstr_printf(cntxt->fdout,"#\n#mosaic BAT %d %s "BUNFMT"\n", bid, BBP_logical(bid), BATcount(b)); + switch( b->ttype){ + case TYPE_bit: + case TYPE_bte: + case TYPE_sht: + case TYPE_int: + case TYPE_lng: + case TYPE_flt: + case TYPE_dbl: + MOScompressInternal(cntxt, &ret, &bid, 0); + break; + default: + if( b->ttype == TYPE_timestamp) + MOScompressInternal(cntxt, &ret, &bid, 0); + else + mnstr_printf(cntxt->fdout,"#nonsupported type %d\n",b->ttype); + } + BBPreleaseref(bid); +} + +str +MOSanalyse(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + int i; + (void) mb; + (void) stk; + (void) pci; + + for (i = 1; i < getBBPsize(); i++) + if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i)) ) + MOSanalyseInternal(cntxt, i); + return MAL_SUCCEED; +} diff --git a/monetdb5/modules/mal/mosaic_delta.c b/monetdb5/modules/mal/mosaic_delta.c --- a/monetdb5/modules/mal/mosaic_delta.c +++ b/monetdb5/modules/mal/mosaic_delta.c @@ -70,7 +70,7 @@ MOSskip_delta(MOStask task) int MOSestimate_delta(Client cntxt, MOStask task) { BUN i = -1; - int percentage = 0; + int percentage = -1; (void) cntxt; switch(ATOMstorage(task->type)){ @@ -87,7 +87,9 @@ MOSestimate_delta(Client cntxt, MOStask percentage = 100 * (sizeof(int)+(int)i-1) / ((int)i * sizeof(int)); } } +#ifdef _DEBUG_MOSAIC_ mnstr_printf(cntxt->fdout,"#estimate delta %d elm %d perc\n",(int)i,percentage); +#endif return percentage; } diff --git a/monetdb5/modules/mal/mosaic_dict.c b/monetdb5/modules/mal/mosaic_dict.c --- a/monetdb5/modules/mal/mosaic_dict.c +++ b/monetdb5/modules/mal/mosaic_dict.c @@ -104,7 +104,7 @@ MOSestimate_dict(Client cntxt, MOStask t { BUN i = -1; int cnt =0,j; lng *size; - int percentage= 99; + int percentage= -1; (void) cntxt; // use the dst to avoid overwriting noneblocked @@ -130,7 +130,9 @@ MOSestimate_dict(Client cntxt, MOStask t if(i) percentage = 100 * sizeof(int) * dictsize / ((int)i * sizeof(int)); } } +#ifdef _DEBUG_MOSAIC_ mnstr_printf(cntxt->fdout,"#estimate dict %d elm %d perc\n",(int)i,percentage); +#endif return percentage; } @@ -202,8 +204,6 @@ MOScompress_dict(Client cntxt, MOStask t task->src = (char*) val; } break; - default: - assert(0); } #ifdef _DEBUG_MOSAIC_ MOSdump_dict(cntxt, task); diff --git a/monetdb5/modules/mal/mosaic_hdr.c b/monetdb5/modules/mal/mosaic_hdr.c --- a/monetdb5/modules/mal/mosaic_hdr.c +++ b/monetdb5/modules/mal/mosaic_hdr.c @@ -32,10 +32,16 @@ MOSdumpHeader(Client cntxt, MOStask task MosaicHdr hdr = (MosaicHdr) task->hdr; int i; +#ifdef _DEBGUG_MOSAIC_ mnstr_printf(cntxt->fdout,"#header block "PTRFMT" version %d\n", PTRFMTCAST hdr, hdr->version); mnstr_printf(cntxt->fdout,"#index top %d\n", hdr->top); for(i= 0; i< hdr->top; i++) mnstr_printf(cntxt->fdout,"#[%d] "OIDFMT" " BUNFMT "\n",i, hdr->index[i], hdr->offset[i]); +#else + (void) cntxt; + (void) i; + (void) hdr; +#endif } // add the chunk to the index to facilitate 'fast' OID-based access @@ -68,7 +74,9 @@ MOSupdateHeader(Client cntxt, MOStask ta minsize = hdr->offset[i] - hdr->offset[i-1]; j = i; } +#ifdef _DEBUG_MOSAIC_ mnstr_printf(cntxt->fdout,"#ditch entry %d\n",j); +#endif // simply remove on element for( i = j; i < hdr->top; i++){ hdr->index[i] = hdr->index[i+1]; diff --git a/monetdb5/modules/mal/mosaic_linear.c b/monetdb5/modules/mal/mosaic_linear.c --- a/monetdb5/modules/mal/mosaic_linear.c +++ b/monetdb5/modules/mal/mosaic_linear.c @@ -96,7 +96,7 @@ MOSskip_linear(MOStask task) int MOSestimate_linear(Client cntxt, MOStask task) { BUN i = -1; - int percentage = 0; + int percentage = -1; (void) cntxt; switch(ATOMstorage(task->type)){ @@ -115,7 +115,9 @@ MOSestimate_linear(Client cntxt, MOStask percentage = 100 * 2 * sizeof(int)/ ( (int) i * sizeof(int)); } } +#ifdef _DEBUG_MOSAIC_ mnstr_printf(cntxt->fdout,"#estimate linear %d elm %d perc\n",(int)i,percentage); +#endif return percentage; } @@ -168,7 +170,7 @@ MOScompress_linear(Client cntxt, MOStask #ifdef _DEBUG_MOSAIC_ MOSdump_linear(cntxt, task); #endif - task->time[MOSAIC_LINEAR] = GDKusec() - task->time[MOSAIC_RLE]; + task->time[MOSAIC_LINEAR] = GDKusec() - task->time[MOSAIC_LINEAR]; } // the inverse operator, extend the src diff --git a/monetdb5/modules/mal/mosaic_rle.c b/monetdb5/modules/mal/mosaic_rle.c --- a/monetdb5/modules/mal/mosaic_rle.c +++ b/monetdb5/modules/mal/mosaic_rle.c @@ -93,7 +93,7 @@ MOSskip_rle(MOStask task) int MOSestimate_rle(Client cntxt, MOStask task) { BUN i = -1; - int percentage = 0; + int percentage = -1; (void) cntxt; switch(ATOMstorage(task->type)){ @@ -111,7 +111,9 @@ MOSestimate_rle(Client cntxt, MOStask ta percentage = 100 * sizeof(int)/ ( (int) i * sizeof(int)); } } +#ifdef _DEBUG_MOSAIC_ mnstr_printf(cntxt->fdout,"#estimate rle %d elm %d perc\n",(int)i,percentage); +#endif return percentage; } diff --git a/monetdb5/modules/mal/mosaic_zone.c b/monetdb5/modules/mal/mosaic_zone.c _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list