Changeset: c6f8c2213bad for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c6f8c2213bad Modified Files: monetdb5/modules/mal/mosaic.c monetdb5/modules/mal/mosaic.h monetdb5/modules/mal/mosaic_dictionary.c monetdb5/modules/mal/mosaic_dictionary.h monetdb5/modules/mal/mosaic_hdr.c Branch: mosaic Log Message:
Dump more header information in the layout structure diffs (truncated from 376 to 300 lines): diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c --- a/monetdb5/modules/mal/mosaic.c +++ b/monetdb5/modules/mal/mosaic.c @@ -79,6 +79,8 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte MOStask task=0; int i,ret,bid; BAT *bn= NULL; + char buf[BUFSIZ]; + lng zero=0; task= (MOStask) GDKzalloc(sizeof(*task)); if( task == NULL) @@ -100,9 +102,35 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte MOSinit(task,b); MOSinitializeScan(cntxt,task,0,task->hdr->top); + // safe the general properties + BUNappend(btech, "ratio", FALSE); + BUNappend(bcount, &zero, FALSE); + BUNappend(binput, &zero, FALSE); + BUNappend(boutput, &zero , FALSE); + snprintf(buf,BUFSIZ,"%g", task->hdr->ratio); + BUNappend(bproperties, buf, FALSE); + for(i=0; i < MOSAIC_METHODS-1; i++){ + lng zero = 0; + snprintf(buf,BUFSIZ,"%s_blks", MOSfiltername[i]); + BUNappend(btech, buf, FALSE); + BUNappend(bcount, &zero, FALSE); + BUNappend(binput, &task->hdr->blks[i], FALSE); + BUNappend(boutput, &zero , FALSE); + BUNappend(bproperties, "", FALSE); + + snprintf(buf,BUFSIZ,"%s_elms", MOSfiltername[i]); + BUNappend(btech, buf, FALSE); + BUNappend(bcount, &zero, FALSE); + BUNappend(binput, &task->hdr->elms[i], FALSE); + BUNappend(boutput, &zero , FALSE); + BUNappend(bproperties, "", FALSE); + + } if( task->hdr->blks[MOSAIC_FRAME]) MOSlayout_frame_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties); + if( task->hdr->blks[MOSAIC_DICT]) + MOSlayout_dictionary_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties); while(task->start< task->stop){ switch(MOSgetTag(task->blk)){ @@ -254,7 +282,7 @@ MOScompressInternal(Client cntxt, bat *r BUN cutoff =0; str msg = MAL_SUCCEED; int cand; - float factor= 1.0, fac= 1.0; + float ratio= 1.0, fac= 1.0; *ret = 0; @@ -367,7 +395,7 @@ MOScompressInternal(Client cntxt, bat *r // default is to extend the non-compressed block cand = MOSAIC_NONE; fac = 1.0; - factor = 1.0; + ratio = 1.0; // cutoff the filters, especially dictionary tests are expensive if( cutoff && cutoff < task->start){ @@ -381,46 +409,46 @@ MOScompressInternal(Client cntxt, bat *r // select candidate amongst those if ( task->filter[MOSAIC_RLE]){ fac = MOSestimate_runlength(cntxt,task); - if (fac > factor){ + if (fac > ratio){ cand = MOSAIC_RLE; - factor = fac; + ratio = fac; } } if ( task->filter[MOSAIC_DICT]){ fac = MOSestimate_dictionary(cntxt,task); - if (fac > factor){ + if (fac > ratio){ cand = MOSAIC_DICT; - factor = fac; + ratio = fac; } } if ( task->filter[MOSAIC_FRAME]){ fac = MOSestimate_frame(cntxt,task); - if (fac > factor){ + if (fac > ratio){ cand = MOSAIC_FRAME; - factor = fac; + ratio = fac; } } if ( task->filter[MOSAIC_DELTA]){ fac = MOSestimate_delta(cntxt,task); - if ( fac > factor ){ + if ( fac > ratio ){ cand = MOSAIC_DELTA; - factor = fac; + ratio = fac; } } if ( task->filter[MOSAIC_PREFIX]){ fac = MOSestimate_prefix(cntxt,task); - if ( fac > factor ){ + if ( fac > ratio ){ cand = MOSAIC_PREFIX; - factor = fac; + ratio = fac; } if ( fac < 0.0) task->filter[MOSAIC_PREFIX] = 0; } if ( task->filter[MOSAIC_LINEAR]){ fac = MOSestimate_linear(cntxt,task); - if ( fac >factor){ + if ( fac >ratio){ cand = MOSAIC_LINEAR; - factor = fac; + ratio = fac; } } @@ -530,7 +558,7 @@ MOScompressInternal(Client cntxt, bat *r BBPkeepref(*ret = bsrc->batCacheid); BBPunfix(bcompress->batCacheid); } - task->factor = task->hdr->factor = (task->xsize ==0 ? 0:(flt)task->size/task->xsize); + task->ratio = task->hdr->ratio = (task->xsize ==0 ? 0:(flt)task->size/task->xsize); #ifdef _DEBUG_MOSAIC_ MOSdumpInternal(cntxt,bcompress); #endif @@ -1451,7 +1479,7 @@ MOSanalyseInternal(Client cntxt, int thr #define CANDIDATES 256 /* all three combinations */ void -MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bfactor, str compressions) +MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio, str compressions) { int i,j,k,cases, bit=1, ret, bid= b->batCacheid; BUN cnt= BATcount(b); @@ -1459,7 +1487,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B MOStask task; int pattern[CANDIDATES]; char technique[CANDIDATES]={0}, *t = technique; - dbl xf[CANDIDATES], factor; + dbl xf[CANDIDATES], ratio; cases = makepatterns(pattern,CANDIDATES, compressions); task = (MOStask) GDKzalloc(sizeof(*task)); @@ -1491,7 +1519,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B if( j<i) continue; - xf[i]= task->hdr? task->factor: 0; + xf[i]= task->hdr? task->ratio: 0; if( xf[i] == 0) continue; BUNappend(boutput,&task->xsize,FALSE); @@ -1504,8 +1532,8 @@ MOSanalyseReport(Client cntxt, BAT *b, B } BUNappend(btech,technique,FALSE); if( task->xsize) - factor = (input + 0.0)/task->xsize; - BUNappend(bfactor,&factor,FALSE); + ratio = (input + 0.0)/task->xsize; + BUNappend(bratio,&ratio,FALSE); // get rid of temporary compressed BAT if( ret != bid) @@ -1634,7 +1662,7 @@ MOSanalyse(Client cntxt, MalBlkPtr mb, M for( k = 0; k< MOSAIC_METHODS; k++) task->filter[k]= 1; x+= MOSanalyseInternal(cntxt, threshold, task, bid); - xf[j]= task->hdr? task->factor: 0; + xf[j]= task->hdr? task->ratio: 0; if(xf[mx] < xf[j]) mx =j; } if(x >1){ @@ -1655,7 +1683,7 @@ MOSanalyse(Client cntxt, MalBlkPtr mb, M for( k = 0; k< MOSAIC_METHODS; k++) task->filter[k]= 1; x+= MOSanalyseInternal(cntxt, threshold, task, i); - xf[j]= task->hdr? task->factor: 0; + xf[j]= task->hdr? task->ratio: 0; } if( x >1){ mnstr_printf(cntxt->fdout,"#all %d ",i); @@ -1722,13 +1750,13 @@ MOSoptimize(Client cntxt, MalBlkPtr mb, bit *=2; } for( j=0; j < i; j++) - if (pattern[j] == k && task->factor == xf[j]) + if (pattern[j] == k && task->ratio == xf[j]) break; if( j<i) continue; - xf[i] = task->factor; + xf[i] = task->ratio; if( ret != bid) BBPdecref(ret, TRUE); } diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h --- a/monetdb5/modules/mal/mosaic.h +++ b/monetdb5/modules/mal/mosaic.h @@ -73,18 +73,15 @@ typedef struct MOSAICHEADER{ flt sumflt; dbl sumdbl; } checksum, checksum2; - // collect compression statistics for the particular task - lng blks[MOSAIC_METHODS]; - lng elms[MOSAIC_METHODS]; - flt factor; int top; + // skip index for OID access oid oidbase[MOSAICINDEX]; // to speedup localization BUN offset[MOSAICINDEX]; bte mask, bits, framebits; // global compression type properties + // both dictionary and framebased compression require a global dictionary of frequent values + // Their size is purposely topped int dictsize; // used by dictionary compression int framesize; // used by frame compression - // both dictionary and framebased compression require a global dictionary of frequent values - // Their size is purposely topped #ifdef HAVE_HGE hge dict[256]; hge frame[256]; @@ -92,6 +89,12 @@ typedef struct MOSAICHEADER{ lng dict[256]; lng frame[256]; #endif + // collect compression statistics for the particular task + flt ratio; //compresion ratio + lng blks[MOSAIC_METHODS]; + lng elms[MOSAIC_METHODS]; + lng dictfreq[256];// keep track on their use + lng framefreq[256]; } * MosaicHdr; // bit stuffed header block, currently 4 bytes wide and chunks should be 4-byte aligned @@ -125,7 +128,7 @@ typedef struct MOSTASK{ MosaicBlk blk; // current block header in scan oid start; // oid of first element in current blk oid stop; // last oid of range to be scanned - flt factor; + flt ratio; // compression ratio encountered char *dst; // write pointer into current compressed blocks diff --git a/monetdb5/modules/mal/mosaic_dictionary.c b/monetdb5/modules/mal/mosaic_dictionary.c --- a/monetdb5/modules/mal/mosaic_dictionary.c +++ b/monetdb5/modules/mal/mosaic_dictionary.c @@ -46,46 +46,66 @@ MOSadvance_dictionary(Client cntxt, MOSt } /* Beware, the dump routines use the compressed part of the task */ +static void +MOSdump_dictionaryInternal(char *buf, size_t len, MOStask task, int i) +{ + void *val = (void*)task->hdr->dict; + + switch(ATOMstorage(task->type)){ + case TYPE_sht: + snprintf(buf,len,"%hd", ((sht*) val)[i]); break; + case TYPE_int: + snprintf(buf,len,"%d", ((int*) val)[i]); break; + case TYPE_oid: + snprintf(buf,len,OIDFMT, ((oid*) val)[i]); break; + case TYPE_lng: + snprintf(buf,len,LLFMT, ((lng*) val)[i]); break; +#ifdef HAVE_HGE + case TYPE_hge: + snprintf(buf,len,"%.40g", (dbl) ((hge*) val)[i]); break; +#endif + case TYPE_wrd: + snprintf(buf,len,SZFMT, ((wrd*) val)[i]); break; + case TYPE_flt: + snprintf(buf,len,"%f", ((flt*) val)[i]); break; + case TYPE_dbl: + snprintf(buf,len,"%g", ((dbl*) val)[i]); break; + } +} + void MOSdump_dictionary(Client cntxt, MOStask task) { - MosaicHdr hdr= task->hdr; int i; - void *val = (void*)hdr->dict; + char buf[BUFSIZ]; - mnstr_printf(cntxt->fdout,"# bits %d",hdr->bits); - switch(ATOMstorage(task->type)){ - case TYPE_sht: - for(i=0; i< hdr->dictsize; i++) - mnstr_printf(cntxt->fdout,"sht [%d] %hd ",i, ((sht*) val)[i]); break; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list