Changeset: c6f8c2213bad for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c6f8c2213bad
Modified Files:
        monetdb5/modules/mal/mosaic.c
        monetdb5/modules/mal/mosaic.h
        monetdb5/modules/mal/mosaic_dictionary.c
        monetdb5/modules/mal/mosaic_dictionary.h
        monetdb5/modules/mal/mosaic_hdr.c
Branch: mosaic
Log Message:

Dump more header information in the layout structure


diffs (truncated from 376 to 300 lines):

diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -79,6 +79,8 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte
        MOStask task=0;
        int i,ret,bid;
        BAT *bn= NULL;
+       char buf[BUFSIZ];
+       lng zero=0;
 
        task= (MOStask) GDKzalloc(sizeof(*task));
        if( task == NULL)
@@ -100,9 +102,35 @@ MOSlayout(Client cntxt, BAT *b, BAT *bte
 
        MOSinit(task,b);
        MOSinitializeScan(cntxt,task,0,task->hdr->top);
+       // safe the general properties
 
+               BUNappend(btech, "ratio", FALSE);
+               BUNappend(bcount, &zero, FALSE);
+               BUNappend(binput, &zero, FALSE);
+               BUNappend(boutput, &zero , FALSE);
+               snprintf(buf,BUFSIZ,"%g", task->hdr->ratio);
+               BUNappend(bproperties, buf, FALSE);
+       for(i=0; i < MOSAIC_METHODS-1; i++){
+               lng zero = 0;
+               snprintf(buf,BUFSIZ,"%s_blks", MOSfiltername[i]);
+               BUNappend(btech, buf, FALSE);
+               BUNappend(bcount, &zero, FALSE);
+               BUNappend(binput, &task->hdr->blks[i], FALSE);
+               BUNappend(boutput, &zero , FALSE);
+               BUNappend(bproperties, "", FALSE);
+
+               snprintf(buf,BUFSIZ,"%s_elms", MOSfiltername[i]);
+               BUNappend(btech, buf, FALSE);
+               BUNappend(bcount, &zero, FALSE);
+               BUNappend(binput, &task->hdr->elms[i], FALSE);
+               BUNappend(boutput, &zero , FALSE);
+               BUNappend(bproperties, "", FALSE);
+
+       }
        if( task->hdr->blks[MOSAIC_FRAME])
                
MOSlayout_frame_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties);
+       if( task->hdr->blks[MOSAIC_DICT])
+               
MOSlayout_dictionary_hdr(cntxt,task,btech,bcount,binput,boutput,bproperties);
 
        while(task->start< task->stop){
                switch(MOSgetTag(task->blk)){
@@ -254,7 +282,7 @@ MOScompressInternal(Client cntxt, bat *r
        BUN cutoff =0;
        str msg = MAL_SUCCEED;
        int cand;
-       float factor= 1.0, fac= 1.0;
+       float ratio= 1.0, fac= 1.0;
        
        *ret = 0;
 
@@ -367,7 +395,7 @@ MOScompressInternal(Client cntxt, bat *r
                // default is to extend the non-compressed block
                cand = MOSAIC_NONE;
                fac = 1.0;
-               factor = 1.0;
+               ratio = 1.0;
 
                // cutoff the filters, especially dictionary tests are expensive
                if( cutoff && cutoff < task->start){
@@ -381,46 +409,46 @@ MOScompressInternal(Client cntxt, bat *r
                // select candidate amongst those
                if ( task->filter[MOSAIC_RLE]){
                        fac = MOSestimate_runlength(cntxt,task);
-                       if (fac > factor){
+                       if (fac > ratio){
                                cand = MOSAIC_RLE;
-                               factor = fac;
+                               ratio = fac;
                        }
                }
                if ( task->filter[MOSAIC_DICT]){
                        fac = MOSestimate_dictionary(cntxt,task);
-                       if (fac > factor){
+                       if (fac > ratio){
                                cand = MOSAIC_DICT;
-                               factor = fac;
+                               ratio = fac;
                        }
                }
                if ( task->filter[MOSAIC_FRAME]){
                        fac = MOSestimate_frame(cntxt,task);
-                       if (fac > factor){
+                       if (fac > ratio){
                                cand = MOSAIC_FRAME;
-                               factor = fac;
+                               ratio = fac;
                        }
                }
                if ( task->filter[MOSAIC_DELTA]){
                        fac = MOSestimate_delta(cntxt,task);
-                       if ( fac > factor ){
+                       if ( fac > ratio ){
                                cand = MOSAIC_DELTA;
-                               factor = fac;
+                               ratio = fac;
                        }
                }
                if ( task->filter[MOSAIC_PREFIX]){
                        fac = MOSestimate_prefix(cntxt,task);
-                       if ( fac > factor ){
+                       if ( fac > ratio ){
                                cand = MOSAIC_PREFIX;
-                               factor = fac;
+                               ratio = fac;
                        }
                        if ( fac  < 0.0)
                                        task->filter[MOSAIC_PREFIX] = 0;
                }
                if ( task->filter[MOSAIC_LINEAR]){
                        fac = MOSestimate_linear(cntxt,task);
-                       if ( fac >factor){
+                       if ( fac >ratio){
                                cand = MOSAIC_LINEAR;
-                               factor = fac;
+                               ratio = fac;
                        }
                }
 
@@ -530,7 +558,7 @@ MOScompressInternal(Client cntxt, bat *r
                BBPkeepref(*ret = bsrc->batCacheid);
                BBPunfix(bcompress->batCacheid);
        }
-       task->factor = task->hdr->factor = (task->xsize ==0 ? 
0:(flt)task->size/task->xsize);
+       task->ratio = task->hdr->ratio = (task->xsize ==0 ? 
0:(flt)task->size/task->xsize);
 #ifdef _DEBUG_MOSAIC_
        MOSdumpInternal(cntxt,bcompress);
 #endif
@@ -1451,7 +1479,7 @@ MOSanalyseInternal(Client cntxt, int thr
 
 #define CANDIDATES 256  /* all three combinations */
 void
-MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bfactor, 
str compressions)
+MOSanalyseReport(Client cntxt, BAT *b, BAT *btech, BAT *boutput, BAT *bratio, 
str compressions)
 {
        int i,j,k,cases, bit=1, ret, bid= b->batCacheid;
        BUN cnt=  BATcount(b);
@@ -1459,7 +1487,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B
        MOStask task;
        int pattern[CANDIDATES];
        char technique[CANDIDATES]={0}, *t =  technique;
-       dbl xf[CANDIDATES], factor;
+       dbl xf[CANDIDATES], ratio;
 
        cases = makepatterns(pattern,CANDIDATES, compressions);
        task = (MOStask) GDKzalloc(sizeof(*task));
@@ -1491,7 +1519,7 @@ MOSanalyseReport(Client cntxt, BAT *b, B
                if( j<i)
                        continue;
 
-               xf[i]= task->hdr? task->factor: 0;
+               xf[i]= task->hdr? task->ratio: 0;
                if( xf[i] == 0)
                        continue;
                BUNappend(boutput,&task->xsize,FALSE);
@@ -1504,8 +1532,8 @@ MOSanalyseReport(Client cntxt, BAT *b, B
                }
                BUNappend(btech,technique,FALSE);
                if( task->xsize)
-                       factor = (input + 0.0)/task->xsize;
-               BUNappend(bfactor,&factor,FALSE);
+                       ratio = (input + 0.0)/task->xsize;
+               BUNappend(bratio,&ratio,FALSE);
 
                // get rid of temporary compressed BAT
                if( ret != bid)
@@ -1634,7 +1662,7 @@ MOSanalyse(Client cntxt, MalBlkPtr mb, M
                                for( k = 0; k< MOSAIC_METHODS; k++)
                                        task->filter[k]= 1;
                        x+= MOSanalyseInternal(cntxt, threshold, task, bid);
-                       xf[j]= task->hdr? task->factor: 0;
+                       xf[j]= task->hdr? task->ratio: 0;
                        if(xf[mx] < xf[j]) mx =j;
                }
                if(x >1){
@@ -1655,7 +1683,7 @@ MOSanalyse(Client cntxt, MalBlkPtr mb, M
                                        for( k = 0; k< MOSAIC_METHODS; k++)
                                                task->filter[k]= 1;
                                x+= MOSanalyseInternal(cntxt, threshold, task, 
i);
-                       xf[j]= task->hdr? task->factor: 0;
+                       xf[j]= task->hdr? task->ratio: 0;
                }
                if( x >1){
                        mnstr_printf(cntxt->fdout,"#all %d ",i);
@@ -1722,13 +1750,13 @@ MOSoptimize(Client cntxt, MalBlkPtr mb, 
                        bit *=2;
                }
                for( j=0; j < i; j++)
-                       if (pattern[j] == k && task->factor == xf[j])
+                       if (pattern[j] == k && task->ratio == xf[j])
                                break;
                if( j<i)
                        continue;
 
 
-               xf[i] = task->factor;
+               xf[i] = task->ratio;
                if( ret != bid)
                        BBPdecref(ret, TRUE);
        }
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -73,18 +73,15 @@ typedef struct MOSAICHEADER{
                flt sumflt;
                dbl sumdbl;
        } checksum, checksum2;
-       // collect compression statistics for the particular task
-       lng blks[MOSAIC_METHODS];       
-       lng elms[MOSAIC_METHODS];       
-       flt factor;
        int top;
+       // skip index for OID access
        oid oidbase[MOSAICINDEX];       // to speedup localization
        BUN offset[MOSAICINDEX];
        bte mask, bits, framebits;      // global compression type properties
+       // both dictionary and framebased compression require a global 
dictionary of frequent values
+       // Their size is purposely topped 
        int dictsize;           // used by dictionary compression
        int framesize;          // used by frame compression
-       // both dictionary and framebased compression require a global 
dictionary of frequent values
-       // Their size is purposely topped 
 #ifdef HAVE_HGE
        hge dict[256];
        hge frame[256];
@@ -92,6 +89,12 @@ typedef struct MOSAICHEADER{
        lng dict[256];
        lng frame[256];
 #endif
+       // collect compression statistics for the particular task
+       flt ratio;      //compresion ratio
+       lng blks[MOSAIC_METHODS];       
+       lng elms[MOSAIC_METHODS];       
+       lng dictfreq[256];// keep track on their use
+       lng framefreq[256];
 } * MosaicHdr;
 
 // bit stuffed header block, currently 4 bytes wide and chunks should be 
4-byte aligned
@@ -125,7 +128,7 @@ typedef struct MOSTASK{
        MosaicBlk blk;  // current block header in scan
        oid start;              // oid of first element in current blk
        oid stop;               // last oid of range to be scanned
-       flt factor;
+       flt ratio;              // compression ratio encountered
 
        char *dst;              // write pointer into current compressed blocks
 
diff --git a/monetdb5/modules/mal/mosaic_dictionary.c 
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -46,46 +46,66 @@ MOSadvance_dictionary(Client cntxt, MOSt
 }
 
 /* Beware, the dump routines use the compressed part of the task */
+static void
+MOSdump_dictionaryInternal(char *buf, size_t len, MOStask task, int i)
+{
+       void *val = (void*)task->hdr->dict;
+
+       switch(ATOMstorage(task->type)){
+       case TYPE_sht:
+               snprintf(buf,len,"%hd", ((sht*) val)[i]); break;
+       case TYPE_int:
+               snprintf(buf,len,"%d", ((int*) val)[i]); break;
+       case  TYPE_oid:
+               snprintf(buf,len,OIDFMT,  ((oid*) val)[i]); break;
+       case  TYPE_lng:
+               snprintf(buf,len,LLFMT,  ((lng*) val)[i]); break;
+#ifdef HAVE_HGE
+       case  TYPE_hge:
+               snprintf(buf,len,"%.40g",  (dbl) ((hge*) val)[i]); break;
+#endif
+       case  TYPE_wrd:
+               snprintf(buf,len,SZFMT,  ((wrd*) val)[i]); break;
+       case TYPE_flt:
+               snprintf(buf,len,"%f", ((flt*) val)[i]); break;
+       case TYPE_dbl:
+               snprintf(buf,len,"%g", ((dbl*) val)[i]); break;
+       }
+}
+
 void
 MOSdump_dictionary(Client cntxt, MOStask task)
 {
-       MosaicHdr hdr= task->hdr;
        int i;
-       void *val = (void*)hdr->dict;
+       char buf[BUFSIZ];
 
-       mnstr_printf(cntxt->fdout,"# bits %d",hdr->bits);
-       switch(ATOMstorage(task->type)){
-       case TYPE_sht:
-               for(i=0; i< hdr->dictsize; i++)
-               mnstr_printf(cntxt->fdout,"sht [%d] %hd ",i, ((sht*) val)[i]); 
break;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to