Changeset: 03cd42acce72 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03cd42acce72
Modified Files:
        monetdb5/modules/mosaic/mosaic.c
        monetdb5/modules/mosaic/mosaic.h
        monetdb5/modules/mosaic/mosaic_dictionary.c
        monetdb5/modules/mosaic/mosaic_frame.c
Branch: mosaic
Log Message:

Clean up the code
Easier to use global dictionary and delta frame
Use the bitvector code in gdk


diffs (truncated from 1209 to 300 lines):

diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c
--- a/monetdb5/modules/mosaic/mosaic.c
+++ b/monetdb5/modules/mosaic/mosaic.c
@@ -653,6 +653,7 @@ MOSdecompressInternal(Client cntxt, bat 
 
        // continue with all work
        bsrc->batDirty = 1;
+       BATsettrivprop(bsrc);
 
        MCexitMaintenance(cntxt);
        BBPkeepref( *ret = bsrc->batCacheid);
@@ -684,9 +685,9 @@ MOSdecompressInternal(Client cntxt, bat 
        }
        if(error)
                mnstr_printf(cntxt->fdout,"#incompatible compression\n");
-       GDKfree(task);
 
        task->timer = GDKusec() - task->timer;
+       GDKfree(task);
        return MAL_SUCCEED;
 }
 
@@ -874,9 +875,7 @@ MOSsubselect(Client cntxt, MalBlkPtr mb,
        BATsetcount(bn,cnt);
        bn->tnil = 0;
        bn->tnonil = 1;
-       bn->tsorted = 1;
-       bn->trevsorted = BATcount(bn) <= 1;
-       bn->tkey = 1;
+       bn->tsorted = bn->trevsorted = cnt <=1;
        *getArgReference_bat(stk, pci, 0) = bn->batCacheid;
        GDKfree(task);
        BBPkeepref(bn->batCacheid);
@@ -1005,9 +1004,7 @@ str MOSthetasubselect(Client cntxt, MalB
                BATsetcount(bn,cnt);
                bn->tnil = 0;
                bn->tnonil = 1;
-               bn->tsorted = 1;
-               bn->trevsorted = BATcount(bn) <= 1;
-               bn->tkey = 1;
+               bn->tsorted = bn->trevsorted = cnt <= 1;
                BBPkeepref(*getArgReference_bat(stk,pci,0)= bn->batCacheid);
        }
        GDKfree(task);
@@ -1135,9 +1132,7 @@ str MOSprojection(Client cntxt, MalBlkPt
        BATsetcount(bn,task->cnt);
        bn->tnil = 0;
        bn->tnonil = 1;
-       bn->tsorted = 1;
-       bn->trevsorted = BATcount(bn) <= 1;
-       bn->tkey = 1;
+       bn->tsorted = bn->trevsorted = cnt <= 1;
        BBPkeepref(*ret = bn->batCacheid);
        GDKfree(task);
        return msg;
@@ -1262,11 +1257,8 @@ MOSsubjoin(Client cntxt, MalBlkPtr mb, M
                        assert(0);
                }
 
-    bln->tsorted = cnt <= 1;
-    bln->trevsorted = cnt <= 1;
-
-    brn->tsorted = cnt<= 1;
-    brn->trevsorted = cnt <= 1;
+       BATsettrivprop(bln);
+       BATsettrivprop(brn);
     if( swapped){
         BBPkeepref(*ret= brn->batCacheid);
         BBPkeepref(*ret2= bln->batCacheid);
diff --git a/monetdb5/modules/mosaic/mosaic.h b/monetdb5/modules/mosaic/mosaic.h
--- a/monetdb5/modules/mosaic/mosaic.h
+++ b/monetdb5/modules/mosaic/mosaic.h
@@ -81,18 +81,33 @@ typedef struct MOSAICHEADER{
        bte mask, bits, framebits;      // global compression type properties
        int dictsize;           // used by dictionary compression, it is a 
small table
        int framesize;          // used by frame compression, it is a small 
table
+       union{
+               sht valsht[256];
+               int valint[256];
+               lng vallng[256];
+               oid valoid[256];
+               flt valflt[256];
+               dbl valdbl[256];
 #ifdef HAVE_HGE
-       hge dict[256];
-       hge frame[256];
-#else
-       lng dict[256];
-       lng frame[256];
+               hge valhge[256];
 #endif
+       }dict;
+       lng dictfreq[256];// keep track on their use
+       union{
+               sht valsht[256];
+               int valint[256];
+               lng vallng[256];
+               oid valoid[256];
+               flt valflt[256];
+               dbl valdbl[256];
+#ifdef HAVE_HGE
+               hge valhge[256];
+#endif
+       }frame;
        // collect compression statistics for the particular task
        flt ratio;      //compresion ratio
        lng blks[MOSAIC_METHODS];       
        lng elms[MOSAIC_METHODS];       
-       lng dictfreq[256];// keep track on their use
        lng framefreq[256];
 } * MosaicHdr;
 
@@ -110,7 +125,7 @@ typedef struct MOSAICBLK{
 #define MOSincCnt(Blk,I) (assert((Blk)->cnt +I < MOSAICMAXCNT), (Blk)->cnt+= 
(unsigned int)(I))
 
 /* The start of the encoding withing a Mosaic block */
-#define MOScodevector(Task) (((char*) Task->blk)+ MosaicBlkSize)
+#define MOScodevector(Task) (((char*) (Task)->blk)+ MosaicBlkSize)
 
 /* Memory word alignement is type and platform dependent.
  * We use an encoding that fits the column type requirements
diff --git a/monetdb5/modules/mosaic/mosaic_dictionary.c 
b/monetdb5/modules/mosaic/mosaic_dictionary.c
--- a/monetdb5/modules/mosaic/mosaic_dictionary.c
+++ b/monetdb5/modules/mosaic/mosaic_dictionary.c
@@ -35,7 +35,7 @@
 void
 MOSadvance_dictionary(Client cntxt, MOStask task)
 {
-       int *dst = (int*)  (((char*) task->blk) + MosaicBlkSize);
+       int *dst = (int*)  MOScodevector(task);
        BUN cnt = MOSgetCnt(task->blk);
        long bytes;
        (void) cntxt;
@@ -51,40 +51,58 @@ MOSadvance_dictionary(Client cntxt, MOSt
 static void
 MOSdump_dictionaryInternal(char *buf, size_t len, MOStask task, int i)
 {
-       void *val = (void*)task->hdr->dict;
 
        switch(ATOMbasetype(task->type)){
        case TYPE_sht:
-               snprintf(buf,len,"%hd", ((sht*) val)[i]); break;
+               snprintf(buf,len,"%hd", task->hdr->dict.valsht[i]); break;
        case TYPE_int:
-               snprintf(buf,len,"%d", ((int*) val)[i]); break;
+               snprintf(buf,len,"%d", task->hdr->dict.valint[i]); break;
        case  TYPE_oid:
-               snprintf(buf,len,OIDFMT,  ((oid*) val)[i]); break;
+               snprintf(buf,len,OIDFMT,  task->hdr->dict.valoid[i]); break;
        case  TYPE_lng:
-               snprintf(buf,len,LLFMT,  ((lng*) val)[i]); break;
+               snprintf(buf,len,LLFMT,  task->hdr->dict.vallng[i]); break;
 #ifdef HAVE_HGE
        case  TYPE_hge:
-               snprintf(buf,len,"%.40g",  (dbl) ((hge*) val)[i]); break;
+               snprintf(buf,len,"%.40g",  (dbl) task->hdr->dict.valhge[i]); 
break;
 #endif
        case TYPE_flt:
-               snprintf(buf,len,"%f", ((flt*) val)[i]); break;
+               snprintf(buf,len,"%f", task->hdr->dict.valflt[i]); break;
        case TYPE_dbl:
-               snprintf(buf,len,"%g", ((dbl*) val)[i]); break;
+               snprintf(buf,len,"%g", task->hdr->dict.valdbl[i]); break;
        }
 }
 
 void
 MOSdump_dictionary(Client cntxt, MOStask task)
 {
-       int i;
+       int i,len= BUFSIZ;
        char buf[BUFSIZ];
 
-       mnstr_printf(cntxt->fdout,"#bits %d",task->hdr->bits);
+       mnstr_printf(cntxt->fdout,"#dictionary bits %d dictsize 
%d",task->hdr->bits, task->hdr->dictsize);
        for(i=0; i< task->hdr->dictsize; i++){
                MOSdump_dictionaryInternal(buf, BUFSIZ, task,i);
                mnstr_printf(cntxt->fdout,"[%d] %s ",i,buf);
        }
        mnstr_printf(cntxt->fdout,"\n");
+       switch(ATOMbasetype(task->type)){
+       case TYPE_sht:
+               snprintf(buf,len,"%hd %hd", 
task->hdr->checksum.sumsht,task->hdr->checksum2.sumsht); break;
+       case TYPE_int:
+               snprintf(buf,len,"%d %d", 
task->hdr->checksum.sumint,task->hdr->checksum2.sumint); break;
+       case  TYPE_oid:
+               snprintf(buf,len,OIDFMT " " OIDFMT, 
task->hdr->checksum.sumoid,task->hdr->checksum2.sumoid); break;
+       case  TYPE_lng:
+               snprintf(buf,len,LLFMT " " LLFMT, 
task->hdr->checksum.sumlng,task->hdr->checksum2.sumlng); break;
+#ifdef HAVE_HGE
+       case  TYPE_hge:
+               snprintf(buf,len,"%.40g %.40g", 
(dbl)task->hdr->checksum.sumhge,(dbl)task->hdr->checksum2.sumhge); break;
+#endif
+       case TYPE_flt:
+               snprintf(buf,len,"%f %f", 
task->hdr->checksum.sumflt,task->hdr->checksum2.sumflt); break;
+       case TYPE_dbl:
+               snprintf(buf,len,"%g %g", 
task->hdr->checksum.sumdbl,task->hdr->checksum2.sumdbl); break;
+       }
+       mnstr_printf(cntxt->fdout,"#checksums %s\n",buf);
 }
 
 void
@@ -130,19 +148,18 @@ MOSskip_dictionary(Client cntxt, MOStask
                task->blk = 0; // ENDOFLIST
 }
 
-#define MOSfind(X,VAL,F,L)\
+#define MOSfind(Res,DICT,VAL,F,L)\
 { int m,f= F, l=L; \
    while( l-f > 0 ) { \
        m = f + (l-f)/2;\
-       if ( VAL < dict[m] ) l=m-1; else f= m;\
-       if ( VAL > dict[m] ) f=m+1; else l= m;\
+       if ( VAL < DICT[m] ) l=m-1; else f= m;\
+       if ( VAL > DICT[m] ) f=m+1; else l= m;\
    }\
-   X= f;\
+   Res= f;\
 }
 
 #define estimateDict(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start;\
-       TPE *dict= (TPE*)hdr->dict;\
        BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
        if( task->range[MOSAIC_DICT] > task->start){\
                i = task->range[MOSAIC_DICT] - task->start;\
@@ -155,8 +172,8 @@ MOSskip_dictionary(Client cntxt, MOStask
                return factor;\
        }\
        for(i =0; i<limit; i++, val++){\
-               MOSfind(j,*val,0,hdr->dictsize);\
-               if( j == hdr->dictsize || dict[j] != *val )\
+               MOSfind(j,hdr->dict.val##TPE,*val,0,hdr->dictsize);\
+               if( j == hdr->dictsize || hdr->dict.val##TPE[j] != *val )\
                        break;\
        }\
        if( i * sizeof(TPE) <= wordaligned( MosaicBlkSize + i,TPE))\
@@ -167,12 +184,11 @@ MOSskip_dictionary(Client cntxt, MOStask
 // store it in the compressed heap header directly
 // filter out the most frequent ones
 #define makeDict(TPE)\
-{      TPE *val = ((TPE*)task->src) + task->start;\
-       TPE *dict = (TPE*)hdr->dict,v;\
+{      TPE v,*val = ((TPE*)task->src) + task->start;\
        BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;\
        for(i = 0; i< limit; i++, val++){\
                for(j= 0; j< hdr->dictsize; j++)\
-                       if( dict[j] == *val) break;\
+                       if( task->hdr->dict.val##TPE[j] == *val) break;\
                if ( j == hdr->dictsize){\
                        if ( hdr->dictsize == 256){\
                                int min = 0;\
@@ -182,7 +198,7 @@ MOSskip_dictionary(Client cntxt, MOStask
                                cnt[j]=0;\
                                break;\
                        }\
-                       dict[j] = *val;\
+                       task->hdr->dict.val##TPE[j] = *val;\
                        cnt[j]++;\
                        hdr->dictsize++;\
                } else\
@@ -190,10 +206,10 @@ MOSskip_dictionary(Client cntxt, MOStask
        }\
        for(k=0; k< hdr->dictsize; k++)\
                for(j=k+1; j< hdr->dictsize; j++)\
-                       if(dict[k] >dict[j]){\
-                               v= dict[k];\
-                               dict[k] = dict[j];\
-                               dict[j] = v;\
+                       if(task->hdr->dict.val##TPE[k] 
>task->hdr->dict.val##TPE[j]){\
+                               v = task->hdr->dict.val##TPE[k];\
+                               task->hdr->dict.val##TPE[k] = 
task->hdr->dict.val##TPE[j];\
+                               task->hdr->dict.val##TPE[j] = v;\
                        }\
        hdr->bits = 1;\
        hdr->mask =1;\
@@ -256,7 +272,6 @@ MOSestimate_dictionary(Client cntxt, MOS
 #endif
        case TYPE_lng:
                {       lng *val = ((lng*)task->src) + task->start;
-                       lng *dict = (lng*)hdr->dict;
                        // assume uniform compression statistics
                        if( task->range[MOSAIC_DICT] > task->start){
                                i = task->range[MOSAIC_DICT] - task->start;
@@ -270,8 +285,8 @@ MOSestimate_dictionary(Client cntxt, MOS
                        }
 
                        for(i =task->start; i<task->stop; i++, val++){
-                               MOSfind(j,*val,0,hdr->dictsize);
-                               if( j == hdr->dictsize || dict[j] != *val)
+                               
MOSfind(j,task->hdr->dict.vallng,*val,0,hdr->dictsize);
+                               if( j == hdr->dictsize || 
task->hdr->dict.vallng[j] != *val)
                                        break;
                        }
                        i -= task->start;
@@ -297,17 +312,17 @@ MOSestimate_dictionary(Client cntxt, MOS
 
 #define DICTcompress(TPE)\
 {      TPE *val = ((TPE*)task->src) + task->start;\
-       TPE *dict = (TPE*)hdr->dict;\
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to