Changeset: 03142818494b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03142818494b
Modified Files:
        monetdb5/modules/mal/mosaic_dictionary.c
Branch: mosaic
Log Message:

Moving dictionary indexing to bit bits
There is still an issue with the bit stuffing
as shown by mosaic_mal test.


diffs (truncated from 839 to 300 lines):

diff --git a/monetdb5/modules/mal/mosaic_dictionary.c 
b/monetdb5/modules/mal/mosaic_dictionary.c
--- a/monetdb5/modules/mal/mosaic_dictionary.c
+++ b/monetdb5/modules/mal/mosaic_dictionary.c
@@ -33,22 +33,15 @@
 void
 MOSadvance_dictionary(Client cntxt, MOStask task)
 {
+       int *dst = (int*)  (((char*) task->blk) + MosaicBlkSize);
+       long cnt = MOSgetCnt(task->blk);
+       long bytes;
        (void) cntxt;
 
-       task->start += MOSgetCnt(task->blk);
-       switch(ATOMstorage(task->type)){
-       //case TYPE_bte: CASE_bit: no compression achievable
-       case TYPE_sht: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),sht)); break;
-       case TYPE_int: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),int)); break;
-       case TYPE_lng: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),lng)); break;
-       case TYPE_oid: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),oid)); break;
-       case TYPE_wrd: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),wrd)); break;
-       case TYPE_flt: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),flt)); break;
-       case TYPE_dbl: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),dbl)); break;
-#ifdef HAVE_HGE
-       case TYPE_hge: task->blk = (MosaicBlk)( ((char*)task->blk) + 
wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),hge)); break;
-#endif
-       }
+       assert(cnt > 0);
+       task->start += (oid) cnt;
+       bytes =  (cnt * task->hdr->bits)/8 + (((cnt * task->hdr->bits) %8) != 
0);
+       task->blk = (MosaicBlk) (((char*) dst)  + wordaligned(bytes, lng)); 
 }
 
 /* Beware, the dump routines use the compressed part of the task */
@@ -59,7 +52,7 @@ MOSdump_dictionary(Client cntxt, MOStask
        int i;
        void *val = (void*)hdr->dict;
 
-       mnstr_printf(cntxt->fdout,"#");
+       mnstr_printf(cntxt->fdout,"# bits %d",hdr->bits);
        switch(ATOMstorage(task->type)){
        case TYPE_sht:
                for(i=0; i< hdr->dictsize; i++)
@@ -99,11 +92,23 @@ MOSskip_dictionary(Client cntxt, MOStask
                task->blk = 0; // ENDOFLIST
 }
 
+#define MOSfind(X,VAL,F,L)\
+{ int m,f= F, l=L; \
+   while( l-f > 0 ) { \
+       m = f + (l-f)/2;\
+       if ( VAL < dict[m] ) l=m-1; else f= m;\
+       if ( VAL > dict[m] ) f=m+1; else l= m;\
+   }\
+   X= f;\
+}
+
 #define estimateDict(TPE)\
 {      TPE *val = (TPE*)task->src;\
+       TPE *dict= (TPE*)hdr->dict;\
        for(i =0; i<task->elm; i++, val++){\
-               for(j= 0; j< hdr->dictsize; j++)\
-                       if( hdr->dict[j] == *val) break;\
+               MOSfind(j,*val,0,hdr->dictsize);\
+               if( j == hdr->dictsize || dict[j] != *val )\
+                       break;\
        }\
        if ( i > MOSlimit() ) i = MOSlimit();\
        if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned( 
MosaicBlkSize + i,TPE);\
@@ -139,8 +144,15 @@ MOSskip_dictionary(Client cntxt, MOStask
                                dict[i] = dict[j];\
                                dict[j] = v;\
                        }\
+       hdr->bits = 1;\
+       hdr->mask =1;\
+       for( i=1 ; i < hdr->dictsize-1; i *=2){\
+               hdr->bits++;\
+               hdr->mask = (hdr->mask <<1) | 1;\
+       }\
 }
 
+
 void
 MOScreatedictionary(Client cntxt, MOStask task)
 {  int i, j;
@@ -153,7 +165,7 @@ MOScreatedictionary(Client cntxt, MOStas
        hdr->dictsize = 0;
        switch(ATOMstorage(task->type)){
        case TYPE_sht: makeDict(sht); break;
-       case TYPE_lng: makeDict(lng); break;
+       case TYPE_int: makeDict(int); break;
        case TYPE_oid: makeDict(oid); break;
        case TYPE_wrd: makeDict(wrd); break;
        case TYPE_flt: makeDict(flt); break;
@@ -161,9 +173,9 @@ MOScreatedictionary(Client cntxt, MOStas
 #ifdef HAVE_HGE
        case TYPE_hge: makeDict(hge); break;
 #endif
-       case TYPE_int:
-               {       int *val = (int*)task->src;
-                       int *dict = (int*)hdr->dict,v;
+       case TYPE_lng:
+               {       lng *val = (lng*)task->src;
+                       lng *dict = (lng*)hdr->dict,v;
 
                        for(i =0; i< (int) task->elm; i++, val++){
                                for(j= 0; j< hdr->dictsize; j++)
@@ -192,10 +204,16 @@ MOScreatedictionary(Client cntxt, MOStas
                                                dict[i] = dict[j];
                                                dict[j] = v;
                                        }
+                       hdr->bits = 1;
+                       hdr->mask =1;
+                       for( i=1 ; i < hdr->dictsize-1; i *=2){
+                               hdr->bits++;
+                               hdr->mask = (hdr->mask <<1) | 1;
+                       }
                }
        }
+       MOSdump_dictionary(cntxt, task);
 #ifdef _DEBUG_MOSAIC_
-       MOSdump_dictionary(cntxt, task);
 #endif
 }
 // calculate the expected reduction using DICT in terms of elements compressed
@@ -210,7 +228,7 @@ MOSestimate_dictionary(Client cntxt, MOS
        switch(ATOMstorage(task->type)){
        //case TYPE_bte: CASE_bit: no compression achievable
        case TYPE_sht: estimateDict(sht); break;
-       case TYPE_int: estimateDict(int); break;
+       case TYPE_lng: estimateDict(lng); break;
        case TYPE_oid: estimateDict(oid); break;
        case TYPE_wrd: estimateDict(wrd); break;
        case TYPE_flt: estimateDict(flt); break;
@@ -218,17 +236,16 @@ MOSestimate_dictionary(Client cntxt, MOS
 #ifdef HAVE_HGE
        case TYPE_hge: estimateDict(hge); break;
 #endif
-       case TYPE_lng:
-               {       lng *val = (lng*)task->src;
+       case TYPE_int:
+               {       int *val = (int*)task->src;
+                       int *dict = (int*)hdr->dict;
                        for(i =0; i<task->elm; i++, val++){
-                               for(j= 0; j< hdr->dictsize; j++)
-                                       if( hdr->dict[j] == *val) 
-                                               break;
-                               if ( j == hdr->dictsize)
+                               MOSfind(j,*val,0,hdr->dictsize);
+                               if( j == hdr->dictsize || dict[j] != *val)
                                        break;
                        }
                        if ( i > MOSlimit() ) i = MOSlimit();
-                       if(i) factor = (flt) ((int)i * sizeof(lng)) / 
wordaligned( MosaicBlkSize + i,lng);
+                       if(i) factor = (flt) ((int)i * sizeof(int)) / 
wordaligned( MosaicBlkSize + i,lng);
                }
        }
 #ifdef _DEBUG_MOSAIC_
@@ -238,24 +255,38 @@ MOSestimate_dictionary(Client cntxt, MOS
 }
 
 // insert a series of values into the compressor block using dictionary
+#define dictcompress()\
+cid = (i * hdr->bits)/64;\
+lshift= 64 -((i * hdr->bits) % 64) ;\
+if ( lshift > hdr->bits){\
+       base[cid]= base[cid] | (((unsigned long)j) << (lshift-hdr->bits));\
+}else{ \
+       rshift= 64 -  ((i+1) * hdr->bits) % 64;\
+       base[cid]= base[cid] | (((unsigned long)j) >> (hdr->bits-lshift));\
+       base[cid+1]= 0 | (((unsigned long)j)  << rshift);\
+}
+
 #define DICTcompress(TPE)\
 {      TPE *val = (TPE*)task->src;\
        TPE *dict = (TPE*)hdr->dict;\
        BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\
        task->dst = ((char*) task->blk)+ MosaicBlkSize;\
+       base  = (unsigned long*) task->dst; \
+       base[0]=0;\
        for(i =0; i<limit; i++, val++){\
-               for(j= 0; j< hdr->dictsize; j++)\
-                       if( dict[j] == *val) {\
-                               MOSincCnt(blk,1);\
-                               *task->dst++ = (char) j;\
-                               break;\
-                       }\
-               if ( j == hdr->dictsize) \
+               MOSfind(j,*val,0,hdr->dictsize);\
+               if(j == hdr->dictsize || dict[j] != *val) \
                        break;\
+               else {\
+                       MOSincCnt(blk,1);\
+                       dictcompress();\
+               }\
        }\
+       assert(i);\
        task->src = (char*) val;\
 }
 
+
 void
 MOScompress_dictionary(Client cntxt, MOStask task)
 {
@@ -263,6 +294,8 @@ MOScompress_dictionary(Client cntxt, MOS
        int j;
        MosaicBlk blk = task->blk;
        MosaicHdr hdr = task->hdr;
+       int cid, lshift, rshift;
+       unsigned long *base;
 
        (void) cntxt;
        MOSsetTag(blk,MOSAIC_DICT);
@@ -282,29 +315,58 @@ MOScompress_dictionary(Client cntxt, MOS
                {       int *val = (int*)task->src;
                        int *dict = (int*)hdr->dict;
                        BUN limit = task->elm > MOSlimit()? MOSlimit(): 
task->elm;
+
                        task->dst = ((char*) task->blk)+ MosaicBlkSize;
+                       base  = (unsigned long*) task->dst; // start of bit 
vector
+                       base[0]=0;
                        for(i =0; i<limit; i++, val++){
-                               for(j= 0; j< hdr->dictsize; j++)
-                                       if( dict[j] == *val) {
-                                               MOSincCnt(blk,1);
-                                               *task->dst++ = (char) j;
-                                               break;
+                               MOSfind(j,*val,0,hdr->dictsize);
+                               //mnstr_printf(cntxt->fdout,"compress %d %d 
bits %d\n",*val,j,hdr->bits);
+                               if( j == hdr->dictsize || dict[j] != *val )
+                                       break;
+                               else {
+                                       MOSincCnt(blk,1);
+                                       cid = (i * hdr->bits)/64;
+                                       lshift= 64 -((i * hdr->bits) % 64) ;
+                                       if ( lshift > hdr->bits){
+                                               base[cid]= base[cid] | 
(((unsigned long)j) << (lshift-hdr->bits));
+                                               
//mnstr_printf(cntxt->fdout,"[%d] shift %d rbits %d \n",cid, lshift, hdr->bits);
+                                       }else{ 
+                                               rshift= 64 -  ((i+1) * 
hdr->bits) % 64;
+                                               base[cid]= base[cid] | 
(((unsigned long)j) >> (hdr->bits-lshift));
+                                               base[cid+1]= 0 | (((unsigned 
long)j)  << rshift);
+                                               
//mnstr_printf(cntxt->fdout,"[%d] shift %d %d val %o %o\n", cid, lshift, rshift,
+                                                       //(j >> 
(hdr->bits-lshift)),  (j <<rshift));
                                        }
-                               if ( j == hdr->dictsize) 
-                                       break;
+                               } 
                        }
+                       assert(i);
                        task->src = (char*) val;
                }
        }
 }
 
 // the inverse operator, extend the src
+#define dictdecompress(I)\
+cid = (I * hdr->bits)/64;\
+lshift= 64 -((I * hdr->bits) % 64) ;\
+if ( lshift >hdr->bits){\
+       j = (base[cid]>> (lshift-hdr->bits)) & ((unsigned long)hdr->mask);\
+  }else{ \
+       rshift= 64 -  ((I+1) * hdr->bits) % 64;\
+       m1 = (base[cid] & ( ((unsigned long)hdr->mask) >> (hdr->bits-lshift)));\
+       m2 = base[cid+1] >>rshift;\
+       j= 0  | (m1 <<(hdr->bits-lshift)) | m2;\
+  }
+
 #define DICTdecompress(TPE)\
 {      TPE *dict =(TPE*)((char*)hdr->dict);\
-       bte *idx = (bte*)(((char*)blk) +  MosaicBlkSize);\
        BUN lim = MOSgetCnt(blk);\
-       for(i = 0; i < lim; i++,idx++)\
-               ((TPE*)task->src)[i] = dict[ (bte)*idx];\
+       base = (unsigned long*)(((char*)blk) +  MosaicBlkSize);\
+       for(i = 0; i < lim; i++){\
+               dictdecompress(i);\
+               ((TPE*)task->src)[i] = dict[j];\
+       }\
        task->src += i * sizeof(TPE);\
 }
 
@@ -314,6 +376,9 @@ MOSdecompress_dictionary(Client cntxt, M
        MosaicBlk blk = task->blk;
        MosaicHdr hdr = task->hdr;
        BUN i;
+       bte j,m1,m2;
+       int cid, lshift, rshift;
+       unsigned long *base;
        (void) cntxt;
 
        switch(ATOMstorage(task->type)){
@@ -329,10 +394,24 @@ MOSdecompress_dictionary(Client cntxt, M
 #endif
        case TYPE_int:
                {       int *dict =(int*)((char*)hdr->dict);
-                       bte *idx = (bte*)(((char*)blk) + MosaicBlkSize);
                        BUN lim = MOSgetCnt(blk);
-                       for(i = 0; i < lim; i++,idx++)
-                               ((int*)task->src)[i] = dict[ (bte)*idx];
+                       base  = (unsigned long*)(((char*)blk) + MosaicBlkSize);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to