Changeset: 1b3bee1700ce for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1b3bee1700ce
Modified Files:
        monetdb5/modules/mosaic/mosaic_prefix.c
Branch: mosaic
Log Message:

Cleaning up prefix compression
For the (de)compress actions we simply need the bits


diffs (truncated from 783 to 300 lines):

diff --git a/monetdb5/modules/mosaic/mosaic_prefix.c 
b/monetdb5/modules/mosaic/mosaic_prefix.c
--- a/monetdb5/modules/mosaic/mosaic_prefix.c
+++ b/monetdb5/modules/mosaic/mosaic_prefix.c
@@ -22,7 +22,8 @@
  * Bit_prefix compression
  * Factor out the leading bits from a series of values.
  * The prefix size is determined by the first two non-identical values.
- * Prefix compression does not require type knowledge
+ * To use the bitvector, we limit the extracted tail to at most 32bits
+ * Prefix (de-)compression does not require type knowledge
  */
 
 #include "monetdb_config.h"
@@ -86,42 +87,35 @@ MOSlayout_prefix(Client cntxt, MOStask t
        input = cnt * ATOMsize(task->type);
        switch(size){
        case 1:
-               {       bte *dst = (bte*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       bte mask = *dst++;
-                       bte val = *dst++;
+               {       unsigned char *dst = (unsigned char*)  
MOScodevector(task);
+                       unsigned char mask = *dst++;
+                       unsigned char val = *dst++;
                        bits = val & (~mask);
-                       // be aware that we use longs as bit vectors
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       output =  wordaligned(bytes,bte); 
                }
                break;
        case 2:
-               {       sht *dst = (sht*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       sht mask = *dst++;
-                       sht val = *dst++;
+               {       unsigned short *dst = (unsigned short*)  
MOScodevector(task);
+                       unsigned short mask = *dst++;
+                       unsigned short val = *dst++;
                        bits = val & (~mask);
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       output = wordaligned(bytes,sht); 
                }
                break;
        case 4:
-               {       int *dst = (int*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       int mask = *dst++;
-                       int val = *dst++;
+               {       unsigned int *dst = (unsigned int*)  
MOScodevector(task);
+                       unsigned int mask = *dst++;
+                       unsigned int val = *dst++;
                        bits = val & (~mask);
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       output = wordaligned(bytes, int); 
                }
                break;
        case 8:
-               {       lng *dst = (lng*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       lng mask = *dst++;
-                       lng val = *dst++;
+               {       ulng *dst = (ulng*)  MOScodevector(task);
+                       ulng mask = *dst++;
+                       ulng val = *dst++;
                        bits = val & (~mask);
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       output = wordaligned(bytes, lng); 
                }
        }
+       bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 + 
(((MOSgetCnt(task->blk) * bits) %32) != 0));
+       output = wordaligned(bytes, int); 
        BUNappend(binput, &input, FALSE);
        BUNappend(boutput, &output, FALSE);
        BUNappend(bproperties, "", FALSE);
@@ -141,46 +135,43 @@ MOSadvance_prefix(Client cntxt, MOStask 
        task->stop = task->elm;
        switch(size){
        case 1:
-               {       bte *dst = (bte*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       bte mask = *dst++;
-                       bte val = *dst++;
+               {       unsigned char *dst = (unsigned char*)  
MOScodevector(task);
+                       unsigned char mask = *dst++;
+                       unsigned char val = *dst++;
                        bits = val & (~mask);
                        // be aware that we use longs as bit vectors
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes,lng)); 
-                       //mnstr_printf(cntxt->fdout,"advance mask width %d 
bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int));
+                       bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 
+ (((MOSgetCnt(task->blk) * bits) %32) != 0));
+                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes, int)); 
                }
                break;
        case 2:
-               {       sht *dst = (sht*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       sht mask = *dst++;
-                       sht val = *dst++;
+               {       unsigned short *dst = (unsigned short*)  
MOScodevector(task);
+                       unsigned short mask = *dst++;
+                       unsigned short val = *dst++;
                        bits = val & (~mask);
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes,lng)); 
-                       //mnstr_printf(cntxt->fdout,"advance mask width %d 
bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int));
+                       bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 
+ (((MOSgetCnt(task->blk) * bits) %32) != 0));
+                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes, int)); 
                }
                break;
        case 4:
-               {       int *dst = (int*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       int mask = *dst++;
-                       int val = *dst++;
+               {       unsigned int *dst = (unsigned int*)  
MOScodevector(task);
+                       unsigned int mask = *dst++;
+                       unsigned int val = *dst++;
                        bits = val & (~mask);
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes, lng)); 
-                       //mnstr_printf(cntxt->fdout,"advance mask width %d 
bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int));
+                       bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 
+ (((MOSgetCnt(task->blk) * bits) %32) != 0));
+                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes, int)); 
                }
                break;
        case 8:
-               {       lng *dst = (lng*)  (((char*) task->blk) + 
MosaicBlkSize);
-                       lng mask = *dst++;
-                       lng val = *dst++;
+               {       ulng *dst = (ulng*)  MOScodevector(task);
+                       ulng mask = *dst++;
+                       ulng val = *dst++;
                        bits = val & (~mask);
-                       bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * 
bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0));
-                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes, lng)); 
-                       //mnstr_printf(cntxt->fdout,"advance mask width %d 
bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int));
+                       bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 
+ (((MOSgetCnt(task->blk) * bits) %32) != 0));
+                       task->blk = (MosaicBlk) (((char*) dst)  + 
wordaligned(bytes, int)); 
                }
        }
+       mnstr_printf(cntxt->fdout,"advance mask width %d bytes %d %d 
\n",bits,bytes,(int)wordaligned(bytes,int));
 }
 
 void
@@ -208,7 +199,7 @@ flt
 MOSestimate_prefix(Client cntxt, MOStask task)
 {      BUN i = 0;
        flt factor = 0.0;
-       int prefix = 0,bits, size;
+       int prefixbits = 0,bits, size;
        lng store;
        BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): 
task->stop - task->start;
        (void) cntxt;
@@ -219,7 +210,7 @@ MOSestimate_prefix(Client cntxt, MOStask
        if( task->elm >= 2)
        switch(size){
        case 1:
-               {       bte *v = ((bte*) task->src) + task->start, *w= v+1, 
val= *v,val2= *w, mask;
+               {       unsigned char *v = ((unsigned char*) task->src) + 
task->start, *w= v+1, val= *v,val2= *w, mask;
                        // search first non-identical value
                        for(i = 0;i < limit-1; i++, w++)
                        if( *v != *w ){
@@ -229,11 +220,11 @@ MOSestimate_prefix(Client cntxt, MOStask
                        if ( i == limit -1)
                                break;
                        Prefix(bits, mask, val, val2, 8);
-                       if( prefix == 0)
+                       if( prefixbits == 0)
                                break;
 
                        if( task->range[MOSAIC_PREFIX] > task->start +1 /* need 
at least two*/){
-                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (8-prefix);
+                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (8-prefixbits);
                                store = bits/8 + ((bits % 8) >0);
                                store = wordaligned( MosaicBlkSize + 2 * 
sizeof(bte) +  store,bte);
                                if( store >= (flt)i * sizeof(bte))
@@ -246,7 +237,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                                if ( val != (*w & mask) )
                                        break;
                        }
-                       bits = i * (8-prefix);
+                       bits = i * (8-prefixbits);
                        store = bits/8 + ((bits % 8) >0);
                        store = wordaligned( MosaicBlkSize + 2 * sizeof(bte) +  
store,bte);
                        if( store >= (flt)i * sizeof(bte))
@@ -257,7 +248,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                }
                break;
        case 2:
-               {       sht *v = ((sht*) task->src) + task->start, *w= v+1, 
val= *v,val2= *w, mask;
+               {       unsigned short *v = ((unsigned short*) task->src) + 
task->start, *w= v+1, val= *v,val2= *w, mask;
                        // search first non-identical value
                        for(i = 0;i < limit-1;i++, w++)
                        if( *v != *w ){
@@ -266,12 +257,12 @@ MOSestimate_prefix(Client cntxt, MOStask
                        }
                        if ( i == limit-1)
                                break;
-                       Prefix(prefix, mask, val, val2, 16);
-                       if( prefix == 0)
+                       Prefix(prefixbits, mask, val, val2, 16);
+                       if( prefixbits == 0)
                                break;
 
                        if( task->range[MOSAIC_PREFIX] > task->start + 1){
-                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (16-prefix);
+                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (16-prefixbits);
                                store = bits/8 + ((bits % 8) >0);
                                store = wordaligned( MosaicBlkSize + 2 * 
sizeof(sht) +  store,sht);
                                if( store >= (flt)i * sizeof(sht))
@@ -284,7 +275,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                                if ( val != (*w & mask) )
                                        break;
                        }
-                       bits = i * (16-prefix);
+                       bits = i * (16-prefixbits);
                        store = bits/8 + ((bits % 8) >0);
                        store = wordaligned( MosaicBlkSize + 2 * sizeof(sht) +  
store,lng);
                        if( store >= (flt)i * sizeof(sht))
@@ -295,7 +286,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                }
                break;
        case 4:
-               {       int *v = ((int*) task->src) + task->start, *w= v+1, 
val= *v,val2= *w, mask;
+               {       unsigned int *v = ((unsigned int*) task->src) + 
task->start, *w= v+1, val= *v,val2= *w, mask;
                        // search first non-identical value
                        for(i = 0;i < limit-1 ;i++, w++)
                        if( *v != *w ){
@@ -305,11 +296,11 @@ MOSestimate_prefix(Client cntxt, MOStask
                        if ( i == limit-1)
                                break;
                        Prefix(bits, mask, val, val2, 32);
-                       if( prefix == 0)
+                       if( prefixbits == 0)
                                break;
 
                        if( task->range[MOSAIC_PREFIX] > task->start + 1){
-                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (32-prefix);
+                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (32-prefixbits);
                                store = bits/8 + ((bits % 8) >0);
                                store = wordaligned( MosaicBlkSize + 2 * 
sizeof(int) +  store,lng);
                                if( store > (flt)i * sizeof(int))
@@ -322,7 +313,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                                if ( val != (*w & mask) )
                                        break;
                        }
-                       bits = i * (32-prefix);
+                       bits = i * (32-prefixbits);
                        store = bits/8 + ((bits % 8) >0);
                        store = wordaligned( MosaicBlkSize + 2 * sizeof(int) +  
store,lng);
                        if( store >= (flt)i * sizeof(int))
@@ -333,7 +324,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                }
                break;
        case 8:
-               {       lng *v = ((lng*) task->src) + task->start, *w= v+1, 
val= *v,val2= *w, mask;
+               {       ulng *v = ((ulng*) task->src) + task->start, *w= v+1, 
val= *v,val2= *w, mask;
                        // search first non-identical value
                        for(i = 0;i < limit-1 ;i++, w++)
                        if( *v != *w ){
@@ -342,12 +333,12 @@ MOSestimate_prefix(Client cntxt, MOStask
                        }
                        if ( i == limit-1 )
                                break;
-                       Prefix(prefix, mask, val, val2, 32); // at most 32bits 
for bitvector
-                       if( prefix == 0)
+                       Prefix(prefixbits, mask, val, val2, 32); // at most 
32bits for bitvector mask
+                       if( prefixbits == 0)
                                break;
 
                        if( task->range[MOSAIC_PREFIX] > task->start + 1){
-                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (32-prefix);
+                               bits = (task->range[MOSAIC_PREFIX] - 
task->start) * (32-prefixbits);
                                store = bits/8 + ((bits % 8) >0);
                                store = wordaligned( MosaicBlkSize + 2 * 
sizeof(lng) +  store,lng);
                                if( store >= (flt)i * sizeof(lng))
@@ -360,7 +351,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                                if ( val != (*w & mask) )
                                        break;
                        }
-                       bits = i * (32-prefix);
+                       bits = i * (32-prefixbits);
                        store = bits/8 + ((bits % 8) >0);
                        store = wordaligned(MosaicBlkSize + 2 * sizeof(lng) + 
store,lng);
                        if( store >= (flt)i * sizeof(lng))
@@ -371,7 +362,7 @@ MOSestimate_prefix(Client cntxt, MOStask
                }
        }
 #ifdef _DEBUG_MOSAIC_
-       mnstr_printf(cntxt->fdout,"#estimate prefix %d "BUNFMT" elm %4.3f 
factor\n",prefix,i,factor);
+       mnstr_printf(cntxt->fdout,"#estimate prefixbits %d "BUNFMT" elm %4.3f 
factor\n",prefixbits,i,factor);
 #endif
        task->factor[MOSAIC_PREFIX] = factor;
        task->range[MOSAIC_PREFIX] = task->start + i;
@@ -383,8 +374,10 @@ MOSestimate_prefix(Client cntxt, MOStask
 void
 MOScompress_prefix(Client cntxt, MOStask task)
 {
-       BUN i, j =0 ;
+       BUN limit, i, j =0 ;
        int size;
+       int prefixbits,residu; 
+       BitVector base;
        MosaicHdr hdr = task->hdr;
        MosaicBlk blk = task->blk;
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to