Changeset: 1b3bee1700ce for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1b3bee1700ce Modified Files: monetdb5/modules/mosaic/mosaic_prefix.c Branch: mosaic Log Message:
Cleaning up prefix compression For the (de)compress actions we simply need the bits diffs (truncated from 783 to 300 lines): diff --git a/monetdb5/modules/mosaic/mosaic_prefix.c b/monetdb5/modules/mosaic/mosaic_prefix.c --- a/monetdb5/modules/mosaic/mosaic_prefix.c +++ b/monetdb5/modules/mosaic/mosaic_prefix.c @@ -22,7 +22,8 @@ * Bit_prefix compression * Factor out the leading bits from a series of values. * The prefix size is determined by the first two non-identical values. - * Prefix compression does not require type knowledge + * To use the bitvector, we limit the extracted tail to at most 32bits + * Prefix (de-)compression does not require type knowledge */ #include "monetdb_config.h" @@ -86,42 +87,35 @@ MOSlayout_prefix(Client cntxt, MOStask t input = cnt * ATOMsize(task->type); switch(size){ case 1: - { bte *dst = (bte*) (((char*) task->blk) + MosaicBlkSize); - bte mask = *dst++; - bte val = *dst++; + { unsigned char *dst = (unsigned char*) MOScodevector(task); + unsigned char mask = *dst++; + unsigned char val = *dst++; bits = val & (~mask); - // be aware that we use longs as bit vectors - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - output = wordaligned(bytes,bte); } break; case 2: - { sht *dst = (sht*) (((char*) task->blk) + MosaicBlkSize); - sht mask = *dst++; - sht val = *dst++; + { unsigned short *dst = (unsigned short*) MOScodevector(task); + unsigned short mask = *dst++; + unsigned short val = *dst++; bits = val & (~mask); - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - output = wordaligned(bytes,sht); } break; case 4: - { int *dst = (int*) (((char*) task->blk) + MosaicBlkSize); - int mask = *dst++; - int val = *dst++; + { unsigned int *dst = (unsigned int*) MOScodevector(task); + unsigned int mask = *dst++; + unsigned int val = *dst++; bits = val & (~mask); - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - output = wordaligned(bytes, int); } break; case 8: - { lng *dst = (lng*) (((char*) task->blk) + MosaicBlkSize); - lng mask = *dst++; - lng val = *dst++; + { ulng *dst = (ulng*) MOScodevector(task); + ulng mask = *dst++; + ulng val = *dst++; bits = val & (~mask); - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - output = wordaligned(bytes, lng); } } + bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 + (((MOSgetCnt(task->blk) * bits) %32) != 0)); + output = wordaligned(bytes, int); BUNappend(binput, &input, FALSE); BUNappend(boutput, &output, FALSE); BUNappend(bproperties, "", FALSE); @@ -141,46 +135,43 @@ MOSadvance_prefix(Client cntxt, MOStask task->stop = task->elm; switch(size){ case 1: - { bte *dst = (bte*) (((char*) task->blk) + MosaicBlkSize); - bte mask = *dst++; - bte val = *dst++; + { unsigned char *dst = (unsigned char*) MOScodevector(task); + unsigned char mask = *dst++; + unsigned char val = *dst++; bits = val & (~mask); // be aware that we use longs as bit vectors - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes,lng)); - //mnstr_printf(cntxt->fdout,"advance mask width %d bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int)); + bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 + (((MOSgetCnt(task->blk) * bits) %32) != 0)); + task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, int)); } break; case 2: - { sht *dst = (sht*) (((char*) task->blk) + MosaicBlkSize); - sht mask = *dst++; - sht val = *dst++; + { unsigned short *dst = (unsigned short*) MOScodevector(task); + unsigned short mask = *dst++; + unsigned short val = *dst++; bits = val & (~mask); - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes,lng)); - //mnstr_printf(cntxt->fdout,"advance mask width %d bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int)); + bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 + (((MOSgetCnt(task->blk) * bits) %32) != 0)); + task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, int)); } break; case 4: - { int *dst = (int*) (((char*) task->blk) + MosaicBlkSize); - int mask = *dst++; - int val = *dst++; + { unsigned int *dst = (unsigned int*) MOScodevector(task); + unsigned int mask = *dst++; + unsigned int val = *dst++; bits = val & (~mask); - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, lng)); - //mnstr_printf(cntxt->fdout,"advance mask width %d bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int)); + bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 + (((MOSgetCnt(task->blk) * bits) %32) != 0)); + task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, int)); } break; case 8: - { lng *dst = (lng*) (((char*) task->blk) + MosaicBlkSize); - lng mask = *dst++; - lng val = *dst++; + { ulng *dst = (ulng*) MOScodevector(task); + ulng mask = *dst++; + ulng val = *dst++; bits = val & (~mask); - bytes = sizeof(ulng) * ((MOSgetCnt(task->blk) * bits)/64 + (((MOSgetCnt(task->blk) * bits) %64) != 0)); - task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, lng)); - //mnstr_printf(cntxt->fdout,"advance mask width %d bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int)); + bytes = sizeof(int) * ((MOSgetCnt(task->blk) * bits)/32 + (((MOSgetCnt(task->blk) * bits) %32) != 0)); + task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, int)); } } + mnstr_printf(cntxt->fdout,"advance mask width %d bytes %d %d \n",bits,bytes,(int)wordaligned(bytes,int)); } void @@ -208,7 +199,7 @@ flt MOSestimate_prefix(Client cntxt, MOStask task) { BUN i = 0; flt factor = 0.0; - int prefix = 0,bits, size; + int prefixbits = 0,bits, size; lng store; BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): task->stop - task->start; (void) cntxt; @@ -219,7 +210,7 @@ MOSestimate_prefix(Client cntxt, MOStask if( task->elm >= 2) switch(size){ case 1: - { bte *v = ((bte*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; + { unsigned char *v = ((unsigned char*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; // search first non-identical value for(i = 0;i < limit-1; i++, w++) if( *v != *w ){ @@ -229,11 +220,11 @@ MOSestimate_prefix(Client cntxt, MOStask if ( i == limit -1) break; Prefix(bits, mask, val, val2, 8); - if( prefix == 0) + if( prefixbits == 0) break; if( task->range[MOSAIC_PREFIX] > task->start +1 /* need at least two*/){ - bits = (task->range[MOSAIC_PREFIX] - task->start) * (8-prefix); + bits = (task->range[MOSAIC_PREFIX] - task->start) * (8-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(bte) + store,bte); if( store >= (flt)i * sizeof(bte)) @@ -246,7 +237,7 @@ MOSestimate_prefix(Client cntxt, MOStask if ( val != (*w & mask) ) break; } - bits = i * (8-prefix); + bits = i * (8-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(bte) + store,bte); if( store >= (flt)i * sizeof(bte)) @@ -257,7 +248,7 @@ MOSestimate_prefix(Client cntxt, MOStask } break; case 2: - { sht *v = ((sht*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; + { unsigned short *v = ((unsigned short*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; // search first non-identical value for(i = 0;i < limit-1;i++, w++) if( *v != *w ){ @@ -266,12 +257,12 @@ MOSestimate_prefix(Client cntxt, MOStask } if ( i == limit-1) break; - Prefix(prefix, mask, val, val2, 16); - if( prefix == 0) + Prefix(prefixbits, mask, val, val2, 16); + if( prefixbits == 0) break; if( task->range[MOSAIC_PREFIX] > task->start + 1){ - bits = (task->range[MOSAIC_PREFIX] - task->start) * (16-prefix); + bits = (task->range[MOSAIC_PREFIX] - task->start) * (16-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(sht) + store,sht); if( store >= (flt)i * sizeof(sht)) @@ -284,7 +275,7 @@ MOSestimate_prefix(Client cntxt, MOStask if ( val != (*w & mask) ) break; } - bits = i * (16-prefix); + bits = i * (16-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(sht) + store,lng); if( store >= (flt)i * sizeof(sht)) @@ -295,7 +286,7 @@ MOSestimate_prefix(Client cntxt, MOStask } break; case 4: - { int *v = ((int*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; + { unsigned int *v = ((unsigned int*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; // search first non-identical value for(i = 0;i < limit-1 ;i++, w++) if( *v != *w ){ @@ -305,11 +296,11 @@ MOSestimate_prefix(Client cntxt, MOStask if ( i == limit-1) break; Prefix(bits, mask, val, val2, 32); - if( prefix == 0) + if( prefixbits == 0) break; if( task->range[MOSAIC_PREFIX] > task->start + 1){ - bits = (task->range[MOSAIC_PREFIX] - task->start) * (32-prefix); + bits = (task->range[MOSAIC_PREFIX] - task->start) * (32-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(int) + store,lng); if( store > (flt)i * sizeof(int)) @@ -322,7 +313,7 @@ MOSestimate_prefix(Client cntxt, MOStask if ( val != (*w & mask) ) break; } - bits = i * (32-prefix); + bits = i * (32-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(int) + store,lng); if( store >= (flt)i * sizeof(int)) @@ -333,7 +324,7 @@ MOSestimate_prefix(Client cntxt, MOStask } break; case 8: - { lng *v = ((lng*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; + { ulng *v = ((ulng*) task->src) + task->start, *w= v+1, val= *v,val2= *w, mask; // search first non-identical value for(i = 0;i < limit-1 ;i++, w++) if( *v != *w ){ @@ -342,12 +333,12 @@ MOSestimate_prefix(Client cntxt, MOStask } if ( i == limit-1 ) break; - Prefix(prefix, mask, val, val2, 32); // at most 32bits for bitvector - if( prefix == 0) + Prefix(prefixbits, mask, val, val2, 32); // at most 32bits for bitvector mask + if( prefixbits == 0) break; if( task->range[MOSAIC_PREFIX] > task->start + 1){ - bits = (task->range[MOSAIC_PREFIX] - task->start) * (32-prefix); + bits = (task->range[MOSAIC_PREFIX] - task->start) * (32-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned( MosaicBlkSize + 2 * sizeof(lng) + store,lng); if( store >= (flt)i * sizeof(lng)) @@ -360,7 +351,7 @@ MOSestimate_prefix(Client cntxt, MOStask if ( val != (*w & mask) ) break; } - bits = i * (32-prefix); + bits = i * (32-prefixbits); store = bits/8 + ((bits % 8) >0); store = wordaligned(MosaicBlkSize + 2 * sizeof(lng) + store,lng); if( store >= (flt)i * sizeof(lng)) @@ -371,7 +362,7 @@ MOSestimate_prefix(Client cntxt, MOStask } } #ifdef _DEBUG_MOSAIC_ - mnstr_printf(cntxt->fdout,"#estimate prefix %d "BUNFMT" elm %4.3f factor\n",prefix,i,factor); + mnstr_printf(cntxt->fdout,"#estimate prefixbits %d "BUNFMT" elm %4.3f factor\n",prefixbits,i,factor); #endif task->factor[MOSAIC_PREFIX] = factor; task->range[MOSAIC_PREFIX] = task->start + i; @@ -383,8 +374,10 @@ MOSestimate_prefix(Client cntxt, MOStask void MOScompress_prefix(Client cntxt, MOStask task) { - BUN i, j =0 ; + BUN limit, i, j =0 ; int size; + int prefixbits,residu; + BitVector base; MosaicHdr hdr = task->hdr; MosaicBlk blk = task->blk; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list