Changeset: 03142818494b for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03142818494b Modified Files: monetdb5/modules/mal/mosaic_dictionary.c Branch: mosaic Log Message:
Moving dictionary indexing to bit bits There is still an issue with the bit stuffing as shown by mosaic_mal test. diffs (truncated from 839 to 300 lines): diff --git a/monetdb5/modules/mal/mosaic_dictionary.c b/monetdb5/modules/mal/mosaic_dictionary.c --- a/monetdb5/modules/mal/mosaic_dictionary.c +++ b/monetdb5/modules/mal/mosaic_dictionary.c @@ -33,22 +33,15 @@ void MOSadvance_dictionary(Client cntxt, MOStask task) { + int *dst = (int*) (((char*) task->blk) + MosaicBlkSize); + long cnt = MOSgetCnt(task->blk); + long bytes; (void) cntxt; - task->start += MOSgetCnt(task->blk); - switch(ATOMstorage(task->type)){ - //case TYPE_bte: CASE_bit: no compression achievable - case TYPE_sht: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),sht)); break; - case TYPE_int: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),int)); break; - case TYPE_lng: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),lng)); break; - case TYPE_oid: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),oid)); break; - case TYPE_wrd: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),wrd)); break; - case TYPE_flt: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),flt)); break; - case TYPE_dbl: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),dbl)); break; -#ifdef HAVE_HGE - case TYPE_hge: task->blk = (MosaicBlk)( ((char*)task->blk) + wordaligned(MosaicBlkSize + sizeof(bte) * MOSgetCnt(task->blk),hge)); break; -#endif - } + assert(cnt > 0); + task->start += (oid) cnt; + bytes = (cnt * task->hdr->bits)/8 + (((cnt * task->hdr->bits) %8) != 0); + task->blk = (MosaicBlk) (((char*) dst) + wordaligned(bytes, lng)); } /* Beware, the dump routines use the compressed part of the task */ @@ -59,7 +52,7 @@ MOSdump_dictionary(Client cntxt, MOStask int i; void *val = (void*)hdr->dict; - mnstr_printf(cntxt->fdout,"#"); + mnstr_printf(cntxt->fdout,"# bits %d",hdr->bits); switch(ATOMstorage(task->type)){ case TYPE_sht: for(i=0; i< hdr->dictsize; i++) @@ -99,11 +92,23 @@ MOSskip_dictionary(Client cntxt, MOStask task->blk = 0; // ENDOFLIST } +#define MOSfind(X,VAL,F,L)\ +{ int m,f= F, l=L; \ + while( l-f > 0 ) { \ + m = f + (l-f)/2;\ + if ( VAL < dict[m] ) l=m-1; else f= m;\ + if ( VAL > dict[m] ) f=m+1; else l= m;\ + }\ + X= f;\ +} + #define estimateDict(TPE)\ { TPE *val = (TPE*)task->src;\ + TPE *dict= (TPE*)hdr->dict;\ for(i =0; i<task->elm; i++, val++){\ - for(j= 0; j< hdr->dictsize; j++)\ - if( hdr->dict[j] == *val) break;\ + MOSfind(j,*val,0,hdr->dictsize);\ + if( j == hdr->dictsize || dict[j] != *val )\ + break;\ }\ if ( i > MOSlimit() ) i = MOSlimit();\ if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned( MosaicBlkSize + i,TPE);\ @@ -139,8 +144,15 @@ MOSskip_dictionary(Client cntxt, MOStask dict[i] = dict[j];\ dict[j] = v;\ }\ + hdr->bits = 1;\ + hdr->mask =1;\ + for( i=1 ; i < hdr->dictsize-1; i *=2){\ + hdr->bits++;\ + hdr->mask = (hdr->mask <<1) | 1;\ + }\ } + void MOScreatedictionary(Client cntxt, MOStask task) { int i, j; @@ -153,7 +165,7 @@ MOScreatedictionary(Client cntxt, MOStas hdr->dictsize = 0; switch(ATOMstorage(task->type)){ case TYPE_sht: makeDict(sht); break; - case TYPE_lng: makeDict(lng); break; + case TYPE_int: makeDict(int); break; case TYPE_oid: makeDict(oid); break; case TYPE_wrd: makeDict(wrd); break; case TYPE_flt: makeDict(flt); break; @@ -161,9 +173,9 @@ MOScreatedictionary(Client cntxt, MOStas #ifdef HAVE_HGE case TYPE_hge: makeDict(hge); break; #endif - case TYPE_int: - { int *val = (int*)task->src; - int *dict = (int*)hdr->dict,v; + case TYPE_lng: + { lng *val = (lng*)task->src; + lng *dict = (lng*)hdr->dict,v; for(i =0; i< (int) task->elm; i++, val++){ for(j= 0; j< hdr->dictsize; j++) @@ -192,10 +204,16 @@ MOScreatedictionary(Client cntxt, MOStas dict[i] = dict[j]; dict[j] = v; } + hdr->bits = 1; + hdr->mask =1; + for( i=1 ; i < hdr->dictsize-1; i *=2){ + hdr->bits++; + hdr->mask = (hdr->mask <<1) | 1; + } } } + MOSdump_dictionary(cntxt, task); #ifdef _DEBUG_MOSAIC_ - MOSdump_dictionary(cntxt, task); #endif } // calculate the expected reduction using DICT in terms of elements compressed @@ -210,7 +228,7 @@ MOSestimate_dictionary(Client cntxt, MOS switch(ATOMstorage(task->type)){ //case TYPE_bte: CASE_bit: no compression achievable case TYPE_sht: estimateDict(sht); break; - case TYPE_int: estimateDict(int); break; + case TYPE_lng: estimateDict(lng); break; case TYPE_oid: estimateDict(oid); break; case TYPE_wrd: estimateDict(wrd); break; case TYPE_flt: estimateDict(flt); break; @@ -218,17 +236,16 @@ MOSestimate_dictionary(Client cntxt, MOS #ifdef HAVE_HGE case TYPE_hge: estimateDict(hge); break; #endif - case TYPE_lng: - { lng *val = (lng*)task->src; + case TYPE_int: + { int *val = (int*)task->src; + int *dict = (int*)hdr->dict; for(i =0; i<task->elm; i++, val++){ - for(j= 0; j< hdr->dictsize; j++) - if( hdr->dict[j] == *val) - break; - if ( j == hdr->dictsize) + MOSfind(j,*val,0,hdr->dictsize); + if( j == hdr->dictsize || dict[j] != *val) break; } if ( i > MOSlimit() ) i = MOSlimit(); - if(i) factor = (flt) ((int)i * sizeof(lng)) / wordaligned( MosaicBlkSize + i,lng); + if(i) factor = (flt) ((int)i * sizeof(int)) / wordaligned( MosaicBlkSize + i,lng); } } #ifdef _DEBUG_MOSAIC_ @@ -238,24 +255,38 @@ MOSestimate_dictionary(Client cntxt, MOS } // insert a series of values into the compressor block using dictionary +#define dictcompress()\ +cid = (i * hdr->bits)/64;\ +lshift= 64 -((i * hdr->bits) % 64) ;\ +if ( lshift > hdr->bits){\ + base[cid]= base[cid] | (((unsigned long)j) << (lshift-hdr->bits));\ +}else{ \ + rshift= 64 - ((i+1) * hdr->bits) % 64;\ + base[cid]= base[cid] | (((unsigned long)j) >> (hdr->bits-lshift));\ + base[cid+1]= 0 | (((unsigned long)j) << rshift);\ +} + #define DICTcompress(TPE)\ { TPE *val = (TPE*)task->src;\ TPE *dict = (TPE*)hdr->dict;\ BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\ task->dst = ((char*) task->blk)+ MosaicBlkSize;\ + base = (unsigned long*) task->dst; \ + base[0]=0;\ for(i =0; i<limit; i++, val++){\ - for(j= 0; j< hdr->dictsize; j++)\ - if( dict[j] == *val) {\ - MOSincCnt(blk,1);\ - *task->dst++ = (char) j;\ - break;\ - }\ - if ( j == hdr->dictsize) \ + MOSfind(j,*val,0,hdr->dictsize);\ + if(j == hdr->dictsize || dict[j] != *val) \ break;\ + else {\ + MOSincCnt(blk,1);\ + dictcompress();\ + }\ }\ + assert(i);\ task->src = (char*) val;\ } + void MOScompress_dictionary(Client cntxt, MOStask task) { @@ -263,6 +294,8 @@ MOScompress_dictionary(Client cntxt, MOS int j; MosaicBlk blk = task->blk; MosaicHdr hdr = task->hdr; + int cid, lshift, rshift; + unsigned long *base; (void) cntxt; MOSsetTag(blk,MOSAIC_DICT); @@ -282,29 +315,58 @@ MOScompress_dictionary(Client cntxt, MOS { int *val = (int*)task->src; int *dict = (int*)hdr->dict; BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm; + task->dst = ((char*) task->blk)+ MosaicBlkSize; + base = (unsigned long*) task->dst; // start of bit vector + base[0]=0; for(i =0; i<limit; i++, val++){ - for(j= 0; j< hdr->dictsize; j++) - if( dict[j] == *val) { - MOSincCnt(blk,1); - *task->dst++ = (char) j; - break; + MOSfind(j,*val,0,hdr->dictsize); + //mnstr_printf(cntxt->fdout,"compress %d %d bits %d\n",*val,j,hdr->bits); + if( j == hdr->dictsize || dict[j] != *val ) + break; + else { + MOSincCnt(blk,1); + cid = (i * hdr->bits)/64; + lshift= 64 -((i * hdr->bits) % 64) ; + if ( lshift > hdr->bits){ + base[cid]= base[cid] | (((unsigned long)j) << (lshift-hdr->bits)); + //mnstr_printf(cntxt->fdout,"[%d] shift %d rbits %d \n",cid, lshift, hdr->bits); + }else{ + rshift= 64 - ((i+1) * hdr->bits) % 64; + base[cid]= base[cid] | (((unsigned long)j) >> (hdr->bits-lshift)); + base[cid+1]= 0 | (((unsigned long)j) << rshift); + //mnstr_printf(cntxt->fdout,"[%d] shift %d %d val %o %o\n", cid, lshift, rshift, + //(j >> (hdr->bits-lshift)), (j <<rshift)); } - if ( j == hdr->dictsize) - break; + } } + assert(i); task->src = (char*) val; } } } // the inverse operator, extend the src +#define dictdecompress(I)\ +cid = (I * hdr->bits)/64;\ +lshift= 64 -((I * hdr->bits) % 64) ;\ +if ( lshift >hdr->bits){\ + j = (base[cid]>> (lshift-hdr->bits)) & ((unsigned long)hdr->mask);\ + }else{ \ + rshift= 64 - ((I+1) * hdr->bits) % 64;\ + m1 = (base[cid] & ( ((unsigned long)hdr->mask) >> (hdr->bits-lshift)));\ + m2 = base[cid+1] >>rshift;\ + j= 0 | (m1 <<(hdr->bits-lshift)) | m2;\ + } + #define DICTdecompress(TPE)\ { TPE *dict =(TPE*)((char*)hdr->dict);\ - bte *idx = (bte*)(((char*)blk) + MosaicBlkSize);\ BUN lim = MOSgetCnt(blk);\ - for(i = 0; i < lim; i++,idx++)\ - ((TPE*)task->src)[i] = dict[ (bte)*idx];\ + base = (unsigned long*)(((char*)blk) + MosaicBlkSize);\ + for(i = 0; i < lim; i++){\ + dictdecompress(i);\ + ((TPE*)task->src)[i] = dict[j];\ + }\ task->src += i * sizeof(TPE);\ } @@ -314,6 +376,9 @@ MOSdecompress_dictionary(Client cntxt, M MosaicBlk blk = task->blk; MosaicHdr hdr = task->hdr; BUN i; + bte j,m1,m2; + int cid, lshift, rshift; + unsigned long *base; (void) cntxt; switch(ATOMstorage(task->type)){ @@ -329,10 +394,24 @@ MOSdecompress_dictionary(Client cntxt, M #endif case TYPE_int: { int *dict =(int*)((char*)hdr->dict); - bte *idx = (bte*)(((char*)blk) + MosaicBlkSize); BUN lim = MOSgetCnt(blk); - for(i = 0; i < lim; i++,idx++) - ((int*)task->src)[i] = dict[ (bte)*idx]; + base = (unsigned long*)(((char*)blk) + MosaicBlkSize); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list