Changeset: 03cd42acce72 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=03cd42acce72 Modified Files: monetdb5/modules/mosaic/mosaic.c monetdb5/modules/mosaic/mosaic.h monetdb5/modules/mosaic/mosaic_dictionary.c monetdb5/modules/mosaic/mosaic_frame.c Branch: mosaic Log Message:
Clean up the code Easier to use global dictionary and delta frame Use the bitvector code in gdk diffs (truncated from 1209 to 300 lines): diff --git a/monetdb5/modules/mosaic/mosaic.c b/monetdb5/modules/mosaic/mosaic.c --- a/monetdb5/modules/mosaic/mosaic.c +++ b/monetdb5/modules/mosaic/mosaic.c @@ -653,6 +653,7 @@ MOSdecompressInternal(Client cntxt, bat // continue with all work bsrc->batDirty = 1; + BATsettrivprop(bsrc); MCexitMaintenance(cntxt); BBPkeepref( *ret = bsrc->batCacheid); @@ -684,9 +685,9 @@ MOSdecompressInternal(Client cntxt, bat } if(error) mnstr_printf(cntxt->fdout,"#incompatible compression\n"); - GDKfree(task); task->timer = GDKusec() - task->timer; + GDKfree(task); return MAL_SUCCEED; } @@ -874,9 +875,7 @@ MOSsubselect(Client cntxt, MalBlkPtr mb, BATsetcount(bn,cnt); bn->tnil = 0; bn->tnonil = 1; - bn->tsorted = 1; - bn->trevsorted = BATcount(bn) <= 1; - bn->tkey = 1; + bn->tsorted = bn->trevsorted = cnt <=1; *getArgReference_bat(stk, pci, 0) = bn->batCacheid; GDKfree(task); BBPkeepref(bn->batCacheid); @@ -1005,9 +1004,7 @@ str MOSthetasubselect(Client cntxt, MalB BATsetcount(bn,cnt); bn->tnil = 0; bn->tnonil = 1; - bn->tsorted = 1; - bn->trevsorted = BATcount(bn) <= 1; - bn->tkey = 1; + bn->tsorted = bn->trevsorted = cnt <= 1; BBPkeepref(*getArgReference_bat(stk,pci,0)= bn->batCacheid); } GDKfree(task); @@ -1135,9 +1132,7 @@ str MOSprojection(Client cntxt, MalBlkPt BATsetcount(bn,task->cnt); bn->tnil = 0; bn->tnonil = 1; - bn->tsorted = 1; - bn->trevsorted = BATcount(bn) <= 1; - bn->tkey = 1; + bn->tsorted = bn->trevsorted = cnt <= 1; BBPkeepref(*ret = bn->batCacheid); GDKfree(task); return msg; @@ -1262,11 +1257,8 @@ MOSsubjoin(Client cntxt, MalBlkPtr mb, M assert(0); } - bln->tsorted = cnt <= 1; - bln->trevsorted = cnt <= 1; - - brn->tsorted = cnt<= 1; - brn->trevsorted = cnt <= 1; + BATsettrivprop(bln); + BATsettrivprop(brn); if( swapped){ BBPkeepref(*ret= brn->batCacheid); BBPkeepref(*ret2= bln->batCacheid); diff --git a/monetdb5/modules/mosaic/mosaic.h b/monetdb5/modules/mosaic/mosaic.h --- a/monetdb5/modules/mosaic/mosaic.h +++ b/monetdb5/modules/mosaic/mosaic.h @@ -81,18 +81,33 @@ typedef struct MOSAICHEADER{ bte mask, bits, framebits; // global compression type properties int dictsize; // used by dictionary compression, it is a small table int framesize; // used by frame compression, it is a small table + union{ + sht valsht[256]; + int valint[256]; + lng vallng[256]; + oid valoid[256]; + flt valflt[256]; + dbl valdbl[256]; #ifdef HAVE_HGE - hge dict[256]; - hge frame[256]; -#else - lng dict[256]; - lng frame[256]; + hge valhge[256]; #endif + }dict; + lng dictfreq[256];// keep track on their use + union{ + sht valsht[256]; + int valint[256]; + lng vallng[256]; + oid valoid[256]; + flt valflt[256]; + dbl valdbl[256]; +#ifdef HAVE_HGE + hge valhge[256]; +#endif + }frame; // collect compression statistics for the particular task flt ratio; //compresion ratio lng blks[MOSAIC_METHODS]; lng elms[MOSAIC_METHODS]; - lng dictfreq[256];// keep track on their use lng framefreq[256]; } * MosaicHdr; @@ -110,7 +125,7 @@ typedef struct MOSAICBLK{ #define MOSincCnt(Blk,I) (assert((Blk)->cnt +I < MOSAICMAXCNT), (Blk)->cnt+= (unsigned int)(I)) /* The start of the encoding withing a Mosaic block */ -#define MOScodevector(Task) (((char*) Task->blk)+ MosaicBlkSize) +#define MOScodevector(Task) (((char*) (Task)->blk)+ MosaicBlkSize) /* Memory word alignement is type and platform dependent. * We use an encoding that fits the column type requirements diff --git a/monetdb5/modules/mosaic/mosaic_dictionary.c b/monetdb5/modules/mosaic/mosaic_dictionary.c --- a/monetdb5/modules/mosaic/mosaic_dictionary.c +++ b/monetdb5/modules/mosaic/mosaic_dictionary.c @@ -35,7 +35,7 @@ void MOSadvance_dictionary(Client cntxt, MOStask task) { - int *dst = (int*) (((char*) task->blk) + MosaicBlkSize); + int *dst = (int*) MOScodevector(task); BUN cnt = MOSgetCnt(task->blk); long bytes; (void) cntxt; @@ -51,40 +51,58 @@ MOSadvance_dictionary(Client cntxt, MOSt static void MOSdump_dictionaryInternal(char *buf, size_t len, MOStask task, int i) { - void *val = (void*)task->hdr->dict; switch(ATOMbasetype(task->type)){ case TYPE_sht: - snprintf(buf,len,"%hd", ((sht*) val)[i]); break; + snprintf(buf,len,"%hd", task->hdr->dict.valsht[i]); break; case TYPE_int: - snprintf(buf,len,"%d", ((int*) val)[i]); break; + snprintf(buf,len,"%d", task->hdr->dict.valint[i]); break; case TYPE_oid: - snprintf(buf,len,OIDFMT, ((oid*) val)[i]); break; + snprintf(buf,len,OIDFMT, task->hdr->dict.valoid[i]); break; case TYPE_lng: - snprintf(buf,len,LLFMT, ((lng*) val)[i]); break; + snprintf(buf,len,LLFMT, task->hdr->dict.vallng[i]); break; #ifdef HAVE_HGE case TYPE_hge: - snprintf(buf,len,"%.40g", (dbl) ((hge*) val)[i]); break; + snprintf(buf,len,"%.40g", (dbl) task->hdr->dict.valhge[i]); break; #endif case TYPE_flt: - snprintf(buf,len,"%f", ((flt*) val)[i]); break; + snprintf(buf,len,"%f", task->hdr->dict.valflt[i]); break; case TYPE_dbl: - snprintf(buf,len,"%g", ((dbl*) val)[i]); break; + snprintf(buf,len,"%g", task->hdr->dict.valdbl[i]); break; } } void MOSdump_dictionary(Client cntxt, MOStask task) { - int i; + int i,len= BUFSIZ; char buf[BUFSIZ]; - mnstr_printf(cntxt->fdout,"#bits %d",task->hdr->bits); + mnstr_printf(cntxt->fdout,"#dictionary bits %d dictsize %d",task->hdr->bits, task->hdr->dictsize); for(i=0; i< task->hdr->dictsize; i++){ MOSdump_dictionaryInternal(buf, BUFSIZ, task,i); mnstr_printf(cntxt->fdout,"[%d] %s ",i,buf); } mnstr_printf(cntxt->fdout,"\n"); + switch(ATOMbasetype(task->type)){ + case TYPE_sht: + snprintf(buf,len,"%hd %hd", task->hdr->checksum.sumsht,task->hdr->checksum2.sumsht); break; + case TYPE_int: + snprintf(buf,len,"%d %d", task->hdr->checksum.sumint,task->hdr->checksum2.sumint); break; + case TYPE_oid: + snprintf(buf,len,OIDFMT " " OIDFMT, task->hdr->checksum.sumoid,task->hdr->checksum2.sumoid); break; + case TYPE_lng: + snprintf(buf,len,LLFMT " " LLFMT, task->hdr->checksum.sumlng,task->hdr->checksum2.sumlng); break; +#ifdef HAVE_HGE + case TYPE_hge: + snprintf(buf,len,"%.40g %.40g", (dbl)task->hdr->checksum.sumhge,(dbl)task->hdr->checksum2.sumhge); break; +#endif + case TYPE_flt: + snprintf(buf,len,"%f %f", task->hdr->checksum.sumflt,task->hdr->checksum2.sumflt); break; + case TYPE_dbl: + snprintf(buf,len,"%g %g", task->hdr->checksum.sumdbl,task->hdr->checksum2.sumdbl); break; + } + mnstr_printf(cntxt->fdout,"#checksums %s\n",buf); } void @@ -130,19 +148,18 @@ MOSskip_dictionary(Client cntxt, MOStask task->blk = 0; // ENDOFLIST } -#define MOSfind(X,VAL,F,L)\ +#define MOSfind(Res,DICT,VAL,F,L)\ { int m,f= F, l=L; \ while( l-f > 0 ) { \ m = f + (l-f)/2;\ - if ( VAL < dict[m] ) l=m-1; else f= m;\ - if ( VAL > dict[m] ) f=m+1; else l= m;\ + if ( VAL < DICT[m] ) l=m-1; else f= m;\ + if ( VAL > DICT[m] ) f=m+1; else l= m;\ }\ - X= f;\ + Res= f;\ } #define estimateDict(TPE)\ { TPE *val = ((TPE*)task->src) + task->start;\ - TPE *dict= (TPE*)hdr->dict;\ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): task->stop - task->start;\ if( task->range[MOSAIC_DICT] > task->start){\ i = task->range[MOSAIC_DICT] - task->start;\ @@ -155,8 +172,8 @@ MOSskip_dictionary(Client cntxt, MOStask return factor;\ }\ for(i =0; i<limit; i++, val++){\ - MOSfind(j,*val,0,hdr->dictsize);\ - if( j == hdr->dictsize || dict[j] != *val )\ + MOSfind(j,hdr->dict.val##TPE,*val,0,hdr->dictsize);\ + if( j == hdr->dictsize || hdr->dict.val##TPE[j] != *val )\ break;\ }\ if( i * sizeof(TPE) <= wordaligned( MosaicBlkSize + i,TPE))\ @@ -167,12 +184,11 @@ MOSskip_dictionary(Client cntxt, MOStask // store it in the compressed heap header directly // filter out the most frequent ones #define makeDict(TPE)\ -{ TPE *val = ((TPE*)task->src) + task->start;\ - TPE *dict = (TPE*)hdr->dict,v;\ +{ TPE v,*val = ((TPE*)task->src) + task->start;\ BUN limit = task->stop - task->start > MOSlimit()? MOSlimit(): task->stop - task->start;\ for(i = 0; i< limit; i++, val++){\ for(j= 0; j< hdr->dictsize; j++)\ - if( dict[j] == *val) break;\ + if( task->hdr->dict.val##TPE[j] == *val) break;\ if ( j == hdr->dictsize){\ if ( hdr->dictsize == 256){\ int min = 0;\ @@ -182,7 +198,7 @@ MOSskip_dictionary(Client cntxt, MOStask cnt[j]=0;\ break;\ }\ - dict[j] = *val;\ + task->hdr->dict.val##TPE[j] = *val;\ cnt[j]++;\ hdr->dictsize++;\ } else\ @@ -190,10 +206,10 @@ MOSskip_dictionary(Client cntxt, MOStask }\ for(k=0; k< hdr->dictsize; k++)\ for(j=k+1; j< hdr->dictsize; j++)\ - if(dict[k] >dict[j]){\ - v= dict[k];\ - dict[k] = dict[j];\ - dict[j] = v;\ + if(task->hdr->dict.val##TPE[k] >task->hdr->dict.val##TPE[j]){\ + v = task->hdr->dict.val##TPE[k];\ + task->hdr->dict.val##TPE[k] = task->hdr->dict.val##TPE[j];\ + task->hdr->dict.val##TPE[j] = v;\ }\ hdr->bits = 1;\ hdr->mask =1;\ @@ -256,7 +272,6 @@ MOSestimate_dictionary(Client cntxt, MOS #endif case TYPE_lng: { lng *val = ((lng*)task->src) + task->start; - lng *dict = (lng*)hdr->dict; // assume uniform compression statistics if( task->range[MOSAIC_DICT] > task->start){ i = task->range[MOSAIC_DICT] - task->start; @@ -270,8 +285,8 @@ MOSestimate_dictionary(Client cntxt, MOS } for(i =task->start; i<task->stop; i++, val++){ - MOSfind(j,*val,0,hdr->dictsize); - if( j == hdr->dictsize || dict[j] != *val) + MOSfind(j,task->hdr->dict.vallng,*val,0,hdr->dictsize); + if( j == hdr->dictsize || task->hdr->dict.vallng[j] != *val) break; } i -= task->start; @@ -297,17 +312,17 @@ MOSestimate_dictionary(Client cntxt, MOS #define DICTcompress(TPE)\ { TPE *val = ((TPE*)task->src) + task->start;\ - TPE *dict = (TPE*)hdr->dict;\ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list