Changeset: 54adcef69551 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/54adcef69551 Modified Files: sql/backends/monet5/dict.c sql/storage/bat/bat_storage.c sql/storage/sql_storage.h Branch: default Log Message:
add support for dicts using int offsets diffs (truncated from 630 to 300 lines): diff --git a/sql/backends/monet5/dict.c b/sql/backends/monet5/dict.c --- a/sql/backends/monet5/dict.c +++ b/sql/backends/monet5/dict.c @@ -33,7 +33,6 @@ BATmaxminpos_bte(BAT *o, bte m) bte minval = m<0?GDK_bte_min:0; /* Later once nils use a bitmask we can include -128 in the range */ bte maxval = m<0?GDK_bte_max:m; - assert(o->ttype == TYPE_bte); o->tnil = m<0?true:false; o->tnonil = m<=0?false:true; bte *op = (bte*)Tloc(o, 0); @@ -80,6 +79,33 @@ BATmaxminpos_sht(BAT *o, sht m) o->tmaxpos = maxpos; } +static void +BATmaxminpos_int(BAT *o, int m) +{ + BUN minpos = BUN_NONE, maxpos = BUN_NONE, p, q; + int minval = m<0?GDK_int_min:0; /* Later once nils use a bitmask we can include -32768 in the range */ + int maxval = m<0?GDK_int_max:m; + + assert(o->ttype == TYPE_int); + o->tnil = m<0?true:false; + o->tnonil = m<=0?false:true; + int *op = (int*)Tloc(o, 0); + BATloop(o, p, q) { + if (op[p] == minval) { + minpos = p; + break; + } + } + BATloop(o, p, q) { + if (op[p] == maxval) { + maxpos = p; + break; + } + } + o->tminpos = minpos; + o->tmaxpos = maxpos; +} + static str DICTcompress_intern(BAT **O, BAT **U, BAT *b, bool ordered, bool persists, bool smallest_type) { @@ -91,12 +117,12 @@ DICTcompress_intern(BAT **O, BAT **U, BA BUN cnt = BATcount(u); /* create hash on u */ - int tt = (cnt<256)?TYPE_bte:TYPE_sht; + int tt = (cnt<256)?TYPE_bte:(cnt<65536)?TYPE_sht:TYPE_int; if (!smallest_type) { BUN cnt = BATcount(b); - tt = (cnt<256)?TYPE_bte:TYPE_sht; + tt = (cnt<256)?TYPE_bte:(cnt<65536)?TYPE_sht:TYPE_int; } - if (cnt >= 64*1024) { + if (cnt >= INT_MAX) { bat_destroy(u); throw(SQL, "dict.compress", SQLSTATE(3F000) "dict compress: too many values"); } @@ -153,7 +179,7 @@ DICTcompress_intern(BAT **O, BAT **U, BA if (BATcount(u) > 0) BATmaxminpos_bte(o, (bte) (BATcount(u)-1)); - } else { + } else if (tt == TYPE_sht) { sht *op = (sht*)Tloc(o, 0); bool havenil = false; BATloop(b, p, q) { @@ -172,6 +198,25 @@ DICTcompress_intern(BAT **O, BAT **U, BA if (BATcount(u) > 0) BATmaxminpos_sht(o, (sht) (BATcount(u)-1)); + } else { + int *op = (int*)Tloc(o, 0); + bool havenil = false; + BATloop(b, p, q) { + BUN up = 0; + HASHloop(ui, ui.b->thash, up, BUNtail(bi, p)) { + op[p] = (int)up; + havenil |= is_int_nil(op[p]); + } + } + BATsetcount(o, BATcount(b)); + o->tsorted = (u->tsorted && bi.sorted); + o->trevsorted = false; + o->tnil = havenil; + o->tnonil = !havenil; + o->tkey = bi.key; + + if (BATcount(u) > 0) + BATmaxminpos_int(o, (int) (BATcount(u)-1)); } bat_iterator_end(&bi); *O = o; @@ -339,8 +384,7 @@ DICTdecompress_(BAT *o, BAT *u, role_t r } } } - } else { - assert(o->ttype == TYPE_sht); + } else if (o->ttype == TYPE_sht) { unsigned short *op = Tloc(o, 0); switch (ATOMbasetype(u->ttype)) { @@ -365,6 +409,35 @@ DICTdecompress_(BAT *o, BAT *u, role_t r } } } + } else if (o->ttype == TYPE_int) { + unsigned int *op = Tloc(o, 0); + + switch (ATOMbasetype(u->ttype)) { + case TYPE_int: + decompress_loop(int); + break; + case TYPE_lng: + decompress_loop(lng); + break; +#ifdef HAVE_HGE + case TYPE_hge: + decompress_loop(hge); + break; +#endif + default: + BATloop(o, p, q) { + BUN up = op[p]; + if (BUNappend(b, BUNtail(ui, up), false) != GDK_SUCCEED) { + bat_iterator_end(&oi); + bat_destroy(b); + return NULL; + } + } + } + } else { + bat_iterator_end(&oi); + bat_destroy(b); + return NULL; } bat_iterator_end(&oi); return b; @@ -440,6 +513,22 @@ convert_oid( BAT *o, int rt) nil |= ((short)rp[p] == sht_nil); } } + } else if (rt == TYPE_int) { + unsigned short *rp = Tloc(b, 0); + if (oi.type == TYPE_void) { + BATloop(o, p, q) { + rp[p] = (unsigned short) (p+o->tseqbase); + brokenrange |= ((short)rp[p] < 0); + nil |= ((short)rp[p] == int_nil); + } + } else { + oid *op = Tloc(o, 0); + BATloop(o, p, q) { + rp[p] = (unsigned short) op[p]; + brokenrange |= ((short)rp[p] < 0); + nil |= ((short)rp[p] == int_nil); + } + } } else { assert(0); } @@ -583,6 +672,17 @@ DICTrenumber_intern( BAT *o, BAT *lc, BA BATsetcount(no, cnt); BATnegateprops(no); no->tkey = oi.key; + } else if (oi.type == TYPE_int) { + int *op = Tloc(no, 0); + oid *c = Tloc(rc, 0); + unsigned short *ip = (unsigned short *) oi.base; + + for(BUN i = 0; i<cnt; i++) { + op[i] = (int) ((BUN)ip[i]==offcnt?offcnt:c[ip[i]]); + } + BATsetcount(no, cnt); + BATnegateprops(no); + no->tkey = oi.key; } else { assert(0); } @@ -741,6 +841,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr } else if (loi.type == TYPE_sht) { sht val = (sht)p; bn = BATthetaselect(lo, lc, &val, op); + } else if (loi.type == TYPE_int) { + int val = (int)p; + bn = BATthetaselect(lo, lc, &val, op); } else assert(0); if (bn && (op[0] == '<' || op[0] == '>' || op[0] == '!') && (!lvi.nonil || lvi.nil)) { /* filter the NULL value out */ @@ -753,6 +856,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr } else if (loi.type == TYPE_sht) { sht val = (sht)p; nbn = BATthetaselect(lo, bn, &val, "<>"); + } else if (loi.type == TYPE_int) { + int val = (int)p; + nbn = BATthetaselect(lo, bn, &val, "<>"); } else assert(0); BBPreclaim(bn); @@ -773,6 +879,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr } else if (loi.type == TYPE_sht) { sht val = (sht)p; bn = BATthetaselect(lo, lc, &val, op); + } else if (loi.type == TYPE_int) { + int val = (int)p; + bn = BATthetaselect(lo, lc, &val, op); } else assert(0); } else if (lc) { /* all rows pass, use input candidate list */ @@ -891,6 +1000,10 @@ DICTselect(Client cntxt, MalBlkPtr mb, M sht lpos = (sht)p; sht hpos = (sht)q; bn = BATselect(lo, lc, &lpos, &hpos, true, hi, anti, false); + } else if (loi.type == TYPE_int) { + int lpos = (int)p; + int hpos = (int)q; + bn = BATselect(lo, lc, &lpos, &hpos, true, hi, anti, false); } else assert(0); } else { @@ -925,16 +1038,43 @@ DICTselect(Client cntxt, MalBlkPtr mb, M BAT * -DICTenlarge(BAT *offsets, BUN cnt, BUN sz, role_t role) +DICTenlarge(BAT *offsets, BUN cnt, BUN sz, int type, role_t role) { - BAT *n = COLnew(offsets->hseqbase, TYPE_sht, sz, role); + BAT *n = NULL; + if (type == TYPE_sht) { + if (offsets->ttype != TYPE_bte) + return NULL; + n = COLnew(offsets->hseqbase, TYPE_sht, sz, role); - if (!n) + if (!n) + return NULL; + unsigned char *o = Tloc(offsets, 0); + unsigned short *no = Tloc(n, 0); + for(BUN i = 0; i<cnt; i++) { + no[i] = o[i]; + } + } else if (type == TYPE_int) { + if (offsets->ttype != TYPE_bte && offsets->ttype != TYPE_sht) + return NULL; + n = COLnew(offsets->hseqbase, TYPE_int, sz, role); + if (!n) + return NULL; + if (offsets->ttype == TYPE_sht) { + unsigned char *o = Tloc(offsets, 0); + unsigned int *no = Tloc(n, 0); + for(BUN i = 0; i<cnt; i++) { + no[i] = o[i]; + } + } else { + unsigned short *o = Tloc(offsets, 0); + unsigned int *no = Tloc(n, 0); + for(BUN i = 0; i<cnt; i++) { + no[i] = o[i]; + } + } + + } else { return NULL; - unsigned char *o = Tloc(offsets, 0); - unsigned short *no = Tloc(n, 0); - for(BUN i = 0; i<cnt; i++) { - no[i] = o[i]; } BATnegateprops(n); n->tnil = offsets->tnil; @@ -970,7 +1110,7 @@ DICTrenumber(Client cntxt, MalBlkPtr mb, bat_destroy(m); throw(SQL, "dict.renumber", SQLSTATE(HY013) MAL_MALLOC_FAIL); } - assert(o->ttype == TYPE_bte || o->ttype == TYPE_sht); + assert(o->ttype == TYPE_bte || o->ttype == TYPE_sht || o->ttype == TYPE_int); bool havenil = false; if (o->ttype == TYPE_bte) { unsigned char *np = Tloc(n, 0); @@ -980,7 +1120,7 @@ DICTrenumber(Client cntxt, MalBlkPtr mb, np[i] = mp[op[i]]; havenil |= np[i] == 128; } - } else { + } else if (o->ttype == TYPE_sht) { unsigned short *np = Tloc(n, 0); unsigned short *op = Tloc(o, 0); unsigned short *mp = Tloc(m, 0); @@ -988,6 +1128,14 @@ DICTrenumber(Client cntxt, MalBlkPtr mb, np[i] = mp[op[i]]; havenil |= np[i] == 32768; } + } else { /* int case */ + unsigned int *np = Tloc(n, 0); + unsigned int *op = Tloc(o, 0); _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org