Changeset: 7552aa991192 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/7552aa991192 Modified Files: sql/backends/monet5/dict.c sql/storage/bat/bat_storage.c sql/storage/sql_storage.h Branch: dict Log Message:
v1 of appends to dict tables diffs (truncated from 363 to 300 lines): diff --git a/sql/backends/monet5/dict.c b/sql/backends/monet5/dict.c --- a/sql/backends/monet5/dict.c +++ b/sql/backends/monet5/dict.c @@ -69,8 +69,8 @@ DICTcompress(Client cntxt, MalBlkPtr mb, BUN cnt = BATcount(u); /* create hash on u */ - int tt = (cnt<256)?TYPE_bte:(cnt<(64*1024))?TYPE_sht:TYPE_int; - if (cnt > (BUN)2*1024*1024*1024) { + int tt = (cnt<256)?TYPE_bte:TYPE_sht; + if (cnt >= 64*1024) { bat_destroy(u); bat_destroy(b); throw(SQL, "dict.compress", SQLSTATE(3F000) "dict compress: too many values"); @@ -151,7 +151,7 @@ DICTcompress(Client cntxt, MalBlkPtr mb, } o->tminpos = minpos; o->tmaxpos = maxpos; - } else if (tt == TYPE_sht) { + } else { sht *op = (sht*)Tloc(o, 0); BATloop(b, p, q) { BUN up = 0; @@ -192,8 +192,6 @@ DICTcompress(Client cntxt, MalBlkPtr mb, } o->tminpos = minpos; o->tmaxpos = maxpos; - } else { - printf("implement int cases \n"); } bat_iterator_end(&bi); bat_destroy(b); @@ -202,34 +200,14 @@ DICTcompress(Client cntxt, MalBlkPtr mb, return MAL_SUCCEED; } - -/* improve decompress of int,lng,hge types */ -str -DICTdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +/* improve decompress of hge types */ +BAT * +DICTdecompress_(BAT *o, BAT *u) { - /* b = project(o:bat[:bte], u) */ - /* b = project(o:bat[:sht], u) */ - /* b = project(o:bat[:int], u) */ - (void)cntxt; - (void)mb; - bat *r = getArgReference_bat(stk, pci, 0); - bat O = *getArgReference_bat(stk, pci, 1); - bat U = *getArgReference_bat(stk, pci, 2); + BAT *b = COLnew(o->hseqbase, u->ttype, BATcount(o), TRANSIENT); - BAT *o = BATdescriptor(O); - BAT *u = BATdescriptor(U); - if (!o || !u) { - bat_destroy(o); - bat_destroy(u); - throw(SQL, "dict.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL); - } - - BAT *b = COLnew(o->hseqbase, u->ttype, BATcount(o), TRANSIENT); - if (!b) { - bat_destroy(o); - bat_destroy(u); - throw(SQL, "dict.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL); - } + if (!b) + return NULL; BUN p, q; BATiter oi = bat_iterator(o); BATiter ui = bat_iterator_nolock(u); @@ -260,13 +238,12 @@ DICTdecompress(Client cntxt, MalBlkPtr m if (BUNappend(b, BUNtail(ui, up), false) != GDK_SUCCEED) { bat_iterator_end(&oi); bat_destroy(b); - bat_destroy(o); - bat_destroy(u); - throw(SQL, "dict.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL); + return NULL; } } } - } else if (o->ttype == TYPE_sht) { + } else { + assert(o->ttype == TYPE_sht); unsigned short *op = Tloc(o, 0); if (ATOMstorage(u->ttype) == TYPE_int) { @@ -293,25 +270,37 @@ DICTdecompress(Client cntxt, MalBlkPtr m if (BUNappend(b, BUNtail(ui, up), false) != GDK_SUCCEED) { bat_iterator_end(&oi); bat_destroy(b); - bat_destroy(o); - bat_destroy(u); - throw(SQL, "dict.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL); + return NULL; } } } - } else if (o->ttype == TYPE_int) { - assert(0); - } else { - bat_iterator_end(&oi); - bat_destroy(b); + } + bat_iterator_end(&oi); + return b; +} + +str +DICTdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void)cntxt; + (void)mb; + bat *r = getArgReference_bat(stk, pci, 0); + bat O = *getArgReference_bat(stk, pci, 1); + bat U = *getArgReference_bat(stk, pci, 2); + + BAT *o = BATdescriptor(O); + BAT *u = BATdescriptor(U); + if (!o || !u) { bat_destroy(o); bat_destroy(u); - throw(SQL, "dict.decompress", SQLSTATE(HY013) "unknown offset type"); + throw(SQL, "dict.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL); } - bat_iterator_end(&oi); - BBPkeepref(*r = b->batCacheid); + BAT *b = DICTdecompress_(o, u); bat_destroy(o); bat_destroy(u); + if (!b) + throw(SQL, "dict.decompress", SQLSTATE(HY013) "unknown offset type"); + BBPkeepref(*r = b->batCacheid); return MAL_SUCCEED; } @@ -644,3 +633,118 @@ DICTselect(Client cntxt, MalBlkPtr mb, M BBPkeepref(*R0 = bn->batCacheid); return MAL_SUCCEED; } + +BAT * +DICTenlarge(BAT *offsets, BUN cnt, BUN sz) +{ + BAT *n = COLnew(offsets->hseqbase, TYPE_sht, sz, TRANSIENT); + + if (!n) + return NULL; + unsigned char *o = Tloc(offsets, 0); + unsigned short *no = Tloc(n, 0); + for(BUN i = 0; i<cnt; i++) { + no[i] = o[i]; + } + n->tkey = offsets->tkey; + BATnegateprops(n); + n->tsorted = offsets->tsorted; + return n; +} + +/* for each val in vals compute its offset in dict (return via noffsets), + * any missing value in dict will be added to the dict. + * Possible side-effects: + * dict is nolonger sorted + * increase of the dict could mean the offset type overflows, then the output is + * an offset bat with a larger type, unless the larger type is int then abort. + * + * Returns < 0 on error. + */ +int +DICTprepare4append(BAT **noffsets, BAT *vals, BAT *dict) +{ + int tt = BATcount(dict)>=256?TYPE_sht:TYPE_bte; + BUN sz = BATcount(vals), nf = 0; + BAT *n = COLnew(0, tt, sz, TRANSIENT); + + if (!n || BAThash(dict) != GDK_SUCCEED) { + bat_destroy(n); + return -1; + } + + BATiter bi = bat_iterator(vals); + BATiter ui = bat_iterator_nolock(dict); + + if (tt == TYPE_bte) { + bte *op = (bte*)Tloc(n, 0); + for(BUN i = 0; i<sz; i++) { + BUN up = 0; + int f = 0; + HASHloop(ui, ui.b->thash, up, BUNtail(bi, i)) { + op[i] = (bte)up; + f = 1; + } + if (!f) { + if (BATcount(dict) >= 255) { + BAT *nn = DICTenlarge(n, i, sz); + bat_destroy(n); + if (!nn) { + bat_iterator_end(&bi); + return -1; + } + n = nn; + nf = i; + tt = TYPE_sht; + break; + } else { + if (BUNappend(dict, BUNtail(bi, i), true) != GDK_SUCCEED || + (!dict->thash && BAThash(dict) != GDK_SUCCEED)) { + assert(0); + bat_destroy(n); + bat_iterator_end(&bi); + return -1; + } + /* reinitialize */ + ui = bat_iterator_nolock(dict); + op[i] = BATcount(dict)-1; + } + } + } + } + if (tt == TYPE_sht) { + sht *op = (sht*)Tloc(n, 0); + for(BUN i = nf; i<sz; i++) { + BUN up = 0; + int f = 0; + HASHloop(ui, ui.b->thash, up, BUNtail(bi, i)) { + op[i] = (sht)up; + f = 1; + } + if (!f) { + if (BATcount(dict) >= (64*1024)-1) { + assert(0); + bat_destroy(n); + bat_iterator_end(&bi); + return -2; + } else { + if (BUNappend(dict, BUNtail(bi, i), true) != GDK_SUCCEED || + (!dict->thash && BAThash(dict) != GDK_SUCCEED)) { + assert(0); + bat_destroy(n); + bat_iterator_end(&bi); + return -1; + } + /* reinitialize */ + ui = bat_iterator_nolock(dict); + op[i] = BATcount(dict)-1; + } + } + } + } + bat_iterator_end(&bi); + BATsetcount(n, sz); + BATnegateprops(n); + *noffsets = n; + return 0; +} diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c --- a/sql/storage/bat/bat_storage.c +++ b/sql/storage/bat/bat_storage.c @@ -1617,6 +1617,63 @@ delta_append_bat(sql_trans *tr, sql_delt return LOG_ERR; lock_column(tr->store, id); + if (bat->cs.st == ST_DICT) { + BAT *newoffsets = NULL; + BAT *u = temp_descriptor(bat->cs.ebid); + + if (!u) { + unlock_column(tr->store, id); + return LOG_ERR; + } + BUN max_cnt = (BATcount(u) < 256)?256:64*1024; + if (DICTprepare4append(&newoffsets, i, u) < 0) { + assert(0); + } else { + /* returns new offset bat (ie to be appended), possibly with larger type ! */ + if (BATcount(u) >= max_cnt) { + if (max_cnt == 64*1024) { /* decompress */ + BAT *b = temp_descriptor(bat->cs.bid); + BAT *n = b?DICTdecompress_(b , u):NULL; + bat_destroy(b); + if (!n) { + bat_destroy(u); + bat_destroy(n); + unlock_column(tr->store, id); + return LOG_ERR; + } + /* TODO change storage type */ + if (bat->cs.bid) + temp_destroy(bat->cs.bid); + bat->cs.bid = temp_create(n); + bat_destroy(n); + if (bat->cs.ebid) + temp_destroy(bat->cs.ebid); + bat->cs.ebid = 0; + bat->cs.st = ST_DEFAULT; + bat->cs.cleared = true; + } else { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list