Changeset: 54adcef69551 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/54adcef69551
Modified Files:
        sql/backends/monet5/dict.c
        sql/storage/bat/bat_storage.c
        sql/storage/sql_storage.h
Branch: default
Log Message:

add support for dicts using int offsets


diffs (truncated from 630 to 300 lines):

diff --git a/sql/backends/monet5/dict.c b/sql/backends/monet5/dict.c
--- a/sql/backends/monet5/dict.c
+++ b/sql/backends/monet5/dict.c
@@ -33,7 +33,6 @@ BATmaxminpos_bte(BAT *o, bte m)
        bte minval = m<0?GDK_bte_min:0; /* Later once nils use a bitmask we can 
include -128 in the range */
        bte maxval = m<0?GDK_bte_max:m;
 
-       assert(o->ttype == TYPE_bte);
        o->tnil = m<0?true:false;
        o->tnonil = m<=0?false:true;
        bte *op = (bte*)Tloc(o, 0);
@@ -80,6 +79,33 @@ BATmaxminpos_sht(BAT *o, sht m)
        o->tmaxpos = maxpos;
 }
 
+static void
+BATmaxminpos_int(BAT *o, int m)
+{
+       BUN minpos = BUN_NONE, maxpos = BUN_NONE, p, q;
+       int minval = m<0?GDK_int_min:0; /* Later once nils use a bitmask we can 
include -32768 in the range */
+       int maxval = m<0?GDK_int_max:m;
+
+       assert(o->ttype == TYPE_int);
+       o->tnil = m<0?true:false;
+       o->tnonil = m<=0?false:true;
+       int *op = (int*)Tloc(o, 0);
+       BATloop(o, p, q) {
+               if (op[p] == minval) {
+                       minpos = p;
+                       break;
+               }
+       }
+       BATloop(o, p, q) {
+               if (op[p] == maxval) {
+                       maxpos = p;
+                       break;
+               }
+       }
+       o->tminpos = minpos;
+       o->tmaxpos = maxpos;
+}
+
 static str
 DICTcompress_intern(BAT **O, BAT **U, BAT *b, bool ordered, bool persists, 
bool smallest_type)
 {
@@ -91,12 +117,12 @@ DICTcompress_intern(BAT **O, BAT **U, BA
 
        BUN cnt = BATcount(u);
        /* create hash on u */
-       int tt = (cnt<256)?TYPE_bte:TYPE_sht;
+       int tt = (cnt<256)?TYPE_bte:(cnt<65536)?TYPE_sht:TYPE_int;
        if (!smallest_type) {
                BUN cnt = BATcount(b);
-               tt = (cnt<256)?TYPE_bte:TYPE_sht;
+               tt = (cnt<256)?TYPE_bte:(cnt<65536)?TYPE_sht:TYPE_int;
        }
-       if (cnt >= 64*1024) {
+       if (cnt >= INT_MAX) {
                bat_destroy(u);
                throw(SQL, "dict.compress", SQLSTATE(3F000) "dict compress: too 
many values");
        }
@@ -153,7 +179,7 @@ DICTcompress_intern(BAT **O, BAT **U, BA
 
                if (BATcount(u) > 0)
                        BATmaxminpos_bte(o, (bte) (BATcount(u)-1));
-       } else {
+       } else if (tt == TYPE_sht) {
                sht *op = (sht*)Tloc(o, 0);
                bool havenil = false;
                BATloop(b, p, q) {
@@ -172,6 +198,25 @@ DICTcompress_intern(BAT **O, BAT **U, BA
 
                if (BATcount(u) > 0)
                        BATmaxminpos_sht(o, (sht) (BATcount(u)-1));
+       } else {
+               int *op = (int*)Tloc(o, 0);
+               bool havenil = false;
+               BATloop(b, p, q) {
+                       BUN up = 0;
+                       HASHloop(ui, ui.b->thash, up, BUNtail(bi, p)) {
+                               op[p] = (int)up;
+                               havenil |= is_int_nil(op[p]);
+                       }
+               }
+               BATsetcount(o, BATcount(b));
+               o->tsorted = (u->tsorted && bi.sorted);
+               o->trevsorted = false;
+               o->tnil = havenil;
+               o->tnonil = !havenil;
+               o->tkey = bi.key;
+
+               if (BATcount(u) > 0)
+                       BATmaxminpos_int(o, (int) (BATcount(u)-1));
        }
        bat_iterator_end(&bi);
        *O = o;
@@ -339,8 +384,7 @@ DICTdecompress_(BAT *o, BAT *u, role_t r
                                }
                        }
                }
-       } else {
-               assert(o->ttype == TYPE_sht);
+       } else if (o->ttype == TYPE_sht) {
                unsigned short *op = Tloc(o, 0);
 
                switch (ATOMbasetype(u->ttype)) {
@@ -365,6 +409,35 @@ DICTdecompress_(BAT *o, BAT *u, role_t r
                                }
                        }
                }
+       } else if (o->ttype == TYPE_int) {
+               unsigned int *op = Tloc(o, 0);
+
+               switch (ATOMbasetype(u->ttype)) {
+               case TYPE_int:
+                       decompress_loop(int);
+                       break;
+               case TYPE_lng:
+                       decompress_loop(lng);
+                       break;
+#ifdef HAVE_HGE
+               case TYPE_hge:
+                       decompress_loop(hge);
+                       break;
+#endif
+               default:
+                       BATloop(o, p, q) {
+                               BUN up = op[p];
+                               if (BUNappend(b, BUNtail(ui, up), false) != 
GDK_SUCCEED) {
+                                       bat_iterator_end(&oi);
+                                       bat_destroy(b);
+                                       return NULL;
+                               }
+                       }
+               }
+       } else {
+               bat_iterator_end(&oi);
+               bat_destroy(b);
+               return NULL;
        }
        bat_iterator_end(&oi);
        return b;
@@ -440,6 +513,22 @@ convert_oid( BAT *o, int rt)
                                nil |= ((short)rp[p] == sht_nil);
                        }
                }
+       } else if (rt == TYPE_int) {
+               unsigned short *rp = Tloc(b, 0);
+               if (oi.type == TYPE_void) {
+                       BATloop(o, p, q) {
+                               rp[p] = (unsigned short) (p+o->tseqbase);
+                               brokenrange |= ((short)rp[p] < 0);
+                               nil |= ((short)rp[p] == int_nil);
+                       }
+               } else {
+                       oid *op = Tloc(o, 0);
+                       BATloop(o, p, q) {
+                               rp[p] = (unsigned short) op[p];
+                               brokenrange |= ((short)rp[p] < 0);
+                               nil |= ((short)rp[p] == int_nil);
+                       }
+               }
        } else {
                assert(0);
        }
@@ -583,6 +672,17 @@ DICTrenumber_intern( BAT *o, BAT *lc, BA
                BATsetcount(no, cnt);
                BATnegateprops(no);
                no->tkey = oi.key;
+       } else if (oi.type == TYPE_int) {
+               int *op = Tloc(no, 0);
+               oid *c = Tloc(rc, 0);
+               unsigned short *ip = (unsigned short *) oi.base;
+
+               for(BUN i = 0; i<cnt; i++) {
+                       op[i] = (int) ((BUN)ip[i]==offcnt?offcnt:c[ip[i]]);
+               }
+               BATsetcount(no, cnt);
+               BATnegateprops(no);
+               no->tkey = oi.key;
        } else {
                assert(0);
        }
@@ -741,6 +841,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr 
                                } else if (loi.type == TYPE_sht) {
                                        sht val = (sht)p;
                                        bn =  BATthetaselect(lo, lc, &val, op);
+                               } else if (loi.type == TYPE_int) {
+                                       int val = (int)p;
+                                       bn =  BATthetaselect(lo, lc, &val, op);
                                } else
                                        assert(0);
                                if (bn && (op[0] == '<' || op[0] == '>' || 
op[0] == '!') && (!lvi.nonil || lvi.nil)) { /* filter the NULL value out */
@@ -753,6 +856,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr 
                                                } else if (loi.type == 
TYPE_sht) {
                                                        sht val = (sht)p;
                                                        nbn =  
BATthetaselect(lo, bn, &val, "<>");
+                                               } else if (loi.type == 
TYPE_int) {
+                                                       int val = (int)p;
+                                                       nbn =  
BATthetaselect(lo, bn, &val, "<>");
                                                } else
                                                        assert(0);
                                                BBPreclaim(bn);
@@ -773,6 +879,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr 
                                        } else if (loi.type == TYPE_sht) {
                                                sht val = (sht)p;
                                                bn =  BATthetaselect(lo, lc, 
&val, op);
+                                       } else if (loi.type == TYPE_int) {
+                                               int val = (int)p;
+                                               bn =  BATthetaselect(lo, lc, 
&val, op);
                                        } else
                                                assert(0);
                                } else if (lc) { /* all rows pass, use input 
candidate list */
@@ -891,6 +1000,10 @@ DICTselect(Client cntxt, MalBlkPtr mb, M
                                sht lpos = (sht)p;
                                sht hpos = (sht)q;
                                bn =  BATselect(lo, lc, &lpos, &hpos, true, hi, 
anti, false);
+                       } else if (loi.type == TYPE_int) {
+                               int lpos = (int)p;
+                               int hpos = (int)q;
+                               bn =  BATselect(lo, lc, &lpos, &hpos, true, hi, 
anti, false);
                        } else
                                assert(0);
                } else {
@@ -925,16 +1038,43 @@ DICTselect(Client cntxt, MalBlkPtr mb, M
 
 
 BAT *
-DICTenlarge(BAT *offsets, BUN cnt, BUN sz, role_t role)
+DICTenlarge(BAT *offsets, BUN cnt, BUN sz, int type, role_t role)
 {
-       BAT *n = COLnew(offsets->hseqbase, TYPE_sht, sz, role);
+       BAT *n = NULL;
+       if (type == TYPE_sht) {
+               if (offsets->ttype != TYPE_bte)
+                       return NULL;
+               n = COLnew(offsets->hseqbase, TYPE_sht, sz, role);
 
-       if (!n)
+               if (!n)
+                       return NULL;
+               unsigned char *o = Tloc(offsets, 0);
+               unsigned short *no = Tloc(n, 0);
+               for(BUN i = 0; i<cnt; i++) {
+                       no[i] = o[i];
+               }
+       } else if (type == TYPE_int) {
+               if (offsets->ttype != TYPE_bte && offsets->ttype != TYPE_sht)
+                       return NULL;
+               n = COLnew(offsets->hseqbase, TYPE_int, sz, role);
+               if (!n)
+                       return NULL;
+               if (offsets->ttype == TYPE_sht) {
+                       unsigned char *o = Tloc(offsets, 0);
+                       unsigned int *no = Tloc(n, 0);
+                       for(BUN i = 0; i<cnt; i++) {
+                               no[i] = o[i];
+                       }
+               } else {
+                       unsigned short *o = Tloc(offsets, 0);
+                       unsigned int *no = Tloc(n, 0);
+                       for(BUN i = 0; i<cnt; i++) {
+                               no[i] = o[i];
+                       }
+               }
+
+       } else {
                return NULL;
-       unsigned char *o = Tloc(offsets, 0);
-       unsigned short *no = Tloc(n, 0);
-       for(BUN i = 0; i<cnt; i++) {
-               no[i] = o[i];
        }
        BATnegateprops(n);
        n->tnil = offsets->tnil;
@@ -970,7 +1110,7 @@ DICTrenumber(Client cntxt, MalBlkPtr mb,
                bat_destroy(m);
                throw(SQL, "dict.renumber", SQLSTATE(HY013) MAL_MALLOC_FAIL);
        }
-       assert(o->ttype == TYPE_bte || o->ttype == TYPE_sht);
+       assert(o->ttype == TYPE_bte || o->ttype == TYPE_sht || o->ttype == 
TYPE_int);
        bool havenil = false;
        if (o->ttype == TYPE_bte) {
                unsigned char *np = Tloc(n, 0);
@@ -980,7 +1120,7 @@ DICTrenumber(Client cntxt, MalBlkPtr mb,
                        np[i] = mp[op[i]];
                        havenil |= np[i] == 128;
                }
-       } else {
+       } else if (o->ttype == TYPE_sht) {
                unsigned short *np = Tloc(n, 0);
                unsigned short *op = Tloc(o, 0);
                unsigned short *mp = Tloc(m, 0);
@@ -988,6 +1128,14 @@ DICTrenumber(Client cntxt, MalBlkPtr mb,
                        np[i] = mp[op[i]];
                        havenil |= np[i] == 32768;
                }
+       } else { /* int case */
+               unsigned int *np = Tloc(n, 0);
+               unsigned int *op = Tloc(o, 0);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to