Changeset: d195532c7b67 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d195532c7b67
Modified Files:
        gdk/gdk_select.c
Branch: leftmart
Log Message:

making ordered_idx to work with non-dense cand lists


diffs (201 lines):

diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -150,45 +150,69 @@ doubleslice(BAT *b, BUN l1, BUN h1, BUN 
 }
 
 static BAT *
-sort_cand(BAT *b, BUN cnt)
+sort_cand(BAT *bn, BAT *s, BUN cnt)
 {
-       BAT *bn;
-       const oid *restrict o;
-       oid *restrict p;
+       oid *restrict o;
        unsigned char *bitvector;
-       BUN i,sz_b;
+       BUN i, cnt_b, new_cnt;
 
 #define remainder8(X)   ((X) &  7)
 #define quotient8(X)    ((X) >> 3)
 
        /* input must be a potential candidate list or NULL */
-       assert(b == NULL ||
-              (((b->ttype == TYPE_void && b->tseqbase != oid_nil) ||
-                b->ttype == TYPE_oid) &&
-                b->tkey));
+       assert(bn == NULL ||
+              (((bn->ttype == TYPE_void && bn->tseqbase != oid_nil) ||
+                bn->ttype == TYPE_oid) &&
+                bn->tkey));
 
-       sz_b = BATcount(b);
+       /* s is a candidate list to be merged with bn */
+       assert(s == NULL || s->ttype == TYPE_oid || s->ttype == TYPE_void);
+       if (s && !BATtordered(s)) {
+               GDKerror("sort_cand: invalid argument: "
+                        "s must be sorted.\n");
+               return NULL;
+       }
+
+       cnt_b = BATcount(bn);
        bitvector = (unsigned char *) GDKzalloc(cnt/8+1);
        if (bitvector == NULL) {
                GDKerror("#sort_cand: memory allocation error");
                return NULL;
        }
 
-       o = (oid *) Tloc(b,0);
-       for (i=0; i < sz_b; i++) {
+       o = (oid *) Tloc(bn,0);
+       for (i=0; i < cnt_b; i++) {
                bitvector[quotient8(o[i])] |= (1<<remainder8(o[i]));
        }
 
-       bn = COLnew(0, TYPE_oid, sz_b, TRANSIENT);
-       p = (oid *) Tloc(bn, 0);
+       if (s) {
+               unsigned char *sbitvector = (unsigned char *) 
GDKzalloc(cnt/8+1);
+               const oid *restrict so = (oid *) Tloc(s,0);
 
-       for (i=0; i < cnt; i++) {
+               if (sbitvector == NULL) {
+                       GDKerror("#sort_cand: memory allocation error");
+                       return NULL;
+               }
+               for (i=0; i < BATcount(s); i++) {
+                       sbitvector[quotient8(so[i])] |= (1<<remainder8(so[i]));
+               }
+               for (i=0; i < cnt/8+1; i++) {
+                       bitvector[i] &= sbitvector[i];
+               }
+               GDKfree(sbitvector);
+       }
+
+       for (new_cnt=0, i=0; i < cnt; i++) {
                if (bitvector[quotient8(i)] & (1<<remainder8(i))) {
-                       *p = (oid) i;
-                       p++;
+                       *o = (oid) i;
+                       o++;
+                       new_cnt++;
                }
        }
+       GDKfree(bitvector);
 
+       assert((!s && (new_cnt == cnt_b)) || (s && new_cnt <= cnt_b));
+       BATsetcount(bn, new_cnt);
        bn->tsorted = 1;
        bn->trevsorted = bn->batCount <= 1;
        bn->tkey = 1;
@@ -1482,11 +1506,12 @@ BATselect(BAT *b, BAT *s, const void *tl
        /* If there is an order index or it is a view and the parent has an 
ordered
         * index, and the bat is not tsorted or trevstorted then use the order
         * index.
-        * And there is no cand list or if there is one, it is dense.
+        * //And there is no cand list or if there is one, it is dense.
+        * trying to make it work for all cand list cases now.
         * TODO: we do not support anti-select with order index */
        if (!anti &&
            !(b->tsorted || b->trevsorted) &&
-           (!s || (s && BATtdense(s)))    &&
+           //(!s || (s && BATtdense(s)))    &&
            (BATcheckorderidx(b) ||
             (VIEWtparent(b) && BATcheckorderidx(BBPquickdesc(VIEWtparent(b), 
0)))))
        {
@@ -1497,9 +1522,10 @@ BATselect(BAT *b, BAT *s, const void *tl
                }
                /* Is query selective enough to use the ordered index ? */
                /* TODO: Test if this heuristic works in practice */
+               /* if there is a orderidx, use it! */
                /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < 
b->batCount/1000 ? (BUN)1000: b->batCount/1000))*/
-               if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < b->batCount/3)
-               {
+               //if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < b->batCount/3)
+               //{
                        use_orderidx = 1;
                        if (view) {
                                vwl = view->hseqbase;
@@ -1508,11 +1534,11 @@ BATselect(BAT *b, BAT *s, const void *tl
                                vwl = b->hseqbase;
                                vwh = vwl + b->batCount;
                        }
-               } else {
+               /*} else {
                        if (view) {
                                b = view;
                        }
-               }
+               }*/
        }
 
        if (BATordered(b) || BATordered_rev(b) || use_orderidx) {
@@ -1605,7 +1631,7 @@ BATselect(BAT *b, BAT *s, const void *tl
                        }
                } else {
                        assert(use_orderidx);
-                       ALGODEBUG fprintf(stderr, "#BATsubselect(b=%s#" BUNFMT
+                       ALGODEBUG fprintf(stderr, "#BATselect(b=%s#" BUNFMT
                                          ",s=%s%s,anti=%d): orderidx\n",
                                          BATgetId(b), BATcount(b),
                                          s ? BATgetId(s) : "NULL",
@@ -1696,7 +1722,7 @@ BATselect(BAT *b, BAT *s, const void *tl
 
                                rbn = (oid *) Tloc((bn), 0);
 
-                               if (s && !BATtdense(s)) {
+                               /*if (s && !BATtdense(s)) {
                                        const oid *rcand = (const oid *) 
Tloc((s), 0);
                                        assert("should not use orderidx with 
non dense cand list, too expensive");
 
@@ -1711,13 +1737,22 @@ BATselect(BAT *b, BAT *s, const void *tl
                                        }
                                        BATsetcount(bn, cnt);
 
-                               } else {
+                               } else {*/
                                        if (s) {
-                                               assert(BATtdense(s));
-                                               if (vwl < s->tseqbase)
-                                                       vwl = s->tseqbase;
-                                               if (s->tseqbase + BATcount(s) < 
vwh)
-                                                       vwh = s->tseqbase + 
BATcount(s);
+                                               if (BATtdense(s)) {
+                                                       if (vwl < s->tseqbase)
+                                                               vwl = 
s->tseqbase;
+                                                       if (s->tseqbase + 
BATcount(s) < vwh)
+                                                               vwh = 
s->tseqbase + BATcount(s);
+                                                       /* we don't need s 
anymore */
+                                                       s = NULL;
+                                               } else {
+                                                       assert(s && 
!BATtdense(s));
+                                                       if (vwl < *((const oid 
*)Tloc(s,0)))
+                                                               vwl = *((const 
oid *)Tloc(s,0));
+                                                       if (*((const oid 
*)Tloc(s,BATcount(s))) < vwh)
+                                                               vwh = *((const 
oid *)Tloc(s,BATcount(s)));
+                                               }
                                        }
                                        for (i = low; i < high; i++) {
                                                if (vwl <= ((*rs)&BUN_UNMSK) && 
((*rs)&BUN_UNMSK) < vwh) {
@@ -1726,17 +1761,18 @@ BATselect(BAT *b, BAT *s, const void *tl
                                                }
                                                rs++;
                                        }
-                               }
+                               //}
                                BATsetcount(bn, cnt);
                                bn->tkey = 1;
                                bn->tnil = 0;
                                bn->tnonil = 1;
                                /* output must be sorted */
+                               /*
                                GDKqsort((oid *) Tloc(bn, 0), NULL, NULL, 
(size_t) bn->batCount, sizeof(oid), 0, TYPE_oid);
                                bn->tsorted = 1;
                                bn->trevsorted = bn->batCount <= 1;
-                               bn->tseqbase = (bn->tdense = bn->batCount <= 1) 
!= 0 ? 0 : oid_nil;
-                               //sort_cand(bn,BATcount(b));
+                               bn->tseqbase = (bn->tdense = bn->batCount <= 1) 
!= 0 ? 0 : oid_nil;*/
+                               return sort_cand(bn, s, BATcount(b));
                        } else {
                                /* match: [low..high) */
                                if (s) {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to