Changeset: d195532c7b67 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d195532c7b67 Modified Files: gdk/gdk_select.c Branch: leftmart Log Message:
making ordered_idx to work with non-dense cand lists diffs (201 lines): diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -150,45 +150,69 @@ doubleslice(BAT *b, BUN l1, BUN h1, BUN } static BAT * -sort_cand(BAT *b, BUN cnt) +sort_cand(BAT *bn, BAT *s, BUN cnt) { - BAT *bn; - const oid *restrict o; - oid *restrict p; + oid *restrict o; unsigned char *bitvector; - BUN i,sz_b; + BUN i, cnt_b, new_cnt; #define remainder8(X) ((X) & 7) #define quotient8(X) ((X) >> 3) /* input must be a potential candidate list or NULL */ - assert(b == NULL || - (((b->ttype == TYPE_void && b->tseqbase != oid_nil) || - b->ttype == TYPE_oid) && - b->tkey)); + assert(bn == NULL || + (((bn->ttype == TYPE_void && bn->tseqbase != oid_nil) || + bn->ttype == TYPE_oid) && + bn->tkey)); - sz_b = BATcount(b); + /* s is a candidate list to be merged with bn */ + assert(s == NULL || s->ttype == TYPE_oid || s->ttype == TYPE_void); + if (s && !BATtordered(s)) { + GDKerror("sort_cand: invalid argument: " + "s must be sorted.\n"); + return NULL; + } + + cnt_b = BATcount(bn); bitvector = (unsigned char *) GDKzalloc(cnt/8+1); if (bitvector == NULL) { GDKerror("#sort_cand: memory allocation error"); return NULL; } - o = (oid *) Tloc(b,0); - for (i=0; i < sz_b; i++) { + o = (oid *) Tloc(bn,0); + for (i=0; i < cnt_b; i++) { bitvector[quotient8(o[i])] |= (1<<remainder8(o[i])); } - bn = COLnew(0, TYPE_oid, sz_b, TRANSIENT); - p = (oid *) Tloc(bn, 0); + if (s) { + unsigned char *sbitvector = (unsigned char *) GDKzalloc(cnt/8+1); + const oid *restrict so = (oid *) Tloc(s,0); - for (i=0; i < cnt; i++) { + if (sbitvector == NULL) { + GDKerror("#sort_cand: memory allocation error"); + return NULL; + } + for (i=0; i < BATcount(s); i++) { + sbitvector[quotient8(so[i])] |= (1<<remainder8(so[i])); + } + for (i=0; i < cnt/8+1; i++) { + bitvector[i] &= sbitvector[i]; + } + GDKfree(sbitvector); + } + + for (new_cnt=0, i=0; i < cnt; i++) { if (bitvector[quotient8(i)] & (1<<remainder8(i))) { - *p = (oid) i; - p++; + *o = (oid) i; + o++; + new_cnt++; } } + GDKfree(bitvector); + assert((!s && (new_cnt == cnt_b)) || (s && new_cnt <= cnt_b)); + BATsetcount(bn, new_cnt); bn->tsorted = 1; bn->trevsorted = bn->batCount <= 1; bn->tkey = 1; @@ -1482,11 +1506,12 @@ BATselect(BAT *b, BAT *s, const void *tl /* If there is an order index or it is a view and the parent has an ordered * index, and the bat is not tsorted or trevstorted then use the order * index. - * And there is no cand list or if there is one, it is dense. + * //And there is no cand list or if there is one, it is dense. + * trying to make it work for all cand list cases now. * TODO: we do not support anti-select with order index */ if (!anti && !(b->tsorted || b->trevsorted) && - (!s || (s && BATtdense(s))) && + //(!s || (s && BATtdense(s))) && (BATcheckorderidx(b) || (VIEWtparent(b) && BATcheckorderidx(BBPquickdesc(VIEWtparent(b), 0))))) { @@ -1497,9 +1522,10 @@ BATselect(BAT *b, BAT *s, const void *tl } /* Is query selective enough to use the ordered index ? */ /* TODO: Test if this heuristic works in practice */ + /* if there is a orderidx, use it! */ /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < b->batCount/1000 ? (BUN)1000: b->batCount/1000))*/ - if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < b->batCount/3) - { + //if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < b->batCount/3) + //{ use_orderidx = 1; if (view) { vwl = view->hseqbase; @@ -1508,11 +1534,11 @@ BATselect(BAT *b, BAT *s, const void *tl vwl = b->hseqbase; vwh = vwl + b->batCount; } - } else { + /*} else { if (view) { b = view; } - } + }*/ } if (BATordered(b) || BATordered_rev(b) || use_orderidx) { @@ -1605,7 +1631,7 @@ BATselect(BAT *b, BAT *s, const void *tl } } else { assert(use_orderidx); - ALGODEBUG fprintf(stderr, "#BATsubselect(b=%s#" BUNFMT + ALGODEBUG fprintf(stderr, "#BATselect(b=%s#" BUNFMT ",s=%s%s,anti=%d): orderidx\n", BATgetId(b), BATcount(b), s ? BATgetId(s) : "NULL", @@ -1696,7 +1722,7 @@ BATselect(BAT *b, BAT *s, const void *tl rbn = (oid *) Tloc((bn), 0); - if (s && !BATtdense(s)) { + /*if (s && !BATtdense(s)) { const oid *rcand = (const oid *) Tloc((s), 0); assert("should not use orderidx with non dense cand list, too expensive"); @@ -1711,13 +1737,22 @@ BATselect(BAT *b, BAT *s, const void *tl } BATsetcount(bn, cnt); - } else { + } else {*/ if (s) { - assert(BATtdense(s)); - if (vwl < s->tseqbase) - vwl = s->tseqbase; - if (s->tseqbase + BATcount(s) < vwh) - vwh = s->tseqbase + BATcount(s); + if (BATtdense(s)) { + if (vwl < s->tseqbase) + vwl = s->tseqbase; + if (s->tseqbase + BATcount(s) < vwh) + vwh = s->tseqbase + BATcount(s); + /* we don't need s anymore */ + s = NULL; + } else { + assert(s && !BATtdense(s)); + if (vwl < *((const oid *)Tloc(s,0))) + vwl = *((const oid *)Tloc(s,0)); + if (*((const oid *)Tloc(s,BATcount(s))) < vwh) + vwh = *((const oid *)Tloc(s,BATcount(s))); + } } for (i = low; i < high; i++) { if (vwl <= ((*rs)&BUN_UNMSK) && ((*rs)&BUN_UNMSK) < vwh) { @@ -1726,17 +1761,18 @@ BATselect(BAT *b, BAT *s, const void *tl } rs++; } - } + //} BATsetcount(bn, cnt); bn->tkey = 1; bn->tnil = 0; bn->tnonil = 1; /* output must be sorted */ + /* GDKqsort((oid *) Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid); bn->tsorted = 1; bn->trevsorted = bn->batCount <= 1; - bn->tseqbase = (bn->tdense = bn->batCount <= 1) != 0 ? 0 : oid_nil; - //sort_cand(bn,BATcount(b)); + bn->tseqbase = (bn->tdense = bn->batCount <= 1) != 0 ? 0 : oid_nil;*/ + return sort_cand(bn, s, BATcount(b)); } else { /* match: [low..high) */ if (s) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list