Changeset: 014804abc3bd for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=014804abc3bd Modified Files: gdk/gdk.h gdk/gdk_orderidx.c gdk/gdk_search.c gdk/gdk_select.c Branch: leftmart Log Message:
Use the most significant bit of oid's to mask the change of values in ordered idx. diffs (truncated from 328 to 300 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -615,6 +615,8 @@ typedef size_t BUN; #else #define BUN_NONE ((BUN) LLONG_MAX) #endif +#define BUN_MSK (~BUN_NONE) +#define BUN_UNMSK BUN_NONE #define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */ #define BUN2 2 diff --git a/gdk/gdk_orderidx.c b/gdk/gdk_orderidx.c --- a/gdk/gdk_orderidx.c +++ b/gdk/gdk_orderidx.c @@ -210,22 +210,58 @@ BATorderidx(BAT *b, int stable) return GDK_SUCCEED; } -#define BINARY_MERGE(TYPE) \ +#define UNARY_MERGE(TYPE) \ do { \ TYPE *v = (TYPE *) Tloc(b, BUNfirst(b)); \ - while (p0 < q0 && p1 < q1) { \ - if (v[*p0 - b->hseqbase] <= v[*p1 - b->hseqbase]) { \ - *mv++ = *p0++; \ - } else { \ - *mv++ = *p1++; \ + while (p < q) { \ + *mv = *p++; \ + if (p < q && v[*mv - b->hseqbase] != v[*p - b->hseqbase]) { \ + *mv |= BUN_MSK; \ } \ + mv++; \ } \ - while (p0 < q0) { \ - *mv++ = *p0++; \ - } \ - while (p1 < q1) { \ - *mv++ = *p1++; \ - } \ + *(mv-1) |= BUN_MSK; \ + } while (0) + +#define BINARY_MERGE(TYPE) \ + do { \ + TYPE *v = (TYPE *) Tloc(b, BUNfirst(b)); \ + while (p0 < q0 && p1 < q1) { \ + if (v[*p0 - b->hseqbase] <= v[*p1 - b->hseqbase]) { \ + *mv = *p0++; \ + if (p0 < q0 && v[*mv - b->hseqbase] != v[*p0 - b->hseqbase]) { \ + *mv |= BUN_MSK; \ + } \ + mv++; \ + } else { \ + *mv++ = *p1++; \ + if (p1 < q1 && v[*mv - b->hseqbase] != v[*p1 - b->hseqbase]) { \ + *mv |= BUN_MSK; \ + } \ + mv++; \ + } \ + } \ + if (p0 < q0 && v[*(mv-1) - b->hseqbase] != v[*p0 - b->hseqbase]) { \ + *(mv-1) |= BUN_MSK; \ + } \ + while (p0 < q0) { \ + *mv = *p0++; \ + if (p0 < q0 && v[*mv - b->hseqbase] != v[*p0 - b->hseqbase]) { \ + *mv |= BUN_MSK; \ + } \ + mv++; \ + } \ + if (p1 < q1 && v[*(mv-1) - b->hseqbase] != v[*p1 - b->hseqbase]) { \ + *(mv-1) |= BUN_MSK; \ + } \ + while (p1 < q1) { \ + *mv = *p1++; \ + if (p1 < q1 && v[*mv - b->hseqbase] != v[*p1 - b->hseqbase]) { \ + *mv |= BUN_MSK; \ + } \ + mv++; \ + } \ + *(mv-1) |= BUN_MSK; \ } while(0) #define swap(X,Y,TMP) (TMP)=(X);(X)=(Y);(Y)=(TMP) @@ -258,37 +294,50 @@ BATorderidx(BAT *b, int stable) } while (cur != min); \ } while (0) -#define NWAY_MERGE(TYPE) \ - do { \ - TYPE *minhp, t; \ - TYPE *v = (TYPE *) Tloc(b, BUNfirst(b)); \ - if ((minhp = (TYPE *) GDKmalloc(sizeof(TYPE)*n_ar)) == NULL) { \ - goto bailout; \ - } \ - /* init min heap */ \ - for (i = 0; i < n_ar; i++) { \ - minhp[i] = v[*p[i] - b->hseqbase]; \ - } \ - for (i = n_ar/2; i >=0 ; i--) { \ - HEAPIFY(i); \ - } \ - /* merge */ \ - while (n_ar > 1) { \ - *mv++ = *(p[0])++; \ - if (p[0] < q[0]) { \ - minhp[0] = v[*p[0] - b->hseqbase]; \ - } else { \ - swap(minhp[0], minhp[n_ar-1], t); \ - swap(p[0], p[n_ar-1], t_oid); \ - swap(q[0], q[n_ar-1], t_oid); \ - n_ar--; \ - } \ - HEAPIFY(0); \ - } \ - while (p[0] < q[0]) { \ - *mv++ = *(p[0])++; \ - } \ - GDKfree(minhp); \ +#define NWAY_MERGE(TYPE) \ + do { \ + TYPE *minhp, t; \ + TYPE *v = (TYPE *) Tloc(b, BUNfirst(b)); \ + if ((minhp = (TYPE *) GDKmalloc(sizeof(TYPE)*n_ar)) == NULL) { \ + goto bailout; \ + } \ + /* init min heap */ \ + for (i = 0; i < n_ar; i++) { \ + minhp[i] = v[*p[i] - b->hseqbase]; \ + } \ + for (i = n_ar/2; i >=0 ; i--) { \ + HEAPIFY(i); \ + } \ + /* merge */ \ + while (n_ar > 1) { \ + *mv = *(p[0])++; \ + if (p[0] < q[0] && v[*mv - b->hseqbase] != v[*p[0] - b->hseqbase]) { \ + *mv |= BUN_MSK; \ + } \ + mv++; \ + if (p[0] < q[0]) { \ + minhp[0] = v[*p[0] - b->hseqbase]; \ + HEAPIFY(0); \ + } else { \ + swap(minhp[0], minhp[n_ar-1], t); \ + swap(p[0], p[n_ar-1], t_oid); \ + swap(q[0], q[n_ar-1], t_oid); \ + n_ar--; \ + HEAPIFY(0); \ + if (v[*(mv-1) - b->hseqbase] != v[*p[0] - b->hseqbase]) { \ + *(mv-1) |= BUN_MSK; \ + } \ + } \ + } \ + while (p[0] < q[0]) { \ + *mv = *(p[0])++; \ + if (p[0] < q[0] && v[*mv - b->hseqbase] != v[*p[0] - b->hseqbase]) { \ + *mv |= BUN_MSK; \ + } \ + mv++; \ + } \ + *(mv-1) |= BUN_MSK; \ + GDKfree(minhp); \ } while (0) gdk_return @@ -328,10 +377,31 @@ GDKmergeidx(BAT *b, BAT**a, int n_ar) *mv++ = (oid) BATcount(b); if (n_ar == 1) { + const oid *restrict p, *q; /* One oid order bat, nothing to merge */ assert(BATcount(a[0]) == BATcount(b)); assert(VIEWtparent(a[0]) == -b->batCacheid && a[0]->torderidx); - memcpy(mv, (oid *) a[0]->torderidx->base + ORDERIDXOFF, BATcount(b) * SIZEOF_OID); + p = (const oid *) a[0]->torderidx->base + ORDERIDXOFF; + q = p + BATcount(a[0]); + switch (ATOMstorage(b->ttype)) { + case TYPE_bte: UNARY_MERGE(bte); break; + case TYPE_sht: UNARY_MERGE(sht); break; + case TYPE_int: UNARY_MERGE(int); break; + case TYPE_lng: UNARY_MERGE(lng); break; +#ifdef HAVE_HGE + case TYPE_hge: UNARY_MERGE(hge); break; +#endif + case TYPE_flt: UNARY_MERGE(flt); break; + case TYPE_dbl: UNARY_MERGE(dbl); break; + case TYPE_str: + default: + /* TODO: support strings, date, timestamps etc. */ + assert(0); + HEAPfree(m, 1); + GDKfree(m); + MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); + return GDK_FAIL; + } } else if (n_ar == 2) { /* sort merge with 1 comparison per BUN */ const oid *restrict p0, *restrict p1, *q0, *q1; diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c --- a/gdk/gdk_search.c +++ b/gdk/gdk_search.c @@ -141,7 +141,7 @@ SORTfndwhich(BAT *b, const void *v, enum end = lo; if (lo >= hi || (use_orderidx ? - (atom_GE(BUNtail(bi, o[lo] - b->hseqbase + BUNfirst(b)), v, b->ttype)) : + (atom_GE(BUNtail(bi, (o[lo]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)), v, b->ttype)) : (b->tsorted ? atom_GE(BUNtail(bi, lo), v, b->ttype) : atom_LE(BUNtail(bi, lo), v, b->ttype)))) { /* shortcut: if BAT is empty or first (and * hence all) tail value is >= v (if sorted) @@ -153,7 +153,7 @@ SORTfndwhich(BAT *b, const void *v, enum end = hi; if (lo >= hi || (use_orderidx ? - (atom_LE(BUNtail(bi, o[hi - 1] - b->hseqbase + BUNfirst(b)), v, b->ttype)) : + (atom_LE(BUNtail(bi, (o[hi - 1]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)), v, b->ttype)) : (b->tsorted ? atom_LE(BUNtail(bi, hi - 1), v, b->ttype) : atom_GE(BUNtail(bi, hi - 1), v, b->ttype)))) { /* shortcut: if BAT is empty or last (and * hence all) tail value is <= v (if sorted) @@ -238,27 +238,27 @@ SORTfndwhich(BAT *b, const void *v, enum assert(use_orderidx); switch (tp) { case TYPE_bte: - SORTfndloop(bte, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(bte, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; case TYPE_sht: - SORTfndloop(sht, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(sht, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; case TYPE_int: - SORTfndloop(int, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(int, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; case TYPE_lng: - SORTfndloop(lng, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(lng, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; #ifdef HAVE_HGE case TYPE_hge: - SORTfndloop(hge, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(hge, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; #endif case TYPE_flt: - SORTfndloop(flt, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(flt, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; case TYPE_dbl: - SORTfndloop(dbl, simple_CMP, BUNtloc, o[cur] - b->hseqbase + BUNfirst(b)); + SORTfndloop(dbl, simple_CMP, BUNtloc, (o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b)); break; default: assert(0); @@ -270,20 +270,34 @@ SORTfndwhich(BAT *b, const void *v, enum case FIND_FIRST: if (cmp == 0 && b->tkey == 0) { /* shift over multiple equals */ - for (diff = cur - end; diff; diff >>= 1) { - while (cur >= end + diff && - atom_EQ(BUNtail(bi, use_orderidx ? o[cur - diff] - b->hseqbase + BUNfirst(b) : cur - diff), v, b->ttype)) - cur -= diff; + if (use_orderidx) { + while (cur >= end && !(o[cur]&BUN_MSK)) { + cur--; + } + cur++; + } else { + for (diff = cur - end; diff; diff >>= 1) { + while (cur >= end + diff && + atom_EQ(BUNtail(bi, cur - diff), v, b->ttype)) + cur -= diff; + } } } break; case FIND_LAST: if (cmp == 0 && b->tkey == 0) { /* shift over multiple equals */ - for (diff = (end - cur) >> 1; diff; diff >>= 1) { - while (cur + diff < end && - atom_EQ(BUNtail(bi, use_orderidx ? o[cur + diff] - b->hseqbase + BUNfirst(b) : cur + diff), v, b->ttype)) - cur += diff; + if (use_orderidx) { + while (cur < end && !(o[cur]&BUN_MSK)) { + cur++; + } + } else { + for (diff = (end - cur) >> 1; diff; diff >>= 1) { + while (cur + diff < end && + atom_EQ(BUNtail(bi, cur + diff), v, b->ttype)) { + cur += diff; + } + } } } cur += (cmp == 0); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list