Changeset: d159bf94ef97 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d159bf94ef97 Modified Files: gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_firstn.c gdk/gdk_group.c gdk/gdk_join.c gdk/gdk_select.c gdk/gdk_unique.c sql/backends/monet5/sql.c Branch: default Log Message:
Merge with Jul2015 branch. diffs (truncated from 551 to 300 lines): diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -1320,7 +1320,7 @@ BUNfnd(BAT *b, const void *v) if (BATtvoid(b)) return BUNfndVOID(b, v); if (!BATcheckhash(b)) { - if (BATtordered(b) || BATtrevordered(b)) + if (BATordered(b) || BATordered_rev(b)) return SORTfnd(b, v); } bi = bat_iterator(b); diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -838,21 +838,97 @@ BATslice(BAT *b, BUN l, BUN h) return NULL; } -/* Return whether the BAT is ordered or not. */ +/* Return whether the BAT is ordered or not. If we don't know, invest +* in a scan and record the results in the bat descriptor. */ int BATordered(BAT *b) { - if (!b->tsorted) - BATderiveTailProps(b, 0); + lng t0 = GDKusec(); + + if (b->ttype == TYPE_void) + return 1; + /* In order that multiple threads don't scan the same BAT at + * the same time (happens a lot with mitosis/mergetable), we + * use a lock. We reuse the hash lock for this, not because + * this scanning interferes with hashes, but because it's + * there, and not so likely to be used at the same time. */ + MT_lock_set(&GDKhashLock(abs(b->batCacheid))); + if (!b->tsorted && b->T->nosorted == 0) { + BATiter bi = bat_iterator(b); + int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype); + BUN p, q; + b->batDirtydesc = 1; + switch (ATOMbasetype(b->ttype)) { + case TYPE_int: { + const int *iptr = (const int *) Tloc(b, 0); + for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) { + if (iptr[p - 1] > iptr[p]) { + b->T->nosorted = p; + ALGODEBUG fprintf(stderr, "#BATordered: fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0); + goto doreturn; + } + } + break; + } + case TYPE_lng: { + const lng *lptr = (const lng *) Tloc(b, 0); + for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) { + if (lptr[p - 1] > lptr[p]) { + b->T->nosorted = p; + ALGODEBUG fprintf(stderr, "#BATordered: fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0); + goto doreturn; + } + } + break; + } + default: + for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) { + if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) > 0) { + b->T->nosorted = p; + ALGODEBUG fprintf(stderr, "#BATordered: fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0); + goto doreturn; + } + } + break; + } + b->tsorted = 1; + ALGODEBUG fprintf(stderr, "#BATordered: fixed sorted for %s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0); + } + doreturn: + MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); return b->tsorted; } -/* Return whether the BAT is reverse ordered or not. */ +/* Return whether the BAT is reverse ordered or not. If we don't + * know, invest in a scan and record the results in the bat + * descriptor. */ int BATordered_rev(BAT *b) { - if (!b->trevsorted) - BATderiveTailProps(b, 0); + lng t0 = GDKusec(); + + if (b == NULL) + return 0; + if (b->ttype == TYPE_void) + return b->tseqbase == oid_nil; + MT_lock_set(&GDKhashLock(abs(b->batCacheid))); + if (!b->trevsorted && b->T->norevsorted == 0) { + BATiter bi = bat_iterator(b); + int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype); + BUN p, q; + b->batDirtydesc = 1; + for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) { + if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) { + b->T->norevsorted = p; + ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed norevsorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0); + goto doreturn; + } + } + b->trevsorted = 1; + ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed revsorted for %s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0); + } + doreturn: + MT_lock_unset(&GDKhashLock(abs(b->batCacheid))); return b->trevsorted; } diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c --- a/gdk/gdk_firstn.c +++ b/gdk/gdk_firstn.c @@ -154,7 +154,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, BATseqbase(BATmirror(bn), start + b->hseqbase); return bn; } - if (b->tsorted || b->trevsorted) { + /* note, we want to do bot calls */ + if (BATordered(b) | BATordered_rev(b)) { /* trivial: b is sorted so we just need to return the * initial or final part of it (or of the candidate * list) */ diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -444,7 +444,7 @@ BATgroup_internal(BAT **groups, BAT **ex } return GDK_SUCCEED; } - if (b->tsorted && b->trevsorted) { + if (BATordered(b) && BATordered_rev(b)) { /* all values are equal */ if (g == NULL) { /* there's only a single group: 0 */ @@ -581,8 +581,8 @@ BATgroup_internal(BAT **groups, BAT **ex } } - if (((b->tsorted || b->trevsorted) && - (g == NULL || g->tsorted || g->trevsorted)) || + if (((BATordered(b) || BATordered_rev(b)) && + (g == NULL || BATordered(g) || BATordered_rev(g))) || subsorted) { /* we only need to compare each entry with the previous */ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "," @@ -634,7 +634,7 @@ BATgroup_internal(BAT **groups, BAT **ex gn->tsorted = 1; *groups = gn; - } else if (b->tsorted || b->trevsorted) { + } else if (BATordered(b) || BATordered_rev(b)) { BUN i, j; BUN *pgrp; diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -458,9 +458,33 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r { BUN cnt; + r1->tkey = 1; + r1->T->nokey[0] = r1->T->nokey[1] = 0; + r1->tsorted = 1; + r1->T->nosorted = 0; + r1->tdense = 0; + r1->T->nodense = 0; + r1->T->nil = 0; + r1->T->nonil = 1; + if (r2) { + r2->tkey = 1; + r2->T->nokey[0] = r2->T->nokey[1] = 0; + r2->tsorted = 1; + r2->T->nosorted = 0; + r2->tdense = 0; + r2->T->nodense = 0; + r2->T->nil = 0; + r2->T->nonil = 1; + } if (lstart == lend || !(nil_on_miss | only_misses)) { virtualize(r1); - virtualize(r2); + r1->trevsorted = 1; + r1->T->norevsorted = 0; + if (r2) { + virtualize(r2); + r2->trevsorted = 1; + r2->T->norevsorted = 0; + } return GDK_SUCCEED; } if (lcand) { @@ -469,12 +493,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r goto bailout; memcpy(Tloc(r1, BUNfirst(r1)), lcand, (lcandend - lcand) * sizeof(oid)); BATsetcount(r1, cnt); - r1->tkey = 1; - r1->tsorted = 1; - r1->trevsorted = BATcount(r1) <= 1; - r1->tdense = 0; - r1->T->nil = 0; - r1->T->nonil = 1; } else { cnt = lend - lstart; HEAPfree(&r1->T->heap, 1); @@ -482,12 +500,12 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r r1->tvarsized = 1; r1->T->width = 0; r1->T->shift = 0; - r1->tdense = 0; if (BATextend(r1, cnt) != GDK_SUCCEED) goto bailout; BATsetcount(r1, cnt); BATseqbase(BATmirror(r1), lstart + l->hseqbase); } + r1->T->norevsorted = !(r1->trevsorted = BATcount(r1) <= 1); BATseqbase(r1, 0); if (r2) { HEAPfree(&r2->T->heap, 1); @@ -495,7 +513,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r r2->tvarsized = 1; r2->T->width = 0; r2->T->shift = 0; - r2->tdense = 0; if (BATextend(r2, cnt) != GDK_SUCCEED) goto bailout; BATsetcount(r2, cnt); @@ -1109,10 +1126,12 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT lordering = 1; rordering = r->tsorted ? 1 : -1; /* if l not sorted, we only know for sure that r2 is - * key if l is, and that r1 is key if r is */ + * key if l is, and that r1 is key if r is; r1 is also + * key in the case of a semi-join or anti-semi-join + * (only_misses) */ if (r2) r2->tkey = l->tkey != 0; - r1->tkey = r->tkey != 0; + r1->tkey = (r->tkey != 0) | semi | only_misses; } /* determine opportunistic scan window for r; if l is not * sorted this is only used to find range of equal values */ @@ -3082,7 +3101,7 @@ subleftjoin(BAT **r1p, BAT **r2p, BAT *l /* use special implementation for dense right-hand side */ return mergejoin_void(r1, r2, l, r, sl, sr, nil_on_miss, only_misses, t0); - } else if ((r->tsorted || r->trevsorted) && + } else if ((BATordered(r) || BATordered_rev(r)) && (BATtdense(r) || lcount < 1024 || BATcount(r) * (Tsize(r) + (r->T->vheap ? r->T->vheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) @@ -3267,7 +3286,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA } else if (BATtdense(l) && (sl == NULL || BATtdense(sl))) { /* use special implementation for dense right-hand side */ return mergejoin_void(r2, r1, r, l, sr, sl, 0, 0, t0); - } else if ((l->tsorted || l->trevsorted) && (r->tsorted || r->trevsorted)) { + } else if ((BATordered(l) || BATordered_rev(l)) && + (BATordered(r) || BATordered_rev(r))) { /* both sorted, smallest on left */ if (BATcount(l) <= BATcount(r)) return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0, maxsize, t0, 0); @@ -3285,14 +3305,14 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA /* only right has hash, don't swap */ swap = 0; reason = "right has hash"; - } else if ((l->tsorted || l->trevsorted) && + } else if ((BATordered(l) || BATordered_rev(l)) && (l->ttype == TYPE_void || rcount < 1024 || MIN(lsize, rsize) > mem_size)) { /* only left is sorted, swap; but only if right is * "large" and the smaller of the two isn't too large * (i.e. prefer hash over binary search, but only if * the hash table doesn't cause thrashing) */ return mergejoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0, maxsize, t0, 1); - } else if ((r->tsorted || r->trevsorted) && + } else if ((BATordered(r) || BATordered_rev(r)) && (r->ttype == TYPE_void || lcount < 1024 || MIN(lsize, rsize) > mem_size)) { /* only right is sorted, don't swap; but only if left * is "large" and the smaller of the two isn't too @@ -3387,12 +3407,12 @@ BATrangejoin(BAT **r1p, BAT **r2p, BAT * #define project_loop(TYPE) \ static gdk_return \ -project_##TYPE(BAT *bn, BAT *l, BAT *r, int nilcheck, int sortcheck) \ +project_##TYPE(BAT *bn, BAT *l, BAT *r, int nilcheck) \ { \ oid lo, hi; \ const TYPE *restrict rt; \ TYPE *restrict bt; \ - TYPE v, prev = 0; \ + TYPE v; \ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list