Changeset: d159bf94ef97 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d159bf94ef97
Modified Files:
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_firstn.c
        gdk/gdk_group.c
        gdk/gdk_join.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
        sql/backends/monet5/sql.c
Branch: default
Log Message:

Merge with Jul2015 branch.


diffs (truncated from 551 to 300 lines):

diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1320,7 +1320,7 @@ BUNfnd(BAT *b, const void *v)
        if (BATtvoid(b))
                return BUNfndVOID(b, v);
        if (!BATcheckhash(b)) {
-               if (BATtordered(b) || BATtrevordered(b))
+               if (BATordered(b) || BATordered_rev(b))
                        return SORTfnd(b, v);
        }
        bi = bat_iterator(b);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -838,21 +838,97 @@ BATslice(BAT *b, BUN l, BUN h)
        return NULL;
 }
 
-/* Return whether the BAT is ordered or not.  */
+/* Return whether the BAT is ordered or not.  If we don't know, invest
+* in a scan and record the results in the bat descriptor.  */
 int
 BATordered(BAT *b)
 {
-       if (!b->tsorted)
-               BATderiveTailProps(b, 0);
+       lng t0 = GDKusec();
+
+       if (b->ttype == TYPE_void)
+               return 1;
+       /* In order that multiple threads don't scan the same BAT at
+        * the same time (happens a lot with mitosis/mergetable), we
+        * use a lock.  We reuse the hash lock for this, not because
+        * this scanning interferes with hashes, but because it's
+        * there, and not so likely to be used at the same time. */
+       MT_lock_set(&GDKhashLock(abs(b->batCacheid)));
+       if (!b->tsorted && b->T->nosorted == 0) {
+               BATiter bi = bat_iterator(b);
+               int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
+               BUN p, q;
+               b->batDirtydesc = 1;
+               switch (ATOMbasetype(b->ttype)) {
+               case TYPE_int: {
+                       const int *iptr = (const int *) Tloc(b, 0);
+                       for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                               if (iptr[p - 1] > iptr[p]) {
+                                       b->T->nosorted = p;
+                                       ALGODEBUG fprintf(stderr, "#BATordered: 
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                                       goto doreturn;
+                               }
+                       }
+                       break;
+               }
+               case TYPE_lng: {
+                       const lng *lptr = (const lng *) Tloc(b, 0);
+                       for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                               if (lptr[p - 1] > lptr[p]) {
+                                       b->T->nosorted = p;
+                                       ALGODEBUG fprintf(stderr, "#BATordered: 
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                                       goto doreturn;
+                               }
+                       }
+                       break;
+               }
+               default:
+                       for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                               if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) > 
0) {
+                                       b->T->nosorted = p;
+                                       ALGODEBUG fprintf(stderr, "#BATordered: 
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                                       goto doreturn;
+                               }
+                       }
+                       break;
+               }
+               b->tsorted = 1;
+               ALGODEBUG fprintf(stderr, "#BATordered: fixed sorted for %s#" 
BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+       }
+  doreturn:
+       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
        return b->tsorted;
 }
 
-/* Return whether the BAT is reverse ordered or not.  */
+/* Return whether the BAT is reverse ordered or not.  If we don't
+ * know, invest in a scan and record the results in the bat
+ * descriptor.  */
 int
 BATordered_rev(BAT *b)
 {
-       if (!b->trevsorted)
-               BATderiveTailProps(b, 0);
+       lng t0 = GDKusec();
+
+       if (b == NULL)
+               return 0;
+       if (b->ttype == TYPE_void)
+               return b->tseqbase == oid_nil;
+       MT_lock_set(&GDKhashLock(abs(b->batCacheid)));
+       if (!b->trevsorted && b->T->norevsorted == 0) {
+               BATiter bi = bat_iterator(b);
+               int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
+               BUN p, q;
+               b->batDirtydesc = 1;
+               for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                       if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
+                               b->T->norevsorted = p;
+                               ALGODEBUG fprintf(stderr, "#BATordered_rev: 
fixed norevsorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                               goto doreturn;
+                       }
+               }
+               b->trevsorted = 1;
+               ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed revsorted for 
%s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+       }
+  doreturn:
+       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
        return b->trevsorted;
 }
 
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -154,7 +154,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, 
                BATseqbase(BATmirror(bn), start + b->hseqbase);
                return bn;
        }
-       if (b->tsorted || b->trevsorted) {
+       /* note, we want to do bot calls */
+       if (BATordered(b) | BATordered_rev(b)) {
                /* trivial: b is sorted so we just need to return the
                 * initial or final part of it (or of the candidate
                 * list) */
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -444,7 +444,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
                return GDK_SUCCEED;
        }
-       if (b->tsorted && b->trevsorted) {
+       if (BATordered(b) && BATordered_rev(b)) {
                /* all values are equal */
                if (g == NULL) {
                        /* there's only a single group: 0 */
@@ -581,8 +581,8 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
        }
 
-       if (((b->tsorted || b->trevsorted) &&
-            (g == NULL || g->tsorted || g->trevsorted)) ||
+       if (((BATordered(b) || BATordered_rev(b)) &&
+            (g == NULL || BATordered(g) || BATordered_rev(g))) ||
            subsorted) {
                /* we only need to compare each entry with the previous */
                ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
@@ -634,7 +634,7 @@ BATgroup_internal(BAT **groups, BAT **ex
 
                gn->tsorted = 1;
                *groups = gn;
-       } else if (b->tsorted || b->trevsorted) {
+       } else if (BATordered(b) || BATordered_rev(b)) {
                BUN i, j;
                BUN *pgrp;
 
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -458,9 +458,33 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
 {
        BUN cnt;
 
+       r1->tkey = 1;
+       r1->T->nokey[0] = r1->T->nokey[1] = 0;
+       r1->tsorted = 1;
+       r1->T->nosorted = 0;
+       r1->tdense = 0;
+       r1->T->nodense = 0;
+       r1->T->nil = 0;
+       r1->T->nonil = 1;
+       if (r2) {
+               r2->tkey = 1;
+               r2->T->nokey[0] = r2->T->nokey[1] = 0;
+               r2->tsorted = 1;
+               r2->T->nosorted = 0;
+               r2->tdense = 0;
+               r2->T->nodense = 0;
+               r2->T->nil = 0;
+               r2->T->nonil = 1;
+       }
        if (lstart == lend || !(nil_on_miss | only_misses)) {
                virtualize(r1);
-               virtualize(r2);
+               r1->trevsorted = 1;
+               r1->T->norevsorted = 0;
+               if (r2) {
+                       virtualize(r2);
+                       r2->trevsorted = 1;
+                       r2->T->norevsorted = 0;
+               }
                return GDK_SUCCEED;
        }
        if (lcand) {
@@ -469,12 +493,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                        goto bailout;
                memcpy(Tloc(r1, BUNfirst(r1)), lcand, (lcandend - lcand) * 
sizeof(oid));
                BATsetcount(r1, cnt);
-               r1->tkey = 1;
-               r1->tsorted = 1;
-               r1->trevsorted = BATcount(r1) <= 1;
-               r1->tdense = 0;
-               r1->T->nil = 0;
-               r1->T->nonil = 1;
        } else {
                cnt = lend - lstart;
                HEAPfree(&r1->T->heap, 1);
@@ -482,12 +500,12 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                r1->tvarsized = 1;
                r1->T->width = 0;
                r1->T->shift = 0;
-               r1->tdense = 0;
                if (BATextend(r1, cnt) != GDK_SUCCEED)
                        goto bailout;
                BATsetcount(r1, cnt);
                BATseqbase(BATmirror(r1), lstart + l->hseqbase);
        }
+       r1->T->norevsorted = !(r1->trevsorted = BATcount(r1) <= 1);
        BATseqbase(r1, 0);
        if (r2) {
                HEAPfree(&r2->T->heap, 1);
@@ -495,7 +513,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                r2->tvarsized = 1;
                r2->T->width = 0;
                r2->T->shift = 0;
-               r2->tdense = 0;
                if (BATextend(r2, cnt) != GDK_SUCCEED)
                        goto bailout;
                BATsetcount(r2, cnt);
@@ -1109,10 +1126,12 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT 
                lordering = 1;
                rordering = r->tsorted ? 1 : -1;
                /* if l not sorted, we only know for sure that r2 is
-                * key if l is, and that r1 is key if r is */
+                * key if l is, and that r1 is key if r is; r1 is also
+                * key in the case of a semi-join or anti-semi-join
+                * (only_misses) */
                if (r2)
                        r2->tkey = l->tkey != 0;
-               r1->tkey = r->tkey != 0;
+               r1->tkey = (r->tkey != 0) | semi | only_misses;
        }
        /* determine opportunistic scan window for r; if l is not
         * sorted this is only used to find range of equal values */
@@ -3082,7 +3101,7 @@ subleftjoin(BAT **r1p, BAT **r2p, BAT *l
                /* use special implementation for dense right-hand side */
                return mergejoin_void(r1, r2, l, r, sl, sr,
                                      nil_on_miss, only_misses, t0);
-       } else if ((r->tsorted || r->trevsorted) &&
+       } else if ((BATordered(r) || BATordered_rev(r)) &&
                   (BATtdense(r) ||
                    lcount < 1024 ||
                    BATcount(r) * (Tsize(r) + (r->T->vheap ? r->T->vheap->size 
: 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 
1)))
@@ -3267,7 +3286,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
        } else if (BATtdense(l) && (sl == NULL || BATtdense(sl))) {
                /* use special implementation for dense right-hand side */
                return mergejoin_void(r2, r1, r, l, sr, sl, 0, 0, t0);
-       } else if ((l->tsorted || l->trevsorted) && (r->tsorted || 
r->trevsorted)) {
+       } else if ((BATordered(l) || BATordered_rev(l)) &&
+                  (BATordered(r) || BATordered_rev(r))) {
                /* both sorted, smallest on left */
                if (BATcount(l) <= BATcount(r))
                        return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 
0, 0, maxsize, t0, 0);
@@ -3285,14 +3305,14 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
                /* only right has hash, don't swap */
                swap = 0;
                reason = "right has hash";
-       } else if ((l->tsorted || l->trevsorted) &&
+       } else if ((BATordered(l) || BATordered_rev(l)) &&
                   (l->ttype == TYPE_void || rcount < 1024 || MIN(lsize, rsize) 
> mem_size)) {
                /* only left is sorted, swap; but only if right is
                 * "large" and the smaller of the two isn't too large
                 * (i.e. prefer hash over binary search, but only if
                 * the hash table doesn't cause thrashing) */
                return mergejoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0, 
maxsize, t0, 1);
-       } else if ((r->tsorted || r->trevsorted) &&
+       } else if ((BATordered(r) || BATordered_rev(r)) &&
                   (r->ttype == TYPE_void || lcount < 1024 || MIN(lsize, rsize) 
> mem_size)) {
                /* only right is sorted, don't swap; but only if left
                 * is "large" and the smaller of the two isn't too
@@ -3387,12 +3407,12 @@ BATrangejoin(BAT **r1p, BAT **r2p, BAT *
 
 #define project_loop(TYPE)                                             \
 static gdk_return                                                      \
-project_##TYPE(BAT *bn, BAT *l, BAT *r, int nilcheck, int sortcheck)   \
+project_##TYPE(BAT *bn, BAT *l, BAT *r, int nilcheck)                  \
 {                                                                      \
        oid lo, hi;                                                     \
        const TYPE *restrict rt;                                        \
        TYPE *restrict bt;                                              \
-       TYPE v, prev = 0;                                               \
+       TYPE v;                                                         \
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to