Changeset: 64a17db65c74 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=64a17db65c74
Added Files:
        sql/test/BugTracker-2016/Tests/All
        
sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.sql
        
sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.stable.err
        
sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.stable.out
        
sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.sql
        
sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.stable.err
        
sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.stable.out
Modified Files:
        gdk/gdk_join.c
        monetdb5/optimizer/Tests/All
        monetdb5/optimizer/Tests/projectionchain.malC
        monetdb5/optimizer/Tests/projectionchain.stable.err
        monetdb5/optimizer/Tests/projectionchain.stable.out
        testing/Mfilter.py
        testing/Mtest.py.in
Branch: pythonudf
Log Message:

Merge with default.


diffs (truncated from 1246 to 300 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -195,16 +195,14 @@ joininitresults(BAT **r1p, BAT **r2p, BU
 #define BINSEARCHFUNC(TYPE)                                            \
 static inline BUN                                                      \
 binsearch_##TYPE(const oid *rcand, oid offset, const TYPE *rvals,      \
-                BUN lo, BUN hi, const void *vp, int ordering, int last) \
+                BUN lo, BUN hi, TYPE v, int ordering, int last)        \
 {                                                                      \
        BUN mid;                                                        \
-       TYPE v, x;                                                      \
+       TYPE x;                                                         \
                                                                        \
        assert(ordering == 1 || ordering == -1);                        \
        assert(lo <= hi);                                               \
                                                                        \
-       v = *(const TYPE *) vp;         /* value we're searching for */ \
-                                                                       \
        if (ordering > 0) {                                             \
                if (rcand) {                                            \
                        if (last) {                                     \
@@ -355,10 +353,10 @@ BINSEARCHFUNC(hge)
 BINSEARCHFUNC(flt)
 BINSEARCHFUNC(dbl)
 #if SIZEOF_OID == SIZEOF_INT
-#define binsearch_oid(rcand, offset, rvals, lo, hi, vp, ordering, last) 
binsearch_int(rcand, offset, (const int *) rvals, lo, hi, (const int *) (vp), 
ordering, last)
+#define binsearch_oid(rcand, offset, rvals, lo, hi, v, ordering, last) 
binsearch_int(rcand, offset, (const int *) rvals, lo, hi, (int) (v), ordering, 
last)
 #endif
 #if SIZEOF_OID == SIZEOF_LNG
-#define binsearch_oid(rcand, offset, rvals, lo, hi, vp, ordering, last) 
binsearch_lng(rcand, offset, (const lng *) rvals, lo, hi, (const lng *) (vp), 
ordering, last)
+#define binsearch_oid(rcand, offset, rvals, lo, hi, v, ordering, last) 
binsearch_lng(rcand, offset, (const lng *) rvals, lo, hi, (lng) (v), ordering, 
last)
 #endif
 
 /* Do a binary search for the first/last occurrence of v between lo and hi
@@ -385,10 +383,10 @@ binsearch(const oid *rcand, oid offset,
        switch (type) {
        case TYPE_bte:
                return binsearch_bte(rcand, offset, (const bte *) rvals,
-                                    lo, hi, (const bte *) v, ordering, last);
+                                    lo, hi, *(const bte *) v, ordering, last);
        case TYPE_sht:
                return binsearch_sht(rcand, offset, (const sht *) rvals,
-                                    lo, hi, (const sht *) v, ordering, last);
+                                    lo, hi, *(const sht *) v, ordering, last);
        case TYPE_int:
 #if SIZEOF_WRD == SIZEOF_INT
        case TYPE_wrd:
@@ -397,7 +395,7 @@ binsearch(const oid *rcand, oid offset,
        case TYPE_oid:
 #endif
                return binsearch_int(rcand, offset, (const int *) rvals,
-                                    lo, hi, (const int *) v, ordering, last);
+                                    lo, hi, *(const int *) v, ordering, last);
        case TYPE_lng:
 #if SIZEOF_WRD == SIZEOF_LNG
        case TYPE_wrd:
@@ -406,18 +404,18 @@ binsearch(const oid *rcand, oid offset,
        case TYPE_oid:
 #endif
                return binsearch_lng(rcand, offset, (const lng *) rvals,
-                                    lo, hi, (const lng *) v, ordering, last);
+                                    lo, hi, *(const lng *) v, ordering, last);
 #ifdef HAVE_HGE
        case TYPE_hge:
                return binsearch_hge(rcand, offset, (const hge *) rvals,
-                                    lo, hi, (const hge *) v, ordering, last);
+                                    lo, hi, *(const hge *) v, ordering, last);
 #endif
        case TYPE_flt:
                return binsearch_flt(rcand, offset, (const flt *) rvals,
-                                    lo, hi, (const flt *) v, ordering, last);
+                                    lo, hi, *(const flt *) v, ordering, last);
        case TYPE_dbl:
                return binsearch_dbl(rcand, offset, (const dbl *) rvals,
-                                    lo, hi, (const dbl *) v, ordering, last);
+                                    lo, hi, *(const dbl *) v, ordering, last);
        }
 
        if ((c = ordering * cmp(VALUE(r, rcand ? rcand[lo] - offset : lo), v)) 
> 0 ||
@@ -746,7 +744,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                if (only_misses) {
                        for (i = 0; i < cnt && lvals[i] < lo; i++)
                                APPEND(r1, lvals[i]);
-                       i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, &hi, 1, 
0);
+                       i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, hi, 1, 0);
                        for (; i < cnt; i++)
                                APPEND(r1, lvals[i]);
                } else {
@@ -761,7 +759,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                                        r2->tkey = i > 1;
                                }
                        } else {
-                               i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, 
&lo, 1, 0);
+                               i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, 
lo, 1, 0);
                        }
                        for (; i < cnt && lvals[i] < hi; i++) {
                                APPEND(r1, lvals[i]);
@@ -812,11 +810,11 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                        /* first restrict candidate list (lcand and
                         * cnt) to section that refers to l */
                        o = l->hseqbase;
-                       i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, &o, 1, 0);
+                       i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0);
                        lcand += i;
                        cnt -= i;
                        o = l->hseqbase + BATcount(l);
-                       i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, &o, 1, 0);
+                       i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0);
                        cnt -= i;
 
                        if (BATextend(r1, cnt) != GDK_SUCCEED)
@@ -1003,6 +1001,608 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
  * r; otherwise all matches are returned.
  */
 static gdk_return
+mergejoin_int(BAT *r1, BAT *r2, BAT *l, BAT *r,
+             int nil_matches, BUN maxsize, lng t0, int swapped)
+{
+       BUN lstart, lend;
+       BUN rstart, rend;
+       BUN lscan, rscan;       /* opportunistic scan window */
+       const int *lvals, *rvals;
+       int v;
+       BUN nl, nr;
+       oid lv;
+       BUN i;
+
+       ALGODEBUG fprintf(stderr, "#mergejoin_int(l=%s#" BUNFMT "[%s]%s%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s%s)%s\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         l->tkey & 1 ? "-key" : "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "",
+                         r->tkey & 1 ? "-key" : "",
+                         swapped ? " swapped" : "");
+
+       assert(BAThdense(l));
+       assert(BAThdense(r));
+       assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
+       assert(r->tsorted || r->trevsorted);
+
+       lstart = rstart = 0;
+       lend = BATcount(l);
+       rend = BATcount(r);
+       lvals = (const int *) Tloc(l, BUNfirst(l));
+       rvals = (const int *) Tloc(r, BUNfirst(r));
+       assert(!r->tvarsized || !r->ttype);
+
+       /* basic properties will be adjusted if necessary later on,
+        * they were initially set by joininitresults() */
+
+       if (lend == 0 || rend == 0) {
+               /* there are no matches */
+               return nomatch(r1, r2, l, r, lstart, lend, NULL, NULL,
+                              0, 0, "mergejoin_int", t0);
+       }
+
+       /* determine opportunistic scan window for l and r */
+       for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
+               nl >>= 1;
+       for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
+               nr >>= 1;
+
+       if (!nil_matches) {
+               /* skip over nils at the start of the columns */
+               if (lscan < lend - lstart && lvals[lstart + lscan] == int_nil) {
+                       lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
+                                              lend - 1, int_nil, 1, 1);
+               } else {
+                       while (lvals[lstart] == int_nil)
+                               lstart++;
+               }
+               if (rscan < rend - rstart && rvals[rstart + rscan] == int_nil) {
+                       rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
+                                              rend - 1, int_nil, 1, 1);
+               } else {
+                       while (rvals[rstart] == int_nil)
+                               rstart++;
+               }
+       }
+       /* from here on we don't have to worry about nil values */
+
+       while (lstart < lend && rstart < rend) {
+               v = rvals[rstart];
+
+               if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
+                       lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
+                                              lend - 1, v, 1, 0);
+               } else {
+                       /* scan l for v */
+                       while (lstart < lend && lvals[lstart] < v)
+                               lstart++;
+               }
+               if (lstart >= lend) {
+                       /* nothing found */
+                       break;
+               }
+
+               /* Here we determine the next value in l that we are
+                * going to try to match in r.  We will also count the
+                * number of occurrences in l of that value.
+                * Afterwards, v points to the value and nl is the
+                * number of times it occurs.  Also, lstart will
+                * point to the next value to be considered (ready for
+                * the next iteration).
+                * If there are many equal values in l (more than
+                * lscan), we will use binary search to find the end
+                * of the sequence.  Obviously, we can do this only if
+                * l is actually sorted (lscan > 0). */
+               nl = 1;         /* we'll match (at least) one in l */
+               nr = 0;         /* maybe we won't match anything in r */
+               v = lvals[lstart];
+               if (l->tkey) {
+                       /* if l is key, there is a single value */
+                       lstart++;
+               } else if (lscan < lend - lstart &&
+                          v == lvals[lstart + lscan]) {
+                       /* lots of equal values: use binary search to
+                        * find end */
+                       nl = binsearch_int(NULL, 0, lvals, lstart + lscan,
+                                          lend - 1, v, 1, 1);
+                       nl -= lstart;
+                       lstart += nl;
+               } else {
+                       /* just scan */
+                       while (++lstart < lend && v == lvals[lstart])
+                               nl++;
+               }
+               /* lstart points one beyond the value we're
+                * going to match: ready for the next iteration. */
+
+               /* First we find the first value in r that is at
+                * least as large as v, then we find the first
+                * value in r that is larger than v.  The difference
+                * is the number of values equal to v and is stored in
+                * nr.
+                * We will use binary search on r to find both ends of
+                * the sequence of values that are equal to v in case
+                * the position is "too far" (more than rscan
+                * away). */
+
+               /* first find the location of the first value in r
+                * that is >= v, then find the location of the first
+                * value in r that is > v; the difference is the
+                * number of values equal to v */
+
+               /* look ahead a little (rscan) in r to see whether
+                * we're better off doing a binary search */
+               if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
+                       /* value too far away in r: use binary
+                        * search */
+                       rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
+                                              rend - 1, v, 1, 0);
+               } else {
+                       /* scan r for v */
+                       while (rstart < rend && rvals[rstart] < v)
+                               rstart++;
+               }
+               if (rstart == rend) {
+                       /* nothing found */
+                       break;
+               }
+
+               /* now find the end of the sequence of equal values v */
+
+               /* if r is key, there is zero or one match, otherwise
+                * look ahead a little (rscan) in r to see whether
+                * we're better off doing a binary search */
+               if (r->tkey) {
+                       if (rstart < rend && v == rvals[rstart]) {
+                               nr = 1;
+                               rstart++;
+                       }
+               } else if (rscan < rend - rstart &&
+                          v == rvals[rstart + rscan]) {
+                       /* range too large: use binary search */
+                       nr = binsearch_int(NULL, 0, rvals, rstart + rscan,
+                                          rend - 1, v, 1, 1);
+                       nr -= rstart;
+                       rstart += nr;
+               } else {
+                       /* scan r for end of range */
+                       while (rstart < rend && v == rvals[rstart]) {
+                               nr++;
+                               rstart++;
+                       }
+               }
+               /* rstart points to first value > v or end of
+                * r, and nr is the number of values in r that
+                * are equal to v */
+               if (nr == 0) {
+                       /* no entries in r found */
+                       continue;
+               }
+               /* make space: nl values in l match nr values in r, so
+                * we need to add nl * nr values in the results */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to