Changeset: 64a17db65c74 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=64a17db65c74 Added Files: sql/test/BugTracker-2016/Tests/All sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.sql sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.stable.err sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.stable.out sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.sql sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.stable.err sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.stable.out Modified Files: gdk/gdk_join.c monetdb5/optimizer/Tests/All monetdb5/optimizer/Tests/projectionchain.malC monetdb5/optimizer/Tests/projectionchain.stable.err monetdb5/optimizer/Tests/projectionchain.stable.out testing/Mfilter.py testing/Mtest.py.in Branch: pythonudf Log Message:
Merge with default. diffs (truncated from 1246 to 300 lines): diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -195,16 +195,14 @@ joininitresults(BAT **r1p, BAT **r2p, BU #define BINSEARCHFUNC(TYPE) \ static inline BUN \ binsearch_##TYPE(const oid *rcand, oid offset, const TYPE *rvals, \ - BUN lo, BUN hi, const void *vp, int ordering, int last) \ + BUN lo, BUN hi, TYPE v, int ordering, int last) \ { \ BUN mid; \ - TYPE v, x; \ + TYPE x; \ \ assert(ordering == 1 || ordering == -1); \ assert(lo <= hi); \ \ - v = *(const TYPE *) vp; /* value we're searching for */ \ - \ if (ordering > 0) { \ if (rcand) { \ if (last) { \ @@ -355,10 +353,10 @@ BINSEARCHFUNC(hge) BINSEARCHFUNC(flt) BINSEARCHFUNC(dbl) #if SIZEOF_OID == SIZEOF_INT -#define binsearch_oid(rcand, offset, rvals, lo, hi, vp, ordering, last) binsearch_int(rcand, offset, (const int *) rvals, lo, hi, (const int *) (vp), ordering, last) +#define binsearch_oid(rcand, offset, rvals, lo, hi, v, ordering, last) binsearch_int(rcand, offset, (const int *) rvals, lo, hi, (int) (v), ordering, last) #endif #if SIZEOF_OID == SIZEOF_LNG -#define binsearch_oid(rcand, offset, rvals, lo, hi, vp, ordering, last) binsearch_lng(rcand, offset, (const lng *) rvals, lo, hi, (const lng *) (vp), ordering, last) +#define binsearch_oid(rcand, offset, rvals, lo, hi, v, ordering, last) binsearch_lng(rcand, offset, (const lng *) rvals, lo, hi, (lng) (v), ordering, last) #endif /* Do a binary search for the first/last occurrence of v between lo and hi @@ -385,10 +383,10 @@ binsearch(const oid *rcand, oid offset, switch (type) { case TYPE_bte: return binsearch_bte(rcand, offset, (const bte *) rvals, - lo, hi, (const bte *) v, ordering, last); + lo, hi, *(const bte *) v, ordering, last); case TYPE_sht: return binsearch_sht(rcand, offset, (const sht *) rvals, - lo, hi, (const sht *) v, ordering, last); + lo, hi, *(const sht *) v, ordering, last); case TYPE_int: #if SIZEOF_WRD == SIZEOF_INT case TYPE_wrd: @@ -397,7 +395,7 @@ binsearch(const oid *rcand, oid offset, case TYPE_oid: #endif return binsearch_int(rcand, offset, (const int *) rvals, - lo, hi, (const int *) v, ordering, last); + lo, hi, *(const int *) v, ordering, last); case TYPE_lng: #if SIZEOF_WRD == SIZEOF_LNG case TYPE_wrd: @@ -406,18 +404,18 @@ binsearch(const oid *rcand, oid offset, case TYPE_oid: #endif return binsearch_lng(rcand, offset, (const lng *) rvals, - lo, hi, (const lng *) v, ordering, last); + lo, hi, *(const lng *) v, ordering, last); #ifdef HAVE_HGE case TYPE_hge: return binsearch_hge(rcand, offset, (const hge *) rvals, - lo, hi, (const hge *) v, ordering, last); + lo, hi, *(const hge *) v, ordering, last); #endif case TYPE_flt: return binsearch_flt(rcand, offset, (const flt *) rvals, - lo, hi, (const flt *) v, ordering, last); + lo, hi, *(const flt *) v, ordering, last); case TYPE_dbl: return binsearch_dbl(rcand, offset, (const dbl *) rvals, - lo, hi, (const dbl *) v, ordering, last); + lo, hi, *(const dbl *) v, ordering, last); } if ((c = ordering * cmp(VALUE(r, rcand ? rcand[lo] - offset : lo), v)) > 0 || @@ -746,7 +744,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l, if (only_misses) { for (i = 0; i < cnt && lvals[i] < lo; i++) APPEND(r1, lvals[i]); - i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, &hi, 1, 0); + i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, hi, 1, 0); for (; i < cnt; i++) APPEND(r1, lvals[i]); } else { @@ -761,7 +759,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l, r2->tkey = i > 1; } } else { - i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, &lo, 1, 0); + i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, lo, 1, 0); } for (; i < cnt && lvals[i] < hi; i++) { APPEND(r1, lvals[i]); @@ -812,11 +810,11 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l, /* first restrict candidate list (lcand and * cnt) to section that refers to l */ o = l->hseqbase; - i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, &o, 1, 0); + i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0); lcand += i; cnt -= i; o = l->hseqbase + BATcount(l); - i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, &o, 1, 0); + i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0); cnt -= i; if (BATextend(r1, cnt) != GDK_SUCCEED) @@ -1003,6 +1001,608 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l, * r; otherwise all matches are returned. */ static gdk_return +mergejoin_int(BAT *r1, BAT *r2, BAT *l, BAT *r, + int nil_matches, BUN maxsize, lng t0, int swapped) +{ + BUN lstart, lend; + BUN rstart, rend; + BUN lscan, rscan; /* opportunistic scan window */ + const int *lvals, *rvals; + int v; + BUN nl, nr; + oid lv; + BUN i; + + ALGODEBUG fprintf(stderr, "#mergejoin_int(l=%s#" BUNFMT "[%s]%s%s%s," + "r=%s#" BUNFMT "[%s]%s%s%s)%s\n", + BATgetId(l), BATcount(l), ATOMname(l->ttype), + l->tsorted ? "-sorted" : "", + l->trevsorted ? "-revsorted" : "", + l->tkey & 1 ? "-key" : "", + BATgetId(r), BATcount(r), ATOMname(r->ttype), + r->tsorted ? "-sorted" : "", + r->trevsorted ? "-revsorted" : "", + r->tkey & 1 ? "-key" : "", + swapped ? " swapped" : ""); + + assert(BAThdense(l)); + assert(BAThdense(r)); + assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); + assert(r->tsorted || r->trevsorted); + + lstart = rstart = 0; + lend = BATcount(l); + rend = BATcount(r); + lvals = (const int *) Tloc(l, BUNfirst(l)); + rvals = (const int *) Tloc(r, BUNfirst(r)); + assert(!r->tvarsized || !r->ttype); + + /* basic properties will be adjusted if necessary later on, + * they were initially set by joininitresults() */ + + if (lend == 0 || rend == 0) { + /* there are no matches */ + return nomatch(r1, r2, l, r, lstart, lend, NULL, NULL, + 0, 0, "mergejoin_int", t0); + } + + /* determine opportunistic scan window for l and r */ + for (nl = lend - lstart, lscan = 4; nl > 0; lscan++) + nl >>= 1; + for (nr = rend - rstart, rscan = 4; nr > 0; rscan++) + nr >>= 1; + + if (!nil_matches) { + /* skip over nils at the start of the columns */ + if (lscan < lend - lstart && lvals[lstart + lscan] == int_nil) { + lstart = binsearch_int(NULL, 0, lvals, lstart + lscan, + lend - 1, int_nil, 1, 1); + } else { + while (lvals[lstart] == int_nil) + lstart++; + } + if (rscan < rend - rstart && rvals[rstart + rscan] == int_nil) { + rstart = binsearch_int(NULL, 0, rvals, rstart + rscan, + rend - 1, int_nil, 1, 1); + } else { + while (rvals[rstart] == int_nil) + rstart++; + } + } + /* from here on we don't have to worry about nil values */ + + while (lstart < lend && rstart < rend) { + v = rvals[rstart]; + + if (lscan < lend - lstart && lvals[lstart + lscan] < v) { + lstart = binsearch_int(NULL, 0, lvals, lstart + lscan, + lend - 1, v, 1, 0); + } else { + /* scan l for v */ + while (lstart < lend && lvals[lstart] < v) + lstart++; + } + if (lstart >= lend) { + /* nothing found */ + break; + } + + /* Here we determine the next value in l that we are + * going to try to match in r. We will also count the + * number of occurrences in l of that value. + * Afterwards, v points to the value and nl is the + * number of times it occurs. Also, lstart will + * point to the next value to be considered (ready for + * the next iteration). + * If there are many equal values in l (more than + * lscan), we will use binary search to find the end + * of the sequence. Obviously, we can do this only if + * l is actually sorted (lscan > 0). */ + nl = 1; /* we'll match (at least) one in l */ + nr = 0; /* maybe we won't match anything in r */ + v = lvals[lstart]; + if (l->tkey) { + /* if l is key, there is a single value */ + lstart++; + } else if (lscan < lend - lstart && + v == lvals[lstart + lscan]) { + /* lots of equal values: use binary search to + * find end */ + nl = binsearch_int(NULL, 0, lvals, lstart + lscan, + lend - 1, v, 1, 1); + nl -= lstart; + lstart += nl; + } else { + /* just scan */ + while (++lstart < lend && v == lvals[lstart]) + nl++; + } + /* lstart points one beyond the value we're + * going to match: ready for the next iteration. */ + + /* First we find the first value in r that is at + * least as large as v, then we find the first + * value in r that is larger than v. The difference + * is the number of values equal to v and is stored in + * nr. + * We will use binary search on r to find both ends of + * the sequence of values that are equal to v in case + * the position is "too far" (more than rscan + * away). */ + + /* first find the location of the first value in r + * that is >= v, then find the location of the first + * value in r that is > v; the difference is the + * number of values equal to v */ + + /* look ahead a little (rscan) in r to see whether + * we're better off doing a binary search */ + if (rscan < rend - rstart && rvals[rstart + rscan] < v) { + /* value too far away in r: use binary + * search */ + rstart = binsearch_int(NULL, 0, rvals, rstart + rscan, + rend - 1, v, 1, 0); + } else { + /* scan r for v */ + while (rstart < rend && rvals[rstart] < v) + rstart++; + } + if (rstart == rend) { + /* nothing found */ + break; + } + + /* now find the end of the sequence of equal values v */ + + /* if r is key, there is zero or one match, otherwise + * look ahead a little (rscan) in r to see whether + * we're better off doing a binary search */ + if (r->tkey) { + if (rstart < rend && v == rvals[rstart]) { + nr = 1; + rstart++; + } + } else if (rscan < rend - rstart && + v == rvals[rstart + rscan]) { + /* range too large: use binary search */ + nr = binsearch_int(NULL, 0, rvals, rstart + rscan, + rend - 1, v, 1, 1); + nr -= rstart; + rstart += nr; + } else { + /* scan r for end of range */ + while (rstart < rend && v == rvals[rstart]) { + nr++; + rstart++; + } + } + /* rstart points to first value > v or end of + * r, and nr is the number of values in r that + * are equal to v */ + if (nr == 0) { + /* no entries in r found */ + continue; + } + /* make space: nl values in l match nr values in r, so + * we need to add nl * nr values in the results */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list