Changeset: c5f3e77f9a41 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c5f3e77f9a41 Modified Files: .editorconfig cmake/monetdb-defines.cmake gdk/gdk_align.c gdk/gdk_batop.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_utils.c monetdb5/modules/atoms/blob.c monetdb_config.h.in sql/include/sql_catalog.h sql/server/rel_optimizer.c sql/server/sql_atom.c sql/server/sql_mvc.c sql/server/sql_mvc.h sql/storage/sql_storage.h sql/storage/store.c Branch: default Log Message:
Merge with oscar branch. diffs (truncated from 511 to 300 lines): diff --git a/.editorconfig b/.editorconfig --- a/.editorconfig +++ b/.editorconfig @@ -19,4 +19,4 @@ charset = utf-8 [gdk/*.{c,h}] tab_width = 8 -max_line_length = 80 +max_line_length = 72 diff --git a/cmake/monetdb-defines.cmake b/cmake/monetdb-defines.cmake --- a/cmake/monetdb-defines.cmake +++ b/cmake/monetdb-defines.cmake @@ -290,6 +290,7 @@ macro(monetdb_configure_sizes) check_type_size(short SIZEOF_SHORT LANGUAGE C) check_type_size(int SIZEOF_INT LANGUAGE C) check_type_size(long SIZEOF_LONG LANGUAGE C) + check_type_size(double SIZEOF_DOUBLE LANGUAGE C) check_type_size(wchar_t SIZEOF_WCHAR_T LANGUAGE C) check_type_size(socklen_t HAVE_SOCKLEN_T LANGUAGE C) check_type_size(pid_t SIZEOF_PID_T LANGUAGE C) diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c --- a/gdk/gdk_align.c +++ b/gdk/gdk_align.c @@ -303,7 +303,7 @@ VIEWreset(BAT *b) goto bailout; th->farmid = BBPselectfarm(b->batRole, b->ttype, varheap); strconcat_len(th->filename, sizeof(th->filename), - nme, ".tail", NULL); + nme, ".theap", NULL); if (ATOMheap(b->ttype, th, cnt) != GDK_SUCCEED) goto bailout; } @@ -366,7 +366,7 @@ VIEWreset(BAT *b) b->batCapacity = cnt; /* insert all of v in b, and quit */ - if (BATappend(b, v, NULL, false) != GDK_SUCCEED) + if (BATappend2(b, v, NULL, false, false) != GDK_SUCCEED) goto bailout; BBPreclaim(v); } diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -53,7 +53,7 @@ unshare_string_heap(BAT *b) #endif static gdk_return -insert_string_bat(BAT *b, BAT *n, struct canditer *ci, bool force) +insert_string_bat(BAT *b, BAT *n, struct canditer *ci, bool force, bool mayshare) { BATiter ni; /* iterator */ size_t toff = ~(size_t) 0; /* tail offset */ @@ -100,7 +100,8 @@ insert_string_bat(BAT *b, BAT *n, struct * wholesale copying of n's offset heap, but * we may still be able to share the string * heap */ - if (oldcnt == 0 && + if (mayshare && + oldcnt == 0 && b->tvheap != n->tvheap && ci->tpe == cand_dense) { if (b->tvheap->parentid != bid) { @@ -393,7 +394,7 @@ insert_string_bat(BAT *b, BAT *n, struct } static gdk_return -append_varsized_bat(BAT *b, BAT *n, struct canditer *ci) +append_varsized_bat(BAT *b, BAT *n, struct canditer *ci, bool mayshare) { BATiter ni; BUN cnt = ci->ncand, r; @@ -406,7 +407,8 @@ append_varsized_bat(BAT *b, BAT *n, stru assert(b->twidth == SIZEOF_VAR_T); if (cnt == 0) return GDK_SUCCEED; - if (BATcount(b) == 0 && + if (mayshare && + BATcount(b) == 0 && b->batRole == TRANSIENT && n->batRestricted == BAT_READ && b->tvheap != n->tvheap) { @@ -489,7 +491,7 @@ append_varsized_bat(BAT *b, BAT *n, stru * list s) to BAT b. If b is empty, b will get the seqbase of s if it * was passed in, and else the seqbase of n. */ gdk_return -BATappend(BAT *b, BAT *n, BAT *s, bool force) +BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare) { struct canditer ci; BUN cnt; @@ -675,11 +677,11 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f b->tnil |= n->tnil && cnt == BATcount(n); } if (b->ttype == TYPE_str) { - if (insert_string_bat(b, n, &ci, force) != GDK_SUCCEED) { + if (insert_string_bat(b, n, &ci, force, mayshare) != GDK_SUCCEED) { return GDK_FAIL; } } else if (ATOMvarsized(b->ttype)) { - if (append_varsized_bat(b, n, &ci) != GDK_SUCCEED) { + if (append_varsized_bat(b, n, &ci, mayshare) != GDK_SUCCEED) { return GDK_FAIL; } } else { @@ -725,6 +727,12 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f } gdk_return +BATappend(BAT *b, BAT *n, BAT *s, bool force) +{ + return BATappend2(b, n, s, force, true); +} + +gdk_return BATdel(BAT *b, BAT *d) { gdk_return (*unfix) (const void *) = BATatoms[b->ttype].atomUnfix; diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3056,8 +3056,101 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l, return GDK_SUCCEED; } +static BAT * +bitmaskjoin(BAT *l, BAT *r, + struct canditer *restrict lci, struct canditer *restrict rci, + bool only_misses, + const char *reason, lng t0) +{ + BAT *r1; + const oid *rp = BATtdense(r) ? NULL : Tloc(r, 0); + size_t nmsk = (lci->ncand + 31) / 32; + uint32_t *mask = GDKzalloc(nmsk * sizeof(uint32_t)); + BUN cnt = 0; -/* Make the implementation choices for various left joins. */ + if (mask == NULL) + return NULL; + + for (BUN n = 0; n < rci->ncand; n++) { + oid o = canditer_next(rci) - r->hseqbase; + if (rp) { + o = rp[o]; + if (is_oid_nil(o)) + continue; + } else { + o = o - r->hseqbase + r->tseqbase; + } + o += l->hseqbase; + if (o < lci->seq + l->tseqbase) + continue; + o -= lci->seq + l->tseqbase; + if (o >= lci->ncand) + continue; + if ((mask[o >> 5] & (1U << (o & 0x1F))) == 0) { + cnt++; + mask[o >> 5] |= 1U << (o & 0x1F); + } + } + if (only_misses) + cnt = lci->ncand - cnt; + if (cnt == 0 || cnt == lci->ncand) { + GDKfree(mask); + if (cnt == 0) + return BATdense(0, 0, 0); + return BATdense(0, lci->seq, lci->ncand); + } + r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT); + if (r1 != NULL) { + oid *r1p = Tloc(r1, 0); + + r1->tkey = true; + r1->tnil = false; + r1->tnonil = true; + r1->tsorted = true; + r1->trevsorted = cnt <= 1; + if (only_misses) { + /* set the bits for unused values at the + * end so that we don't need special + * code in the loop */ + if (lci->ncand & 0x1F) + mask[nmsk - 1] |= ~0U << (lci->ncand & 0x1F); + for (size_t i = 0; i < nmsk; i++) + if (mask[i] != ~0U) + for (uint32_t j = 0; j < 32; j++) + if ((mask[i] & (1U << j)) == 0) + *r1p++ = i * 32 + j + lci->seq; + } else { + for (size_t i = 0; i < nmsk; i++) + if (mask[i] != 0U) + for (uint32_t j = 0; j < 32; j++) + if ((mask[i] & (1U << j)) != 0) + *r1p++ = i * 32 + j + lci->seq; + } + BATsetcount(r1, cnt); + assert((BUN) (r1p - (oid*) Tloc(r1, 0)) == BATcount(r1)); + + TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," + "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT "," + "sr=" ALGOOPTBATFMT ",only_misses=%s; %s " + "-> " ALGOBATFMT " (" LLFMT "usec)\n", + ALGOBATPAR(l), ALGOBATPAR(r), + ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s), + only_misses ? "true" : "false", + reason, + ALGOBATPAR(r1), + GDKusec() - t0); + } + GDKfree(mask); + return r1; +} + +/* Make the implementation choices for various left joins. + * nil_matches: nil is an ordinary value that can match; + * nil_on_miss: outer join: fill in a nil value in case of no match; + * semi: semi join: return one of potentially more than one matches; + * only_misses: difference: list rows without match on the right; + * not_in: for implementing NOT IN: if nil on right then there are no matches; + * max_one: error if there is more than one match. */ static gdk_return leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, @@ -3135,6 +3228,15 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B && (BATordered(r) || BATordered_rev(r))) { assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */ return fetchjoin(r1p, r2p, l, r, sl, sr, &lci, &rci, func, t0); + } else if (BATtdense(l) + && lci.tpe == cand_dense + && r2p == NULL + && (semi || only_misses) + && !nil_on_miss + && !not_in + && !max_one) { + *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0); + return *r1p == NULL ? GDK_FAIL : GDK_SUCCEED; } else if ((BATordered(r) || BATordered_rev(r)) && (BATordered(l) || BATordered_rev(l) diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -46,6 +46,8 @@ str ATOMunknown_name(int a) __attribute__((__visibility__("hidden"))); void ATOMunknown_clean(void) __attribute__((__visibility__("hidden"))); +gdk_return BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare) + __attribute__((__visibility__("hidden"))); bool BATcheckhash(BAT *b) __attribute__((__visibility__("hidden"))); bool BATcheckimprints(BAT *b) diff --git a/gdk/gdk_utils.c b/gdk/gdk_utils.c --- a/gdk/gdk_utils.c +++ b/gdk/gdk_utils.c @@ -767,6 +767,8 @@ GDKinit(opt *set, int setlen, int embedd static_assert(sizeof(hge) == SIZEOF_HGE, "error in configure: bad value for SIZEOF_HGE"); #endif + static_assert(sizeof(dbl) == SIZEOF_DOUBLE, + "error in configure: bad value for SIZEOF_DOUBLE"); static_assert(sizeof(oid) == SIZEOF_OID, "error in configure: bad value for SIZEOF_OID"); static_assert(sizeof(void *) == SIZEOF_VOID_P, diff --git a/monetdb5/modules/atoms/blob.c b/monetdb5/modules/atoms/blob.c --- a/monetdb5/modules/atoms/blob.c +++ b/monetdb5/modules/atoms/blob.c @@ -248,7 +248,7 @@ BLOBtostr(str *tostr, size_t *l, const v if (is_blob_nil(p)) expectedlen = external ? 4 : 2; else - expectedlen = 24 + (p->nitems * 3); + expectedlen = p->nitems * 2 + 1; if (*l < expectedlen || *tostr == NULL) { GDKfree(*tostr); *tostr = GDKmalloc(expectedlen); diff --git a/monetdb_config.h.in b/monetdb_config.h.in --- a/monetdb_config.h.in +++ b/monetdb_config.h.in @@ -280,6 +280,7 @@ #cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@ #cmakedefine SIZEOF_INT @SIZEOF_INT@ #cmakedefine SIZEOF_LONG @SIZEOF_LONG@ +#cmakedefine SIZEOF_DOUBLE @SIZEOF_DOUBLE@ #cmakedefine SIZEOF_WCHAR_T @SIZEOF_WCHAR_T@ #cmakedefine LENP_OR_POINTER_T @LENP_OR_POINTER_T@ diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h --- a/sql/include/sql_catalog.h +++ b/sql/include/sql_catalog.h @@ -178,10 +178,13 @@ typedef enum comp_type { #define CMP_BETWEEN 16 #define is_theta_exp(e) ((e) == cmp_gt || (e) == cmp_gte || (e) == cmp_lte ||\ - (e) == cmp_lt || (e) == cmp_equal || (e) == cmp_notequal) + (e) == cmp_lt || (e) == cmp_equal || (e) == cmp_notequal) #define is_complex_exp(et) ((et) == cmp_or || (et) == cmp_in || (et) == cmp_notin || (et) == cmp_filter) +#define is_equality_or_inequality_exp(et) ((et) == cmp_equal || (et) == cmp_notequal || (et) == cmp_in || \ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list