Changeset: c5f3e77f9a41 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c5f3e77f9a41
Modified Files:
        .editorconfig
        cmake/monetdb-defines.cmake
        gdk/gdk_align.c
        gdk/gdk_batop.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_utils.c
        monetdb5/modules/atoms/blob.c
        monetdb_config.h.in
        sql/include/sql_catalog.h
        sql/server/rel_optimizer.c
        sql/server/sql_atom.c
        sql/server/sql_mvc.c
        sql/server/sql_mvc.h
        sql/storage/sql_storage.h
        sql/storage/store.c
Branch: default
Log Message:

Merge with oscar branch.


diffs (truncated from 511 to 300 lines):

diff --git a/.editorconfig b/.editorconfig
--- a/.editorconfig
+++ b/.editorconfig
@@ -19,4 +19,4 @@ charset = utf-8
 
 [gdk/*.{c,h}]
 tab_width = 8
-max_line_length = 80
+max_line_length = 72
diff --git a/cmake/monetdb-defines.cmake b/cmake/monetdb-defines.cmake
--- a/cmake/monetdb-defines.cmake
+++ b/cmake/monetdb-defines.cmake
@@ -290,6 +290,7 @@ macro(monetdb_configure_sizes)
   check_type_size(short SIZEOF_SHORT LANGUAGE C)
   check_type_size(int SIZEOF_INT LANGUAGE C)
   check_type_size(long SIZEOF_LONG LANGUAGE C)
+  check_type_size(double SIZEOF_DOUBLE LANGUAGE C)
   check_type_size(wchar_t SIZEOF_WCHAR_T LANGUAGE C)
   check_type_size(socklen_t HAVE_SOCKLEN_T LANGUAGE C)
   check_type_size(pid_t SIZEOF_PID_T LANGUAGE C)
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -303,7 +303,7 @@ VIEWreset(BAT *b)
                                goto bailout;
                        th->farmid = BBPselectfarm(b->batRole, b->ttype, 
varheap);
                        strconcat_len(th->filename, sizeof(th->filename),
-                                     nme, ".tail", NULL);
+                                     nme, ".theap", NULL);
                        if (ATOMheap(b->ttype, th, cnt) != GDK_SUCCEED)
                                goto bailout;
                }
@@ -366,7 +366,7 @@ VIEWreset(BAT *b)
                b->batCapacity = cnt;
 
                /* insert all of v in b, and quit */
-               if (BATappend(b, v, NULL, false) != GDK_SUCCEED)
+               if (BATappend2(b, v, NULL, false, false) != GDK_SUCCEED)
                        goto bailout;
                BBPreclaim(v);
        }
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -53,7 +53,7 @@ unshare_string_heap(BAT *b)
 #endif
 
 static gdk_return
-insert_string_bat(BAT *b, BAT *n, struct canditer *ci, bool force)
+insert_string_bat(BAT *b, BAT *n, struct canditer *ci, bool force, bool 
mayshare)
 {
        BATiter ni;             /* iterator */
        size_t toff = ~(size_t) 0;      /* tail offset */
@@ -100,7 +100,8 @@ insert_string_bat(BAT *b, BAT *n, struct
                         * wholesale copying of n's offset heap, but
                         * we may still be able to share the string
                         * heap */
-                       if (oldcnt == 0 &&
+                       if (mayshare &&
+                           oldcnt == 0 &&
                            b->tvheap != n->tvheap &&
                            ci->tpe == cand_dense) {
                                if (b->tvheap->parentid != bid) {
@@ -393,7 +394,7 @@ insert_string_bat(BAT *b, BAT *n, struct
 }
 
 static gdk_return
-append_varsized_bat(BAT *b, BAT *n, struct canditer *ci)
+append_varsized_bat(BAT *b, BAT *n, struct canditer *ci, bool mayshare)
 {
        BATiter ni;
        BUN cnt = ci->ncand, r;
@@ -406,7 +407,8 @@ append_varsized_bat(BAT *b, BAT *n, stru
        assert(b->twidth == SIZEOF_VAR_T);
        if (cnt == 0)
                return GDK_SUCCEED;
-       if (BATcount(b) == 0 &&
+       if (mayshare &&
+           BATcount(b) == 0 &&
            b->batRole == TRANSIENT &&
            n->batRestricted == BAT_READ &&
            b->tvheap != n->tvheap) {
@@ -489,7 +491,7 @@ append_varsized_bat(BAT *b, BAT *n, stru
  * list s) to BAT b.  If b is empty, b will get the seqbase of s if it
  * was passed in, and else the seqbase of n. */
 gdk_return
-BATappend(BAT *b, BAT *n, BAT *s, bool force)
+BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
 {
        struct canditer ci;
        BUN cnt;
@@ -675,11 +677,11 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                b->tnil |= n->tnil && cnt == BATcount(n);
        }
        if (b->ttype == TYPE_str) {
-               if (insert_string_bat(b, n, &ci, force) != GDK_SUCCEED) {
+               if (insert_string_bat(b, n, &ci, force, mayshare) != 
GDK_SUCCEED) {
                        return GDK_FAIL;
                }
        } else if (ATOMvarsized(b->ttype)) {
-               if (append_varsized_bat(b, n, &ci) != GDK_SUCCEED) {
+               if (append_varsized_bat(b, n, &ci, mayshare) != GDK_SUCCEED) {
                        return GDK_FAIL;
                }
        } else {
@@ -725,6 +727,12 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
 }
 
 gdk_return
+BATappend(BAT *b, BAT *n, BAT *s, bool force)
+{
+       return BATappend2(b, n, s, force, true);
+}
+
+gdk_return
 BATdel(BAT *b, BAT *d)
 {
        gdk_return (*unfix) (const void *) = BATatoms[b->ttype].atomUnfix;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3056,8 +3056,101 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l, 
        return GDK_SUCCEED;
 }
 
+static BAT *
+bitmaskjoin(BAT *l, BAT *r,
+           struct canditer *restrict lci, struct canditer *restrict rci,
+           bool only_misses,
+           const char *reason, lng t0)
+{
+       BAT *r1;
+       const oid *rp = BATtdense(r) ? NULL : Tloc(r, 0);
+       size_t nmsk = (lci->ncand + 31) / 32;
+       uint32_t *mask = GDKzalloc(nmsk * sizeof(uint32_t));
+       BUN cnt = 0;
 
-/* Make the implementation choices for various left joins. */
+       if (mask == NULL)
+               return NULL;
+
+       for (BUN n = 0; n < rci->ncand; n++) {
+               oid o = canditer_next(rci) - r->hseqbase;
+               if (rp) {
+                       o = rp[o];
+                       if (is_oid_nil(o))
+                               continue;
+               } else {
+                       o = o - r->hseqbase + r->tseqbase;
+               }
+               o += l->hseqbase;
+               if (o < lci->seq + l->tseqbase)
+                       continue;
+               o -= lci->seq + l->tseqbase;
+               if (o >= lci->ncand)
+                       continue;
+               if ((mask[o >> 5] & (1U << (o & 0x1F))) == 0) {
+                       cnt++;
+                       mask[o >> 5] |= 1U << (o & 0x1F);
+               }
+       }
+       if (only_misses)
+               cnt = lci->ncand - cnt;
+       if (cnt == 0 || cnt == lci->ncand) {
+               GDKfree(mask);
+               if (cnt == 0)
+                       return BATdense(0, 0, 0);
+               return BATdense(0, lci->seq, lci->ncand);
+       }
+       r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
+       if (r1 != NULL) {
+               oid *r1p = Tloc(r1, 0);
+
+               r1->tkey = true;
+               r1->tnil = false;
+               r1->tnonil = true;
+               r1->tsorted = true;
+               r1->trevsorted = cnt <= 1;
+               if (only_misses) {
+                       /* set the bits for unused values at the
+                        * end so that we don't need special
+                        * code in the loop */
+                       if (lci->ncand & 0x1F)
+                               mask[nmsk - 1] |= ~0U << (lci->ncand & 0x1F);
+                       for (size_t i = 0; i < nmsk; i++)
+                               if (mask[i] != ~0U)
+                                       for (uint32_t j = 0; j < 32; j++)
+                                               if ((mask[i] & (1U << j)) == 0)
+                                                       *r1p++ = i * 32 + j + 
lci->seq;
+               } else {
+                       for (size_t i = 0; i < nmsk; i++)
+                               if (mask[i] != 0U)
+                                       for (uint32_t j = 0; j < 32; j++)
+                                               if ((mask[i] & (1U << j)) != 0)
+                                                       *r1p++ = i * 32 + j + 
lci->seq;
+               }
+               BATsetcount(r1, cnt);
+               assert((BUN) (r1p - (oid*) Tloc(r1, 0)) == BATcount(r1));
+
+               TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
+                         "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
+                         "sr=" ALGOOPTBATFMT ",only_misses=%s; %s "
+                         "-> " ALGOBATFMT " (" LLFMT "usec)\n",
+                         ALGOBATPAR(l), ALGOBATPAR(r),
+                         ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
+                         only_misses ? "true" : "false",
+                         reason,
+                         ALGOBATPAR(r1),
+                         GDKusec() - t0);
+       }
+       GDKfree(mask);
+       return r1;
+}
+
+/* Make the implementation choices for various left joins.
+ * nil_matches: nil is an ordinary value that can match;
+ * nil_on_miss: outer join: fill in a nil value in case of no match;
+ * semi: semi join: return one of potentially more than one matches;
+ * only_misses: difference: list rows without match on the right;
+ * not_in: for implementing NOT IN: if nil on right then there are no matches;
+ * max_one: error if there is more than one match. */
 static gdk_return
 leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
         bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
@@ -3135,6 +3228,15 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                   && (BATordered(r) || BATordered_rev(r))) {
                assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
                return fetchjoin(r1p, r2p, l, r, sl, sr, &lci, &rci, func, t0);
+       } else if (BATtdense(l)
+                  && lci.tpe == cand_dense
+                  && r2p == NULL
+                  && (semi || only_misses)
+                  && !nil_on_miss
+                  && !not_in
+                  && !max_one) {
+               *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0);
+               return *r1p == NULL ? GDK_FAIL : GDK_SUCCEED;
        } else if ((BATordered(r) || BATordered_rev(r))
                   && (BATordered(l)
                       || BATordered_rev(l)
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -46,6 +46,8 @@ str ATOMunknown_name(int a)
        __attribute__((__visibility__("hidden")));
 void ATOMunknown_clean(void)
        __attribute__((__visibility__("hidden")));
+gdk_return BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
+       __attribute__((__visibility__("hidden")));
 bool BATcheckhash(BAT *b)
        __attribute__((__visibility__("hidden")));
 bool BATcheckimprints(BAT *b)
diff --git a/gdk/gdk_utils.c b/gdk/gdk_utils.c
--- a/gdk/gdk_utils.c
+++ b/gdk/gdk_utils.c
@@ -767,6 +767,8 @@ GDKinit(opt *set, int setlen, int embedd
        static_assert(sizeof(hge) == SIZEOF_HGE,
                      "error in configure: bad value for SIZEOF_HGE");
 #endif
+       static_assert(sizeof(dbl) == SIZEOF_DOUBLE,
+                     "error in configure: bad value for SIZEOF_DOUBLE");
        static_assert(sizeof(oid) == SIZEOF_OID,
                      "error in configure: bad value for SIZEOF_OID");
        static_assert(sizeof(void *) == SIZEOF_VOID_P,
diff --git a/monetdb5/modules/atoms/blob.c b/monetdb5/modules/atoms/blob.c
--- a/monetdb5/modules/atoms/blob.c
+++ b/monetdb5/modules/atoms/blob.c
@@ -248,7 +248,7 @@ BLOBtostr(str *tostr, size_t *l, const v
        if (is_blob_nil(p))
                expectedlen = external ? 4 : 2;
        else
-               expectedlen = 24 + (p->nitems * 3);
+               expectedlen = p->nitems * 2 + 1;
        if (*l < expectedlen || *tostr == NULL) {
                GDKfree(*tostr);
                *tostr = GDKmalloc(expectedlen);
diff --git a/monetdb_config.h.in b/monetdb_config.h.in
--- a/monetdb_config.h.in
+++ b/monetdb_config.h.in
@@ -280,6 +280,7 @@
 #cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@
 #cmakedefine SIZEOF_INT @SIZEOF_INT@
 #cmakedefine SIZEOF_LONG @SIZEOF_LONG@
+#cmakedefine SIZEOF_DOUBLE @SIZEOF_DOUBLE@
 #cmakedefine SIZEOF_WCHAR_T @SIZEOF_WCHAR_T@
 
 #cmakedefine LENP_OR_POINTER_T @LENP_OR_POINTER_T@
diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h
--- a/sql/include/sql_catalog.h
+++ b/sql/include/sql_catalog.h
@@ -178,10 +178,13 @@ typedef enum comp_type {
 #define CMP_BETWEEN 16
 
 #define is_theta_exp(e) ((e) == cmp_gt || (e) == cmp_gte || (e) == cmp_lte ||\
-                        (e) == cmp_lt || (e) == cmp_equal || (e) == 
cmp_notequal)
+                                                (e) == cmp_lt || (e) == 
cmp_equal || (e) == cmp_notequal)
 
 #define is_complex_exp(et) ((et) == cmp_or || (et) == cmp_in || (et) == 
cmp_notin || (et) == cmp_filter)
 
+#define is_equality_or_inequality_exp(et) ((et) == cmp_equal || (et) == 
cmp_notequal || (et) == cmp_in || \
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to