Changeset: e7f3248d0c6b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e7f3248d0c6b
Modified Files:
        gdk/gdk_batop.c
        sql/backends/monet5/UDF/pyapi/connection.c
        sql/backends/monet5/UDF/pyapi/emit.c
        sql/backends/monet5/UDF/pyapi/formatinput.c
        sql/backends/monet5/UDF/pyapi/pyloader.c
        sql/backends/monet5/UDF/pyapi/pytypes.c
        sql/backends/monet5/UDF/pyapi/type_conversion.c
Branch: default
Log Message:

Merge with Dec2016 branch.


diffs (219 lines):

diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -58,10 +58,8 @@ static gdk_return
 insert_string_bat(BAT *b, BAT *n, int force)
 {
        BATiter ni;             /* iterator */
-       int tt;                 /* tail type */
        size_t toff = ~(size_t) 0;      /* tail offset */
        BUN p, q;               /* loop variables */
-       oid o = 0;              /* in case we're appending */
        const void *tp;         /* tail value pointer */
        unsigned char tbv;      /* tail value-as-bte */
        unsigned short tsv;     /* tail value-as-sht */
@@ -71,18 +69,20 @@ insert_string_bat(BAT *b, BAT *n, int fo
        var_t v;                /* value */
        size_t off;             /* offset within n's string heap */
 
+       assert(b->ttype == TYPE_str);
+       /* only transient bats can use some other bat's string heap */
+       assert(b->batRole == TRANSIENT ||
+              b->tvheap->parentid == abs(b->batCacheid));
        if (n->batCount == 0)
                return GDK_SUCCEED;
        ni = bat_iterator(n);
        tp = NULL;
-       tt = b->ttype;
-       if (tt == TYPE_str &&
-           (!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) &&
+       if ((!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) &&
            !GDK_ELIMDOUBLES(n->tvheap) &&
            b->tvheap->hashash == n->tvheap->hashash &&
            /* if needs to be kept unique, take slow path */
            (b->tkey & BOUND2BTRUE) == 0) {
-               if (b->batRole == TRANSIENT) {
+               if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) {
                        /* If b is in the transient farm (i.e. b will
                         * never become persistent), we try some
                         * clever tricks to avoid copying:
@@ -90,14 +90,16 @@ insert_string_bat(BAT *b, BAT *n, int fo
                          *   string heap with n;
                         * - otherwise, if b's string heap and n's
                          *   string heap are the same (i.e. shared),
-                         *   we leave it that way;
+                         *   we leave it that way (this includes the
+                         *   case that b is persistent and n shares
+                         *   its string heap with b);
                         * - otherwise, if b shares its string heap
                          *   with some other bat, we materialize it
                          *   and we will have to copy strings.
                         */
                        bat bid = b->batCacheid;
 
-                       if (b->batCount == 0) {
+                       if (b->batCount == 0 && b->tvheap != n->tvheap) {
                                if (b->tvheap->parentid != bid) {
                                        BBPunshare(b->tvheap->parentid);
                                } else {
@@ -151,8 +153,8 @@ insert_string_bat(BAT *b, BAT *n, int fo
                                /* make sure we get alignment right */
                                toff = (toff + GDK_VARALIGN - 1) & 
~(GDK_VARALIGN - 1);
                                assert(((toff >> GDK_VARSHIFT) << GDK_VARSHIFT) 
== toff);
-                               /* if in "force" mode, the heap may be shared 
when
-                                * memory mapped */
+                               /* if in "force" mode, the heap may be
+                                * shared when memory mapped */
                                if (HEAPextend(b->tvheap, toff + 
n->tvheap->size, force) != GDK_SUCCEED) {
                                        toff = ~(size_t) 0;
                                        goto bunins_failed;
@@ -182,25 +184,25 @@ insert_string_bat(BAT *b, BAT *n, int fo
                        }
                        switch (b->twidth) {
                        case 1:
-                               tt = TYPE_bte;
+                               b->ttype = TYPE_bte;
                                tp = &tbv;
                                break;
                        case 2:
-                               tt = TYPE_sht;
+                               b->ttype = TYPE_sht;
                                tp = &tsv;
                                break;
 #if SIZEOF_VAR_T == 8
                        case 4:
-                               tt = TYPE_int;
+                               b->ttype = TYPE_int;
                                tp = &tiv;
                                break;
                        case 8:
-                               tt = TYPE_lng;
+                               b->ttype = TYPE_lng;
                                tp = &v;
                                break;
 #else
                        case 4:
-                               tt = TYPE_int;
+                               b->ttype = TYPE_int;
                                tp = &v;
                                break;
 #endif
@@ -208,13 +210,11 @@ insert_string_bat(BAT *b, BAT *n, int fo
                                assert(0);
                        }
                        b->tvarsized = 0;
-                       b->ttype = tt;
                }
        }
        if (toff == 0 && n->twidth == b->twidth) {
                /* we don't need to do any translation of offset
-                * values, nor do we need to do any calculations for
-                * the head column, so we can use fast memcpy */
+                * values, so we can use fast memcpy */
                memcpy(Tloc(b, BUNlast(b)), Tloc(n, 0),
                       BATcount(n) * n->twidth);
                BATsetcount(b, BATcount(b) + BATcount(n));
@@ -275,7 +275,26 @@ insert_string_bat(BAT *b, BAT *n, int fo
                                break;
                        }
                        bunfastapp(b, tp);
-                       o++;
+               }
+       } else if (b->tkey & BOUND2BTRUE) {
+               BUN i = BUNlast(b);
+               /* if no duplicate values allowed, insert one-by-one */
+               BATloop(n, p, q) {
+                       tp = BUNtvar(ni, p);
+                       if (BUNfnd(b, tp) == BUN_NONE) {
+                               bunfastapp(b, tp);
+                               if (b->thash) {
+                                       HASHins(b, i, tp);
+                               }
+                               i++;
+                       }
+               }
+       } else if (b->tvheap->free < n->tvheap->free / 2) {
+               /* if b's string heap is much smaller than n's string
+                * heap, don't bother checking whether n's string
+                * values occur in b's string heap */
+               BATloop(n, p, q) {
+                       bunfastapp(b, BUNtvar(ni, p));
                }
        } else {
                /* Insert values from n individually into b; however,
@@ -332,19 +351,14 @@ insert_string_bat(BAT *b, BAT *n, int fo
                        } else {
                                bunfastapp(b, tp);
                        }
-                       o++;
                }
        }
-       if (toff != ~(size_t) 0) {
-               b->tvarsized = 1;
-               b->ttype = TYPE_str;
-       }
+       b->tvarsized = 1;
+       b->ttype = TYPE_str;
        return GDK_SUCCEED;
       bunins_failed:
-       if (toff != ~(size_t) 0) {
-               b->tvarsized = 1;
-               b->ttype = TYPE_str;
-       }
+       b->tvarsized = 1;
+       b->ttype = TYPE_str;
        return GDK_FAIL;
 }
 
@@ -464,10 +478,7 @@ BATappend(BAT *b, BAT *n, bit force)
                                b->tnodense = r;
                        }
                }
-               if (b->ttype == TYPE_str &&
-                   (b->batCount == 0 || !GDK_ELIMDOUBLES(b->tvheap)) &&
-                   !GDK_ELIMDOUBLES(n->tvheap) &&
-                   b->tvheap->hashash == n->tvheap->hashash) {
+               if (b->ttype == TYPE_str) {
                        if (insert_string_bat(b, n, force) != GDK_SUCCEED)
                                return GDK_FAIL;
                } else {
diff --git a/sql/backends/monet5/UDF/pyapi/connection.c 
b/sql/backends/monet5/UDF/pyapi/connection.c
--- a/sql/backends/monet5/UDF/pyapi/connection.c
+++ b/sql/backends/monet5/UDF/pyapi/connection.c
@@ -166,7 +166,7 @@ PyTypeObject Py_ConnectionType = {
 #ifdef IS_PY3K
     }
 #endif
-    ,"monetdb._connection",
+    , "monetdb._connection",
     sizeof(Py_ConnectionObject),
     0,
     0,                                          /* tp_dealloc */
diff --git a/sql/backends/monet5/UDF/pyapi/emit.c 
b/sql/backends/monet5/UDF/pyapi/emit.c
--- a/sql/backends/monet5/UDF/pyapi/emit.c
+++ b/sql/backends/monet5/UDF/pyapi/emit.c
@@ -313,7 +313,7 @@ PyTypeObject PyEmitType = {
 #ifdef IS_PY3K
     }
 #endif
-    ,"monetdb._emit",
+    , "monetdb._emit",
     sizeof(PyEmitObject),
     0,
     0,                                          /* tp_dealloc */
diff --git a/sql/backends/monet5/UDF/pyapi/type_conversion.c 
b/sql/backends/monet5/UDF/pyapi/type_conversion.c
--- a/sql/backends/monet5/UDF/pyapi/type_conversion.c
+++ b/sql/backends/monet5/UDF/pyapi/type_conversion.c
@@ -11,6 +11,12 @@
 
 #include <longintrepr.h>
 
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyInt_Check PyLong_Check
+#define PyString_CheckExact PyUnicode_CheckExact
+#endif
+
 bool string_copy(char * source, char* dest, size_t max_size, bool 
allow_unicode)
 {
     size_t i;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to