Changeset: 6230882d2425 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6230882d2425
Modified Files:
        gdk/gdk_batop.c
Branch: Jun2016
Log Message:

Always call insert_string_bat to append string bats.
In the function, deal with all variations of sharing or not sharing
string heaps, and also with special cases as BOUND2BTRUE.
This hopefully fixes bug 6118.


diffs (86 lines):

diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -82,16 +82,20 @@ insert_string_bat(BAT *b, BAT *n, int fo
 
        assert(b->htype == TYPE_void);
        assert(b->ttype == TYPE_str);
+       /* only transient bats can use some other bat's string heap */
+       assert(b->batRole == TRANSIENT ||
+              b->T->vheap->parentid == abs(b->batCacheid));
        if (n->batCount == 0)
                return GDK_SUCCEED;
        ni = bat_iterator(n);
        tp = NULL;
-       if ((!GDK_ELIMDOUBLES(b->T->vheap) || b->batCount == 0) &&
-           !GDK_ELIMDOUBLES(n->T->vheap) &&
-           b->T->vheap->hashash == n->T->vheap->hashash &&
+       if ((b->T->vheap == n->T->vheap ||
+            ((!GDK_ELIMDOUBLES(b->T->vheap) || b->batCount == 0) &&
+             !GDK_ELIMDOUBLES(n->T->vheap) &&
+             b->T->vheap->hashash == n->T->vheap->hashash)) &&
            /* if needs to be kept unique, take slow path */
            (b->tkey & BOUND2BTRUE) == 0) {
-               if (b->S->role == TRANSIENT) {
+               if (b->S->role == TRANSIENT || b->T->vheap == n->T->vheap) {
                        /* If b is in the transient farm (i.e. b will
                         * never become persistent), we try some
                         * clever tricks to avoid copying:
@@ -99,14 +103,16 @@ insert_string_bat(BAT *b, BAT *n, int fo
                          *   string heap with n;
                         * - otherwise, if b's string heap and n's
                          *   string heap are the same (i.e. shared),
-                         *   we leave it that way;
+                         *   we leave it that way (this includes the
+                         *   case that b is persistent and n shares
+                         *   its string heap with b);
                         * - otherwise, if b shares its string heap
                          *   with some other bat, we materialize it
                          *   and we will have to copy strings.
                         */
                        bat bid = abs(b->batCacheid);
 
-                       if (b->batCount == 0) {
+                       if (b->batCount == 0 && b->T->vheap != n->T->vheap) {
                                if (b->T->vheap->parentid != bid) {
                                        BBPunshare(b->T->vheap->parentid);
                                } else {
@@ -276,6 +282,26 @@ insert_string_bat(BAT *b, BAT *n, int fo
                        }
                        bunfastapp(b, tp);
                }
+       } else if (b->tkey & BOUND2BTRUE) {
+               BUN i = BUNlast(b);
+               /* if no duplicate values allowed, insert one-by-one */
+               BATloop(n, p, q) {
+                       tp = BUNtvar(ni, p);
+                       if (BUNfnd(b, tp) == BUN_NONE) {
+                               bunfastapp(b, tp);
+                               if (b->T->hash) {
+                                       HASHins(b, i, tp);
+                               }
+                               i++;
+                       }
+               }
+       } else if (b->T->vheap->free < n->T->vheap->free / 2) {
+               /* if b's string heap is much smaller than n's string
+                * heap, don't bother checking whether n's string
+                * values occur in b's string heap */
+               BATloop(n, p, q) {
+                       bunfastapp(b, BUNtvar(ni, p));
+               }
        } else {
                /* Insert values from n individually into b; however,
                 * we check whether there is a string in b's string
@@ -461,10 +487,7 @@ BATappend(BAT *b, BAT *n, bit force)
                                b->T->nodense = r;
                        }
                }
-               if (b->ttype == TYPE_str &&
-                   (b->batCount == 0 || !GDK_ELIMDOUBLES(b->T->vheap)) &&
-                   !GDK_ELIMDOUBLES(n->T->vheap) &&
-                   b->T->vheap->hashash == n->T->vheap->hashash) {
+               if (b->ttype == TYPE_str) {
                        if (insert_string_bat(b, n, force) != GDK_SUCCEED)
                                return GDK_FAIL;
                } else {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to