Changeset: 5d781336bfd2 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5d781336bfd2 Modified Files: gdk/gdk_batop.c Branch: Dec2016 Log Message:
Merge with Jun2016 branch. diffs (179 lines): diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -58,10 +58,8 @@ static gdk_return insert_string_bat(BAT *b, BAT *n, int force) { BATiter ni; /* iterator */ - int tt; /* tail type */ size_t toff = ~(size_t) 0; /* tail offset */ BUN p, q; /* loop variables */ - oid o = 0; /* in case we're appending */ const void *tp; /* tail value pointer */ unsigned char tbv; /* tail value-as-bte */ unsigned short tsv; /* tail value-as-sht */ @@ -71,18 +69,20 @@ insert_string_bat(BAT *b, BAT *n, int fo var_t v; /* value */ size_t off; /* offset within n's string heap */ + assert(b->ttype == TYPE_str); + /* only transient bats can use some other bat's string heap */ + assert(b->batRole == TRANSIENT || + b->tvheap->parentid == abs(b->batCacheid)); if (n->batCount == 0) return GDK_SUCCEED; ni = bat_iterator(n); tp = NULL; - tt = b->ttype; - if (tt == TYPE_str && - (!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) && + if ((!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) && !GDK_ELIMDOUBLES(n->tvheap) && b->tvheap->hashash == n->tvheap->hashash && /* if needs to be kept unique, take slow path */ (b->tkey & BOUND2BTRUE) == 0) { - if (b->batRole == TRANSIENT) { + if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) { /* If b is in the transient farm (i.e. b will * never become persistent), we try some * clever tricks to avoid copying: @@ -90,14 +90,16 @@ insert_string_bat(BAT *b, BAT *n, int fo * string heap with n; * - otherwise, if b's string heap and n's * string heap are the same (i.e. shared), - * we leave it that way; + * we leave it that way (this includes the + * case that b is persistent and n shares + * its string heap with b); * - otherwise, if b shares its string heap * with some other bat, we materialize it * and we will have to copy strings. */ bat bid = b->batCacheid; - if (b->batCount == 0) { + if (b->batCount == 0 && b->tvheap != n->tvheap) { if (b->tvheap->parentid != bid) { BBPunshare(b->tvheap->parentid); } else { @@ -151,8 +153,8 @@ insert_string_bat(BAT *b, BAT *n, int fo /* make sure we get alignment right */ toff = (toff + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1); assert(((toff >> GDK_VARSHIFT) << GDK_VARSHIFT) == toff); - /* if in "force" mode, the heap may be shared when - * memory mapped */ + /* if in "force" mode, the heap may be + * shared when memory mapped */ if (HEAPextend(b->tvheap, toff + n->tvheap->size, force) != GDK_SUCCEED) { toff = ~(size_t) 0; goto bunins_failed; @@ -182,25 +184,25 @@ insert_string_bat(BAT *b, BAT *n, int fo } switch (b->twidth) { case 1: - tt = TYPE_bte; + b->ttype = TYPE_bte; tp = &tbv; break; case 2: - tt = TYPE_sht; + b->ttype = TYPE_sht; tp = &tsv; break; #if SIZEOF_VAR_T == 8 case 4: - tt = TYPE_int; + b->ttype = TYPE_int; tp = &tiv; break; case 8: - tt = TYPE_lng; + b->ttype = TYPE_lng; tp = &v; break; #else case 4: - tt = TYPE_int; + b->ttype = TYPE_int; tp = &v; break; #endif @@ -208,13 +210,11 @@ insert_string_bat(BAT *b, BAT *n, int fo assert(0); } b->tvarsized = 0; - b->ttype = tt; } } if (toff == 0 && n->twidth == b->twidth) { /* we don't need to do any translation of offset - * values, nor do we need to do any calculations for - * the head column, so we can use fast memcpy */ + * values, so we can use fast memcpy */ memcpy(Tloc(b, BUNlast(b)), Tloc(n, 0), BATcount(n) * n->twidth); BATsetcount(b, BATcount(b) + BATcount(n)); @@ -275,7 +275,26 @@ insert_string_bat(BAT *b, BAT *n, int fo break; } bunfastapp(b, tp); - o++; + } + } else if (b->tkey & BOUND2BTRUE) { + BUN i = BUNlast(b); + /* if no duplicate values allowed, insert one-by-one */ + BATloop(n, p, q) { + tp = BUNtvar(ni, p); + if (BUNfnd(b, tp) == BUN_NONE) { + bunfastapp(b, tp); + if (b->thash) { + HASHins(b, i, tp); + } + i++; + } + } + } else if (b->tvheap->free < n->tvheap->free / 2) { + /* if b's string heap is much smaller than n's string + * heap, don't bother checking whether n's string + * values occur in b's string heap */ + BATloop(n, p, q) { + bunfastapp(b, BUNtvar(ni, p)); } } else { /* Insert values from n individually into b; however, @@ -332,19 +351,14 @@ insert_string_bat(BAT *b, BAT *n, int fo } else { bunfastapp(b, tp); } - o++; } } - if (toff != ~(size_t) 0) { - b->tvarsized = 1; - b->ttype = TYPE_str; - } + b->tvarsized = 1; + b->ttype = TYPE_str; return GDK_SUCCEED; bunins_failed: - if (toff != ~(size_t) 0) { - b->tvarsized = 1; - b->ttype = TYPE_str; - } + b->tvarsized = 1; + b->ttype = TYPE_str; return GDK_FAIL; } @@ -464,10 +478,7 @@ BATappend(BAT *b, BAT *n, bit force) b->tnodense = r; } } - if (b->ttype == TYPE_str && - (b->batCount == 0 || !GDK_ELIMDOUBLES(b->tvheap)) && - !GDK_ELIMDOUBLES(n->tvheap) && - b->tvheap->hashash == n->tvheap->hashash) { + if (b->ttype == TYPE_str) { if (insert_string_bat(b, n, force) != GDK_SUCCEED) return GDK_FAIL; } else { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list