Changeset: f913719e8320 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/f913719e8320 Modified Files: gdk/gdk_batop.c monetdb5/modules/mal/remote.c sql/backends/monet5/sql.c sql/server/rel_optimizer.c sql/server/rel_schema.c sql/storage/bat/bat_logger.c Branch: default Log Message:
Merge with Jul2021 branch. diffs (truncated from 440 to 300 lines): diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -61,13 +61,13 @@ insert_string_bat(BAT *b, BAT *n, struct BATiter ni; /* iterator */ size_t toff = ~(size_t) 0; /* tail offset */ BUN p, r; /* loop variables */ - const void *tp; /* tail value pointer */ + const void *tp = NULL; /* tail value pointer */ unsigned char tbv; /* tail value-as-bte */ unsigned short tsv; /* tail value-as-sht */ #if SIZEOF_VAR_T == 8 unsigned int tiv; /* tail value-as-int */ #endif - var_t v = GDK_VAROFFSET; /* value */ + var_t v; /* value */ size_t off; /* offset within n's string heap */ BUN cnt = ci->ncand; BUN oldcnt = BATcount(b); @@ -80,143 +80,88 @@ insert_string_bat(BAT *b, BAT *n, struct if (cnt == 0) return GDK_SUCCEED; ni = bat_iterator(n); - tp = NULL; - if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) && - !GDK_ELIMDOUBLES(ni.vh))) { - if (b->batRole == TRANSIENT || b->tvheap == ni.vh) { - /* If b is in the transient farm (i.e. b will - * never become persistent), we try some - * clever tricks to avoid copying: - * - if b is empty, we just let it share the - * string heap with n; - * - otherwise, if b's string heap and n's - * string heap are the same (i.e. shared), - * we leave it that way (this includes the - * case that b is persistent and n shares - * its string heap with b); - * - otherwise, if b shares its string heap - * with some other bat, we materialize it - * and we will have to copy strings. - */ - bat bid = b->batCacheid; - /* if candidates are not dense, there is no - * wholesale copying of n's offset heap, but - * we may still be able to share the string - * heap */ - if (mayshare && - oldcnt == 0 && - b->tvheap != ni.vh && - ci->tpe == cand_dense) { - /* make sure locking happens in a - * predictable order: lowest id - * first */ - MT_thread_setalgorithm("share vheap, copy heap"); - MT_lock_set(&b->theaplock); - if (b->tvheap->parentid != bid) - BBPunshare(b->tvheap->parentid); - HEAPdecref(b->tvheap, true); - HEAPincref(ni.vh); - b->tvheap = ni.vh; - BBPshare(ni.vh->parentid); - b->batDirtydesc = true; - MT_lock_unset(&b->theaplock); - toff = 0; - v = ni.width == 1 ? GDK_VAROFFSET + 1 : - ni.width == 2 ? GDK_VAROFFSET + (1 << 9) : -#if SIZEOF_VAR_T == 8 - ni.width != 4 ? (var_t) 1 << 33 : -#endif - (var_t) 1 << 17; - } else if (b->tvheap->parentid == ni.vh->parentid && - ci->tpe == cand_dense) { - MT_thread_setalgorithm("copy heap"); - toff = 0; - } else if (b->tvheap->parentid != bid && - unshare_varsized_heap(b) != GDK_SUCCEED) { - bat_iterator_end(&ni); - return GDK_FAIL; - } - } else if (oldcnt == 0) { - v = ni.width == 1 ? GDK_VAROFFSET + 1 : - ni.width == 2 ? GDK_VAROFFSET + (1 << 9) : -#if SIZEOF_VAR_T == 8 - ni.width != 4 ? (var_t) 1 << 33 : -#endif - (var_t) 1 << 17; - MT_thread_setalgorithm("copy vheap, copy heap"); - if (b->tvheap->size < ni.vh->free) { - if (HEAPgrow(&b->theaplock, &b->tvheap, ni.vh->free, force) != GDK_SUCCEED) { - bat_iterator_end(&ni); - return GDK_FAIL; - } - } - memcpy(b->tvheap->base, ni.vh->base, ni.vh->free); - b->tvheap->free = ni.vh->free; - toff = 0; + if (b->tvheap == ni.vh) { + /* vheaps are already shared, continue doing so: we just + * need to append the offsets */ + toff = 0; + MT_thread_setalgorithm("shared vheap"); + } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) { + /* we can share the vheaps, so we then only need to + * append the offsets */ + MT_lock_set(&b->theaplock); + if (b->tvheap->parentid != b->batCacheid) + BBPunshare(b->tvheap->parentid); + HEAPdecref(b->tvheap, b->tvheap->parentid == b->batCacheid); + HEAPincref(ni.vh); + b->tvheap = ni.vh; + BBPshare(ni.vh->parentid); + b->batDirtydesc = true; + MT_lock_unset(&b->theaplock); + toff = 0; + MT_thread_setalgorithm("share vheap"); + } else { + /* no heap sharing, so also make sure the heap isn't + * shared currently (we're not allowed to write in + * another bat's heap) */ + if (b->tvheap->parentid != b->batCacheid && + unshare_varsized_heap(b) != GDK_SUCCEED) { + bat_iterator_end(&ni); + return GDK_FAIL; } - if (toff == ~(size_t) 0 && cnt > 1024 && b->tvheap->free >= ni.vh->free) { - /* If b and n aren't sharing their string - * heaps, we try to determine whether to copy - * n's whole string heap to the end of b's, or - * whether we will insert each string from n - * individually. We do this by testing a - * sample of n's strings and extrapolating - * from that sample whether n uses a - * significant part of its string heap for its - * strings (i.e. whether there are many unused - * strings in n's string heap). If n doesn't - * have many strings in the first place, we - * skip this and just insert them all - * individually. We also check whether a - * significant number of n's strings happen to - * have the same offset in b. In the latter - * case we also want to insert strings - * individually, but reusing the string in b's - * string heap. */ - int match = 0, i; + if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) && + !GDK_ELIMDOUBLES(ni.vh))) { + /* we'll consider copying the string heap completely + * + * we first estimate how much space the string heap + * should occupy, given the number of rows we need to + * insert, then, if that is way smaller than the actual + * space occupied, we will skip the copy and just insert + * one by one */ size_t len = 0; - for (i = 0; i < 1024; i++) { + for (int i = 0; i < 1024; i++) { p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1)); p = canditer_idx(ci, p) - n->hseqbase; - off = BUNtvaroff(ni, p); - if (off < b->tvheap->free && - strcmp(b->tvheap->base + off, ni.vh->base + off) == 0) - match++; - len += (strlen(ni.vh->base + off) + 8) & ~7; + len += strlen(BUNtvar(ni, p)) + 1; } - if (match < 768 && (size_t) (ni.count * (double) len / 1024) >= ni.vh->free / 2) { - /* append string heaps */ - toff = oldcnt == 0 ? 0 : b->tvheap->free; - /* make sure we get alignment right */ - toff = (toff + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1); - /* if in "force" mode, the heap may be - * shared when memory mapped */ + len = (len + 512) / 1024; /* rounded average length */ + r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12); + /* r is estimate of number of strings in + * double-eliminated area */ + if (r < ci->ncand) + len = GDK_ELIMLIMIT + (ci->ncand - r) * len; + else + len = GDK_STRHASHSIZE + ci->ncand * (len + 12); + /* len is total estimated expected size of vheap */ + + if (len > ni.vh->free / 2) { + /* we copy the string heap, perhaps appending */ + if (oldcnt == 0) { + toff = 0; + MT_thread_setalgorithm("copy vheap"); + } else { + toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1); + MT_thread_setalgorithm("append vheap"); + } + if (HEAPgrow(&b->theaplock, &b->tvheap, toff + ni.vh->size, force) != GDK_SUCCEED) { bat_iterator_end(&ni); return GDK_FAIL; } - MT_thread_setalgorithm("append vheap"); memcpy(b->tvheap->base + toff, ni.vh->base, ni.vh->free); b->tvheap->free = toff + ni.vh->free; - if (toff > 0) { - /* flush double-elimination - * hash table */ - memset(b->tvheap->base, 0, - GDK_STRHASHSIZE); - } - /* make sure b is wide enough */ - v = b->tvheap->free; } } - } else if (b->tvheap != ni.vh && - unshare_varsized_heap(b) != GDK_SUCCEED) { - bat_iterator_end(&ni); - return GDK_FAIL; } + /* if toff has the initial value of ~0, we insert strings + * individually, otherwise we only copy (insert) offsets */ + if (toff == ~(size_t) 0) + v = GDK_VAROFFSET; + else + v = b->tvheap->free - 1; /* make sure there is (vertical) space in the offset heap, we - * may also widen if v was set to some limit above */ + * may also widen thanks to v, set above */ if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) { bat_iterator_end(&ni); return GDK_FAIL; @@ -225,6 +170,7 @@ insert_string_bat(BAT *b, BAT *n, struct if (toff == 0 && ni.width == b->twidth && ci->tpe == cand_dense) { /* we don't need to do any translation of offset * values, so we can use fast memcpy */ + MT_thread_setalgorithm("memcpy offsets"); memcpy(Tloc(b, BUNlast(b)), (const char *) ni.base + ((ci->seq - n->hseqbase) << ni.shift), cnt << ni.shift); } else if (toff != ~(size_t) 0) { /* we don't need to insert any actual strings since we diff --git a/monetdb5/modules/mal/remote.c b/monetdb5/modules/mal/remote.c --- a/monetdb5/modules/mal/remote.c +++ b/monetdb5/modules/mal/remote.c @@ -1517,10 +1517,16 @@ static str RMTbincopyto(Client cntxt, Ma sendtheap = b->ttype != TYPE_void && b->tvarsized; if (isVIEW(b) && sendtheap && VIEWvtparent(b) && BATcount(b) < BATcount(BBP_cache(VIEWvtparent(b)))) { - if ((b = BATdescriptor(bid)) == NULL) + if ((b = BATdescriptor(bid)) == NULL) { + BBPunfix(bid); throw(MAL, "remote.bincopyto", RUNTIME_OBJECT_MISSING); + } v = COLcopy(b, b->ttype, true, TRANSIENT); BBPunfix(b->batCacheid); + if (v == NULL) { + BBPunfix(bid); + throw(MAL, "remote.bincopyto", GDK_EXCEPTION); + } } mnstr_printf(cntxt->fdout, /*JSON*/"{" diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -2601,7 +2601,7 @@ mvc_export_table_wrap( Client cntxt, Mal } if ((ok = mvc_export_result(cntxt->sqlcontext, s, res, tostdout, mb->starttime, mb->optimize)) < 0) { msg = createException(SQL, "sql.resultSet", SQLSTATE(45000) "Result set construction failed: %s", mvc_export_error(cntxt->sqlcontext, s, ok)); - if (!tostdout) + if (!onclient && !tostdout) close_stream(s); goto wrapup_result_set1; } @@ -2832,7 +2832,7 @@ mvc_export_row_wrap( Client cntxt, MalBl } if ((ok = mvc_export_result(cntxt->sqlcontext, s, res, strcmp(filename, "stdout") == 0, mb->starttime, mb->optimize)) < 0) { msg = createException(SQL, "sql.resultSet", SQLSTATE(45000) "Result set construction failed: %s", mvc_export_error(cntxt->sqlcontext, s, ok)); - if (!tostdout) + if (!onclient && !tostdout) close_stream(s); goto wrapup_result_set; } diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -9532,7 +9532,8 @@ rel_basecount(visitor *v, sql_rel *rel) if (is_basetable(bt->op) && !e->l) { /* count(*) */ /* change into select cnt('schema','table') */; sql_table *t = bt->l; - if (!isTable(t)) + /* I need to get the declared table's frame number to make this work correctly for those */ + if (!isTable(t) || isDeclaredTable(t)) return rel; sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC); list *exps = sa_list(v->sql->sa); diff --git a/sql/server/rel_schema.c b/sql/server/rel_schema.c --- a/sql/server/rel_schema.c +++ b/sql/server/rel_schema.c @@ -1472,6 +1472,8 @@ sql_drop_view(sql_query *query, dlist *q } return NULL; } + if (!isView(t)) + return sql_error(sql, 02, SQLSTATE(42000) "DROP VIEW: unable to drop view '%s': is a table", tname); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list