Changeset: 655d3eb72e16 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=655d3eb72e16 Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out clients/mapiclient/mhelp.c gdk/gdk_atoms.h gdk/gdk_cand.c gdk/gdk_hash.c gdk/gdk_hash.h gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_system.c gdk/gdk_tracer.h gdk/gdk_unique.c monetdb5/mal/mal_profiler.c monetdb5/modules/atoms/str.c monetdb5/modules/kernel/alarm.c monetdb5/modules/kernel/alarm.mal monetdb5/modules/mal/sysmon.c sql/ChangeLog sql/backends/monet5/sql_upgrades.c sql/scripts/25_debug.sql sql/server/sql_parser.y sql/test/emptydb/Tests/check.stable.out sql/test/emptydb/Tests/check.stable.out.32bit sql/test/emptydb/Tests/check.stable.out.int128 sql/test/remote/Tests/creds.SQL.py sql/test/remote/Tests/different_user.SQL.py sql/test/remote/Tests/invalid_creds.SQL.py sql/test/remote/Tests/ssbm.SQL.py sql/test/sys-schema/Tests/systemfunctions.stable.out sql/test/sys-schema/Tests/systemfunctions.stable.out.int128 Branch: statistics-analytics Log Message:
Merged with default diffs (truncated from 2450 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -565,7 +565,9 @@ stdout of test 'MAL-signatures` in direc [ "aggr", "variancep", "command aggr.variancep(b:bat[:any_2]):dbl ", "ALGvariancep;", "Gives the variance of all tail values" ] [ "alarm", "ctime", "unsafe command alarm.ctime():str ", "ALARMctime;", "Return the current time as a C-time string." ] [ "alarm", "epoch", "unsafe command alarm.epoch():int ", "ALARMepoch;", "Return time since Jan 1, 1970 in seconds." ] -[ "alarm", "sleep", "unsafe command alarm.sleep(secs:int):void ", "ALARMsleep;", "Sleep a few seconds" ] +[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:bat[:int]):bat[:int] ", "ALARMsleep;", "Sleep a few milliseconds and return the slept value" ] +[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):int ", "ALARMsleep;", "Sleep a few milliseconds and return the slept value" ] +[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):void ", "ALARMsleep;", "Sleep a few milliseconds" ] [ "alarm", "time", "unsafe command alarm.time():int ", "ALARMtime;", "Return time since program start in milliseconds." ] [ "alarm", "usec", "unsafe command alarm.usec():lng ", "ALARMusec;", "Return time since Jan 1, 1970 in microseconds." ] [ "algebra", "antijoin", "function algebra.antijoin(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]);", "", "" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -674,7 +674,9 @@ stdout of test 'MAL-signatures` in direc [ "aggr", "variancep", "command aggr.variancep(b:bat[:any_2]):dbl ", "ALGvariancep;", "Gives the variance of all tail values" ] [ "alarm", "ctime", "unsafe command alarm.ctime():str ", "ALARMctime;", "Return the current time as a C-time string." ] [ "alarm", "epoch", "unsafe command alarm.epoch():int ", "ALARMepoch;", "Return time since Jan 1, 1970 in seconds." ] -[ "alarm", "sleep", "unsafe command alarm.sleep(secs:int):void ", "ALARMsleep;", "Sleep a few seconds" ] +[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:bat[:int]):bat[:int] ", "ALARMsleep;", "Sleep a few milliseconds and return the slept value" ] +[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):int ", "ALARMsleep;", "Sleep a few milliseconds and return the slept value" ] +[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):void ", "ALARMsleep;", "Sleep a few milliseconds" ] [ "alarm", "time", "unsafe command alarm.time():int ", "ALARMtime;", "Return time since program start in milliseconds." ] [ "alarm", "usec", "unsafe command alarm.usec():lng ", "ALARMusec;", "Return time since Jan 1, 1970 in microseconds." ] [ "algebra", "antijoin", "function algebra.antijoin(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]);", "", "" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -745,7 +745,7 @@ str AGGRvariance3_dbl(bat *retval, const str AGGRvariancep3_dbl(bat *retval, const bat *bid, const bat *gid, const bat *eid); str ALARMctime(str *res); str ALARMepoch(int *res); -str ALARMsleep(void *res, int *secs); +str ALARMsleep(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str ALARMtime(int *res); str ALARMusec(lng *ret); str ALGbandjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const void *low, const void *high, const bit *li, const bit *hi, const lng *estimate); diff --git a/clients/mapiclient/mhelp.c b/clients/mapiclient/mhelp.c --- a/clients/mapiclient/mhelp.c +++ b/clients/mapiclient/mhelp.c @@ -508,7 +508,7 @@ SQLhelp sqlhelp1[] = { NULL}, {"START TRANSACTION", "Change transaction mode from auto-commit to user controlled commit/rollback", - "START TRANSACTION transactionmode", + "{ START | BEGIN } TRANSACTION transactionmode", "transactionmode,isolevel", "See also https://www.monetdb.org/Documentation/Manuals/SQLreference/Transactions"}, {"TABLE JOINS", diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h --- a/gdk/gdk_atoms.h +++ b/gdk/gdk_atoms.h @@ -165,8 +165,8 @@ gdk_export const ptr ptr_nil; * In all algorithms across GDK, you will find switches on the types * (bte, sht, int, flt, dbl, lng, hge, str). They respectively * represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a - * 64-bit double, a 64-bit int, and a pointer-sized location of a - * char-buffer (ended by a zero char). + * 64-bit double, a 64-bit int, a 128-bit int, and a pointer-sized location + * of a char-buffer (ended by a zero char). * * In contrast, the types (bit, ptr, bat, oid) are derived types. They * do not occur in the switches. The ATOMstorage macro maps them diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c --- a/gdk/gdk_cand.c +++ b/gdk/gdk_cand.c @@ -384,7 +384,6 @@ canditer_init(struct canditer *ci, BAT * } else { /* why the vheap? */ ci->tpe = cand_dense; - ci->oids = NULL; } } else { ci->tpe = cand_dense; @@ -404,23 +403,6 @@ canditer_init(struct canditer *ci, BAT * ci->tpe = cand_dense; } switch (ci->tpe) { - case cand_dense: - case_cand_dense: - if (b != NULL) { - if (ci->seq + cnt <= b->hseqbase || - ci->seq >= b->hseqbase + BATcount(b)) { - ci->ncand = 0; - return 0; - } - if (b->hseqbase > ci->seq) { - cnt -= b->hseqbase - ci->seq; - ci->offset += b->hseqbase - ci->seq; - ci->seq = b->hseqbase; - } - if (ci->seq + cnt > b->hseqbase + BATcount(b)) - cnt = b->hseqbase + BATcount(b) - ci->seq; - } - break; case cand_materialized: if (b != NULL) { if (ci->oids[ci->noids - 1] < b->hseqbase) { @@ -488,14 +470,9 @@ canditer_init(struct canditer *ci, BAT * while (ci->noids > 0 && ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1) ci->noids--; - /* WARNING: don't reset ci->oids to NULL when setting - * ci->tpe to cand_dense below: BATprojectchain will - * fail */ - if (ci->noids == 0) { - ci->tpe = cand_dense; - goto case_cand_dense; - } - if (b != NULL) { + if (ci->noids > 0) { + if (b == NULL) + break; BUN p; p = binsearchcand(ci->oids, ci->noids - 1, b->hseqbase); if (p == ci->noids) { @@ -505,6 +482,7 @@ canditer_init(struct canditer *ci, BAT * ci->seq = b->hseqbase; ci->noids = 0; ci->tpe = cand_dense; + ci->oids = NULL; break; } assert(b->hseqbase > ci->seq || p == 0); @@ -532,10 +510,26 @@ canditer_init(struct canditer *ci, BAT * while (ci->noids > 0 && ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1) ci->noids--; - if (ci->noids == 0) { - ci->tpe = cand_dense; - goto case_cand_dense; + if (ci->noids > 0) + break; + } + ci->tpe = cand_dense; + ci->oids = NULL; + /* fall through */ + case cand_dense: + if (b != NULL) { + if (ci->seq + cnt <= b->hseqbase || + ci->seq >= b->hseqbase + BATcount(b)) { + ci->ncand = 0; + return 0; } + if (b->hseqbase > ci->seq) { + cnt -= b->hseqbase - ci->seq; + ci->offset += b->hseqbase - ci->seq; + ci->seq = b->hseqbase; + } + if (ci->seq + cnt > b->hseqbase + BATcount(b)) + cnt = b->hseqbase + BATcount(b) - ci->seq; } break; } diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -649,13 +649,13 @@ BAThashsync(void *arg) } \ HASHputlink(h, p, hget); \ HASHput(h, c, p); \ - o = canditer_next(&ci); \ + o = canditer_next(ci); \ } \ } while (0) #define finishhash(TYPE) \ do { \ const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \ - for (; p < cnt; p++) { \ + for (; p < ci->ncand; p++) { \ c = hash_##TYPE(h, v + o - b->hseqbase); \ c = hash_##TYPE(h, v + o - b->hseqbase); \ hget = HASHget(h, c); \ @@ -669,22 +669,21 @@ BAThashsync(void *arg) h->nunique += hb == hnil; \ HASHputlink(h, p, hget); \ HASHput(h, c, p); \ - o = canditer_next(&ci); \ + o = canditer_next(ci); \ } \ } while (0) -/* - * The prime routine for the BAT layer is to create a new hash index. - * Its argument is the element type and the maximum number of BUNs be - * stored under the hash function. - */ +/* Internal function to create a hash table for the given BAT b. + * If a candidate list s is also given, the hash table is specific for + * the combination of the two: only values from b that are referred to + * by s are included in the hash table, so if a result is found when + * searching the hash table, the result is a candidate. */ Hash * -BAThash_impl(BAT *b, BAT *s, const char *ext) +BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext) { lng t0 = 0; unsigned int tpe = ATOMbasetype(b->ttype); - BUN cnt, cnt1; - struct canditer ci; + BUN cnt1; BUN mask, maxmask = 0; BUN p, c; oid o; @@ -693,6 +692,9 @@ BAThash_impl(BAT *b, BAT *s, const char const char *nme = GDKinmemory() ? ":inmemory" : BBP_physical(b->batCacheid); BATiter bi = bat_iterator(b); PROPrec *prop; + bool hascand = ci->tpe != cand_dense || ci->ncand != BATcount(b); + + assert(strcmp(ext, "thash") != 0 || !hascand); TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec(); TRC_DEBUG(ACCELERATOR, @@ -710,8 +712,6 @@ BAThash_impl(BAT *b, BAT *s, const char tpe = TYPE_void; } - cnt = canditer_init(&ci, b, s); - if ((h = GDKzalloc(sizeof(*h))) == NULL || (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0 || (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0) { @@ -725,12 +725,12 @@ BAThash_impl(BAT *b, BAT *s, const char nme, ".", ext, "l", NULL); strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename), nme, ".", ext, "b", NULL); - if (HEAPalloc(&h->heaplink, s ? cnt : BATcapacity(b), + if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b), h->width) != GDK_SUCCEED) { GDKfree(h); return NULL; } - h->heaplink.free = cnt * h->width; + h->heaplink.free = ci->ncand * h->width; h->Link = h->heaplink.base; #ifndef NDEBUG /* clear unused part of Link array */ @@ -747,35 +747,35 @@ BAThash_impl(BAT *b, BAT *s, const char } else if (ATOMsize(tpe) == 2) { /* perfect hash for two-byte sized atoms */ mask = (1 << 16); - } else if (b->tkey || cnt <= 4096) { + } else if (b->tkey || ci->ncand <= 4096) { /* if key, or if small, don't bother dynamically * adjusting the hash mask */ - mask = HASHmask(cnt); - } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL) { + mask = HASHmask(ci->ncand); + } else if (!hascand && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != NULL) { assert(prop->v.vtype == TYPE_oid); mask = prop->v.val.oval * 8 / 7; - } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS)) != NULL) { + } else if (!hascand && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS)) != NULL) { assert(prop->v.vtype == TYPE_oid); mask = prop->v.val.oval; - maxmask = HASHmask(cnt); + maxmask = HASHmask(ci->ncand); if (mask > maxmask) mask = maxmask; } else { - /* dynamic hash: we start with HASHmask(cnt)/64, or, - * if cnt large enough, HASHmask(cnt)/256; if there - * are too many collisions we try HASHmask(cnt)/64, - * HASHmask(cnt)/16, HASHmask(cnt)/4, and finally - * HASHmask(cnt), but we might skip some of these if + /* dynamic hash: we start with HASHmask(ci->ncand)/64, or, + * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there + * are too many collisions we try HASHmask(ci->ncand)/64, + * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally + * HASHmask(ci->ncand), but we might skip some of these if * there are many distinct values. */ - maxmask = HASHmask(cnt); + maxmask = HASHmask(ci->ncand); mask = maxmask >> 6; while (mask > 4096) mask >>= 2; /* try out on first 25% of b */ - cnt1 = cnt >> 2; + cnt1 = ci->ncand >> 2; } - o = canditer_next(&ci); /* always one ahead */ + o = canditer_next(ci); /* always one ahead */ for (;;) { lng t1 = 0; TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec(); @@ -787,7 +787,7 @@ BAThash_impl(BAT *b, BAT *s, const char HEAPfree(&h->heapbckt, true); /* create the hash structures */ if (HASHnew(h, ATOMtype(b->ttype), BATcapacity(b), - mask, cnt, true) != GDK_SUCCEED) { + mask, ci->ncand, true) != GDK_SUCCEED) { HEAPfree(&h->heaplink, true); GDKfree(h); return NULL; @@ -842,7 +842,7 @@ BAThash_impl(BAT *b, BAT *s, const char _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list