Changeset: 655d3eb72e16 for MonetDB
Modified Files:
Branch: statistics-analytics
Log Message:

Merged with default

diffs (truncated from 2450 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -565,7 +565,9 @@ stdout of test 'MAL-signatures` in direc
 [ "aggr",      "variancep",    "command aggr.variancep(b:bat[:any_2]):dbl ",   
"ALGvariancep;",        "Gives the variance of all tail values" ]
 [ "alarm",     "ctime",        "unsafe command alarm.ctime():str ",    
"ALARMctime;",  "Return the current time as a C-time string."   ]
 [ "alarm",     "epoch",        "unsafe command alarm.epoch():int ",    
"ALARMepoch;",  "Return time since Jan 1, 1970 in seconds."     ]
-[ "alarm",     "sleep",        "unsafe command alarm.sleep(secs:int):void ",   
"ALARMsleep;",  "Sleep a few seconds"   ]
+[ "alarm",     "sleep",        "unsafe pattern 
alarm.sleep(msecs:bat[:int]):bat[:int] ",       "ALARMsleep;",  "Sleep a few 
milliseconds and return the slept value"   ]
+[ "alarm",     "sleep",        "unsafe pattern alarm.sleep(msecs:int):int ",   
"ALARMsleep;",  "Sleep a few milliseconds and return the slept value"   ]
+[ "alarm",     "sleep",        "unsafe pattern alarm.sleep(msecs:int):void ",  
"ALARMsleep;",  "Sleep a few milliseconds"      ]
 [ "alarm",     "time", "unsafe command alarm.time():int ",     "ALARMtime;",   
"Return time since program start in milliseconds."      ]
 [ "alarm",     "usec", "unsafe command alarm.usec():lng ",     "ALARMusec;",   
"Return time since Jan 1, 1970 in microseconds."        ]
 [ "algebra",   "antijoin",     "function algebra.antijoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) 
(X_0:bat[:oid], X_1:bat[:oid]);",   "",     ""      ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -674,7 +674,9 @@ stdout of test 'MAL-signatures` in direc
 [ "aggr",      "variancep",    "command aggr.variancep(b:bat[:any_2]):dbl ",   
"ALGvariancep;",        "Gives the variance of all tail values" ]
 [ "alarm",     "ctime",        "unsafe command alarm.ctime():str ",    
"ALARMctime;",  "Return the current time as a C-time string."   ]
 [ "alarm",     "epoch",        "unsafe command alarm.epoch():int ",    
"ALARMepoch;",  "Return time since Jan 1, 1970 in seconds."     ]
-[ "alarm",     "sleep",        "unsafe command alarm.sleep(secs:int):void ",   
"ALARMsleep;",  "Sleep a few seconds"   ]
+[ "alarm",     "sleep",        "unsafe pattern 
alarm.sleep(msecs:bat[:int]):bat[:int] ",       "ALARMsleep;",  "Sleep a few 
milliseconds and return the slept value"   ]
+[ "alarm",     "sleep",        "unsafe pattern alarm.sleep(msecs:int):int ",   
"ALARMsleep;",  "Sleep a few milliseconds and return the slept value"   ]
+[ "alarm",     "sleep",        "unsafe pattern alarm.sleep(msecs:int):void ",  
"ALARMsleep;",  "Sleep a few milliseconds"      ]
 [ "alarm",     "time", "unsafe command alarm.time():int ",     "ALARMtime;",   
"Return time since program start in milliseconds."      ]
 [ "alarm",     "usec", "unsafe command alarm.usec():lng ",     "ALARMusec;",   
"Return time since Jan 1, 1970 in microseconds."        ]
 [ "algebra",   "antijoin",     "function algebra.antijoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng) 
(X_0:bat[:oid], X_1:bat[:oid]);",   "",     ""      ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -745,7 +745,7 @@ str AGGRvariance3_dbl(bat *retval, const
 str AGGRvariancep3_dbl(bat *retval, const bat *bid, const bat *gid, const bat 
 str ALARMctime(str *res);
 str ALARMepoch(int *res);
-str ALARMsleep(void *res, int *secs);
+str ALARMsleep(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
 str ALARMtime(int *res);
 str ALARMusec(lng *ret);
 str ALGbandjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat 
*slid, const bat *srid, const void *low, const void *high, const bit *li, const 
bit *hi, const lng *estimate);
diff --git a/clients/mapiclient/mhelp.c b/clients/mapiclient/mhelp.c
--- a/clients/mapiclient/mhelp.c
+++ b/clients/mapiclient/mhelp.c
@@ -508,7 +508,7 @@ SQLhelp sqlhelp1[] = {
         "Change transaction mode from auto-commit to user controlled 
-        "START TRANSACTION transactionmode",
+        "{ START | BEGIN } TRANSACTION transactionmode",
         "See also"},
        {"TABLE JOINS",
diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h
--- a/gdk/gdk_atoms.h
+++ b/gdk/gdk_atoms.h
@@ -165,8 +165,8 @@ gdk_export const ptr ptr_nil;
  * In all algorithms across GDK, you will find switches on the types
  * (bte, sht, int, flt, dbl, lng, hge, str). They respectively
  * represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a
- * 64-bit double, a 64-bit int, and a pointer-sized location of a
- * char-buffer (ended by a zero char).
+ * 64-bit double, a 64-bit int, a 128-bit int, and a pointer-sized location
+ * of a char-buffer (ended by a zero char).
  * In contrast, the types (bit, ptr, bat, oid) are derived types. They
  * do not occur in the switches. The ATOMstorage macro maps them
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -384,7 +384,6 @@ canditer_init(struct canditer *ci, BAT *
                        } else {
                                /* why the vheap? */
                                ci->tpe = cand_dense;
-                               ci->oids = NULL;
                } else {
                        ci->tpe = cand_dense;
@@ -404,23 +403,6 @@ canditer_init(struct canditer *ci, BAT *
                ci->tpe = cand_dense;
        switch (ci->tpe) {
-       case cand_dense:
-       case_cand_dense:
-               if (b != NULL) {
-                       if (ci->seq + cnt <= b->hseqbase ||
-                           ci->seq >= b->hseqbase + BATcount(b)) {
-                               ci->ncand = 0;
-                               return 0;
-                       }
-                       if (b->hseqbase > ci->seq) {
-                               cnt -= b->hseqbase - ci->seq;
-                               ci->offset += b->hseqbase - ci->seq;
-                               ci->seq = b->hseqbase;
-                       }
-                       if (ci->seq + cnt > b->hseqbase + BATcount(b))
-                               cnt = b->hseqbase + BATcount(b) - ci->seq;
-               }
-               break;
        case cand_materialized:
                if (b != NULL) {
                        if (ci->oids[ci->noids - 1] < b->hseqbase) {
@@ -488,14 +470,9 @@ canditer_init(struct canditer *ci, BAT *
                while (ci->noids > 0 &&
                       ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1)
-               /* WARNING: don't reset ci->oids to NULL when setting
-                * ci->tpe to cand_dense below: BATprojectchain will
-                * fail */
-               if (ci->noids == 0) {
-                       ci->tpe = cand_dense;
-                       goto case_cand_dense;
-               }
-               if (b != NULL) {
+               if (ci->noids > 0) {
+                       if (b == NULL)
+                               break;
                        BUN p;
                        p = binsearchcand(ci->oids, ci->noids - 1, b->hseqbase);
                        if (p == ci->noids) {
@@ -505,6 +482,7 @@ canditer_init(struct canditer *ci, BAT *
                                ci->seq = b->hseqbase;
                                ci->noids = 0;
                                ci->tpe = cand_dense;
+                               ci->oids = NULL;
                        assert(b->hseqbase > ci->seq || p == 0);
@@ -532,10 +510,26 @@ canditer_init(struct canditer *ci, BAT *
                        while (ci->noids > 0 &&
                               ci->oids[ci->noids - 1] == ci->seq + cnt + 
ci->noids - 1)
-                       if (ci->noids == 0) {
-                               ci->tpe = cand_dense;
-                               goto case_cand_dense;
+                       if (ci->noids > 0)
+                               break;
+               }
+               ci->tpe = cand_dense;
+               ci->oids = NULL;
+               /* fall through */
+       case cand_dense:
+               if (b != NULL) {
+                       if (ci->seq + cnt <= b->hseqbase ||
+                           ci->seq >= b->hseqbase + BATcount(b)) {
+                               ci->ncand = 0;
+                               return 0;
+                       if (b->hseqbase > ci->seq) {
+                               cnt -= b->hseqbase - ci->seq;
+                               ci->offset += b->hseqbase - ci->seq;
+                               ci->seq = b->hseqbase;
+                       }
+                       if (ci->seq + cnt > b->hseqbase + BATcount(b))
+                               cnt = b->hseqbase + BATcount(b) - ci->seq;
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -649,13 +649,13 @@ BAThashsync(void *arg)
                        }                                               \
                        HASHputlink(h, p, hget);                        \
                        HASHput(h, c, p);                               \
-                       o = canditer_next(&ci);                         \
+                       o = canditer_next(ci);                          \
                }                                                       \
        } while (0)
 #define finishhash(TYPE)                                               \
        do {                                                            \
                const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
-               for (; p < cnt; p++) {                                  \
+               for (; p < ci->ncand; p++) {                                    
                        c = hash_##TYPE(h, v + o - b->hseqbase);        \
                        c = hash_##TYPE(h, v + o - b->hseqbase);        \
                        hget = HASHget(h, c);                           \
@@ -669,22 +669,21 @@ BAThashsync(void *arg)
                        h->nunique += hb == hnil;                       \
                        HASHputlink(h, p, hget);                        \
                        HASHput(h, c, p);                               \
-                       o = canditer_next(&ci);                         \
+                       o = canditer_next(ci);                          \
                }                                                       \
        } while (0)
- * The prime routine for the BAT layer is to create a new hash index.
- * Its argument is the element type and the maximum number of BUNs be
- * stored under the hash function.
- */
+/* Internal function to create a hash table for the given BAT b.
+ * If a candidate list s is also given, the hash table is specific for
+ * the combination of the two: only values from b that are referred to
+ * by s are included in the hash table, so if a result is found when
+ * searching the hash table, the result is a candidate. */
 Hash *
-BAThash_impl(BAT *b, BAT *s, const char *ext)
+BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char 
*restrict ext)
        lng t0 = 0;
        unsigned int tpe = ATOMbasetype(b->ttype);
-       BUN cnt, cnt1;
-       struct canditer ci;
+       BUN cnt1;
        BUN mask, maxmask = 0;
        BUN p, c;
        oid o;
@@ -693,6 +692,9 @@ BAThash_impl(BAT *b, BAT *s, const char 
        const char *nme = GDKinmemory() ? ":inmemory" : 
        BATiter bi = bat_iterator(b);
        PROPrec *prop;
+       bool hascand = ci->tpe != cand_dense || ci->ncand != BATcount(b);
+       assert(strcmp(ext, "thash") != 0 || !hascand);
        TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
@@ -710,8 +712,6 @@ BAThash_impl(BAT *b, BAT *s, const char 
                tpe = TYPE_void;
-       cnt = canditer_init(&ci, b, s);
        if ((h = GDKzalloc(sizeof(*h))) == NULL ||
            (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, 
hashheap)) < 0 ||
            (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, 
hashheap)) < 0) {
@@ -725,12 +725,12 @@ BAThash_impl(BAT *b, BAT *s, const char 
                      nme, ".", ext, "l", NULL);
        strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
                      nme, ".", ext, "b", NULL);
-       if (HEAPalloc(&h->heaplink, s ? cnt : BATcapacity(b),
+       if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
                      h->width) != GDK_SUCCEED) {
                return NULL;
-       h-> = cnt * h->width;
+       h-> = ci->ncand * h->width;
        h->Link = h->heaplink.base;
 #ifndef NDEBUG
        /* clear unused part of Link array */
@@ -747,35 +747,35 @@ BAThash_impl(BAT *b, BAT *s, const char 
        } else if (ATOMsize(tpe) == 2) {
                /* perfect hash for two-byte sized atoms */
                mask = (1 << 16);
-       } else if (b->tkey || cnt <= 4096) {
+       } else if (b->tkey || ci->ncand <= 4096) {
                /* if key, or if small, don't bother dynamically
                 * adjusting the hash mask */
-               mask = HASHmask(cnt);
-       } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != 
+               mask = HASHmask(ci->ncand);
+       } else if (!hascand && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) != 
                assert(prop->v.vtype == TYPE_oid);
                mask = prop->v.val.oval * 8 / 7;
-       } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS)) 
!= NULL) {
+       } else if (!hascand && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS)) 
!= NULL) {
                assert(prop->v.vtype == TYPE_oid);
                mask = prop->v.val.oval;
-               maxmask = HASHmask(cnt);
+               maxmask = HASHmask(ci->ncand);
                if (mask > maxmask)
                        mask = maxmask;
        } else {
-               /* dynamic hash: we start with HASHmask(cnt)/64, or,
-                * if cnt large enough, HASHmask(cnt)/256; if there
-                * are too many collisions we try HASHmask(cnt)/64,
-                * HASHmask(cnt)/16, HASHmask(cnt)/4, and finally
-                * HASHmask(cnt), but we might skip some of these if
+               /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
+                * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
+                * are too many collisions we try HASHmask(ci->ncand)/64,
+                * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
+                * HASHmask(ci->ncand), but we might skip some of these if
                 * there are many distinct values.  */
-               maxmask = HASHmask(cnt);
+               maxmask = HASHmask(ci->ncand);
                mask = maxmask >> 6;
                while (mask > 4096)
                        mask >>= 2;
                /* try out on first 25% of b */
-               cnt1 = cnt >> 2;
+               cnt1 = ci->ncand >> 2;
-       o = canditer_next(&ci); /* always one ahead */
+       o = canditer_next(ci);  /* always one ahead */
        for (;;) {
                lng t1 = 0;
                TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
@@ -787,7 +787,7 @@ BAThash_impl(BAT *b, BAT *s, const char 
                HEAPfree(&h->heapbckt, true);
                /* create the hash structures */
                if (HASHnew(h, ATOMtype(b->ttype), BATcapacity(b),
-                           mask, cnt, true) != GDK_SUCCEED) {
+                           mask, ci->ncand, true) != GDK_SUCCEED) {
                        HEAPfree(&h->heaplink, true);
                        return NULL;
@@ -842,7 +842,7 @@ BAThash_impl(BAT *b, BAT *s, const char 
checkin-list mailing list

Reply via email to