Changeset: a9cb90dab9d3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a9cb90dab9d3
Added Files:
        sql/test/BugTracker-2017/Tests/side-effect.Bug-6397.sql
Modified Files:
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_firstn.c
        gdk/gdk_group.c
        gdk/gdk_imprints.c
        gdk/gdk_join.c
        gdk/gdk_logger.c
        gdk/gdk_sample.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
        monetdb5/extras/rapi/rapi.c
        monetdb5/modules/atoms/json.c
        monetdb5/modules/kernel/aggr.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/batmmath.c
        monetdb5/modules/kernel/batstr.c
        monetdb5/modules/kernel/group.c
        monetdb5/modules/kernel/microbenchmark.c
        monetdb5/modules/mal/batExtensions.c
        monetdb5/modules/mal/batcalc.c
        monetdb5/modules/mal/mat.c
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/sample.c
        monetdb5/modules/mal/txtsim.c
        sql/backends/monet5/UDF/pyapi/connection.c
        sql/backends/monet5/generator/generator.c
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_rank.c
        sql/backends/monet5/sql_result.c
        sql/backends/monet5/vaults/bam/bam_lib.c
        sql/storage/bat/bat_storage.c
        sql/storage/bat/bat_table.c
        sql/storage/bat/bat_utils.c
        sql/test/leaks/Tests/check1.stable.out
        sql/test/leaks/Tests/check1.stable.out.int128
        sql/test/leaks/Tests/check2.stable.out
        sql/test/leaks/Tests/check3.stable.out
        sql/test/leaks/Tests/check4.stable.out
        sql/test/leaks/Tests/check5.stable.out
        sql/test/leaks/Tests/select1.stable.out
        sql/test/leaks/Tests/select1.stable.out.int128
        sql/test/leaks/Tests/select2.stable.out
        sql/test/leaks/Tests/select2.stable.out.int128
        sql/test/leaks/Tests/temp1.stable.out
Branch: data-vaults
Log Message:

Merge with default


diffs (truncated from 3560 to 300 lines):

diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -246,10 +246,10 @@ BATdense(oid hseq, oid tseq, BUN cnt)
        BAT *bn;
 
        bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
-       if (bn == NULL)
-               return NULL;
-       BATtseqbase(bn, tseq);
-       BATsetcount(bn, cnt);
+       if (bn != NULL) {
+               BATtseqbase(bn, tseq);
+               BATsetcount(bn, cnt);
+       }
        return bn;
 }
 
@@ -1380,6 +1380,7 @@ BATsetcount(BAT *b, BUN cnt)
 {
        /* head column is always VOID, and some head properties never change */
        assert(b->hseqbase != oid_nil);
+       assert(cnt <= BUN_MAX);
 
        b->batCount = cnt;
        b->batDirtydesc = TRUE;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1306,21 +1306,17 @@ BATsort(BAT **sorted, BAT **order, BAT *
                        *sorted = bn;
                }
                if (order) {
-                       on = COLnew(b->hseqbase, TYPE_void, BATcount(b), 
TRANSIENT);
+                       on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
                        if (on == NULL)
                                goto error;
-                       BATsetcount(on, BATcount(b));
-                       BATtseqbase(on, b->hseqbase);
                        *order = on;
                }
                if (groups) {
                        if (BATtkey(b)) {
                                /* singleton groups */
-                               gn = COLnew(0, TYPE_void, BATcount(b), 
TRANSIENT);
+                               gn = BATdense(0, 0, BATcount(b));
                                if (gn == NULL)
                                        goto error;
-                               BATsetcount(gn, BATcount(b));
-                               BATtseqbase(gn, 0);
                        } else {
                                /* single group */
                                const oid *o = 0;
@@ -1822,15 +1818,9 @@ BATcount_no_nil(BAT *b)
 static BAT *
 newdensecand(oid first, oid last)
 {
-       BAT *bn;
-
-       if ((bn = COLnew(0, TYPE_void, 0, TRANSIENT)) == NULL)
-               return NULL;
        if (last < first)
                first = last = 0; /* empty range */
-       BATsetcount(bn, last - first);
-       BATtseqbase(bn, first);
-       return bn;
+       return BATdense(0, first, last - first);
 }
 
 /* merge two candidate lists and produce a new one
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -321,9 +321,18 @@ BBPselectfarm(int role, int type, enum h
 {
        int i;
 
-       assert(role >= 0 && role < 32);
        (void) type;            /* may use in future */
        (void) hptype;          /* may use in future */
+
+       assert(role >= 0 && role < 32);
+#ifndef PERSISTENTHASH
+       if (hptype == hashheap)
+               role = TRANSIENT;
+#endif
+#ifndef PERSISTENTIDX
+       if (hptype == orderidxheap)
+               role = TRANSIENT;
+#endif
        for (i = 0; i < MAXFARMS; i++)
                if (BBPfarms[i].dirname && BBPfarms[i].roles & (1 << role))
                        return i;
@@ -1909,7 +1918,9 @@ BBPdump(void)
                                vm += HEAPvmsize(b->thash->heap);
                        }
                }
-               fprintf(stderr, "\n");
+               fprintf(stderr, " role: %s, persistence: %s\n",
+                       b->batRole == PERSISTENT ? "persistent" : "transient",
+                       b->batPersistence == PERSISTENT ? "persistent" : 
"transient");
        }
        fprintf(stderr,
                "# %d bats: mem=" SZFMT ", vm=" SZFMT " %d cached bats: mem=" 
SZFMT ", vm=" SZFMT "\n",
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -102,7 +102,7 @@
 
 #define shuffle_unique(TYPE, OP)                                       \
        do {                                                            \
-               const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
+               const TYPE *restrict vals = (const TYPE *) Tloc(b, 0);  \
                heapify(OP##fix, SWAP1);                                \
                while (cand ? cand < candend : start < end) {           \
                        i = cand ? *cand++ : start++ + b->hseqbase;     \
@@ -119,9 +119,13 @@
  * refer to the N smallest/largest (depending on asc) tail values of b
  * (taking the optional candidate list s into account).  If there are
  * multiple equal values to take us past N, we return a subset of those.
+ *
+ * If lastp is non-NULL, it is filled in with the oid of the "last"
+ * value, i.e. the value of which there may be multiple occurrences
+ * that are not all included in the first N.
  */
 static BAT *
-BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc)
+BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc, oid *lastp)
 {
        BAT *bn;
        BATiter bi = bat_iterator(b);
@@ -140,17 +144,19 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, 
                if (n >= (BUN) (candend - cand)) {
                        /* trivial: return the candidate list (the
                         * part that refers to b, that is) */
+                       if (lastp)
+                               *lastp = 0;
                        return BATslice(s,
                                        (BUN) (cand - (const oid *) Tloc(s, 0)),
                                        (BUN) (candend - (const oid *) Tloc(s, 
0)));
                }
        } else if (n >= cnt) {
                /* trivial: return everything */
-               bn = COLnew(0, TYPE_void, cnt, TRANSIENT);
+               bn = BATdense(0, start + b->hseqbase, cnt);
                if (bn == NULL)
                        return NULL;
-               BATsetcount(bn, cnt);
-               BATtseqbase(bn, start + b->hseqbase);
+               if (lastp)
+                       *lastp = 0;
                return bn;
        }
        /* note, we want to do both calls */
@@ -163,23 +169,27 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, 
                                /* return copy of first relevant part
                                 * of candidate list */
                                i = (BUN) (cand - (const oid *) Tloc(s, 0));
+                               if (lastp)
+                                       *lastp = cand[n - 1];
                                return BATslice(s, i, i + n);
                        }
                        /* return copy of last relevant part of
                         * candidate list */
                        i = (BUN) (candend - (const oid *) Tloc(s, 0));
+                       if (lastp)
+                               *lastp = candend[-(ssize_t)n];
                        return BATslice(s, i - n, i);
                }
-               bn = COLnew(0, TYPE_void, n, TRANSIENT);
-               if (bn == NULL)
-                       return NULL;
-               BATsetcount(bn, n);
                if (asc ? b->tsorted : b->trevsorted) {
                        /* first n entries from b */
-                       BATtseqbase(bn, start + b->hseqbase);
+                       bn = BATdense(0, start + b->hseqbase, n);
+                       if (lastp)
+                               *lastp = start + b->hseqbase + n - 1;
                } else {
                        /* last n entries from b */
-                       BATtseqbase(bn, start + cnt + b->hseqbase - n);
+                       bn = BATdense(0, start + cnt + b->hseqbase - n, n);
+                       if (lastp)
+                               *lastp = start + cnt + b->hseqbase - n;
                }
                return bn;
        }
@@ -293,6 +303,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, 
                        break;
                }
        }
+       if (lastp)
+               *lastp = oids[0]; /* store id of largest value */
        /* output must be sorted since it's a candidate list */
        GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
        bn->tsorted = 1;
@@ -356,8 +368,19 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, 
                }                                                       \
        } while (0)
 
+/* This version of BATfirstn is like the one above, except that it
+ * also looks at groups.  The values of the group IDs are important:
+ * we return only the smallest N (i.e., not dependent on asc which
+ * refers only to the values in the BAT b).
+ *
+ * If lastp is non-NULL, it is filled in with the oid of the "last"
+ * value, i.e. the value of which there may be multiple occurrences
+ * that are not all included in the first N.  If lastgp is non-NULL,
+ * it is filled with the group ID (not the oid of the group ID) for
+ * that same value.
+ */
 static BAT *
-BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc)
+BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc, oid 
*lastp, oid *lastgp)
 {
        BAT *bn;
        BATiter bi = bat_iterator(b);
@@ -387,11 +410,7 @@ BATfirstn_unique_with_groups(BAT *b, BAT
        if (n == 0) {
                /* candidate list might refer only to values outside
                 * of the bat and hence be effectively empty */
-               bn = COLnew(0, TYPE_void, 0, TRANSIENT);
-               if (bn == NULL)
-                       return NULL;
-               BATtseqbase(bn, 0);
-               return bn;
+               return BATdense(0, 0, 0);
        }
 
        bn = COLnew(0, TYPE_oid, n, TRANSIENT);
@@ -533,6 +552,10 @@ BATfirstn_unique_with_groups(BAT *b, BAT
                        break;
                }
        }
+       if (lastp)
+               *lastp = oids[0];
+       if (lastgp)
+               *lastgp = goids[0];
        GDKfree(goids);
        /* output must be sorted since it's a candidate list */
        GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
@@ -545,597 +568,238 @@ BATfirstn_unique_with_groups(BAT *b, BAT
        return bn;
 }
 
-#define shuffle_grouped1_body(COMPARE, EQUAL)                          \
-       do {                                                            \
-               for (i = cand ? *cand++ - b->hseqbase : start;          \
-                    i < end;                                           \
-                    cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
-                       for (j = 0; j < n; j++) {                       \
-                               if (j == top) {                         \
-                                       assert(top < n);                \
-                                       groups[top].cnt = 1;            \
-                                       groups[top++].bun = i;          \
-                                       break;                          \
-                               } else {                                \
-                                       assert(j < top);                \
-                                       assert(groups[j].bun < i);      \
-                                       if (COMPARE) {                  \
-                                               if (top < n)            \
-                                                       top++;          \
-                                               for (k = top - 1; k > j; k--) { 
\
-                                                       groups[k] = groups[k - 
1]; \
-                                               }                       \
-                                               groups[j].bun = i;      \
-                                               groups[j].cnt = 1;      \
-                                               break;                  \
-                                       } else if (EQUAL) {             \
-                                               groups[j].cnt++;        \
-                                               break;                  \
-                                       }                               \
-                               }                                       \
-                       }                                               \
-               }                                                       \
-       } while (0)
-
-#define shuffle_grouped1(TYPE, OPER)                                   \
-       do {                                                            \
-               const TYPE *restrict v = (const TYPE *) Tloc(b, 0);     \
-               shuffle_grouped1_body(OPER(v[i], v[groups[j].bun]),     \
-                                     v[i] == v[groups[j].bun]);        \
-       } while (0)
-
-#define shuffle_grouped2(TYPE)                                         \
-       do {                                                            \
-               const TYPE *restrict v = (const TYPE *) Tloc(b, 0);     \
-               TYPE lastval = v[groups[top - 1].bun];                  \
-               for (i = cand ? *cand++ - b->hseqbase : start;          \
-                    i < end;                                           \
-                    cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
-                       if (asc ? v[i] > lastval : v[i] < lastval)      \
-                               continue;                               \
-                       for (j = 0; j < top; j++) {                     \
-                               if (v[i] == v[groups[j].bun]) {         \
-                                       if (bp)                         \
-                                               *bp++ = i + b->hseqbase; \
-                                       *gp++ = j;                      \
-                                       break;                          \
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to