Changeset: a9cb90dab9d3 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a9cb90dab9d3 Added Files: sql/test/BugTracker-2017/Tests/side-effect.Bug-6397.sql Modified Files: gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_firstn.c gdk/gdk_group.c gdk/gdk_imprints.c gdk/gdk_join.c gdk/gdk_logger.c gdk/gdk_sample.c gdk/gdk_select.c gdk/gdk_unique.c monetdb5/extras/rapi/rapi.c monetdb5/modules/atoms/json.c monetdb5/modules/kernel/aggr.c monetdb5/modules/kernel/algebra.c monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/batmmath.c monetdb5/modules/kernel/batstr.c monetdb5/modules/kernel/group.c monetdb5/modules/kernel/microbenchmark.c monetdb5/modules/mal/batExtensions.c monetdb5/modules/mal/batcalc.c monetdb5/modules/mal/mat.c monetdb5/modules/mal/pcre.c monetdb5/modules/mal/sample.c monetdb5/modules/mal/txtsim.c sql/backends/monet5/UDF/pyapi/connection.c sql/backends/monet5/generator/generator.c sql/backends/monet5/sql.c sql/backends/monet5/sql_rank.c sql/backends/monet5/sql_result.c sql/backends/monet5/vaults/bam/bam_lib.c sql/storage/bat/bat_storage.c sql/storage/bat/bat_table.c sql/storage/bat/bat_utils.c sql/test/leaks/Tests/check1.stable.out sql/test/leaks/Tests/check1.stable.out.int128 sql/test/leaks/Tests/check2.stable.out sql/test/leaks/Tests/check3.stable.out sql/test/leaks/Tests/check4.stable.out sql/test/leaks/Tests/check5.stable.out sql/test/leaks/Tests/select1.stable.out sql/test/leaks/Tests/select1.stable.out.int128 sql/test/leaks/Tests/select2.stable.out sql/test/leaks/Tests/select2.stable.out.int128 sql/test/leaks/Tests/temp1.stable.out Branch: data-vaults Log Message:
Merge with default diffs (truncated from 3560 to 300 lines): diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -246,10 +246,10 @@ BATdense(oid hseq, oid tseq, BUN cnt) BAT *bn; bn = COLnew(hseq, TYPE_void, 0, TRANSIENT); - if (bn == NULL) - return NULL; - BATtseqbase(bn, tseq); - BATsetcount(bn, cnt); + if (bn != NULL) { + BATtseqbase(bn, tseq); + BATsetcount(bn, cnt); + } return bn; } @@ -1380,6 +1380,7 @@ BATsetcount(BAT *b, BUN cnt) { /* head column is always VOID, and some head properties never change */ assert(b->hseqbase != oid_nil); + assert(cnt <= BUN_MAX); b->batCount = cnt; b->batDirtydesc = TRUE; diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -1306,21 +1306,17 @@ BATsort(BAT **sorted, BAT **order, BAT * *sorted = bn; } if (order) { - on = COLnew(b->hseqbase, TYPE_void, BATcount(b), TRANSIENT); + on = BATdense(b->hseqbase, b->hseqbase, BATcount(b)); if (on == NULL) goto error; - BATsetcount(on, BATcount(b)); - BATtseqbase(on, b->hseqbase); *order = on; } if (groups) { if (BATtkey(b)) { /* singleton groups */ - gn = COLnew(0, TYPE_void, BATcount(b), TRANSIENT); + gn = BATdense(0, 0, BATcount(b)); if (gn == NULL) goto error; - BATsetcount(gn, BATcount(b)); - BATtseqbase(gn, 0); } else { /* single group */ const oid *o = 0; @@ -1822,15 +1818,9 @@ BATcount_no_nil(BAT *b) static BAT * newdensecand(oid first, oid last) { - BAT *bn; - - if ((bn = COLnew(0, TYPE_void, 0, TRANSIENT)) == NULL) - return NULL; if (last < first) first = last = 0; /* empty range */ - BATsetcount(bn, last - first); - BATtseqbase(bn, first); - return bn; + return BATdense(0, first, last - first); } /* merge two candidate lists and produce a new one diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -321,9 +321,18 @@ BBPselectfarm(int role, int type, enum h { int i; - assert(role >= 0 && role < 32); (void) type; /* may use in future */ (void) hptype; /* may use in future */ + + assert(role >= 0 && role < 32); +#ifndef PERSISTENTHASH + if (hptype == hashheap) + role = TRANSIENT; +#endif +#ifndef PERSISTENTIDX + if (hptype == orderidxheap) + role = TRANSIENT; +#endif for (i = 0; i < MAXFARMS; i++) if (BBPfarms[i].dirname && BBPfarms[i].roles & (1 << role)) return i; @@ -1909,7 +1918,9 @@ BBPdump(void) vm += HEAPvmsize(b->thash->heap); } } - fprintf(stderr, "\n"); + fprintf(stderr, " role: %s, persistence: %s\n", + b->batRole == PERSISTENT ? "persistent" : "transient", + b->batPersistence == PERSISTENT ? "persistent" : "transient"); } fprintf(stderr, "# %d bats: mem=" SZFMT ", vm=" SZFMT " %d cached bats: mem=" SZFMT ", vm=" SZFMT "\n", diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c --- a/gdk/gdk_firstn.c +++ b/gdk/gdk_firstn.c @@ -102,7 +102,7 @@ #define shuffle_unique(TYPE, OP) \ do { \ - const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ + const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ heapify(OP##fix, SWAP1); \ while (cand ? cand < candend : start < end) { \ i = cand ? *cand++ : start++ + b->hseqbase; \ @@ -119,9 +119,13 @@ * refer to the N smallest/largest (depending on asc) tail values of b * (taking the optional candidate list s into account). If there are * multiple equal values to take us past N, we return a subset of those. + * + * If lastp is non-NULL, it is filled in with the oid of the "last" + * value, i.e. the value of which there may be multiple occurrences + * that are not all included in the first N. */ static BAT * -BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc) +BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc, oid *lastp) { BAT *bn; BATiter bi = bat_iterator(b); @@ -140,17 +144,19 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, if (n >= (BUN) (candend - cand)) { /* trivial: return the candidate list (the * part that refers to b, that is) */ + if (lastp) + *lastp = 0; return BATslice(s, (BUN) (cand - (const oid *) Tloc(s, 0)), (BUN) (candend - (const oid *) Tloc(s, 0))); } } else if (n >= cnt) { /* trivial: return everything */ - bn = COLnew(0, TYPE_void, cnt, TRANSIENT); + bn = BATdense(0, start + b->hseqbase, cnt); if (bn == NULL) return NULL; - BATsetcount(bn, cnt); - BATtseqbase(bn, start + b->hseqbase); + if (lastp) + *lastp = 0; return bn; } /* note, we want to do both calls */ @@ -163,23 +169,27 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, /* return copy of first relevant part * of candidate list */ i = (BUN) (cand - (const oid *) Tloc(s, 0)); + if (lastp) + *lastp = cand[n - 1]; return BATslice(s, i, i + n); } /* return copy of last relevant part of * candidate list */ i = (BUN) (candend - (const oid *) Tloc(s, 0)); + if (lastp) + *lastp = candend[-(ssize_t)n]; return BATslice(s, i - n, i); } - bn = COLnew(0, TYPE_void, n, TRANSIENT); - if (bn == NULL) - return NULL; - BATsetcount(bn, n); if (asc ? b->tsorted : b->trevsorted) { /* first n entries from b */ - BATtseqbase(bn, start + b->hseqbase); + bn = BATdense(0, start + b->hseqbase, n); + if (lastp) + *lastp = start + b->hseqbase + n - 1; } else { /* last n entries from b */ - BATtseqbase(bn, start + cnt + b->hseqbase - n); + bn = BATdense(0, start + cnt + b->hseqbase - n, n); + if (lastp) + *lastp = start + cnt + b->hseqbase - n; } return bn; } @@ -293,6 +303,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, break; } } + if (lastp) + *lastp = oids[0]; /* store id of largest value */ /* output must be sorted since it's a candidate list */ GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid); bn->tsorted = 1; @@ -356,8 +368,19 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, } \ } while (0) +/* This version of BATfirstn is like the one above, except that it + * also looks at groups. The values of the group IDs are important: + * we return only the smallest N (i.e., not dependent on asc which + * refers only to the values in the BAT b). + * + * If lastp is non-NULL, it is filled in with the oid of the "last" + * value, i.e. the value of which there may be multiple occurrences + * that are not all included in the first N. If lastgp is non-NULL, + * it is filled with the group ID (not the oid of the group ID) for + * that same value. + */ static BAT * -BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc) +BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc, oid *lastp, oid *lastgp) { BAT *bn; BATiter bi = bat_iterator(b); @@ -387,11 +410,7 @@ BATfirstn_unique_with_groups(BAT *b, BAT if (n == 0) { /* candidate list might refer only to values outside * of the bat and hence be effectively empty */ - bn = COLnew(0, TYPE_void, 0, TRANSIENT); - if (bn == NULL) - return NULL; - BATtseqbase(bn, 0); - return bn; + return BATdense(0, 0, 0); } bn = COLnew(0, TYPE_oid, n, TRANSIENT); @@ -533,6 +552,10 @@ BATfirstn_unique_with_groups(BAT *b, BAT break; } } + if (lastp) + *lastp = oids[0]; + if (lastgp) + *lastgp = goids[0]; GDKfree(goids); /* output must be sorted since it's a candidate list */ GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid); @@ -545,597 +568,238 @@ BATfirstn_unique_with_groups(BAT *b, BAT return bn; } -#define shuffle_grouped1_body(COMPARE, EQUAL) \ - do { \ - for (i = cand ? *cand++ - b->hseqbase : start; \ - i < end; \ - cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \ - for (j = 0; j < n; j++) { \ - if (j == top) { \ - assert(top < n); \ - groups[top].cnt = 1; \ - groups[top++].bun = i; \ - break; \ - } else { \ - assert(j < top); \ - assert(groups[j].bun < i); \ - if (COMPARE) { \ - if (top < n) \ - top++; \ - for (k = top - 1; k > j; k--) { \ - groups[k] = groups[k - 1]; \ - } \ - groups[j].bun = i; \ - groups[j].cnt = 1; \ - break; \ - } else if (EQUAL) { \ - groups[j].cnt++; \ - break; \ - } \ - } \ - } \ - } \ - } while (0) - -#define shuffle_grouped1(TYPE, OPER) \ - do { \ - const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \ - shuffle_grouped1_body(OPER(v[i], v[groups[j].bun]), \ - v[i] == v[groups[j].bun]); \ - } while (0) - -#define shuffle_grouped2(TYPE) \ - do { \ - const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \ - TYPE lastval = v[groups[top - 1].bun]; \ - for (i = cand ? *cand++ - b->hseqbase : start; \ - i < end; \ - cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \ - if (asc ? v[i] > lastval : v[i] < lastval) \ - continue; \ - for (j = 0; j < top; j++) { \ - if (v[i] == v[groups[j].bun]) { \ - if (bp) \ - *bp++ = i + b->hseqbase; \ - *gp++ = j; \ - break; \ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list