Changeset: ed2982ec2945 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ed2982ec2945 Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_batop.c monetdb5/ChangeLog monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/bat5.h monetdb5/modules/kernel/bat5.mal monetdb5/optimizer/opt_candidates.c monetdb5/optimizer/opt_prelude.c monetdb5/optimizer/opt_prelude.h monetdb5/optimizer/opt_support.c Branch: context Log Message:
merged with default diffs (truncated from 380 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -612,6 +612,7 @@ stdout of test 'MAL-signatures` in direc [ "bat", "delete", "command bat.delete(b:bat[:any_1]):bat[:any_1] ", "BKCdelete_all;", "Delete all entries." ] [ "bat", "delete", "command bat.delete(b:bat[:any_1], d:bat[:oid]):bat[:any_1] ", "BKCdelete_multi;", "Delete multiple BUN, shifting BUNs up" ] [ "bat", "densebat", "command bat.densebat(sz:lng):bat[:oid] ", "BKCdensebat;", "Creates a new [void,void] BAT of size 'sz'." ] +[ "bat", "diffcand", "command bat.diffcand(a:bat[:oid], b:bat[:oid]):bat[:oid] ", "BKCdiffcand;", "Calculate difference of two candidate lists" ] [ "bat", "getAccess", "command bat.getAccess(b:bat[:any_1]):str ", "BKCgetAccess;", "Return the access mode attached to this BAT as a character." ] [ "bat", "getCapacity", "command bat.getCapacity(b:bat[:any_1]):lng ", "BKCgetCapacity;", "Returns the current allocation size (in max number of elements) of a BAT." ] [ "bat", "getColumnType", "command bat.getColumnType(b:bat[:any_1]):str ", "BKCgetColumnType;", "Returns the type of the tail column of a BAT, as an integer type number." ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -716,6 +716,7 @@ stdout of test 'MAL-signatures` in direc [ "bat", "delete", "command bat.delete(b:bat[:any_1]):bat[:any_1] ", "BKCdelete_all;", "Delete all entries." ] [ "bat", "delete", "command bat.delete(b:bat[:any_1], d:bat[:oid]):bat[:any_1] ", "BKCdelete_multi;", "Delete multiple BUN, shifting BUNs up" ] [ "bat", "densebat", "command bat.densebat(sz:lng):bat[:oid] ", "BKCdensebat;", "Creates a new [void,void] BAT of size 'sz'." ] +[ "bat", "diffcand", "command bat.diffcand(a:bat[:oid], b:bat[:oid]):bat[:oid] ", "BKCdiffcand;", "Calculate difference of two candidate lists" ] [ "bat", "getAccess", "command bat.getAccess(b:bat[:any_1]):str ", "BKCgetAccess;", "Return the access mode attached to this BAT as a character." ] [ "bat", "getCapacity", "command bat.getCapacity(b:bat[:any_1]):lng ", "BKCgetCapacity;", "Returns the current allocation size (in max number of elements) of a BAT." ] [ "bat", "getColumnType", "command bat.getColumnType(b:bat[:any_1]):str ", "BKCgetColumnType;", "Returns the type of the tail column of a BAT, as an integer type number." ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -114,6 +114,7 @@ BUN BATcount_no_nil(BAT *b); gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__)); BAT *BATdense(oid hseq, oid tseq, BUN cnt) __attribute__((__warn_unused_result__)); BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, BUN estimate); +BAT *BATdiffcand(BAT *a, BAT *b); gdk_return BATextend(BAT *b, BUN newcap) __attribute__((__warn_unused_result__)); void BATfakeCommit(BAT *b); gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct) __attribute__((__warn_unused_result__)); @@ -805,6 +806,7 @@ str BKCdelete(bat *r, const bat *bid, co str BKCdelete_all(bat *r, const bat *bid); str BKCdelete_multi(bat *r, const bat *bid, const bat *sid); str BKCdensebat(bat *ret, const lng *size); +str BKCdiffcand(bat *ret, const bat *aid, const bat *bid); str BKCgetAccess(str *res, const bat *bid); str BKCgetBBPname(str *ret, const bat *bid); str BKCgetCapacity(lng *res, const bat *bid); @@ -2095,6 +2097,7 @@ str deltaRef; str dense_rankRef; malType destinationType(MalBlkPtr mb, InstrPtr p); str diffRef; +str diffcandRef; str differenceRef; str disconnectRef; str divRef; diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2736,6 +2736,7 @@ gdk_export BAT *BATunique(BAT *b, BAT *s gdk_export BAT *BATmergecand(BAT *a, BAT *b); gdk_export BAT *BATintersectcand(BAT *a, BAT *b); +gdk_export BAT *BATdiffcand(BAT *a, BAT *b); gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct) __attribute__((__warn_unused_result__)); diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -2164,8 +2164,8 @@ BATmergecand(BAT *a, BAT *b) if (bn == NULL) return NULL; p = (oid *) Tloc(bn, 0); - if (a->ttype == TYPE_void && b->ttype == TYPE_void) { - /* both lists are VOID */ + if (BATtdense(a) && BATtdense(b)) { + /* both lists are dense */ if (a->tseqbase > b->tseqbase) { BAT *t = a; @@ -2179,14 +2179,14 @@ BATmergecand(BAT *a, BAT *b) i < b->tseqbase + BATcount(b); i++) *p++ = i; - } else if (a->ttype == TYPE_void || b->ttype == TYPE_void) { - if (b->ttype == TYPE_void) { + } else if (BATtdense(a) || BATtdense(b)) { + if (BATtdense(b)) { BAT *t = a; a = b; b = t; } - /* a->ttype == TYPE_void, b->ttype == TYPE_oid */ + /* only a is dense */ bp = (const oid *) Tloc(b, 0); bpe = bp + BATcount(b); while (bp < bpe && *bp < a->tseqbase) @@ -2198,7 +2198,7 @@ BATmergecand(BAT *a, BAT *b) while (bp < bpe) *p++ = *bp++; } else { - /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */ + /* a and b are both not dense */ ap = (const oid *) Tloc(a, 0); ape = ap + BATcount(a); bp = (const oid *) Tloc(b, 0); @@ -2263,7 +2263,7 @@ BATintersectcand(BAT *a, BAT *b) bl = BUNtoid(b, BUNlast(b) - 1); if ((af + BATcount(a) - 1 == al) && (bf + BATcount(b) - 1 == bl)) { - /* both lists are VOID */ + /* both lists are dense */ return newdensecand(MAX(af, bf), MIN(al, bl) + 1); } @@ -2271,14 +2271,14 @@ BATintersectcand(BAT *a, BAT *b) if (bn == NULL) return NULL; p = (oid *) Tloc(bn, 0); - if (a->ttype == TYPE_void || b->ttype == TYPE_void) { - if (b->ttype == TYPE_void) { + if (BATtdense(a) || BATtdense(b)) { + if (BATtdense(b)) { BAT *t = a; a = b; b = t; } - /* a->ttype == TYPE_void, b->ttype == TYPE_oid */ + /* only a is dense */ bp = (const oid *) Tloc(b, 0); bpe = bp + BATcount(b); while (bp < bpe && *bp < a->tseqbase) @@ -2286,7 +2286,7 @@ BATintersectcand(BAT *a, BAT *b) while (bp < bpe && *bp < a->tseqbase + BATcount(a)) *p++ = *bp++; } else { - /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */ + /* a and b are both not dense */ ap = (const oid *) Tloc(a, 0); ape = ap + BATcount(a); bp = (const oid *) Tloc(b, 0); @@ -2312,3 +2312,123 @@ BATintersectcand(BAT *a, BAT *b) bn->tnonil = true; return virtualize(bn); } + +/* calculate the difference of two candidate lists and produce a new one + */ +BAT * +BATdiffcand(BAT *a, BAT *b) +{ + BAT *bn; + const oid *restrict ap, *restrict bp, *ape, *bpe; + oid *restrict p; + oid af, al, bf, bl; + + BATcheck(a, "BATdiffcand", NULL); + BATcheck(b, "BATdiffcand", NULL); + assert(ATOMtype(a->ttype) == TYPE_oid); + assert(ATOMtype(b->ttype) == TYPE_oid); + assert(a->tsorted); + assert(b->tsorted); + assert(a->tkey); + assert(b->tkey); + assert(a->tnonil); + assert(b->tnonil); + + if (BATcount(a) == 0) + return newdensecand(0, 0); + if (BATcount(b) == 0) + return COLcopy(a, a->ttype, false, TRANSIENT); + + af = BUNtoid(a, 0); + bf = BUNtoid(b, 0); + al = BUNtoid(a, BUNlast(a) - 1) + 1; + bl = BUNtoid(b, BUNlast(b) - 1) + 1; + + if (bf >= al || bl <= af) { + /* no overlap, return a */ + return COLcopy(a, a->ttype, false, TRANSIENT); + } + + if (BATtdense(a) && BATtdense(b)) { + /* both a and b are dense */ + if (af < bf) { + if (al <= bl) { + /* b overlaps with end of a */ + return newdensecand(af, bf); + } + /* b is subset of a */ + return doublerange(af, bf, bl, al); + } else { + /* af >= bf */ + if (al > bl) { + /* b overlaps with beginning of a */ + return newdensecand(bl, al); + } + /* a is subset f b */ + return newdensecand(0, 0); + } + } + bn = COLnew(0, TYPE_oid, BATcount(a), TRANSIENT); + if (bn == NULL) + return NULL; + p = Tloc(bn, 0); + if (BATtdense(b)) { + BUN n; + /* b is dense and a is not: we can copy the part of a + * that is before the start of b and the part of a + * that is after the end of b */ + ap = Tloc(a, 0); + /* find where b starts in a */ + n = binsearch(NULL, 0, TYPE_oid, ap, NULL, SIZEOF_OID, 0, + BATcount(a), &bf, 1, 0); + if (n > 0) + memcpy(p, ap, n * SIZEOF_OID); + p += n; + n = binsearch(NULL, 0, TYPE_oid, ap, NULL, SIZEOF_OID, 0, + BATcount(a), &bl, 1, 0); + if (n < BATcount(a)) + memcpy(p, ap + n, (BATcount(a) - n) * SIZEOF_OID); + p += n; + } else { + /* b is not dense; find first position in b that is in + * range of a */ + bp = Tloc(b, 0); + bpe = bp + BATcount(b); + bp += binsearch(NULL, 0, TYPE_oid, bp, NULL, SIZEOF_OID, 0, + BATcount(b), &af, 1, 0); + if (BATtdense(a)) { + /* only a is dense */ + while (af < al) { + if (bp == bpe) + *p++ = af; + else if (af < *bp) + *p++ = af; + else + bp++; + af++; + } + } else { + /* a and b are both not dense */ + ap = Tloc(a, 0); + ape = ap + BATcount(a); + while (ap < ape) { + if (bp == bpe) + *p++ = *ap; + else if (*ap < *bp) + *p++ = *ap; + else + bp++; + ap++; + } + } + } + + /* properties */ + BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); + bn->trevsorted = BATcount(bn) <= 1; + bn->tsorted = true; + bn->tkey = true; + bn->tnil = false; + bn->tnonil = true; + return virtualize(bn); +} diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog --- a/monetdb5/ChangeLog +++ b/monetdb5/ChangeLog @@ -1,6 +1,10 @@ # ChangeLog file for MonetDB5 # This file is updated with Maddlog +* Mon Jul 1 2019 Sjoerd Mullender <sjo...@acm.org> +- Implemented a function bat.diffcand to calculate difference of two + candidate lists. + * Fri Jun 14 2019 Sjoerd Mullender <sjo...@acm.org> - The mtime module was completely rewritten, the atom types date, daytime, and timestamp were changed. Upgrade code for BATs diff --git a/monetdb5/modules/kernel/bat5.c b/monetdb5/modules/kernel/bat5.c --- a/monetdb5/modules/kernel/bat5.c +++ b/monetdb5/modules/kernel/bat5.c @@ -1408,3 +1408,25 @@ BKCintersectcand(bat *ret, const bat *ai BBPkeepref(*ret); return MAL_SUCCEED; } + +str +BKCdiffcand(bat *ret, const bat *aid, const bat *bid) +{ + BAT *a, *b, *bn; + + if ((a = BATdescriptor(*aid)) == NULL) { + throw(MAL, "bat.diffcand", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + } + if ((b = BATdescriptor(*bid)) == NULL) { + BBPunfix(a->batCacheid); + throw(MAL, "bat.diffcand", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + } + bn = BATdiffcand(a, b); + BBPunfix(a->batCacheid); + BBPunfix(b->batCacheid); + if (bn == NULL) + throw(MAL, "bat.diffcand", OPERATION_FAILED); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list