Changeset: ed2982ec2945 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ed2982ec2945
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_batop.c
        monetdb5/ChangeLog
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/bat5.h
        monetdb5/modules/kernel/bat5.mal
        monetdb5/optimizer/opt_candidates.c
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        monetdb5/optimizer/opt_support.c
Branch: context
Log Message:

merged with default


diffs (truncated from 380 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -612,6 +612,7 @@ stdout of test 'MAL-signatures` in direc
 [ "bat",       "delete",       "command bat.delete(b:bat[:any_1]):bat[:any_1] 
",       "BKCdelete_all;",       "Delete all entries."   ]
 [ "bat",       "delete",       "command bat.delete(b:bat[:any_1], 
d:bat[:oid]):bat[:any_1] ",  "BKCdelete_multi;",     "Delete multiple BUN, 
shifting BUNs up" ]
 [ "bat",       "densebat",     "command bat.densebat(sz:lng):bat[:oid] ",      
"BKCdensebat;", "Creates a new [void,void] BAT of size 'sz'."   ]
+[ "bat",       "diffcand",     "command bat.diffcand(a:bat[:oid], 
b:bat[:oid]):bat[:oid] ",    "BKCdiffcand;", "Calculate difference of two 
candidate lists"   ]
 [ "bat",       "getAccess",    "command bat.getAccess(b:bat[:any_1]):str ",    
"BKCgetAccess;",        "Return the access mode attached to this BAT as a 
character."   ]
 [ "bat",       "getCapacity",  "command bat.getCapacity(b:bat[:any_1]):lng ",  
"BKCgetCapacity;",      "Returns the current allocation size (in max number of 
elements) of a BAT."     ]
 [ "bat",       "getColumnType",        "command 
bat.getColumnType(b:bat[:any_1]):str ",        "BKCgetColumnType;",    "Returns 
the type of the tail column of a BAT, as an integer type number."      ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -716,6 +716,7 @@ stdout of test 'MAL-signatures` in direc
 [ "bat",       "delete",       "command bat.delete(b:bat[:any_1]):bat[:any_1] 
",       "BKCdelete_all;",       "Delete all entries."   ]
 [ "bat",       "delete",       "command bat.delete(b:bat[:any_1], 
d:bat[:oid]):bat[:any_1] ",  "BKCdelete_multi;",     "Delete multiple BUN, 
shifting BUNs up" ]
 [ "bat",       "densebat",     "command bat.densebat(sz:lng):bat[:oid] ",      
"BKCdensebat;", "Creates a new [void,void] BAT of size 'sz'."   ]
+[ "bat",       "diffcand",     "command bat.diffcand(a:bat[:oid], 
b:bat[:oid]):bat[:oid] ",    "BKCdiffcand;", "Calculate difference of two 
candidate lists"   ]
 [ "bat",       "getAccess",    "command bat.getAccess(b:bat[:any_1]):str ",    
"BKCgetAccess;",        "Return the access mode attached to this BAT as a 
character."   ]
 [ "bat",       "getCapacity",  "command bat.getCapacity(b:bat[:any_1]):lng ",  
"BKCgetCapacity;",      "Returns the current allocation size (in max number of 
elements) of a BAT."     ]
 [ "bat",       "getColumnType",        "command 
bat.getColumnType(b:bat[:any_1]):str ",        "BKCgetColumnType;",    "Returns 
the type of the tail column of a BAT, as an integer type number."      ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -114,6 +114,7 @@ BUN BATcount_no_nil(BAT *b);
 gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
 BAT *BATdense(oid hseq, oid tseq, BUN cnt) 
__attribute__((__warn_unused_result__));
 BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, 
BUN estimate);
+BAT *BATdiffcand(BAT *a, BAT *b);
 gdk_return BATextend(BAT *b, BUN newcap) 
__attribute__((__warn_unused_result__));
 void BATfakeCommit(BAT *b);
 gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, 
BUN n, bool asc, bool nilslast, bool distinct) 
__attribute__((__warn_unused_result__));
@@ -805,6 +806,7 @@ str BKCdelete(bat *r, const bat *bid, co
 str BKCdelete_all(bat *r, const bat *bid);
 str BKCdelete_multi(bat *r, const bat *bid, const bat *sid);
 str BKCdensebat(bat *ret, const lng *size);
+str BKCdiffcand(bat *ret, const bat *aid, const bat *bid);
 str BKCgetAccess(str *res, const bat *bid);
 str BKCgetBBPname(str *ret, const bat *bid);
 str BKCgetCapacity(lng *res, const bat *bid);
@@ -2095,6 +2097,7 @@ str deltaRef;
 str dense_rankRef;
 malType destinationType(MalBlkPtr mb, InstrPtr p);
 str diffRef;
+str diffcandRef;
 str differenceRef;
 str disconnectRef;
 str divRef;
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2736,6 +2736,7 @@ gdk_export BAT *BATunique(BAT *b, BAT *s
 
 gdk_export BAT *BATmergecand(BAT *a, BAT *b);
 gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
+gdk_export BAT *BATdiffcand(BAT *a, BAT *b);
 
 gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, 
BAT *grps, BUN n, bool asc, bool nilslast, bool distinct)
        __attribute__((__warn_unused_result__));
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -2164,8 +2164,8 @@ BATmergecand(BAT *a, BAT *b)
        if (bn == NULL)
                return NULL;
        p = (oid *) Tloc(bn, 0);
-       if (a->ttype == TYPE_void && b->ttype == TYPE_void) {
-               /* both lists are VOID */
+       if (BATtdense(a) && BATtdense(b)) {
+               /* both lists are dense */
                if (a->tseqbase > b->tseqbase) {
                        BAT *t = a;
 
@@ -2179,14 +2179,14 @@ BATmergecand(BAT *a, BAT *b)
                     i < b->tseqbase + BATcount(b);
                     i++)
                        *p++ = i;
-       } else if (a->ttype == TYPE_void || b->ttype == TYPE_void) {
-               if (b->ttype == TYPE_void) {
+       } else if (BATtdense(a) || BATtdense(b)) {
+               if (BATtdense(b)) {
                        BAT *t = a;
 
                        a = b;
                        b = t;
                }
-               /* a->ttype == TYPE_void, b->ttype == TYPE_oid */
+               /* only a is dense */
                bp = (const oid *) Tloc(b, 0);
                bpe = bp + BATcount(b);
                while (bp < bpe && *bp < a->tseqbase)
@@ -2198,7 +2198,7 @@ BATmergecand(BAT *a, BAT *b)
                while (bp < bpe)
                        *p++ = *bp++;
        } else {
-               /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */
+               /* a and b are both not dense */
                ap = (const oid *) Tloc(a, 0);
                ape = ap + BATcount(a);
                bp = (const oid *) Tloc(b, 0);
@@ -2263,7 +2263,7 @@ BATintersectcand(BAT *a, BAT *b)
        bl = BUNtoid(b, BUNlast(b) - 1);
 
        if ((af + BATcount(a) - 1 == al) && (bf + BATcount(b) - 1 == bl)) {
-               /* both lists are VOID */
+               /* both lists are dense */
                return newdensecand(MAX(af, bf), MIN(al, bl) + 1);
        }
 
@@ -2271,14 +2271,14 @@ BATintersectcand(BAT *a, BAT *b)
        if (bn == NULL)
                return NULL;
        p = (oid *) Tloc(bn, 0);
-       if (a->ttype == TYPE_void || b->ttype == TYPE_void) {
-               if (b->ttype == TYPE_void) {
+       if (BATtdense(a) || BATtdense(b)) {
+               if (BATtdense(b)) {
                        BAT *t = a;
 
                        a = b;
                        b = t;
                }
-               /* a->ttype == TYPE_void, b->ttype == TYPE_oid */
+               /* only a is dense */
                bp = (const oid *) Tloc(b, 0);
                bpe = bp + BATcount(b);
                while (bp < bpe && *bp < a->tseqbase)
@@ -2286,7 +2286,7 @@ BATintersectcand(BAT *a, BAT *b)
                while (bp < bpe && *bp < a->tseqbase + BATcount(a))
                        *p++ = *bp++;
        } else {
-               /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */
+               /* a and b are both not dense */
                ap = (const oid *) Tloc(a, 0);
                ape = ap + BATcount(a);
                bp = (const oid *) Tloc(b, 0);
@@ -2312,3 +2312,123 @@ BATintersectcand(BAT *a, BAT *b)
        bn->tnonil = true;
        return virtualize(bn);
 }
+
+/* calculate the difference of two candidate lists and produce a new one
+ */
+BAT *
+BATdiffcand(BAT *a, BAT *b)
+{
+       BAT *bn;
+       const oid *restrict ap, *restrict bp, *ape, *bpe;
+       oid *restrict p;
+       oid af, al, bf, bl;
+
+       BATcheck(a, "BATdiffcand", NULL);
+       BATcheck(b, "BATdiffcand", NULL);
+       assert(ATOMtype(a->ttype) == TYPE_oid);
+       assert(ATOMtype(b->ttype) == TYPE_oid);
+       assert(a->tsorted);
+       assert(b->tsorted);
+       assert(a->tkey);
+       assert(b->tkey);
+       assert(a->tnonil);
+       assert(b->tnonil);
+
+       if (BATcount(a) == 0)
+               return newdensecand(0, 0);
+       if (BATcount(b) == 0)
+               return COLcopy(a, a->ttype, false, TRANSIENT);
+
+       af = BUNtoid(a, 0);
+       bf = BUNtoid(b, 0);
+       al = BUNtoid(a, BUNlast(a) - 1) + 1;
+       bl = BUNtoid(b, BUNlast(b) - 1) + 1;
+
+       if (bf >= al || bl <= af) {
+               /* no overlap, return a */
+               return COLcopy(a, a->ttype, false, TRANSIENT);
+       }
+
+       if (BATtdense(a) && BATtdense(b)) {
+               /* both a and b are dense */
+               if (af < bf) {
+                       if (al <= bl) {
+                               /* b overlaps with end of a */
+                               return newdensecand(af, bf);
+                       }
+                       /* b is subset of a */
+                       return doublerange(af, bf, bl, al);
+               } else {
+                       /* af >= bf */
+                       if (al > bl) {
+                               /* b overlaps with beginning of a */
+                               return newdensecand(bl, al);
+                       }
+                       /* a is subset f b */
+                       return newdensecand(0, 0);
+               }
+       }
+       bn = COLnew(0, TYPE_oid, BATcount(a), TRANSIENT);
+       if (bn == NULL)
+               return NULL;
+       p = Tloc(bn, 0);
+       if (BATtdense(b)) {
+               BUN n;
+               /* b is dense and a is not: we can copy the part of a
+                * that is before the start of b and the part of a
+                * that is after the end of b */
+               ap = Tloc(a, 0);
+               /* find where b starts in a */
+               n = binsearch(NULL, 0, TYPE_oid, ap, NULL, SIZEOF_OID, 0,
+                             BATcount(a), &bf, 1, 0);
+               if (n > 0)
+                       memcpy(p, ap, n * SIZEOF_OID);
+               p += n;
+               n = binsearch(NULL, 0, TYPE_oid, ap, NULL, SIZEOF_OID, 0,
+                             BATcount(a), &bl, 1, 0);
+               if (n < BATcount(a))
+                       memcpy(p, ap + n, (BATcount(a) - n) * SIZEOF_OID);
+               p += n;
+       } else {
+               /* b is not dense; find first position in b that is in
+                * range of a */
+               bp = Tloc(b, 0);
+               bpe = bp + BATcount(b);
+               bp += binsearch(NULL, 0, TYPE_oid, bp, NULL, SIZEOF_OID, 0,
+                               BATcount(b), &af, 1, 0);
+               if (BATtdense(a)) {
+                       /* only a is dense */
+                       while (af < al) {
+                               if (bp == bpe)
+                                       *p++ = af;
+                               else if (af < *bp)
+                                       *p++ = af;
+                               else
+                                       bp++;
+                               af++;
+                       }
+               } else {
+                       /* a and b are both not dense */
+                       ap = Tloc(a, 0);
+                       ape = ap + BATcount(a);
+                       while (ap < ape) {
+                               if (bp == bpe)
+                                       *p++ = *ap;
+                               else if (*ap < *bp)
+                                       *p++ = *ap;
+                               else
+                                       bp++;
+                               ap++;
+                       }
+               }
+       }
+
+       /* properties */
+       BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
+       bn->trevsorted = BATcount(bn) <= 1;
+       bn->tsorted = true;
+       bn->tkey = true;
+       bn->tnil = false;
+       bn->tnonil = true;
+       return virtualize(bn);
+}
diff --git a/monetdb5/ChangeLog b/monetdb5/ChangeLog
--- a/monetdb5/ChangeLog
+++ b/monetdb5/ChangeLog
@@ -1,6 +1,10 @@
 # ChangeLog file for MonetDB5
 # This file is updated with Maddlog
 
+* Mon Jul  1 2019 Sjoerd Mullender <sjo...@acm.org>
+- Implemented a function bat.diffcand to calculate difference of two
+  candidate lists.
+
 * Fri Jun 14 2019 Sjoerd Mullender <sjo...@acm.org>
 - The mtime module was completely rewritten, the atom types date,
   daytime, and timestamp were changed.  Upgrade code for BATs
diff --git a/monetdb5/modules/kernel/bat5.c b/monetdb5/modules/kernel/bat5.c
--- a/monetdb5/modules/kernel/bat5.c
+++ b/monetdb5/modules/kernel/bat5.c
@@ -1408,3 +1408,25 @@ BKCintersectcand(bat *ret, const bat *ai
        BBPkeepref(*ret);
        return MAL_SUCCEED;
 }
+
+str
+BKCdiffcand(bat *ret, const bat *aid, const bat *bid)
+{
+       BAT *a, *b, *bn;
+
+       if ((a = BATdescriptor(*aid)) == NULL) {
+               throw(MAL, "bat.diffcand", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       if ((b = BATdescriptor(*bid)) == NULL) {
+               BBPunfix(a->batCacheid);
+               throw(MAL, "bat.diffcand", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       bn = BATdiffcand(a, b);
+       BBPunfix(a->batCacheid);
+       BBPunfix(b->batCacheid);
+       if (bn == NULL)
+               throw(MAL, "bat.diffcand", OPERATION_FAILED);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to