Changeset: 2d5ddc62f9e9 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2d5ddc62f9e9
Modified Files:
        clients/Tests/exports.stable.out
Branch: default
Log Message:

Merge groupjoin branch into default.


diffs (truncated from 5997 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -3494,6 +3494,26 @@ command algebra.likeselect(X_0:bat[:str]
 PCRElikeselect;
 Select all head values of the first input BAT for which the@tail value is 
"like" the given (SQL-style) pattern and for@which the head value occurs in the 
tail of the second input@BAT.@Input is a dense-headed BAT, output is a 
dense-headed BAT with in@the tail the head value of the input BAT for which 
the@relationship holds.  The output BAT is sorted on the tail value.
 algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:bit]) 
+ALGmark2join;
+Mark join with candidate lists
+algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:oid], X_7:bat[:bit]) 
+ALGmark3join;
+Mark join with candidate lists
+algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit]) 
+ALGmarkjoin;
+Left mark join with candidate lists, produces left output and mark flag; 
+algebra
+markselect
+command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
+ALGmarkselect;
+Group on group-ids, return aggregated anyequal or allnotequal
+algebra
 not_like
 command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit 
 PCREnotlike;
@@ -3504,6 +3524,11 @@ command algebra.orderidx(X_0:bat[:any_1]
 OIDXorderidx;
 Create an order index
 algebra
+outercrossproduct
+command algebra.outercrossproduct(X_0:bat[:any_1], X_1:bat[:any_2], 
X_2:bat[:oid], X_3:bat[:oid], X_4:bit) (X_5:bat[:oid], X_6:bat[:oid]) 
+ALGoutercrossproduct3;
+Compute the outer cross product of both input bats
+algebra
 outerjoin
 command algebra.outerjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:bit, X_5:bit, X_6:lng):bat[:oid] 
 ALGouterjoin1;
@@ -3514,6 +3539,11 @@ command algebra.outerjoin(X_0:bat[:any_1
 ALGouterjoin;
 Left outer join with candidate lists
 algebra
+outerselect
+command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
+ALGouterselect;
+Per input lid return atleast one row, if none of the predicates (p) hold, 
return a nil, else 'all' true cases.
+algebra
 project
 pattern algebra.project(X_0:bat[:any_1], X_1:any_3):bat[:any_3] 
 ALGprojecttail;
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -2929,6 +2929,26 @@ command algebra.likeselect(X_0:bat[:str]
 PCRElikeselect;
 Select all head values of the first input BAT for which the@tail value is 
"like" the given (SQL-style) pattern and for@which the head value occurs in the 
tail of the second input@BAT.@Input is a dense-headed BAT, output is a 
dense-headed BAT with in@the tail the head value of the input BAT for which 
the@relationship holds.  The output BAT is sorted on the tail value.
 algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:bit]) 
+ALGmark2join;
+Mark join with candidate lists
+algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:oid], X_7:bat[:bit]) 
+ALGmark3join;
+Mark join with candidate lists
+algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit]) 
+ALGmarkjoin;
+Left mark join with candidate lists, produces left output and mark flag; 
+algebra
+markselect
+command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
+ALGmarkselect;
+Group on group-ids, return aggregated anyequal or allnotequal
+algebra
 not_like
 command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit 
 PCREnotlike;
@@ -2939,6 +2959,11 @@ command algebra.orderidx(X_0:bat[:any_1]
 OIDXorderidx;
 Create an order index
 algebra
+outercrossproduct
+command algebra.outercrossproduct(X_0:bat[:any_1], X_1:bat[:any_2], 
X_2:bat[:oid], X_3:bat[:oid], X_4:bit) (X_5:bat[:oid], X_6:bat[:oid]) 
+ALGoutercrossproduct3;
+Compute the outer cross product of both input bats
+algebra
 outerjoin
 command algebra.outerjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:bit, X_5:bit, X_6:lng):bat[:oid] 
 ALGouterjoin1;
@@ -2949,6 +2974,11 @@ command algebra.outerjoin(X_0:bat[:any_1
 ALGouterjoin;
 Left outer join with candidate lists
 algebra
+outerselect
+command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
+ALGouterselect;
+Per input lid return atleast one row, if none of the predicates (p) hold, 
return a nil, else 'all' true cases.
+algebra
 project
 pattern algebra.project(X_0:bat[:any_1], X_1:any_3):bat[:any_3] 
 ALGprojecttail;
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -159,6 +159,7 @@ BAT *BATintersectcand(BAT *a, BAT *b);
 gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
 gdk_return BATkey(BAT *b, bool onoff);
 gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
+gdk_return BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate) __attribute__((__warn_unused_result__));
 BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected);
 void *BATmax(BAT *b, void *aggr);
 void *BATmax_skipnil(BAT *b, void *aggr, bit skipnil);
@@ -172,6 +173,7 @@ BAT *BATnil_grp(BAT *l, BAT *g, BAT *e, 
 bool BATordered(BAT *b);
 bool BATordered_rev(BAT *b);
 gdk_return BATorderidx(BAT *b, bool stable);
+gdk_return BAToutercross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, bool max_one) __attribute__((__warn_unused_result__));
 gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, bool nil_matches, bool match_one, BUN estimate) 
__attribute__((__warn_unused_result__));
 gdk_return BATprint(stream *s, BAT *b);
 gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]);
@@ -1037,6 +1039,8 @@ const char *mal_version(void);
 stream *maleventstream;
 const char *manifoldRef;
 const char *mapiRef;
+const char *markjoinRef;
+const char *markselectRef;
 const char *maskRef;
 const char *matRef;
 const char *maxRef;
@@ -1112,7 +1116,9 @@ str operatorName(int i);
 void opt_pipes_reset(void);
 str optimizeMALBlock(Client cntxt, MalBlkPtr mb);
 const char *optimizerRef;
+const char *outercrossRef;
 const char *outerjoinRef;
+const char *outerselectRef;
 const char *packIncrementRef;
 const char *packRef;
 const char *parametersRef;
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2302,9 +2302,13 @@ gdk_export BAT *BATthetaselect(BAT *b, B
 gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t 
role);
 gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool max_one)
        __attribute__((__warn_unused_result__));
+gdk_export gdk_return BAToutercross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool max_one)
+       __attribute__((__warn_unused_result__));
 
 gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool nil_matches, BUN estimate)
        __attribute__((__warn_unused_result__));
+gdk_export gdk_return BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, BUN estimate)
+       __attribute__((__warn_unused_result__));
 gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
        __attribute__((__warn_unused_result__));
 gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int op, bool nil_matches, BUN estimate)
diff --git a/gdk/gdk_cross.c b/gdk/gdk_cross.c
--- a/gdk/gdk_cross.c
+++ b/gdk/gdk_cross.c
@@ -17,29 +17,21 @@
  * The result is two bats r1 and r2 which contain the OID (head
  * values) of the input bats l and r.
  * If max_one is set, r can have at most one row. */
-gdk_return
-BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool 
max_one)
+static gdk_return
+BATcrossci(BAT **r1p, BAT **r2p, struct canditer *ci1, struct canditer *ci2)
 {
        BAT *bn1, *bn2 = NULL;
-       struct canditer ci1, ci2;
        oid *restrict p;
        BUN i, j;
 
-       canditer_init(&ci1, l, sl);
-       canditer_init(&ci2, r, sr);
        lng timeoffset = 0;
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();
        if (qry_ctx != NULL) {
                timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? 
(qry_ctx->starttime + qry_ctx->querytimeout) : 0;
        }
 
-       if (max_one && ci1.ncand > 0 && ci2.ncand > 1) {
-               GDKerror("more than one match");
-               return GDK_FAIL;
-       }
-
        /* first some special cases */
-       if (ci1.ncand == 0 || ci2.ncand == 0) {
+       if (ci1->ncand == 0 || ci2->ncand == 0) {
                if ((bn1 = BATdense(0, 0, 0)) == NULL)
                        return GDK_FAIL;
                if (r2p) {
@@ -52,14 +44,14 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
                *r1p = bn1;
                return GDK_SUCCEED;
        }
-       if (ci2.ncand == 1) {
-               if ((bn1 = canditer_slice(&ci1, 0, ci1.ncand)) == NULL)
+       if (ci2->ncand == 1) {
+               if ((bn1 = canditer_slice(ci1, 0, ci1->ncand)) == NULL)
                        return GDK_FAIL;
                if (r2p) {
-                       if (ci1.ncand == 1) {
-                               bn2 = canditer_slice(&ci2, 0, ci2.ncand);
+                       if (ci1->ncand == 1) {
+                               bn2 = canditer_slice(ci2, 0, ci2->ncand);
                        } else {
-                               bn2 = BATconstant(0, TYPE_oid, &ci2.seq, 
ci1.ncand, TRANSIENT);
+                               bn2 = BATconstant(0, TYPE_oid, &ci2->seq, 
ci1->ncand, TRANSIENT);
                        }
                        if (bn2 == NULL) {
                                BBPreclaim(bn1);
@@ -70,12 +62,12 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
                *r1p = bn1;
                return GDK_SUCCEED;
        }
-       if (ci1.ncand == 1) {
-               bn1 = BATconstant(0, TYPE_oid, &ci1.seq, ci2.ncand, TRANSIENT);
+       if (ci1->ncand == 1) {
+               bn1 = BATconstant(0, TYPE_oid, &ci1->seq, ci2->ncand, 
TRANSIENT);
                if (bn1 == NULL)
                        return GDK_FAIL;
                if (r2p) {
-                       bn2 = canditer_slice(&ci2, 0, ci2.ncand);
+                       bn2 = canditer_slice(ci2, 0, ci2->ncand);
                        if (bn2 == NULL) {
                                BBPreclaim(bn1);
                                return GDK_FAIL;
@@ -86,48 +78,48 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
                return GDK_SUCCEED;
        }
 
-       bn1 = COLnew(0, TYPE_oid, ci1.ncand * ci2.ncand, TRANSIENT);
+       bn1 = COLnew(0, TYPE_oid, ci1->ncand * ci2->ncand, TRANSIENT);
        if (r2p)
-               bn2 = COLnew(0, TYPE_oid, ci1.ncand * ci2.ncand, TRANSIENT);
+               bn2 = COLnew(0, TYPE_oid, ci1->ncand * ci2->ncand, TRANSIENT);
        if (!bn1 || (r2p && !bn2)) {
                BBPreclaim(bn1);
                if (bn2)
                        BBPreclaim(bn2);
                return GDK_FAIL;
        }
-       if (ci1.ncand > 0 && ci2.ncand > 0) {
-               BATsetcount(bn1, ci1.ncand * ci2.ncand);
+       if (ci1->ncand > 0 && ci2->ncand > 0) {
+               BATsetcount(bn1, ci1->ncand * ci2->ncand);
                bn1->tsorted = true;
-               bn1->trevsorted = ci1.ncand <= 1;
-               bn1->tkey = ci2.ncand <= 1;
+               bn1->trevsorted = ci1->ncand <= 1;
+               bn1->tkey = ci2->ncand <= 1;
                bn1->tnil = false;
                bn1->tnonil = true;
                p = (oid *) Tloc(bn1, 0);
-               for (i = 0; i < ci1.ncand; i++) {
+               for (i = 0; i < ci1->ncand; i++) {
                        GDK_CHECK_TIMEOUT_BODY(timeoffset, 
GOTO_LABEL_TIMEOUT_HANDLER(bailout));
-                       oid x = canditer_next(&ci1);
-                       for (j = 0; j < ci2.ncand; j++) {
+                       oid x = canditer_next(ci1);
+                       for (j = 0; j < ci2->ncand; j++) {
                                *p++ = x;
                        }
                }
-               BATtseqbase(bn1, ci2.ncand == 1 ? *(oid *) Tloc(bn1, 0) : 
oid_nil);
+               BATtseqbase(bn1, ci2->ncand == 1 ? *(oid *) Tloc(bn1, 0) : 
oid_nil);
 
                if (bn2) {
-                       BATsetcount(bn2, ci1.ncand * ci2.ncand);
-                       bn2->tsorted = ci1.ncand <= 1 || ci2.ncand <= 1;
-                       bn2->trevsorted = ci2.ncand <= 1;
-                       bn2->tkey = ci1.ncand <= 1;
+                       BATsetcount(bn2, ci1->ncand * ci2->ncand);
+                       bn2->tsorted = ci1->ncand <= 1 || ci2->ncand <= 1;
+                       bn2->trevsorted = ci2->ncand <= 1;
+                       bn2->tkey = ci1->ncand <= 1;
                        bn2->tnil = false;
                        bn2->tnonil = true;
                        p = (oid *) Tloc(bn2, 0);
-                       for (i = 0; i < ci1.ncand; i++) {
+                       for (i = 0; i < ci1->ncand; i++) {
                                GDK_CHECK_TIMEOUT_BODY(timeoffset, 
GOTO_LABEL_TIMEOUT_HANDLER(bailout));
-                               for (j = 0; j < ci2.ncand; j++) {
-                                       *p++ = canditer_next(&ci2);
+                               for (j = 0; j < ci2->ncand; j++) {
+                                       *p++ = canditer_next(ci2);
                                }
-                               canditer_reset(&ci2);
+                               canditer_reset(ci2);
                        }
-                       BATtseqbase(bn2, ci1.ncand == 1 ? *(oid *) Tloc(bn2, 0) 
: oid_nil);
+                       BATtseqbase(bn2, ci1->ncand == 1 ? *(oid *) Tloc(bn2, 
0) : oid_nil);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to