Changeset: 2d5ddc62f9e9 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/2d5ddc62f9e9 Modified Files: clients/Tests/exports.stable.out Branch: default Log Message:
Merge groupjoin branch into default. diffs (truncated from 5997 to 300 lines): diff --git a/clients/Tests/MAL-signatures-hge.test b/clients/Tests/MAL-signatures-hge.test --- a/clients/Tests/MAL-signatures-hge.test +++ b/clients/Tests/MAL-signatures-hge.test @@ -3494,6 +3494,26 @@ command algebra.likeselect(X_0:bat[:str] PCRElikeselect; Select all head values of the first input BAT for which the@tail value is "like" the given (SQL-style) pattern and for@which the head value occurs in the tail of the second input@BAT.@Input is a dense-headed BAT, output is a dense-headed BAT with in@the tail the head value of the input BAT for which the@relationship holds. The output BAT is sorted on the tail value. algebra +markjoin +command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:bit]) +ALGmark2join; +Mark join with candidate lists +algebra +markjoin +command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:oid], X_7:bat[:bit]) +ALGmark3join; +Mark join with candidate lists +algebra +markjoin +command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit]) +ALGmarkjoin; +Left mark join with candidate lists, produces left output and mark flag; +algebra +markselect +command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) +ALGmarkselect; +Group on group-ids, return aggregated anyequal or allnotequal +algebra not_like command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit PCREnotlike; @@ -3504,6 +3524,11 @@ command algebra.orderidx(X_0:bat[:any_1] OIDXorderidx; Create an order index algebra +outercrossproduct +command algebra.outercrossproduct(X_0:bat[:any_1], X_1:bat[:any_2], X_2:bat[:oid], X_3:bat[:oid], X_4:bit) (X_5:bat[:oid], X_6:bat[:oid]) +ALGoutercrossproduct3; +Compute the outer cross product of both input bats +algebra outerjoin command algebra.outerjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:bit, X_6:lng):bat[:oid] ALGouterjoin1; @@ -3514,6 +3539,11 @@ command algebra.outerjoin(X_0:bat[:any_1 ALGouterjoin; Left outer join with candidate lists algebra +outerselect +command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) +ALGouterselect; +Per input lid return atleast one row, if none of the predicates (p) hold, return a nil, else 'all' true cases. +algebra project pattern algebra.project(X_0:bat[:any_1], X_1:any_3):bat[:any_3] ALGprojecttail; diff --git a/clients/Tests/MAL-signatures.test b/clients/Tests/MAL-signatures.test --- a/clients/Tests/MAL-signatures.test +++ b/clients/Tests/MAL-signatures.test @@ -2929,6 +2929,26 @@ command algebra.likeselect(X_0:bat[:str] PCRElikeselect; Select all head values of the first input BAT for which the@tail value is "like" the given (SQL-style) pattern and for@which the head value occurs in the tail of the second input@BAT.@Input is a dense-headed BAT, output is a dense-headed BAT with in@the tail the head value of the input BAT for which the@relationship holds. The output BAT is sorted on the tail value. algebra +markjoin +command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:bit]) +ALGmark2join; +Mark join with candidate lists +algebra +markjoin +command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:lng) (X_5:bat[:oid], X_6:bat[:oid], X_7:bat[:bit]) +ALGmark3join; +Mark join with candidate lists +algebra +markjoin +command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit]) +ALGmarkjoin; +Left mark join with candidate lists, produces left output and mark flag; +algebra +markselect +command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) +ALGmarkselect; +Group on group-ids, return aggregated anyequal or allnotequal +algebra not_like command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit PCREnotlike; @@ -2939,6 +2959,11 @@ command algebra.orderidx(X_0:bat[:any_1] OIDXorderidx; Create an order index algebra +outercrossproduct +command algebra.outercrossproduct(X_0:bat[:any_1], X_1:bat[:any_2], X_2:bat[:oid], X_3:bat[:oid], X_4:bit) (X_5:bat[:oid], X_6:bat[:oid]) +ALGoutercrossproduct3; +Compute the outer cross product of both input bats +algebra outerjoin command algebra.outerjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], X_3:bat[:oid], X_4:bit, X_5:bit, X_6:lng):bat[:oid] ALGouterjoin1; @@ -2949,6 +2974,11 @@ command algebra.outerjoin(X_0:bat[:any_1 ALGouterjoin; Left outer join with candidate lists algebra +outerselect +command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) +ALGouterselect; +Per input lid return atleast one row, if none of the predicates (p) hold, return a nil, else 'all' true cases. +algebra project pattern algebra.project(X_0:bat[:any_1], X_1:any_3):bat[:any_3] ALGprojecttail; diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -159,6 +159,7 @@ BAT *BATintersectcand(BAT *a, BAT *b); gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATkey(BAT *b, bool onoff); gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); +gdk_return BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN estimate) __attribute__((__warn_unused_result__)); BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected); void *BATmax(BAT *b, void *aggr); void *BATmax_skipnil(BAT *b, void *aggr, bit skipnil); @@ -172,6 +173,7 @@ BAT *BATnil_grp(BAT *l, BAT *g, BAT *e, bool BATordered(BAT *b); bool BATordered_rev(BAT *b); gdk_return BATorderidx(BAT *b, bool stable); +gdk_return BAToutercross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool max_one) __attribute__((__warn_unused_result__)); gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATprint(stream *s, BAT *b); gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]); @@ -1037,6 +1039,8 @@ const char *mal_version(void); stream *maleventstream; const char *manifoldRef; const char *mapiRef; +const char *markjoinRef; +const char *markselectRef; const char *maskRef; const char *matRef; const char *maxRef; @@ -1112,7 +1116,9 @@ str operatorName(int i); void opt_pipes_reset(void); str optimizeMALBlock(Client cntxt, MalBlkPtr mb); const char *optimizerRef; +const char *outercrossRef; const char *outerjoinRef; +const char *outerselectRef; const char *packIncrementRef; const char *packRef; const char *parametersRef; diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2302,9 +2302,13 @@ gdk_export BAT *BATthetaselect(BAT *b, B gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role); gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool max_one) __attribute__((__warn_unused_result__)); +gdk_export gdk_return BAToutercross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool max_one) + __attribute__((__warn_unused_result__)); gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); +gdk_export gdk_return BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN estimate) + __attribute__((__warn_unused_result__)); gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate) __attribute__((__warn_unused_result__)); gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate) diff --git a/gdk/gdk_cross.c b/gdk/gdk_cross.c --- a/gdk/gdk_cross.c +++ b/gdk/gdk_cross.c @@ -17,29 +17,21 @@ * The result is two bats r1 and r2 which contain the OID (head * values) of the input bats l and r. * If max_one is set, r can have at most one row. */ -gdk_return -BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool max_one) +static gdk_return +BATcrossci(BAT **r1p, BAT **r2p, struct canditer *ci1, struct canditer *ci2) { BAT *bn1, *bn2 = NULL; - struct canditer ci1, ci2; oid *restrict p; BUN i, j; - canditer_init(&ci1, l, sl); - canditer_init(&ci2, r, sr); lng timeoffset = 0; QryCtx *qry_ctx = MT_thread_get_qry_ctx(); if (qry_ctx != NULL) { timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0; } - if (max_one && ci1.ncand > 0 && ci2.ncand > 1) { - GDKerror("more than one match"); - return GDK_FAIL; - } - /* first some special cases */ - if (ci1.ncand == 0 || ci2.ncand == 0) { + if (ci1->ncand == 0 || ci2->ncand == 0) { if ((bn1 = BATdense(0, 0, 0)) == NULL) return GDK_FAIL; if (r2p) { @@ -52,14 +44,14 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l *r1p = bn1; return GDK_SUCCEED; } - if (ci2.ncand == 1) { - if ((bn1 = canditer_slice(&ci1, 0, ci1.ncand)) == NULL) + if (ci2->ncand == 1) { + if ((bn1 = canditer_slice(ci1, 0, ci1->ncand)) == NULL) return GDK_FAIL; if (r2p) { - if (ci1.ncand == 1) { - bn2 = canditer_slice(&ci2, 0, ci2.ncand); + if (ci1->ncand == 1) { + bn2 = canditer_slice(ci2, 0, ci2->ncand); } else { - bn2 = BATconstant(0, TYPE_oid, &ci2.seq, ci1.ncand, TRANSIENT); + bn2 = BATconstant(0, TYPE_oid, &ci2->seq, ci1->ncand, TRANSIENT); } if (bn2 == NULL) { BBPreclaim(bn1); @@ -70,12 +62,12 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l *r1p = bn1; return GDK_SUCCEED; } - if (ci1.ncand == 1) { - bn1 = BATconstant(0, TYPE_oid, &ci1.seq, ci2.ncand, TRANSIENT); + if (ci1->ncand == 1) { + bn1 = BATconstant(0, TYPE_oid, &ci1->seq, ci2->ncand, TRANSIENT); if (bn1 == NULL) return GDK_FAIL; if (r2p) { - bn2 = canditer_slice(&ci2, 0, ci2.ncand); + bn2 = canditer_slice(ci2, 0, ci2->ncand); if (bn2 == NULL) { BBPreclaim(bn1); return GDK_FAIL; @@ -86,48 +78,48 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l return GDK_SUCCEED; } - bn1 = COLnew(0, TYPE_oid, ci1.ncand * ci2.ncand, TRANSIENT); + bn1 = COLnew(0, TYPE_oid, ci1->ncand * ci2->ncand, TRANSIENT); if (r2p) - bn2 = COLnew(0, TYPE_oid, ci1.ncand * ci2.ncand, TRANSIENT); + bn2 = COLnew(0, TYPE_oid, ci1->ncand * ci2->ncand, TRANSIENT); if (!bn1 || (r2p && !bn2)) { BBPreclaim(bn1); if (bn2) BBPreclaim(bn2); return GDK_FAIL; } - if (ci1.ncand > 0 && ci2.ncand > 0) { - BATsetcount(bn1, ci1.ncand * ci2.ncand); + if (ci1->ncand > 0 && ci2->ncand > 0) { + BATsetcount(bn1, ci1->ncand * ci2->ncand); bn1->tsorted = true; - bn1->trevsorted = ci1.ncand <= 1; - bn1->tkey = ci2.ncand <= 1; + bn1->trevsorted = ci1->ncand <= 1; + bn1->tkey = ci2->ncand <= 1; bn1->tnil = false; bn1->tnonil = true; p = (oid *) Tloc(bn1, 0); - for (i = 0; i < ci1.ncand; i++) { + for (i = 0; i < ci1->ncand; i++) { GDK_CHECK_TIMEOUT_BODY(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); - oid x = canditer_next(&ci1); - for (j = 0; j < ci2.ncand; j++) { + oid x = canditer_next(ci1); + for (j = 0; j < ci2->ncand; j++) { *p++ = x; } } - BATtseqbase(bn1, ci2.ncand == 1 ? *(oid *) Tloc(bn1, 0) : oid_nil); + BATtseqbase(bn1, ci2->ncand == 1 ? *(oid *) Tloc(bn1, 0) : oid_nil); if (bn2) { - BATsetcount(bn2, ci1.ncand * ci2.ncand); - bn2->tsorted = ci1.ncand <= 1 || ci2.ncand <= 1; - bn2->trevsorted = ci2.ncand <= 1; - bn2->tkey = ci1.ncand <= 1; + BATsetcount(bn2, ci1->ncand * ci2->ncand); + bn2->tsorted = ci1->ncand <= 1 || ci2->ncand <= 1; + bn2->trevsorted = ci2->ncand <= 1; + bn2->tkey = ci1->ncand <= 1; bn2->tnil = false; bn2->tnonil = true; p = (oid *) Tloc(bn2, 0); - for (i = 0; i < ci1.ncand; i++) { + for (i = 0; i < ci1->ncand; i++) { GDK_CHECK_TIMEOUT_BODY(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); - for (j = 0; j < ci2.ncand; j++) { - *p++ = canditer_next(&ci2); + for (j = 0; j < ci2->ncand; j++) { + *p++ = canditer_next(ci2); } - canditer_reset(&ci2); + canditer_reset(ci2); } - BATtseqbase(bn2, ci1.ncand == 1 ? *(oid *) Tloc(bn2, 0) : oid_nil); + BATtseqbase(bn2, ci1->ncand == 1 ? *(oid *) Tloc(bn2, 0) : oid_nil); _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org