Changeset: 3dd6bf4ad4f4 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3dd6bf4ad4f4 Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_firstn.c gdk/gdk_join.c monetdb5/modules/kernel/algebra.c sql/backends/monet5/sql.c Branch: default Log Message:
New function BATintersect. This is equivalent to BATsemijoin with a single output and similar to BATdiff. diffs (174 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -139,6 +139,7 @@ BUN BATgrows(BAT *b); gdk_return BAThash(BAT *b, BUN masksize); void BAThseqbase(BAT *b, oid o); gdk_return BATimprints(BAT *b); +BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate); BAT *BATintersectcand(BAT *a, BAT *b); gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATkey(BAT *b, bool onoff); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2746,6 +2746,7 @@ gdk_export gdk_return BATthetajoin(BAT * __attribute__ ((__warn_unused_result__)); gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate) __attribute__ ((__warn_unused_result__)); +gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate); gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate); gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate) __attribute__ ((__warn_unused_result__)); diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c --- a/gdk/gdk_firstn.c +++ b/gdk/gdk_firstn.c @@ -653,9 +653,9 @@ BATfirstn_grouped(BAT **topn, BAT **gids bn1 = bn; BBPunfix(s->batCacheid); - rc = BATsemijoin(&bn, NULL, b, b, su, bn1, 1, BUN_NONE); + bn = BATintersect(b, b, su, bn1, 1, BUN_NONE); BBPunfix(bn1->batCacheid); - if (rc != GDK_SUCCEED) + if (bn == NULL) return GDK_FAIL; } else { BATiter bi = bat_iterator(b); @@ -725,9 +725,9 @@ BATfirstn_grouped_with_groups(BAT **topn BBPunfix(bn2->batCacheid); return GDK_FAIL; } - rc = BATsemijoin(&bn4, NULL, s, bn2, NULL, NULL, 0, BUN_NONE); + bn4 = BATintersect(s, bn2, NULL, NULL, 0, BUN_NONE); BBPunfix(bn2->batCacheid); - if (rc != GDK_SUCCEED) { + if (bn4 == NULL) { BBPunfix(bn1->batCacheid); return GDK_FAIL; } @@ -801,7 +801,7 @@ BATfirstn_grouped_with_groups(BAT **topn if (gids) { BAT *bn1, *bn2, *bn3, *bn4, *bn5, *bn6, *bn7, *bn8; - if (BATsemijoin(&bn1, NULL, s, bn, NULL, NULL, 0, BUN_NONE) != GDK_SUCCEED) { + if ((bn1 = BATintersect(s, bn, NULL, NULL, 0, BUN_NONE)) == NULL) { BBPunfix(bn->batCacheid); return GDK_FAIL; } diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -60,12 +60,14 @@ * right values; two extra Boolean parameters, li and hi, * indicate whether equal values match * - * In addition to these functions, there is one more functions that is - * closely related: + * In addition to these functions, there are two more functions that + * are closely related: + * BATintersect + * intersection: return a candidate list with OIDs of tuples in + * the left input whose value occurs in the right input * BATdiff - * difference: return a candidate list compatible list of OIDs of - * tuples in the left input whose value does not occur in the - * right input + * difference: return a candidate list with OIDs of tuples in the + * left input whose value does not occur in the right input */ /* Perform a bunch of sanity checks on the inputs to a join. */ @@ -3864,9 +3866,10 @@ BATouterjoin(BAT **r1p, BAT **r2p, BAT * GDKdebug & ALGOMASK ? GDKusec() : 0); } -/* Perform a semi-join over l and r. Returns two new, aligned, bats +/* Perform a semi-join over l and r. Returns one or two new, bats * with the oids of matching tuples. The result is in the same order - * as l (i.e. r1 is sorted). */ + * as l (i.e. r1 is sorted). If a single bat is returned, it is a + * candidate list. */ gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate) { @@ -3875,10 +3878,23 @@ BATsemijoin(BAT **r1p, BAT **r2p, BAT *l GDKdebug & ALGOMASK ? GDKusec() : 0); } +/* Return a candidate list with the list of rows in l whose value also + * occurs in r. This is just the left output of a semi-join. */ +BAT * +BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate) +{ + BAT *bn; + + if (leftjoin(&bn, NULL, l, r, sl, sr, nil_matches, + false, true, false, estimate, "BATintersect", + GDKdebug & ALGOMASK ? GDKusec() : 0) == GDK_SUCCEED) + return virtualize(bn); + return NULL; +} + /* Return the difference of l and r. The result is a BAT with the * oids of those values in l that do not occur in r. This is what you - * might call an anti-semi-join. The result can be used as a - * candidate list. */ + * might call an anti-semi-join. The result is a candidate list. */ BAT * BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate) { @@ -3887,7 +3903,7 @@ BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr if (leftjoin(&bn, NULL, l, r, sl, sr, nil_matches, false, false, true, estimate, "BATdiff", GDKdebug & ALGOMASK ? GDKusec() : 0) == GDK_SUCCEED) - return bn; + return virtualize(bn); return NULL; } diff --git a/monetdb5/modules/kernel/algebra.c b/monetdb5/modules/kernel/algebra.c --- a/monetdb5/modules/kernel/algebra.c +++ b/monetdb5/modules/kernel/algebra.c @@ -508,7 +508,7 @@ ALGintersect(bat *r1, const bat *lid, co { return do_join(r1, NULL, lid, rid, NULL, slid, srid, 0, NULL, NULL, 0, 0, nil_matches, estimate, - BATsemijoin, NULL, NULL, NULL, NULL, "algebra.intersect"); + NULL, NULL, NULL, NULL, BATintersect, "algebra.intersect"); } /* algebra.firstn(b:bat[:any], diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -1537,9 +1537,9 @@ DELTAsub(bat *result, const bat *col, co BBPunfix(u->batCacheid); throw(MAL, "sql.delta", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); } - ret = BATsemijoin(&cminu, NULL, u, c_ids, NULL, NULL, 0, BUN_NONE); + cminu = BATintersect(u, c_ids, NULL, NULL, 0, BUN_NONE); BBPunfix(c_ids->batCacheid); - if (ret != GDK_SUCCEED) { + if (cminu == NULL) { BBPunfix(c->batCacheid); BBPunfix(u->batCacheid); throw(MAL, "sql.delta", SQLSTATE(HY001) MAL_MALLOC_FAIL); @@ -1704,7 +1704,7 @@ DELTAproject(bat *result, const bat *sub /* create subsets of u_id and u_val where the tail * values of u_id are also in s, and where those tail * values occur as head value in res */ - if (BATsemijoin(&o, NULL, u_id, s, NULL, NULL, 0, BUN_NONE) != GDK_SUCCEED) { + if ((o = BATintersect(u_id, s, NULL, NULL, 0, BUN_NONE)) == NULL) { BBPunfix(s->batCacheid); BBPunfix(res->batCacheid); BBPunfix(u_id->batCacheid); @@ -1720,7 +1720,7 @@ DELTAproject(bat *result, const bat *sub if (nu_id == NULL || nu_val == NULL || tres == NULL || - BATsemijoin(&o, NULL, nu_id, tres, NULL, NULL, 0, BUN_NONE) != GDK_SUCCEED) { + (o = BATintersect(nu_id, tres, NULL, NULL, 0, BUN_NONE)) == NULL) { BBPunfix(s->batCacheid); BBPunfix(res->batCacheid); BBPreclaim(nu_id); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list