Changeset: 3dd6bf4ad4f4 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3dd6bf4ad4f4
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_firstn.c
        gdk/gdk_join.c
        monetdb5/modules/kernel/algebra.c
        sql/backends/monet5/sql.c
Branch: default
Log Message:

New function BATintersect.
This is equivalent to BATsemijoin with a single output and similar to BATdiff.


diffs (174 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -139,6 +139,7 @@ BUN BATgrows(BAT *b);
 gdk_return BAThash(BAT *b, BUN masksize);
 void BAThseqbase(BAT *b, oid o);
 gdk_return BATimprints(BAT *b);
+BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN 
estimate);
 BAT *BATintersectcand(BAT *a, BAT *b);
 gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
 gdk_return BATkey(BAT *b, bool onoff);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2746,6 +2746,7 @@ gdk_export gdk_return BATthetajoin(BAT *
        __attribute__ ((__warn_unused_result__));
 gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate)
        __attribute__ ((__warn_unused_result__));
+gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate);
 gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN 
estimate);
 gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, int nil_matches, BUN estimate)
        __attribute__ ((__warn_unused_result__));
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -653,9 +653,9 @@ BATfirstn_grouped(BAT **topn, BAT **gids
 
                        bn1 = bn;
                        BBPunfix(s->batCacheid);
-                       rc = BATsemijoin(&bn, NULL, b, b, su, bn1, 1, BUN_NONE);
+                       bn = BATintersect(b, b, su, bn1, 1, BUN_NONE);
                        BBPunfix(bn1->batCacheid);
-                       if (rc != GDK_SUCCEED)
+                       if (bn == NULL)
                                return GDK_FAIL;
                } else {
                        BATiter bi = bat_iterator(b);
@@ -725,9 +725,9 @@ BATfirstn_grouped_with_groups(BAT **topn
                        BBPunfix(bn2->batCacheid);
                        return GDK_FAIL;
                }
-               rc = BATsemijoin(&bn4, NULL, s, bn2, NULL, NULL, 0, BUN_NONE);
+               bn4 = BATintersect(s, bn2, NULL, NULL, 0, BUN_NONE);
                BBPunfix(bn2->batCacheid);
-               if (rc != GDK_SUCCEED) {
+               if (bn4 == NULL) {
                        BBPunfix(bn1->batCacheid);
                        return GDK_FAIL;
                }
@@ -801,7 +801,7 @@ BATfirstn_grouped_with_groups(BAT **topn
        if (gids) {
                BAT *bn1, *bn2, *bn3, *bn4, *bn5, *bn6, *bn7, *bn8;
 
-               if (BATsemijoin(&bn1, NULL, s, bn, NULL, NULL, 0, BUN_NONE) != 
GDK_SUCCEED) {
+               if ((bn1 = BATintersect(s, bn, NULL, NULL, 0, BUN_NONE)) == 
NULL) {
                        BBPunfix(bn->batCacheid);
                        return  GDK_FAIL;
                }
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -60,12 +60,14 @@
  *     right values; two extra Boolean parameters, li and hi,
  *     indicate whether equal values match
  *
- * In addition to these functions, there is one more functions that is
- * closely related:
+ * In addition to these functions, there are two more functions that
+ * are closely related:
+ * BATintersect
+ *     intersection: return a candidate list with OIDs of tuples in
+ *     the left input whose value occurs in the right input
  * BATdiff
- *     difference: return a candidate list compatible list of OIDs of
- *     tuples in the left input whose value does not occur in the
- *     right input
+ *     difference: return a candidate list with OIDs of tuples in the
+ *     left input whose value does not occur in the right input
  */
 
 /* Perform a bunch of sanity checks on the inputs to a join. */
@@ -3864,9 +3866,10 @@ BATouterjoin(BAT **r1p, BAT **r2p, BAT *
                        GDKdebug & ALGOMASK ? GDKusec() : 0);
 }
 
-/* Perform a semi-join over l and r.  Returns two new, aligned, bats
+/* Perform a semi-join over l and r.  Returns one or two new, bats
  * with the oids of matching tuples.  The result is in the same order
- * as l (i.e. r1 is sorted). */
+ * as l (i.e. r1 is sorted).  If a single bat is returned, it is a
+ * candidate list. */
 gdk_return
 BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
@@ -3875,10 +3878,23 @@ BATsemijoin(BAT **r1p, BAT **r2p, BAT *l
                        GDKdebug & ALGOMASK ? GDKusec() : 0);
 }
 
+/* Return a candidate list with the list of rows in l whose value also
+ * occurs in r.  This is just the left output of a semi-join. */
+BAT *
+BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate)
+{
+       BAT *bn;
+
+       if (leftjoin(&bn, NULL, l, r, sl, sr, nil_matches,
+                    false, true, false, estimate, "BATintersect",
+                    GDKdebug & ALGOMASK ? GDKusec() : 0) == GDK_SUCCEED)
+               return virtualize(bn);
+       return NULL;
+}
+
 /* Return the difference of l and r.  The result is a BAT with the
  * oids of those values in l that do not occur in r.  This is what you
- * might call an anti-semi-join.  The result can be used as a
- * candidate list. */
+ * might call an anti-semi-join.  The result is a candidate list. */
 BAT *
 BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate)
 {
@@ -3887,7 +3903,7 @@ BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr
        if (leftjoin(&bn, NULL, l, r, sl, sr, nil_matches,
                     false, false, true, estimate, "BATdiff",
                     GDKdebug & ALGOMASK ? GDKusec() : 0) == GDK_SUCCEED)
-               return bn;
+               return virtualize(bn);
        return NULL;
 }
 
diff --git a/monetdb5/modules/kernel/algebra.c 
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -508,7 +508,7 @@ ALGintersect(bat *r1, const bat *lid, co
 {
        return do_join(r1, NULL, lid, rid, NULL, slid, srid, 0, NULL, NULL, 0, 
0,
                                   nil_matches, estimate,
-                                  BATsemijoin, NULL, NULL, NULL, NULL, 
"algebra.intersect");
+                                  NULL, NULL, NULL, NULL, BATintersect, 
"algebra.intersect");
 }
 
 /* algebra.firstn(b:bat[:any],
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -1537,9 +1537,9 @@ DELTAsub(bat *result, const bat *col, co
                                BBPunfix(u->batCacheid);
                                throw(MAL, "sql.delta", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
                        }
-                       ret = BATsemijoin(&cminu, NULL, u, c_ids, NULL, NULL, 
0, BUN_NONE);
+                       cminu = BATintersect(u, c_ids, NULL, NULL, 0, BUN_NONE);
                        BBPunfix(c_ids->batCacheid);
-                       if (ret != GDK_SUCCEED) {
+                       if (cminu == NULL) {
                                BBPunfix(c->batCacheid);
                                BBPunfix(u->batCacheid);
                                throw(MAL, "sql.delta", SQLSTATE(HY001) 
MAL_MALLOC_FAIL);
@@ -1704,7 +1704,7 @@ DELTAproject(bat *result, const bat *sub
                /* create subsets of u_id and u_val where the tail
                 * values of u_id are also in s, and where those tail
                 * values occur as head value in res */
-               if (BATsemijoin(&o, NULL, u_id, s, NULL, NULL, 0, BUN_NONE) != 
GDK_SUCCEED) {
+               if ((o = BATintersect(u_id, s, NULL, NULL, 0, BUN_NONE)) == 
NULL) {
                        BBPunfix(s->batCacheid);
                        BBPunfix(res->batCacheid);
                        BBPunfix(u_id->batCacheid);
@@ -1720,7 +1720,7 @@ DELTAproject(bat *result, const bat *sub
                if (nu_id == NULL ||
                    nu_val == NULL ||
                    tres == NULL ||
-                   BATsemijoin(&o, NULL, nu_id, tres, NULL, NULL, 0, BUN_NONE) 
!= GDK_SUCCEED) {
+                   (o = BATintersect(nu_id, tres, NULL, NULL, 0, BUN_NONE)) == 
NULL) {
                        BBPunfix(s->batCacheid);
                        BBPunfix(res->batCacheid);
                        BBPreclaim(nu_id);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to