Changeset: a9bc9d620277 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a9bc9d620277 Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_batop.c monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/bat5.h monetdb5/modules/kernel/bat5.mal monetdb5/modules/mal/Tests/inspect05.stable.out Branch: default Log Message:
Implemented bat.mergecand() and bat.intersectcand(). These functions merge/intersect two candidates lists. diffs (truncated from 338 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -130,6 +130,7 @@ BAT *BAThash(BAT *b, BUN masksize); BAT *BAThashjoin(BAT *l, BAT *r, BUN estimate); BAT *BAThistogram(BAT *b); BAT *BATins(BAT *b, BAT *c, bit force); +BAT *BATintersectcand(BAT *a, BAT *b); BAT *BATjoin(BAT *l, BAT *r, BUN estimate); BAT *BATkdiff(BAT *b, BAT *c); BAT *BATkey(BAT *b, int onoff); @@ -144,6 +145,7 @@ BAT *BATmark_grp(BAT *b, BAT *g, oid *ba BAT *BATmaterialize(BAT *b); BAT *BATmaterializeh(BAT *b); size_t BATmemsize(BAT *b, int dirty); +BAT *BATmergecand(BAT *a, BAT *b); BAT *BATmergejoin(BAT *l, BAT *r, BUN estimate); int BATmmap(BAT *b, int hb, int tb, int hh, int th, int force); BAT *BATmode(BAT *b, int onoff); @@ -983,6 +985,7 @@ str BKCinsert_bat(int *r, int *bid, int str BKCinsert_bat_force(int *r, int *bid, int *sid, bit *force); char *BKCinsert_bun(int *r, int *bid, ptr h, ptr t); char *BKCinsert_bun_force(int *r, int *bid, ptr h, ptr t, bit *force); +str BKCintersectcand(bat *ret, bat *aid, bat *bid); str BKCisCached(bit *res, int *bid); str BKCisPersistent(bit *res, int *bid); str BKCisSorted(bit *res, int *bid); @@ -993,6 +996,7 @@ str BKCisaSet(bit *res, int *bid); str BKCload(int *res, str *input); str BKCmadvise(bit *res, int *bid, int *hbns, int *tbns, int *hhp, int *thp); str BKCmadvise2(bit *res, int *bid, int *mode); +str BKCmergecand(bat *ret, bat *aid, bat *bid); str BKCmirror(int *ret, int *bid); str BKCmmap(bit *res, int *bid, int *hbns, int *tbns, int *hhp, int *thp); str BKCmmap2(bit *res, int *bid, int *bns); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -3148,6 +3148,9 @@ gdk_export BAT *BATkunion(BAT *b, BAT *c gdk_export BAT *BATsdiff(BAT *b, BAT *c); gdk_export BAT *BATkdiff(BAT *b, BAT *c); +gdk_export BAT *BATmergecand(BAT *a, BAT *b); +gdk_export BAT *BATintersectcand(BAT *a, BAT *b); + #include "gdk_calc.h" /* diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -2192,3 +2192,192 @@ BATcount_no_nil(BAT *b) } return cnt; } + +/* merge two candidate lists and produce a new one + * + * candidate lists are VOID-headed BATs with an OID tail which is + * sorted and unique. + */ +BAT * +BATmergecand(BAT *a, BAT *b) +{ + BAT *bn; + const oid *ap, *bp, *ape, *bpe; + oid *p, i; + + BATcheck(a, "BATmergecand"); + BATcheck(b, "BATmergecand"); + assert(a->htype == TYPE_void); + assert(b->htype == TYPE_void); + assert(ATOMtype(a->htype) == TYPE_oid); + assert(ATOMtype(b->htype) == TYPE_oid); + assert(a->tsorted); + assert(b->tsorted); + assert(a->tkey); + assert(b->tkey); + assert(a->T->nonil); + assert(b->T->nonil); + + /* XXX we could return a if b is empty (and v.v.) */ + + bn = BATnew(TYPE_void, TYPE_oid, BATcount(a) + BATcount(b)); + if (bn == NULL) + return NULL; + p = (oid *) Tloc(bn, BUNfirst(bn)); + if (a->ttype == TYPE_void && b->ttype == TYPE_void) { + /* both lists are VOID */ + if (a->tseqbase > b->tseqbase) { + BAT *t = a; + a = b; + b = t; + } + /* a->tseqbase <= b->tseqbase */ + for (i = a->tseqbase; i < a->tseqbase + BATcount(a); i++) + *p++ = i; + for (i = MAX(b->tseqbase, i); + i < b->tseqbase + BATcount(b); + i++) + *p++ = i; + } else if (a->ttype == TYPE_void || b->ttype == TYPE_void) { + if (b->ttype == TYPE_void) { + BAT *t = a; + a = b; + b = t; + } + /* a->ttype == TYPE_void, b->ttype == TYPE_oid */ + bp = (const oid *) Tloc(b, BUNfirst(b)); + bpe = bp + BATcount(b); + while (bp < bpe && *bp < a->tseqbase) + *p++ = *bp++; + for (i = a->tseqbase; i < a->tseqbase + BATcount(a); i++) + *p++ = i; + while (bp < bpe && *bp < i) + bp++; + while (bp < bpe) + *p++ = *bp++; + } else { + /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */ + ap = (const oid *) Tloc(a, BUNfirst(a)); + ape = ap + BATcount(a); + bp = (const oid *) Tloc(b, BUNfirst(b)); + bpe = bp + BATcount(b); + while (ap < ape && bp < bpe) { + if (*ap < *bp) + *p++ = *ap++; + else if (*ap > *bp) + *p++ = *bp++; + else { + *p++ = *ap++; + bp++; + } + } + while (ap < ape) + *p++ = *ap++; + while (bp < bpe) + *p++ = *bp++; + } + + /* properties */ + BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, BUNfirst(bn)))); + BATseqbase(bn, 0); + bn->tsorted = 1; + bn->tkey = 1; + bn->T->nil = 0; + bn->T->nonil = 1; + return bn; +} + +/* intersect two candidate lists and produce a new one + * + * candidate lists are VOID-headed BATs with an OID tail which is + * sorted and unique. + */ +BAT * +BATintersectcand(BAT *a, BAT *b) +{ + BAT *bn; + const oid *ap, *bp, *ape, *bpe; + oid *p, i; + + BATcheck(a, "BATintersectcand"); + BATcheck(b, "BATintersectcand"); + assert(a->htype == TYPE_void); + assert(b->htype == TYPE_void); + assert(ATOMtype(a->htype) == TYPE_oid); + assert(ATOMtype(b->htype) == TYPE_oid); + assert(a->tsorted); + assert(b->tsorted); + assert(a->tkey); + assert(b->tkey); + assert(a->T->nonil); + assert(b->T->nonil); + + if (BATcount(a) == 0 || BATcount(b) == 0) { + bn = BATnew(TYPE_void, TYPE_void, 0); + BATseqbase(bn, 0); + BATseqbase(BATmirror(bn), 0); + return bn; + } + + if (a->ttype == TYPE_void && b->ttype == TYPE_void) { + /* both lists are VOID */ + bn = BATnew(TYPE_void, TYPE_void, 0); + if (bn == NULL) + return NULL; + i = MAX(a->tseqbase, b->tseqbase); + if (a->tseqbase + BATcount(a) <= b->tseqbase || + b->tseqbase + BATcount(b) <= a->tseqbase) { + /* no overlap */ + BATsetcount(bn, 0); + } else { + BATsetcount(bn, MIN(a->tseqbase + BATcount(a) - i, + b->tseqbase + BATcount(b) - i)); + } + BATseqbase(BATmirror(bn), i); + return bn; + } + + bn = BATnew(TYPE_void, TYPE_oid, MIN(BATcount(a), BATcount(b))); + if (bn == NULL) + return NULL; + p = (oid *) Tloc(bn, BUNfirst(bn)); + if (a->ttype == TYPE_void || b->ttype == TYPE_void) { + if (b->ttype == TYPE_void) { + BAT *t = a; + a = b; + b = t; + } + /* a->ttype == TYPE_void, b->ttype == TYPE_oid */ + bp = (const oid *) Tloc(b, BUNfirst(b)); + bpe = bp + BATcount(b); + while (bp < bpe && *bp < a->tseqbase) + bp++; + while (bp < bpe && *bp < a->tseqbase + BATcount(a)) + *p++ = *bp++; + } else { + /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */ + ap = (const oid *) Tloc(a, BUNfirst(a)); + ape = ap + BATcount(a); + bp = (const oid *) Tloc(b, BUNfirst(b)); + bpe = bp + BATcount(b); + while (ap < ape && bp < bpe) { + if (*ap < *bp) + ap++; + else if (*ap > *bp) + bp++; + else { + *p++ = *ap++; + bp++; + } + } + } + + /* properties */ + BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, BUNfirst(bn)))); + BATseqbase(bn, 0); + bn->tsorted = 1; + bn->tkey = 1; + bn->T->nil = 0; + bn->T->nonil = 1; + return bn; +} diff --git a/monetdb5/modules/kernel/bat5.c b/monetdb5/modules/kernel/bat5.c --- a/monetdb5/modules/kernel/bat5.c +++ b/monetdb5/modules/kernel/bat5.c @@ -2418,3 +2418,46 @@ BKCreuseBATmap(int *ret, int *bid, int * return MAL_SUCCEED; } +str +BKCmergecand(bat *ret, bat *aid, bat *bid) +{ + BAT *a, *b, *bn; + + if ((a = BATdescriptor(*aid)) == NULL) { + throw(MAL, "bat.mergecand", RUNTIME_OBJECT_MISSING); + } + if ((b = BATdescriptor(*bid)) == NULL) { + BBPreleaseref(a->batCacheid); + throw(MAL, "bat.mergecand", RUNTIME_OBJECT_MISSING); + } + bn = BATmergecand(a, b); + BBPreleaseref(a->batCacheid); + BBPreleaseref(b->batCacheid); + if (bn == NULL) + throw(MAL, "bat.mergecand", OPERATION_FAILED); + *ret = bn->batCacheid; + BBPkeepref(*ret); + return MAL_SUCCEED; +} + +str +BKCintersectcand(bat *ret, bat *aid, bat *bid) +{ + BAT *a, *b, *bn; + + if ((a = BATdescriptor(*aid)) == NULL) { + throw(MAL, "bat.intersectcand", RUNTIME_OBJECT_MISSING); + } + if ((b = BATdescriptor(*bid)) == NULL) { + BBPreleaseref(a->batCacheid); + throw(MAL, "bat.intersectcand", RUNTIME_OBJECT_MISSING); + } + bn = BATintersectcand(a, b); + BBPreleaseref(a->batCacheid); + BBPreleaseref(b->batCacheid); + if (bn == NULL) + throw(MAL, "bat.intersectcand", OPERATION_FAILED); + *ret = bn->batCacheid; + BBPkeepref(*ret); + return MAL_SUCCEED; +} diff --git a/monetdb5/modules/kernel/bat5.h b/monetdb5/modules/kernel/bat5.h --- a/monetdb5/modules/kernel/bat5.h +++ b/monetdb5/modules/kernel/bat5.h @@ -131,4 +131,7 @@ bat5_export str BKCsetReadMode(int *res, bat5_export str BKChasReadMode(bit *res, int *bid); bat5_export str BKCsetAppendMode(int *res, int *bid) ; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list