Changeset: aa20ce0fc12a for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/aa20ce0fc12a Modified Files: monetdb5/modules/atoms/str.c Branch: Jun2023 Log Message:
Strimps working in normal join. Anti join still defaults to no Strimps. diffs (218 lines): diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -5230,32 +5230,35 @@ STRcontainsselect(bat *ret, const bat *b B->tseqbase = 0; \ } while (0) -#define batstr_join_loop(STRCMP, STR_LEN, WITH_STRIMPS) \ +#define str_join_loop(STRCMP, STR_LEN) \ do { \ for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \ - GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit));\ + GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \ ro = canditer_next(&rci); \ vr = VALUE(r, ro - rbase); \ - rlen = str_strlen(vr); \ + rlen = STR_LEN; \ nl = 0; \ - canditer_reset(&lci); \ - if (with_strimps) { \ - if(!(filtered_sl = STRMPfilter(l, sl, vr, anti))) \ - sl = filtered_sl; \ - } \ + if (with_strimps) \ + filtered_sl = STRMPfilter(l, sl, vr, anti); \ + if (filtered_sl) \ + canditer_init(&lci, l, filtered_sl); \ + else \ + canditer_init(&lci, l, sl); \ for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \ lo = canditer_next(&lci); \ vl = VALUE(l, lo - lbase); \ if (strNil(vl)) { \ continue; \ - } else if (!(STRCMP)) \ + } else if (!(STRCMP)) { \ continue; \ + } \ if (BATcount(r1) == BATcapacity(r1)) { \ newcap = BATgrows(r1); \ BATsetcount(r1, BATcount(r1)); \ if (r2) \ BATsetcount(r2, BATcount(r2)); \ - if (BATextend(r1, newcap) != GDK_SUCCEED || (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \ + if (BATextend(r1, newcap) != GDK_SUCCEED || \ + (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \ msg = createException(MAL, "str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \ goto exit; \ } \ @@ -5282,9 +5285,75 @@ STRcontainsselect(bat *ret, const bat *b APPEND(r2, ro); \ lastl = lo; \ nl++; \ - if (with_strimps) { \ - sl = original_sl; \ + } \ + if (with_strimps && filtered_sl) \ + BBPreclaim(filtered_sl); \ + if (r2) { \ + if (nl > 1) { \ + r2->tkey = false; \ + r2->tseqbase = oid_nil; \ + r1->trevsorted = false; \ + } else if (nl == 0) { \ + rskipped = BATcount(r2) > 0; \ + } else if (rskipped) { \ + r2->tseqbase = oid_nil; \ } \ + } else if (nl > 1) { \ + r1->trevsorted = false; \ + } \ + } \ + } while (0) + +#define str_antijoin_loop(STRCMP, STR_LEN) \ + do { \ + for (BUN ridx = 0; ridx < rci.ncand; ridx++) { \ + GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(exit)); \ + ro = canditer_next(&rci); \ + vr = VALUE(r, ro - rbase); \ + rlen = STR_LEN; \ + nl = 0; \ + canditer_init(&lci, l, sl); \ + for (BUN lidx = 0; lidx < lci.ncand; lidx++) { \ + lo = canditer_next(&lci); \ + vl = VALUE(l, lo - lbase); \ + if (strNil(vl)) { \ + continue; \ + } else if (!(STRCMP)) { \ + continue; \ + } \ + if (BATcount(r1) == BATcapacity(r1)) { \ + newcap = BATgrows(r1); \ + BATsetcount(r1, BATcount(r1)); \ + if (r2) \ + BATsetcount(r2, BATcount(r2)); \ + if (BATextend(r1, newcap) != GDK_SUCCEED || \ + (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \ + msg = createException(MAL, "str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \ + goto exit; \ + } \ + assert(!r2 || BATcapacity(r1) == BATcapacity(r2)); \ + } \ + if (BATcount(r1) > 0) { \ + if (lastl + 1 != lo) \ + r1->tseqbase = oid_nil; \ + if (nl == 0) { \ + if (r2) \ + r2->trevsorted = false; \ + if (lastl > lo) { \ + r1->tsorted = false; \ + r1->tkey = false; \ + } else if (lastl < lo) { \ + r1->trevsorted = false; \ + } else { \ + r1->tkey = false; \ + } \ + } \ + } \ + APPEND(r1, lo); \ + if (r2) \ + APPEND(r2, ro); \ + lastl = lo; \ + nl++; \ } \ if (r2) { \ if (nl > 1) { \ @@ -5299,18 +5368,6 @@ STRcontainsselect(bat *ret, const bat *b } else if (nl > 1) { \ r1->trevsorted = false; \ } \ - if (with_strimps && anti) { \ - BAT *rev; \ - if (original_sl) { \ - rev = BATdiffcand(original_sl, r1); \ - assert (BATintersectcand(original_sl, r1)->batCount == r1->batCount); \ - assert (rev->batCount == original_sl->batCount - r1->batCount); \ - } \ - else \ - rev = BATnegcands(l->batCount, r1); \ - BBPreclaim(r1); \ - r1 = rev; \ - } \ } \ } while (0) @@ -5318,12 +5375,13 @@ static str strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, bit anti, bit (*str_cmp)(const char*, const char*, int), const str fname) { + (void)anti; struct canditer lci, rci; const char *lvals, *rvals, *lvars, *rvars, *vl, *vr; int rskipped = 0, rlen = 0; oid lbase, rbase, lo, ro, lastl = 0; BUN nl, newcap; - BAT *original_sl = sl, *filtered_sl = NULL; + BAT *filtered_sl = NULL; bool with_strimps = false; char *msg = MAL_SUCCEED; @@ -5335,8 +5393,10 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r (qry_ctx->starttime + qry_ctx->querytimeout) : 0; if (BAThasstrimps(l)) { - if (STRMPcreate(l, NULL) == GDK_SUCCEED) + if (STRMPcreate(l, NULL) == GDK_SUCCEED){ + /* original_sl = sl; */ with_strimps = true; + } /* else throw the GDK error and default to nested loop without filters */ } @@ -5361,7 +5421,7 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); assert(ATOMtype(l->ttype) == TYPE_str); - canditer_init(&lci, l, sl); + /* canditer_init(&lci, l, sl); */ canditer_init(&rci, r, sr); BATiter li = bat_iterator(l); @@ -5374,19 +5434,11 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r lvars = li.vh->base; rvars = ri.vh->base; - set_empty_bat_props(r1); - if (r2) - set_empty_bat_props(r2); - - /* if (anti) */ - /* batstr_join_loop(str_cmp(vl, vr, rlen) == 0, str_strlen(vr), with_strimps); */ - /* else */ - /* batstr_join_loop(str_cmp(vl, vr, rlen) != 0, str_strlen(vr), with_strimps); */ - - batstr_join_loop(anti && !with_strimps ? - (str_cmp(vl, vr, rlen) == 0) : - (str_cmp(vl, vr, rlen) != 0), - str_strlen(vr), with_strimps); + if (anti) + str_antijoin_loop(str_cmp(vl, vr, rlen) == 0, str_strlen(vr)); + else { + str_join_loop(str_cmp(vl, vr, rlen) != 0, str_strlen(vr)); + } assert(!r2 || BATcount(r1) == BATcount(r2)); BATsetcount(r1, BATcount(r1)); @@ -5423,10 +5475,6 @@ strjoin(BAT *r1, BAT *r2, BAT *l, BAT *r r1->tsorted ? "-sorted" : "", r1->trevsorted ? "-revsorted" : ""); exit: - if (with_strimps) { - BBPreclaim(filtered_sl); - BBPreclaim(original_sl); - } bat_iterator_end(&li); bat_iterator_end(&ri); return msg; _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org