Changeset: 96eb46ae8f92 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/96eb46ae8f92 Branch: pushcands Log Message:
merged diffs (256 lines): diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c --- a/monetdb5/modules/mal/pcre.c +++ b/monetdb5/modules/mal/pcre.c @@ -1566,14 +1566,16 @@ pcre_clean( static str BATPCRElike_imp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, const str *esc, const bit *isens, const bit *not) { - str msg = MAL_SUCCEED, input = NULL, pat = NULL; - BAT *b = NULL, *pbn = NULL, *bn = NULL; + str msg = MAL_SUCCEED; + BAT *b = NULL, *pbn = NULL, *bn = NULL, *s1 = NULL, *s2 = NULL; char *ppat = NULL; bool use_re = false, use_strcmp = false, empty = false, isensitive = (bool) *isens, anti = (bool) *not, has_nil = false, input_is_a_bat = isaBatType(getArgType(mb, pci, 1)), pattern_is_a_bat = isaBatType(getArgType(mb, pci, 2)); - bat *r = getArgReference_bat(stk, pci, 0); + bat *r = getArgReference_bat(stk, pci, 0), *sid1 = pci->argc > 5 ? getArgReference_bat(stk, pci, 5) : NULL, *sid2 = pci->argc == 7 ? getArgReference_bat(stk, pci, 6) : NULL; BUN q = 0; bit *ret = NULL; + struct canditer ci1 = {0}, ci2 = {0}; + oid off1, off2; #ifdef HAVE_LIBPCRE pcre *re = NULL; pcre_extra *ex = NULL; @@ -1583,66 +1585,108 @@ BATPCRElike_imp(Client cntxt, MalBlkPtr #endif struct RE *re_simple = NULL; uint32_t *wpat = NULL; - BATiter bi = (BATiter) {0}, pi; (void) cntxt; if (input_is_a_bat) { bat *bid = getArgReference_bat(stk, pci, 1); if (!(b = BATdescriptor(*bid))) { - msg = createException(MAL, "batalgebra.batpcrelike3", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING); + msg = createException(MAL, "batalgebra.batpcrelike", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING); goto bailout; } } if (pattern_is_a_bat) { bat *pb = getArgReference_bat(stk, pci, 2); if (!(pbn = BATdescriptor(*pb))) { - msg = createException(MAL, "batalgebra.batpcrelike3", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING); + msg = createException(MAL, "batalgebra.batpcrelike", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING); goto bailout; } } assert((!b || ATOMstorage(b->ttype) == TYPE_str) && (!pbn || ATOMstorage(pbn->ttype) == TYPE_str)); + if (sid1 && !is_bat_nil(*sid1) && !(s1 = BATdescriptor(*sid1))) { + msg = createException(MAL, "batalgebra.batpcrelike", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING); + goto bailout; + } + if (sid2 && !is_bat_nil(*sid2) && !(s2 = BATdescriptor(*sid2))) { + msg = createException(MAL, "batalgebra.batpcrelike", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING); + goto bailout; + } - q = BATcount(b ? b : pbn); - if (!(bn = COLnew(b ? b->hseqbase : pbn->hseqbase, TYPE_bit, q, TRANSIENT))) { - msg = createException(MAL, "batalgebra.batpcrelike3", SQLSTATE(HY013) MAL_MALLOC_FAIL); + q = canditer_init(&ci1, b ? b : pbn, s1); + if (b && pbn && (canditer_init(&ci2, pbn, s2) != q || ci1.hseq != ci2.hseq)) { + msg = createException(MAL, "batalgebra.batpcrelike", ILLEGAL_ARGUMENT " Requires bats of identical size"); + goto bailout; + } + if (!(bn = COLnew(ci1.hseq, TYPE_bit, q, TRANSIENT))) { + msg = createException(MAL, "batalgebra.batpcrelike", SQLSTATE(HY013) MAL_MALLOC_FAIL); goto bailout; } ret = (bit*) Tloc(bn, 0); + off1 = b ? b->hseqbase : pbn->hseqbase; if (pattern_is_a_bat) { - pi = bat_iterator(pbn); - if (b) - bi = bat_iterator(b); - else - input = *getArgReference_str(stk, pci, 1); + BATiter pi = bat_iterator(pbn); + + if (b) { + BATiter bi = bat_iterator(b); + off2 = pbn->hseqbase; - for (BUN p = 0; p < q; p++) { - const str next_input = b ? BUNtail(bi, p) : input, np = BUNtail(pi, p); + for (BUN p = 0; p < q; p++) { + oid p1 = (canditer_next(&ci1) - off1), p2 = (canditer_next(&ci2) - off2); + const str input = BUNtvar(bi, p1), np = BUNtvar(pi, p2); - if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &empty, &np, esc)) != MAL_SUCCEED) - goto bailout; - - if (use_re) { - if ((msg = re_like_build(&re_simple, &wpat, np, isensitive, use_strcmp, (unsigned char) **esc)) != MAL_SUCCEED) + if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &empty, &np, esc)) != MAL_SUCCEED) goto bailout; - ret[p] = re_like_proj_apply(next_input, re_simple, wpat, np, isensitive, anti, use_strcmp); - re_like_clean(&re_simple, &wpat); - } else if (empty) { - ret[p] = bit_nil; - } else { - if ((msg = pcre_like_build(&re, &ex, ppat, isensitive, 1)) != MAL_SUCCEED) + + if (use_re) { + if ((msg = re_like_build(&re_simple, &wpat, np, isensitive, use_strcmp, (unsigned char) **esc)) != MAL_SUCCEED) + goto bailout; + ret[p] = re_like_proj_apply(input, re_simple, wpat, np, isensitive, anti, use_strcmp); + re_like_clean(&re_simple, &wpat); + } else if (empty) { + ret[p] = bit_nil; + } else { + if ((msg = pcre_like_build(&re, &ex, ppat, isensitive, 1)) != MAL_SUCCEED) + goto bailout; + if ((msg = pcre_like_apply(&(ret[p]), input, re, ex, ppat, anti)) != MAL_SUCCEED) + goto bailout; + pcre_clean(&re, &ex); + } + has_nil |= is_bit_nil(ret[p]); + GDKfree(ppat); + ppat = NULL; + } + } else { + const str input = *getArgReference_str(stk, pci, 1); + + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next(&ci1) - off1); + const str np = BUNtvar(pi, p); + + if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &empty, &np, esc)) != MAL_SUCCEED) goto bailout; - if ((msg = pcre_like_apply(&(ret[p]), next_input, re, ex, ppat, anti)) != MAL_SUCCEED) - goto bailout; - pcre_clean(&re, &ex); + + if (use_re) { + if ((msg = re_like_build(&re_simple, &wpat, np, isensitive, use_strcmp, (unsigned char) **esc)) != MAL_SUCCEED) + goto bailout; + ret[p] = re_like_proj_apply(input, re_simple, wpat, np, isensitive, anti, use_strcmp); + re_like_clean(&re_simple, &wpat); + } else if (empty) { + ret[p] = bit_nil; + } else { + if ((msg = pcre_like_build(&re, &ex, ppat, isensitive, 1)) != MAL_SUCCEED) + goto bailout; + if ((msg = pcre_like_apply(&(ret[p]), input, re, ex, ppat, anti)) != MAL_SUCCEED) + goto bailout; + pcre_clean(&re, &ex); + } + has_nil |= is_bit_nil(ret[p]); + GDKfree(ppat); + ppat = NULL; } - has_nil |= is_bit_nil(ret[p]); - GDKfree(ppat); - ppat = NULL; } } else { - bi = bat_iterator(b); - pat = *getArgReference_str(stk, pci, 2); + const str pat = *getArgReference_str(stk, pci, 2); + BATiter bi = bat_iterator(b); if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &empty, &pat, esc)) != MAL_SUCCEED) goto bailout; @@ -1652,23 +1696,53 @@ BATPCRElike_imp(Client cntxt, MalBlkPtr if (use_re) { if ((msg = re_like_build(&re_simple, &wpat, pat, isensitive, use_strcmp, (unsigned char) **esc)) != MAL_SUCCEED) goto bailout; - for (BUN p = 0; p < q; p++) { - const str s = BUNtail(bi, p); - ret[p] = re_like_proj_apply(s, re_simple, wpat, pat, isensitive, anti, use_strcmp); - has_nil |= is_bit_nil(ret[p]); + if (ci1.tpe == cand_dense) { + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next_dense(&ci1) - off1); + const str s = BUNtvar(bi, p); + ret[p] = re_like_proj_apply(s, re_simple, wpat, pat, isensitive, anti, use_strcmp); + has_nil |= is_bit_nil(ret[p]); + } + } else { + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next(&ci1) - off1); + const str s = BUNtvar(bi, p); + ret[p] = re_like_proj_apply(s, re_simple, wpat, pat, isensitive, anti, use_strcmp); + has_nil |= is_bit_nil(ret[p]); + } } } else if (empty) { - for (BUN p = 0; p < q; p++) - ret[p] = bit_nil; + if (ci1.tpe == cand_dense) { + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next_dense(&ci1) - off1); + ret[p] = bit_nil; + } + } else { + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next(&ci1) - off1); + ret[p] = bit_nil; + } + } has_nil = true; } else { if ((msg = pcre_like_build(&re, &ex, ppat, isensitive, q)) != MAL_SUCCEED) goto bailout; - for (BUN p = 0; p < q; p++) { - const str s = BUNtail(bi, p); - if ((msg = pcre_like_apply(&(ret[p]), s, re, ex, ppat, anti)) != MAL_SUCCEED) - goto bailout; - has_nil |= is_bit_nil(ret[p]); + if (ci1.tpe == cand_dense) { + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next_dense(&ci1) - off1); + const str s = BUNtvar(bi, p); + if ((msg = pcre_like_apply(&(ret[p]), s, re, ex, ppat, anti)) != MAL_SUCCEED) + goto bailout; + has_nil |= is_bit_nil(ret[p]); + } + } else { + for (BUN p = 0; p < q; p++) { + oid p = (canditer_next(&ci1) - off1); + const str s = BUNtvar(bi, p); + if ((msg = pcre_like_apply(&(ret[p]), s, re, ex, ppat, anti)) != MAL_SUCCEED) + goto bailout; + has_nil |= is_bit_nil(ret[p]); + } } } } @@ -1691,6 +1765,10 @@ bailout: BBPunfix(b->batCacheid); if (pbn) BBPunfix(pbn->batCacheid); + if (s1) + BBPunfix(s1->batCacheid); + if (s2) + BBPunfix(s2->batCacheid); return msg; } @@ -2331,9 +2409,15 @@ mel_func pcre_init_funcs[] = { pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))), pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))), pattern("batalgebra", "like", BATPCRElike, false, "", args(1,5, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))), + pattern("batalgebra", "like", BATPCRElike, false, "", args(1,6, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit),batarg("s",oid))), + pattern("batalgebra", "like", BATPCRElike, false, "", args(1,6, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit),batarg("s",oid))), + pattern("batalgebra", "like", BATPCRElike, false, "", args(1,7, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit),batarg("s1",oid),batarg("s2",oid))), pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit))), pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))), pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,5, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit))), + pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,6, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str),arg("caseignore",bit),batarg("s",oid))), + pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,6, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit),batarg("s",oid))), + pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,7, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str),arg("caseignore",bit),batarg("s1",oid),batarg("s2",oid))), command("algebra", "likeselect", PCRElikeselect, false, "Select all head values of the first input BAT for which the\ntail value is \"like\" the given (SQL-style) pattern and for\nwhich the head value occurs in the tail of the second input\nBAT.\nInput is a dense-headed BAT, output is a dense-headed BAT with in\nthe tail the head value of the input BAT for which the\nrelationship holds. The output BAT is sorted on the tail value.", args(1,7, batarg("",oid),batarg("b",str),batarg("s",oid),arg("pat",str),arg("esc",str),arg("caseignore",bit),arg("anti",bit))), command("algebra", "likejoin", LIKEjoin, false, "Join the string bat L with the pattern bat R\nwith optional candidate lists SL and SR using pattern escape string ESC\nand doing a case sensitive match.\nThe result is two aligned bats with oids of matching rows.", args(2,11, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))), command("algebra", "likejoin", LIKEjoin1, false, "The same as LIKEjoin_esc, but only produce one output", args(1,10,batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("caseignore",bit),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))), _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list