Changeset: d5d8593eecaf for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/d5d8593eecaf Modified Files: monetdb5/modules/atoms/str.c monetdb5/modules/kernel/batstr.c Branch: sw_ew_c_sorting Log Message:
Cleanup MAL str functions plus make proper comparisons with ext string functions. diffs (truncated from 681 to 300 lines): diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -3766,25 +3766,6 @@ str_is_iprefix(const char *s, const char return utf8ncasecmp(s, prefix, plen); } -static str -STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) -{ - (void) cntxt; - (void) mb; - bit *res = getArgReference(stk, pci, 0); - const str *arg1 = getArgReference(stk, pci, 1), - *arg2 = getArgReference(stk, pci, 2); - bit icase = pci->argc == 4 - && *getArgReference_bit(stk, pci, 3) ? true : false; - str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED; - int plen = str_strlen(prefix); - - *res = (strNil(s) || strNil(prefix)) ? bit_nil : - icase ? str_is_iprefix(s, prefix, plen) : str_is_prefix(s, prefix, - plen); - return msg; -} - bit str_is_suffix(const char *s, const char *suffix, int sul) { @@ -3807,27 +3788,6 @@ str_is_isuffix(const char *s, const char return utf8casecmp(s + sl - sul, suffix); } - -/* returns whether arg1 ends with arg2 */ -static str -STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) -{ - (void) cntxt; - (void) mb; - bit *res = getArgReference(stk, pci, 0); - const str *arg1 = getArgReference(stk, pci, 1), - *arg2 = getArgReference(stk, pci, 2); - bit icase = pci->argc == 4 - && *getArgReference_bit(stk, pci, 3) ? true : false; - str s = *arg1, suffix = *arg2, msg = MAL_SUCCEED; - int sul = str_strlen(suffix); - - *res = (strNil(s) || strNil(suffix)) ? bit_nil : - icase ? str_is_isuffix(s, suffix, sul) : str_is_suffix(s, suffix, - sul); - return msg; -} - bit str_contains(const char *h, const char *n, int nlen) { @@ -3844,25 +3804,69 @@ str_icontains(const char *h, const char return utf8casestr(h, n) ? 0 : 1; } +#define STR_MAPARGS(STK, PCI, R, S1, S2, ICASE) \ + do{ \ + R = getArgReference(STK, PCI, 0); \ + S1 = *getArgReference_str(STK, PCI, 1); \ + S2 = *getArgReference_str(STK, PCI, 2); \ + icase = PCI->argc == 4 && \ + *getArgReference_bit(STK, PCI, 3) ? true : false; \ + \ + } while(0) + +static str +STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void) cntxt; + (void) mb; + + str s1, s2; + bit *r, icase; + + STR_MAPARGS(stk, pci, r, s1, s2, icase); + + int s2_len = str_strlen(s2); + *r = (strNil(s1) || strNil(s2)) ? bit_nil : + icase ? str_is_iprefix(s1, s2, s2_len) == 0 : + str_is_prefix(s1, s2, s2_len) == 0; + return MAL_SUCCEED; +} + +static str +STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void) cntxt; + (void) mb; + + str s1, s2; + bit *r, icase; + + STR_MAPARGS(stk, pci, r, s1, s2, icase); + + int s2_len = str_strlen(s2); + *r = (strNil(s1) || strNil(s2)) ? bit_nil : + icase ? str_is_isuffix(s1, s2, s2_len) == 0 : + str_is_suffix(s1, s2, s2_len) == 0; + return MAL_SUCCEED; +} + /* returns whether haystack contains needle */ static str STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { (void) cntxt; (void) mb; - bit *res = getArgReference(stk, pci, 0); - const str *arg1 = getArgReference(stk, pci, 1), - *arg2 = getArgReference(stk, pci, 2); - bit icase = pci->argc == 4 - && *getArgReference_bit(stk, pci, 3) ? true : false; - str haystack = *arg1, needle = *arg2, msg = MAL_SUCCEED; - int needle_len = str_strlen(needle); - - *res = (strNil(haystack) || strNil(needle)) ? bit_nil : - icase ? str_icontains(haystack, needle, - needle_len) : str_contains(haystack, needle, - needle_len); - return msg; + + str s1, s2; + bit *r, icase; + + STR_MAPARGS(stk, pci, r, s1, s2, icase); + + int s2_len = str_strlen(s2); + *r = (strNil(s1) || strNil(s2)) ? bit_nil : + icase ? str_icontains(s1, s2, s2_len) == 0 : + str_contains(s1, s2, s2_len) == 0; + return MAL_SUCCEED; } int @@ -5150,7 +5154,7 @@ BBPnreclaim(int nargs, ...) } while (0) static str -do_string_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q, +str_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q, BUN *rcnt, const char *key, bool anti, bit (*str_cmp)(const char *, const char *, int), bool keep_nulls) @@ -5170,10 +5174,9 @@ do_string_select(BAT *bn, BAT *b, BAT *s qry_ctx->querytimeout) : 0; if (anti) /* keep nulls ? (use false for now) */ - scanloop_anti(v && *v != '\200' - && str_cmp(v, key, klen) == 0, keep_nulls); + scanloop_anti(v && *v != '\200' && str_cmp(v, key, klen) != 0, keep_nulls); else - scanloop(v && *v != '\200' && str_cmp(v, key, klen) != 0, keep_nulls); + scanloop(v && *v != '\200' && str_cmp(v, key, klen) == 0, keep_nulls); bailout: bat_iterator_end(&bi); @@ -5182,41 +5185,40 @@ do_string_select(BAT *bn, BAT *b, BAT *s } static str -string_select(bat *ret, const bat *bid, const bat *sid, const str *key, - const bit *anti, bit (*str_cmp)(const char *, const char *, int), +STRselect(bat *r_id, const bat *b_id, const bat *cb_id, const char *key, + const bit anti, bit (*str_cmp)(const char *, const char *, int), const str fname) { - BAT *b, *s = NULL, *bn = NULL, *old_s = NULL;; str msg = MAL_SUCCEED; + + BAT *b, *cb = NULL, *r = NULL, *old_s = NULL;; BUN p = 0, q = 0, rcnt = 0; struct canditer ci; - bool with_strimps = false, with_strimps_anti = false; - - if ((b = BATdescriptor(*bid)) == NULL) { - msg = createException(MAL, fname, - SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); - goto bailout; - } - if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) { - msg = createException(MAL, fname, - SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); - goto bailout; + bool with_strimps = false, + with_strimps_anti = false; + + if (!(b = BATdescriptor(*b_id))) + throw(MAL, fname, RUNTIME_OBJECT_MISSING); + + if (cb_id && !is_bat_nil(*cb_id) && !(cb = BATdescriptor(*cb_id))) { + BBPreclaim(b); + throw(MAL, fname, RUNTIME_OBJECT_MISSING); } assert(ATOMstorage(b->ttype) == TYPE_str); if (BAThasstrimps(b)) { if (STRMPcreate(b, NULL) == GDK_SUCCEED) { - BAT *tmp_s = STRMPfilter(b, s, *key, *anti); + BAT *tmp_s = STRMPfilter(b, cb, key, anti); if (tmp_s) { - old_s = s; - s = tmp_s; - if (!*anti) + old_s = cb; + cb = tmp_s; + if (!anti) with_strimps = true; else with_strimps_anti = true; } - } else { /* If we cannot filter with the strimp just continue normally */ + } else { GDKclrerr(); } } @@ -5225,20 +5227,19 @@ string_select(bat *ret, const bat *bid, "string_select: strcmp function using strimps" : (with_strimps_anti ? "string_select: strcmp function using strimps anti" - : - "string_select: strcmp function with no accelerator")); - - canditer_init(&ci, b, s); - if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) { - msg = createException(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); - goto bailout; + : "string_select: strcmp function with no accelerator")); + + canditer_init(&ci, b, cb); + if (!(r = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) { + BBPnreclaim(2, b, cb); + throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); } - if (!s || BATtdense(s)) { - if (s) { - assert(BATtdense(s)); - p = (BUN) s->tseqbase; - q = p + BATcount(s); + if (!cb || BATtdense(cb)) { + if (cb) { + assert(BATtdense(cb)); + p = (BUN) cb->tseqbase; + q = p + BATcount(cb); if ((oid) p < b->hseqbase) p = b->hseqbase; if ((oid) q > b->hseqbase + BATcount(b)) @@ -5249,101 +5250,131 @@ string_select(bat *ret, const bat *bid, } } - msg = do_string_select(bn, b, s, &ci, p, q, &rcnt, *key, *anti - && !with_strimps_anti, str_cmp, with_strimps_anti); - - if (!msg) { /* set some properties */ - BATsetcount(bn, rcnt); - bn->tsorted = true; - bn->trevsorted = bn->batCount <= 1; - bn->tkey = true; - bn->tnil = false; - bn->tnonil = true; - bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const oid *) Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil; + msg = str_select(r, b, cb, &ci, p, q, &rcnt, key, anti + && !with_strimps_anti, str_cmp, with_strimps_anti); + + if (!msg) { + BATsetcount(r, rcnt); + r->tsorted = r->batCount <= 1; + r->trevsorted = r->batCount <= 1; + r->tkey = false; + r->tnil = false; + r->tnonil = true; + r->tseqbase = rcnt == 0 ? + 0 : rcnt == 1 ? + *(const oid *) Tloc(r, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil; + if (with_strimps_anti) { BAT *rev; if (old_s) { - rev = BATdiffcand(old_s, bn); + rev = BATdiffcand(old_s, r); #ifndef NDEBUG - BAT *is = BATintersectcand(old_s, bn); + BAT *is = BATintersectcand(old_s, r); if (is) { - assert(is->batCount == bn->batCount); + assert(is->batCount == r->batCount); BBPreclaim(is); } - assert(rev->batCount == old_s->batCount - bn->batCount); + assert(rev->batCount == old_s->batCount - r->batCount); #endif - } _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org