Changeset: de1c35aedd6f for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/de1c35aedd6f Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_string.c monetdb5/modules/atoms/str.c monetdb5/modules/atoms/str.h monetdb5/modules/kernel/batstr.c Branch: ascii-flag Log Message:
Move case conversion fully to GDK. diffs (242 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -206,6 +206,8 @@ gdk_return BATsum(void *res, int tp, BAT const char *BATtailname(const BAT *b); gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char *op); +BAT *BATtolower(BAT *b, BAT *s); +BAT *BATtoupper(BAT *b, BAT *s); void BATtseqbase(BAT *b, oid o); BAT *BATunique(BAT *b, BAT *s); BAT *BATunmask(BAT *b); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2346,6 +2346,8 @@ gdk_export gdk_return GDKtolower(char ** gdk_export int GDKstrncasecmp(const char *str1, const char *str2, size_t l1, size_t l2); gdk_export int GDKstrcasecmp(const char *s1, const char *s2); gdk_export char *GDKstrcasestr(const char *haystack, const char *needle); +gdk_export BAT *BATtolower(BAT *b, BAT *s); +gdk_export BAT *BATtoupper(BAT *b, BAT *s); /* * @- BAT sample operators diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c --- a/gdk/gdk_string.c +++ b/gdk/gdk_string.c @@ -4501,7 +4501,7 @@ convertcase(char **buf, size_t *buflen, size_t dstoff = 0; size_t bl = *buflen; - if (buf == NULL) + if (*buf == NULL) bl = 0; while (*s) { /* we are at the start of a Unicode codepoint encoded in @@ -4553,6 +4553,16 @@ convertcase(char **buf, size_t *buflen, dst[dstoff++] = 0x80 | (v & 0x3F); } } + if (dstoff + 1 > bl) { + size_t newlen = dstoff + 1; + dst = GDKrealloc(*buf, newlen); + if (dst == NULL) { + *buflen = bl; + return GDK_FAIL; + } + *buf = (char *) dst; + bl = newlen; + } dst[dstoff] = '\0'; *buflen = bl; return GDK_SUCCEED; @@ -4572,6 +4582,64 @@ GDKtolower(char **buf, size_t *buflen, c return convertcase(buf, buflen, (const uint8_t *) s, lowercase); } +static BAT * +BATcaseconvert(BAT *b, BAT *s, const int *convtab, const char *func) +{ + lng t0 = 0; + BAT *bn; + struct canditer ci; + BATiter bi; + oid bhseqbase = b->hseqbase; + QryCtx *qry_ctx = MT_thread_get_qry_ctx(); + qry_ctx = qry_ctx ? qry_ctx : &(QryCtx) {.endtime = 0}; + + TRC_DEBUG_IF(ALGO) t0 = GDKusec(); + BATcheck(b, NULL); + canditer_init(&ci, b, s); + bn = COLnew(ci.hseq, TYPE_str, ci.ncand, TRANSIENT); + if (bn == NULL) + return NULL; + bi = bat_iterator(b); + char *buf = NULL; + size_t buflen = 0; + TIMEOUT_LOOP(ci.ncand, qry_ctx) { + BUN x = canditer_next(&ci) - bhseqbase; + if (convertcase(&buf, &buflen, (const uint8_t *) BUNtvar(bi, x), + convtab) != GDK_SUCCEED || + BUNappend(bn, buf, false) != GDK_SUCCEED) { + goto bailout; + } + } + GDKfree(buf); + bat_iterator_end(&bi); + TIMEOUT_CHECK(qry_ctx, + GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); + TRC_DEBUG(ALGO, "%s: b=" ALGOBATFMT ",s=" ALGOOPTBATFMT + " -> " ALGOOPTBATFMT " " LLFMT "usec\n", + func, + ALGOBATPAR(b), ALGOOPTBATPAR(s), + ALGOOPTBATPAR(bn), GDKusec() - t0); + return bn; + + bailout: + GDKfree(buf); + bat_iterator_end(&bi); + BBPreclaim(bn); + return NULL; +} + +BAT * +BATtolower(BAT *b, BAT *s) +{ + return BATcaseconvert(b, s, lowercase, __func__); +} + +BAT * +BATtoupper(BAT *b, BAT *s) +{ + return BATcaseconvert(b, s, uppercase, __func__); +} + /* Unicode-aware case insensitive string comparison of two UTF-8 encoded * string; do not look beyond the lengths in bytes of the two strings; * if either one reaches the end of the buffer (as given by the length diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -719,14 +719,6 @@ STRWChrAt(int *res, const str *arg1, con return str_wchr_at(res, *arg1, *at); } -str -str_lower(str *buf, size_t *buflen, const char *s) -{ - if (GDKtolower(buf, buflen, s) != GDK_SUCCEED) - throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL); - return MAL_SUCCEED; -} - static inline str STRlower(str *res, const str *arg1) { @@ -741,10 +733,9 @@ STRlower(str *res, const str *arg1) *res = NULL; if (!(buf = GDKmalloc(buflen))) throw(MAL, "str.lower", SQLSTATE(HY013) MAL_MALLOC_FAIL); - msg = str_lower(&buf, &buflen, s); - if (msg != MAL_SUCCEED) { + if (GDKtolower(&buf, &buflen, s) != GDK_SUCCEED) { GDKfree(buf); - return msg; + throw(MAL, "str.lower", GDK_EXCEPTION); } *res = GDKstrdup(buf); } @@ -756,14 +747,6 @@ STRlower(str *res, const str *arg1) return msg; } -str -str_upper(str *buf, size_t *buflen, const char *s) -{ - if (GDKtoupper(buf, buflen, s) != GDK_SUCCEED) - throw(MAL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL); - return MAL_SUCCEED; -} - static str STRupper(str *res, const str *arg1) { @@ -778,10 +761,9 @@ STRupper(str *res, const str *arg1) *res = NULL; if (!(buf = GDKmalloc(buflen))) throw(MAL, "str.upper", SQLSTATE(HY013) MAL_MALLOC_FAIL); - msg = str_upper(&buf, &buflen, s); - if (msg != MAL_SUCCEED) { + if (GDKtoupper(&buf, &buflen, s) != GDK_SUCCEED) { GDKfree(buf); - return msg; + throw(MAL, "str.upper", GDK_EXCEPTION); } *res = GDKstrdup(buf); } diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h --- a/monetdb5/modules/atoms/str.h +++ b/monetdb5/modules/atoms/str.h @@ -185,13 +185,6 @@ extern str str_suffix(str *buf, size_t * extern str str_repeat(str *buf, size_t *buflen, const char *s, int c) __attribute__((__visibility__("hidden"))); -/* Make sure the UTF8_toLowerFrom hash is locked! */ -extern str str_lower(str *buf, size_t *buflen, const char *s) - __attribute__((__visibility__("hidden"))); -/* Make sure the UTF8_toUpperFrom hash is locked! */ -extern str str_upper(str *buf, size_t *buflen, const char *s) - __attribute__((__visibility__("hidden"))); - extern str str_strip(str *buf, size_t *buflen, const char *s) __attribute__((__visibility__("hidden"))); extern str str_ltrim(str *buf, size_t *buflen, const char *s) diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c --- a/monetdb5/modules/kernel/batstr.c +++ b/monetdb5/modules/kernel/batstr.c @@ -1605,15 +1605,42 @@ do_batstr_batint_batstr_str(Client cntxt } static str +STRbatConvert(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, + BAT *(*func)(BAT *, BAT *), const char *malfunc) +{ + BAT *bn = NULL, *b = NULL, *bs = NULL; + bat *res = getArgReference_bat(stk, pci, 0), + *bid = getArgReference_bat(stk, pci, 1), + *sid1 = pci->argc == 3 ? getArgReference_bat(stk, pci, 2) : NULL; + + (void) cntxt; + (void) mb; + if (!(b = BATdescriptor(*bid))) { + throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + } + if (sid1 && !is_bat_nil(*sid1) && !(bs = BATdescriptor(*sid1))) { + BBPreclaim(b); + throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + } + bn = (*func)(b, bs); + unfix_inputs(2, b, bs); + if (bn == NULL) + throw(MAL, malfunc, GDK_EXCEPTION); + *res = bn->batCacheid; + BBPkeepref(bn); + return MAL_SUCCEED; +} + +static str STRbatLower(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { - return do_batstr_str(cntxt, mb, stk, pci, "batstr.lower", str_lower); + return STRbatConvert(cntxt, mb, stk, pci, BATtolower, "batstr.toLower"); } static str STRbatUpper(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { - return do_batstr_str(cntxt, mb, stk, pci, "batstr.upper", str_upper); + return STRbatConvert(cntxt, mb, stk, pci, BATtoupper, "batstr.toUpper"); } static str _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org