Changeset: 1d869b0d669f for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1d869b0d669f Modified Files: monetdb5/modules/atoms/str.c monetdb5/modules/atoms/str.h monetdb5/modules/kernel/batstr.c sql/backends/monet5/UDF/udf/udf.c Branch: alloc-less-str Log Message:
Re-allocate the string buffer in a multiple of 1024 bytes, look for nils in reverse function and give a larger initial buffer for ltrim2, rtrim2 and strip2 functions because of codepoints calculation diffs (209 lines): diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c --- a/monetdb5/modules/atoms/str.c +++ b/monetdb5/modules/atoms/str.c @@ -3934,7 +3934,7 @@ trimchars(str *buf, size_t *buflen, size int c, *cbuf; CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc); - cbuf = *((int**)buf); + cbuf = *(int**)buf; while (*s) { UTF8_GETCHAR(c, s); @@ -3965,10 +3965,10 @@ str_strip2(str *buf, size_t *buflen, str if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) != MAL_SUCCEED) return msg; len = strlen(s); - n = lstrip(s, len, *((int**)buf), n3); + n = lstrip(s, len, *(int**)buf, n3); s += n; len -= n; - n = rstrip(s, len, *((int**)buf), n3); + n = rstrip(s, len, *(int**)buf, n3); n++; CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2"); @@ -3982,7 +3982,7 @@ str_strip2(str *buf, size_t *buflen, str static str STRStrip2(str *res, const str *arg1, const str *arg2) { - size_t buflen = INITIAL_STR_BUFFER_LENGTH; + size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int); str buf = GDKmalloc(buflen), msg; *res = NULL; @@ -4015,7 +4015,7 @@ str_ltrim2(str *buf, size_t *buflen, str if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED) return msg; len = strlen(s); - n = lstrip(s, len, *((int**)buf), n3); + n = lstrip(s, len, *(int**)buf, n3); nallocate = len - n + 1; CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2"); @@ -4029,7 +4029,7 @@ str_ltrim2(str *buf, size_t *buflen, str static str STRLtrim2(str *res, const str *arg1, const str *arg2) { - size_t buflen = INITIAL_STR_BUFFER_LENGTH; + size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int); str buf = GDKmalloc(buflen), msg; *res = NULL; @@ -4062,7 +4062,7 @@ str_rtrim2(str *buf, size_t *buflen, str if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) != MAL_SUCCEED) return msg; len = strlen(s); - n = rstrip(s, len, *((int**)buf), n3); + n = rstrip(s, len, *(int**)buf, n3); n++; CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2"); @@ -4076,7 +4076,7 @@ str_rtrim2(str *buf, size_t *buflen, str static str STRRtrim2(str *res, const str *arg1, const str *arg2) { - size_t buflen = INITIAL_STR_BUFFER_LENGTH; + size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int); str buf = GDKmalloc(buflen), msg; *res = NULL; diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h --- a/monetdb5/modules/atoms/str.h +++ b/monetdb5/modules/atoms/str.h @@ -15,14 +15,14 @@ /* The batstr module functions use a single buffer to avoid malloc/free overhead. Note the buffer should be always large enough to hold null strings, so less testing will be required */ -#define INITIAL_STR_BUFFER_LENGTH MAX(strlen(str_nil) + 1, 1024) +#define INITIAL_STR_BUFFER_LENGTH (MAX(strlen(str_nil) + 1, 1024)) /* The batstr module functions use a single buffer to avoid malloc/free overhead. Note the buffer should be always large enough to hold null strings, so less testing will be required */ #define CHECK_STR_BUFFER_LENGTH(BUFFER, BUFFER_LEN, NEXT_LEN, OP) \ - do { \ - if (NEXT_LEN > *BUFFER_LEN) { \ - size_t newlen = NEXT_LEN + 1024; \ + do { \ + if ((NEXT_LEN) > *BUFFER_LEN) { \ + size_t newlen = (((NEXT_LEN) + 1023) & ~1023); /* align to a multiple of 1024 bytes */ \ str newbuf = GDKmalloc(newlen); \ if (!newbuf) \ throw(MAL, OP, SQLSTATE(HY013) MAL_MALLOC_FAIL); \ diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c --- a/monetdb5/modules/kernel/batstr.c +++ b/monetdb5/modules/kernel/batstr.c @@ -296,12 +296,11 @@ bailout: * Output type: str (a BAT of strings) */ static str -do_batstr_conststr_str(bat *res, const bat *l, const str *s2, const char *name, str (*func)(str*, size_t*, str, str)) +do_batstr_conststr_str(bat *res, const bat *l, const str *s2, const char *name, size_t buflen, str (*func)(str*, size_t*, str, str)) { BATiter bi; BAT *bn = NULL, *b = NULL; BUN p, q; - size_t buflen = INITIAL_STR_BUFFER_LENGTH; str x, y = *s2, buf = GDKmalloc(buflen), msg = MAL_SUCCEED; bool nils = false; @@ -353,12 +352,11 @@ bailout: * Output type: str (a BAT of strings) */ static str -do_batstr_batstr_str(bat *res, const bat *l, const bat *l2, const char *name, str (*func)(str*, size_t*, str, str)) +do_batstr_batstr_str(bat *res, const bat *l, const bat *l2, const char *name, size_t buflen, str (*func)(str*, size_t*, str, str)) { BATiter lefti, righti; BAT *bn = NULL, *left = NULL, *right = NULL; BUN p, q; - size_t buflen = INITIAL_STR_BUFFER_LENGTH; str x, y, buf = GDKmalloc(buflen), msg = MAL_SUCCEED; bool nils = false; @@ -828,37 +826,37 @@ STRbatRtrim(bat *ret, const bat *l) static str STRbatStrip2_const(bat *ret, const bat *l, const str *s2) { - return do_batstr_conststr_str(ret, l, s2, "batstr.strip", str_strip2); + return do_batstr_conststr_str(ret, l, s2, "batstr.strip", INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_strip2); } static str STRbatLtrim2_const(bat *ret, const bat *l, const str *s2) { - return do_batstr_conststr_str(ret, l, s2, "batstr.ltrim", str_ltrim2); + return do_batstr_conststr_str(ret, l, s2, "batstr.ltrim", INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_ltrim2); } static str STRbatRtrim2_const(bat *ret, const bat *l, const str *s2) { - return do_batstr_conststr_str(ret, l, s2, "batstr.rtrim", str_rtrim2); + return do_batstr_conststr_str(ret, l, s2, "batstr.rtrim", INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_rtrim2); } static str STRbatStrip2_bat(bat *ret, const bat *l, const bat *l2) { - return do_batstr_batstr_str(ret, l, l2, "batstr.strip", str_strip2); + return do_batstr_batstr_str(ret, l, l2, "batstr.strip", INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_strip2); } static str STRbatLtrim2_bat(bat *ret, const bat *l, const bat *l2) { - return do_batstr_batstr_str(ret, l, l2, "batstr.ltrim", str_ltrim2); + return do_batstr_batstr_str(ret, l, l2, "batstr.ltrim", INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_ltrim2); } static str STRbatRtrim2_bat(bat *ret, const bat *l, const bat *l2) { - return do_batstr_batstr_str(ret, l, l2, "batstr.rtrim", str_rtrim2); + return do_batstr_batstr_str(ret, l, l2, "batstr.rtrim", INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_rtrim2); } static str diff --git a/sql/backends/monet5/UDF/udf/udf.c b/sql/backends/monet5/UDF/udf/udf.c --- a/sql/backends/monet5/UDF/udf/udf.c +++ b/sql/backends/monet5/UDF/udf/udf.c @@ -118,6 +118,7 @@ UDFBATreverse_(BAT **ret, BAT *src) BUN p = 0, q = 0; size_t buflen = INITIAL_STR_BUFFER_LENGTH; str msg = MAL_SUCCEED, buf; + bool nils = false; /* assert calling sanity */ assert(ret); @@ -158,14 +159,15 @@ UDFBATreverse_(BAT **ret, BAT *src) msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL); goto bailout; } + nils |= strNil(buf); } bailout: GDKfree(buf); if (bn && !msg) { BATsetcount(bn, q); - bn->tnil = src->tnil; - bn->tnonil = src->tnonil; + bn->tnil = nils; + bn->tnonil = !nils; bn->tkey = BATcount(bn) <= 1; bn->tsorted = BATcount(bn) <= 1; bn->trevsorted = BATcount(bn) <= 1; @@ -189,10 +191,10 @@ UDFBATreverse(bat *ret, const bat *arg) /* bat-id -> BAT-descriptor */ if ((src = BATdescriptor(*arg)) == NULL) - throw(MAL, "batudf.reverse", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + throw(MAL, "batudf.reverse", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); /* do the work */ - msg = UDFBATreverse_ ( &res, src ); + msg = UDFBATreverse_( &res, src ); /* release input BAT-descriptor */ BBPunfix(src->batCacheid); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list