Changeset: 1d869b0d669f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1d869b0d669f
Modified Files:
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/kernel/batstr.c
        sql/backends/monet5/UDF/udf/udf.c
Branch: alloc-less-str
Log Message:

Re-allocate the string buffer in a multiple of 1024 bytes, look for nils in 
reverse function and give a larger initial buffer for ltrim2, rtrim2 and strip2 
functions because of codepoints calculation


diffs (209 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3934,7 +3934,7 @@ trimchars(str *buf, size_t *buflen, size
        int c, *cbuf;
 
        CHECK_STR_BUFFER_LENGTH(buf, buflen, nlen, malfunc);
-       cbuf = *((int**)buf);
+       cbuf = *(int**)buf;
 
        while (*s) {
                UTF8_GETCHAR(c, s);
@@ -3965,10 +3965,10 @@ str_strip2(str *buf, size_t *buflen, str
                if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) 
!= MAL_SUCCEED)
                        return msg;
                len = strlen(s);
-               n = lstrip(s, len, *((int**)buf), n3);
+               n = lstrip(s, len, *(int**)buf, n3);
                s += n;
                len -= n;
-               n = rstrip(s, len, *((int**)buf), n3);
+               n = rstrip(s, len, *(int**)buf, n3);
 
                n++;
                CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
@@ -3982,7 +3982,7 @@ str_strip2(str *buf, size_t *buflen, str
 static str
 STRStrip2(str *res, const str *arg1, const str *arg2)
 {
-       size_t buflen = INITIAL_STR_BUFFER_LENGTH;
+       size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
        str buf = GDKmalloc(buflen), msg;
 
        *res = NULL;
@@ -4015,7 +4015,7 @@ str_ltrim2(str *buf, size_t *buflen, str
                if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) 
!= MAL_SUCCEED)
                        return msg;
                len = strlen(s);
-               n = lstrip(s, len, *((int**)buf), n3);
+               n = lstrip(s, len, *(int**)buf, n3);
                nallocate = len - n + 1;
 
                CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
@@ -4029,7 +4029,7 @@ str_ltrim2(str *buf, size_t *buflen, str
 static str
 STRLtrim2(str *res, const str *arg1, const str *arg2)
 {
-       size_t buflen = INITIAL_STR_BUFFER_LENGTH;
+       size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
        str buf = GDKmalloc(buflen), msg;
 
        *res = NULL;
@@ -4062,7 +4062,7 @@ str_rtrim2(str *buf, size_t *buflen, str
                if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) 
!= MAL_SUCCEED)
                        return msg;
                len = strlen(s);
-               n = rstrip(s, len, *((int**)buf), n3);
+               n = rstrip(s, len, *(int**)buf, n3);
                n++;
 
                CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
@@ -4076,7 +4076,7 @@ str_rtrim2(str *buf, size_t *buflen, str
 static str
 STRRtrim2(str *res, const str *arg1, const str *arg2)
 {
-       size_t buflen = INITIAL_STR_BUFFER_LENGTH;
+       size_t buflen = INITIAL_STR_BUFFER_LENGTH * sizeof(int);
        str buf = GDKmalloc(buflen), msg;
 
        *res = NULL;
diff --git a/monetdb5/modules/atoms/str.h b/monetdb5/modules/atoms/str.h
--- a/monetdb5/modules/atoms/str.h
+++ b/monetdb5/modules/atoms/str.h
@@ -15,14 +15,14 @@
 
 /* The batstr module functions use a single buffer to avoid malloc/free 
overhead.
    Note the buffer should be always large enough to hold null strings, so less 
testing will be required */
-#define INITIAL_STR_BUFFER_LENGTH  MAX(strlen(str_nil) + 1, 1024)
+#define INITIAL_STR_BUFFER_LENGTH (MAX(strlen(str_nil) + 1, 1024))
 
 /* The batstr module functions use a single buffer to avoid malloc/free 
overhead.
    Note the buffer should be always large enough to hold null strings, so less 
testing will be required */
 #define CHECK_STR_BUFFER_LENGTH(BUFFER, BUFFER_LEN, NEXT_LEN, OP) \
-       do {  \
-               if (NEXT_LEN > *BUFFER_LEN) { \
-                       size_t newlen = NEXT_LEN + 1024; \
+       do { \
+               if ((NEXT_LEN) > *BUFFER_LEN) { \
+                       size_t newlen = (((NEXT_LEN) + 1023) & ~1023); /* align 
to a multiple of 1024 bytes */ \
                        str newbuf = GDKmalloc(newlen); \
                        if (!newbuf) \
                                throw(MAL, OP, SQLSTATE(HY013) 
MAL_MALLOC_FAIL); \
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -296,12 +296,11 @@ bailout:
  * Output type: str (a BAT of strings)
  */
 static str
-do_batstr_conststr_str(bat *res, const bat *l, const str *s2, const char 
*name, str (*func)(str*, size_t*, str, str))
+do_batstr_conststr_str(bat *res, const bat *l, const str *s2, const char 
*name, size_t buflen, str (*func)(str*, size_t*, str, str))
 {
        BATiter bi;
        BAT *bn = NULL, *b = NULL;
        BUN p, q;
-       size_t buflen = INITIAL_STR_BUFFER_LENGTH;
        str x, y = *s2, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
        bool nils = false;
 
@@ -353,12 +352,11 @@ bailout:
  * Output type: str (a BAT of strings)
  */
 static str
-do_batstr_batstr_str(bat *res, const bat *l, const bat *l2, const char *name, 
str (*func)(str*, size_t*, str, str))
+do_batstr_batstr_str(bat *res, const bat *l, const bat *l2, const char *name, 
size_t buflen, str (*func)(str*, size_t*, str, str))
 {
        BATiter lefti, righti;
        BAT *bn = NULL, *left = NULL, *right = NULL;
        BUN p, q;
-       size_t buflen = INITIAL_STR_BUFFER_LENGTH;
        str x, y, buf = GDKmalloc(buflen), msg = MAL_SUCCEED;
        bool nils = false;
 
@@ -828,37 +826,37 @@ STRbatRtrim(bat *ret, const bat *l)
 static str
 STRbatStrip2_const(bat *ret, const bat *l, const str *s2)
 {
-       return do_batstr_conststr_str(ret, l, s2, "batstr.strip", str_strip2);
+       return do_batstr_conststr_str(ret, l, s2, "batstr.strip", 
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_strip2);
 }
 
 static str
 STRbatLtrim2_const(bat *ret, const bat *l, const str *s2)
 {
-       return do_batstr_conststr_str(ret, l, s2, "batstr.ltrim", str_ltrim2);
+       return do_batstr_conststr_str(ret, l, s2, "batstr.ltrim", 
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_ltrim2);
 }
 
 static str
 STRbatRtrim2_const(bat *ret, const bat *l, const str *s2)
 {
-       return do_batstr_conststr_str(ret, l, s2, "batstr.rtrim", str_rtrim2);
+       return do_batstr_conststr_str(ret, l, s2, "batstr.rtrim", 
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_rtrim2);
 }
 
 static str
 STRbatStrip2_bat(bat *ret, const bat *l, const bat *l2)
 {
-       return do_batstr_batstr_str(ret, l, l2, "batstr.strip", str_strip2);
+       return do_batstr_batstr_str(ret, l, l2, "batstr.strip", 
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_strip2);
 }
 
 static str
 STRbatLtrim2_bat(bat *ret, const bat *l, const bat *l2)
 {
-       return do_batstr_batstr_str(ret, l, l2, "batstr.ltrim", str_ltrim2);
+       return do_batstr_batstr_str(ret, l, l2, "batstr.ltrim", 
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_ltrim2);
 }
 
 static str
 STRbatRtrim2_bat(bat *ret, const bat *l, const bat *l2)
 {
-       return do_batstr_batstr_str(ret, l, l2, "batstr.rtrim", str_rtrim2);
+       return do_batstr_batstr_str(ret, l, l2, "batstr.rtrim", 
INITIAL_STR_BUFFER_LENGTH * sizeof(int), str_rtrim2);
 }
 
 static str
diff --git a/sql/backends/monet5/UDF/udf/udf.c 
b/sql/backends/monet5/UDF/udf/udf.c
--- a/sql/backends/monet5/UDF/udf/udf.c
+++ b/sql/backends/monet5/UDF/udf/udf.c
@@ -118,6 +118,7 @@ UDFBATreverse_(BAT **ret, BAT *src)
        BUN p = 0, q = 0;
        size_t buflen = INITIAL_STR_BUFFER_LENGTH;
        str msg = MAL_SUCCEED, buf;
+       bool nils = false;
 
        /* assert calling sanity */
        assert(ret);
@@ -158,14 +159,15 @@ UDFBATreverse_(BAT **ret, BAT *src)
                        msg = createException(MAL, "batudf.reverse", 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
                        goto bailout;
                }
+               nils |= strNil(buf);
        }
 
 bailout:
        GDKfree(buf);
        if (bn && !msg) {
                BATsetcount(bn, q);
-               bn->tnil = src->tnil;
-               bn->tnonil = src->tnonil;
+               bn->tnil = nils;
+               bn->tnonil = !nils;
                bn->tkey = BATcount(bn) <= 1;
                bn->tsorted = BATcount(bn) <= 1;
                bn->trevsorted = BATcount(bn) <= 1;
@@ -189,10 +191,10 @@ UDFBATreverse(bat *ret, const bat *arg)
 
        /* bat-id -> BAT-descriptor */
        if ((src = BATdescriptor(*arg)) == NULL)
-               throw(MAL, "batudf.reverse",  SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+               throw(MAL, "batudf.reverse", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
 
        /* do the work */
-       msg = UDFBATreverse_ ( &res, src );
+       msg = UDFBATreverse_( &res, src );
 
        /* release input BAT-descriptor */
        BBPunfix(src->batCacheid);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to