Changeset: d5c304dce084 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d5c304dce084
Modified Files:
        monetdb5/modules/atoms/str.c
Branch: ascii-flag
Log Message:

Add line and paragraph separators to the list of whitespace codepoints.


diffs (79 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -1057,9 +1057,9 @@ STRsplitpart(str *res, str *haystack, st
 
 /* returns number of bytes to remove from left to strip the codepoints in rm */
 static size_t
-lstrip(const char *s, size_t len, const int *rm, size_t nrm)
+lstrip(const char *s, size_t len, const uint32_t *rm, size_t nrm)
 {
-       int c;
+       uint32_t c;
        size_t i, n, skip = 0;
 
        while (len > 0) {
@@ -1082,9 +1082,9 @@ lstrip(const char *s, size_t len, const 
 /* returns the resulting length of s after stripping codepoints in rm
  * from the right */
 static size_t
-rstrip(const char *s, size_t len, const int *rm, size_t nrm)
+rstrip(const char *s, size_t len, const uint32_t *rm, size_t nrm)
 {
-       int c;
+       uint32_t c;
        size_t i, n;
 
        while (len > 0) {
@@ -1102,7 +1102,7 @@ rstrip(const char *s, size_t len, const 
        return len;
 }
 
-const int whitespace[] = {
+const uint32_t whitespace[] = {
        ' ',                                            /* space */
        '\t',                                           /* tab (character 
tabulation) */
        '\n',                                           /* line feed */
@@ -1126,6 +1126,11 @@ const int whitespace[] = {
        0x202F,                                         /* narrow no-break 
space */
        0x205F,                                         /* medium mathematical 
space */
        0x3000,                                         /* ideographic space */
+/* below the code points that have the Unicode Zl (line separator) property */
+       0x2028,                                         /* line separator */
+/* below the code points that have the Unicode Zp (paragraph separator)
+ * property */
+       0x2029,                                         /* paragraph separator 
*/
 };
 
 #define NSPACES                (sizeof(whitespace) / sizeof(whitespace[0]))
@@ -1294,10 +1299,10 @@ str_strip2(str *buf, size_t *buflen, con
                if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2")) 
!= MAL_SUCCEED)
                        return msg;
                len = strlen(s);
-               n = lstrip(s, len, *(int **) buf, n3);
+               n = lstrip(s, len, *(uint32_t **) buf, n3);
                s += n;
                len -= n;
-               n = rstrip(s, len, *(int **) buf, n3);
+               n = rstrip(s, len, *(uint32_t **) buf, n3);
 
                n++;
                CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
@@ -1351,7 +1356,7 @@ str_ltrim2(str *buf, size_t *buflen, con
                if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) 
!= MAL_SUCCEED)
                        return msg;
                len = strlen(s);
-               n = lstrip(s, len, *(int **) buf, n3);
+               n = lstrip(s, len, *(uint32_t **) buf, n3);
                nallocate = len - n + 1;
 
                CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
@@ -1405,7 +1410,7 @@ str_rtrim2(str *buf, size_t *buflen, con
                if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2")) 
!= MAL_SUCCEED)
                        return msg;
                len = strlen(s);
-               n = rstrip(s, len, *(int **) buf, n3);
+               n = rstrip(s, len, *(uint32_t **) buf, n3);
                n++;
 
                CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to