Changeset: 431bfeaaa76b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/431bfeaaa76b
Branch: default
Log Message:

Merges regexp branch


diffs (101 lines):

diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -733,6 +733,7 @@ single_replace(pcre *pcre_code, pcre_ext
        int offset = 0;
        int len_result = 0;
        int addlen;
+       int empty_match_correction = 0;
        char *tmp;
 
        do {
@@ -740,7 +741,12 @@ single_replace(pcre *pcre_code, pcre_ext
                                                  exec_options, ovector, 
ovecsize);
                if (j <= 0)
                        break;
-               addlen = ovector[0] - offset + (nbackrefs == 0 ? 
len_replacement : 0);
+
+               empty_match_correction = ovector[0] == ovector[1] ? 1 : 0;
+
+               // calculate the length of the string that will be appended to 
result
+               addlen = ovector[0] - offset
+                               + (nbackrefs == 0 ? len_replacement : 0) + 
empty_match_correction;
                if (len_result + addlen >= *max_result) {
                        tmp = GDKrealloc(result, len_result + addlen + 1);
                        if (tmp == NULL) {
@@ -750,11 +756,13 @@ single_replace(pcre *pcre_code, pcre_ext
                        result = tmp;
                        *max_result = len_result + addlen + 1;
                }
+               // append to the result the parts of the original string that 
are left unchanged
                if (ovector[0] > offset) {
                        strncpy(result + len_result, origin_str + offset,
                                        ovector[0] - offset);
                        len_result += ovector[0] - offset;
                }
+               // append to the result the replacement of the matched string
                if (nbackrefs == 0) {
                        strncpy(result + len_result, replacement, 
len_replacement);
                        len_result += len_replacement;
@@ -807,8 +815,18 @@ single_replace(pcre *pcre_code, pcre_ext
                                len_result += addlen;
                        }
                }
-               offset = ovector[1];
-       } while (offset < len_origin_str && global);
+               // In case of an empty match just advance the offset by 1
+               offset = ovector[1] + empty_match_correction;
+               // and copy the character that we just advanced over
+               if (empty_match_correction) {
+                       strncpy(result + len_result, origin_str + ovector[1], 
1);
+                       ++len_result;
+               }
+               // before we loop around check with the offset - 1 if we had an 
empty match
+               // since we manually advanced the offset by one. otherwise we 
gonna skip a
+               // replacement at the end of the string
+       } while ((offset - empty_match_correction) < len_origin_str && global);
+
        if (offset < len_origin_str) {
                addlen = len_origin_str - offset;
                if (len_result + addlen >= *max_result) {
diff --git a/sql/test/Tests/regexp.test b/sql/test/Tests/regexp.test
--- a/sql/test/Tests/regexp.test
+++ b/sql/test/Tests/regexp.test
@@ -54,9 +54,36 @@ select regexp_replace('foo', 'f o o', 'X
 ----
 foo
 
-# regex option - not extended
+# regex option - extended
 query T rowsort
 select regexp_replace('foo', 'f o o', 'XYZ', 'x')
 ----
 XYZ
 
+# regex option - not emtpy match
+query T rowsort
+select regexp_replace('foobar', 'k?', 'XY')
+----
+foobar
+
+# regex option - emtpy match
+query T rowsort
+select regexp_replace('foobar', 'k?', '-', 'e')
+----
+-f-o-o-b-a-r-
+
+# regex option - empty match w alternative v1
+query T rowsort
+select regexp_replace('abc', 'b|k?', '-', 'e')
+----
+-a--c-
+
+# regex option - empty match w alternative v2
+# even though you would expect -a--c- the pcre lib does not return 
+# the longest match for this particular pattern in offset 1 ('b') but 
+# an empty string match ¯\_(ツ)_/¯ 
+query T rowsort
+select regexp_replace('abc', 'k?|b', '-', 'e')
+----
+-a-b-c-
+
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to