Hello Serg, Please review a patch implementing a new system variable default_regex_flags, to address the remaining incompatibilities between PCRE and the old regex library.
Greetings.
=== modified file 'mysql-test/r/func_regexp_pcre.result' --- mysql-test/r/func_regexp_pcre.result 2013-10-08 14:25:17 +0000 +++ mysql-test/r/func_regexp_pcre.result 2014-04-17 12:19:16 +0000 @@ -754,3 +754,88 @@ DROP TABLE t1; SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*'); REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*') https://mariadb.org +# +# MDEV-6027 RLIKE: "." no longer matching new line +# +SELECT 'cat and\ndog' RLIKE 'cat.*dog'; +'cat and\ndog' RLIKE 'cat.*dog' +0 +SELECT 'cat and\r\ndog' RLIKE 'cat.*dog'; +'cat and\r\ndog' RLIKE 'cat.*dog' +0 +SELECT 'a\nb' RLIKE 'a.b'; +'a\nb' RLIKE 'a.b' +0 +SELECT 'a\nb' RLIKE '(?-s)a.b'; +'a\nb' RLIKE '(?-s)a.b' +0 +SET default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +@@default_regex_flags +DOTALL +SELECT 'cat and\ndog' RLIKE 'cat.*dog'; +'cat and\ndog' RLIKE 'cat.*dog' +1 +SELECT 'cat and\r\ndog' RLIKE 'cat.*dog'; +'cat and\r\ndog' RLIKE 'cat.*dog' +1 +SELECT 'a\nb' RLIKE 'a.b'; +'a\nb' RLIKE 'a.b' +1 +SELECT 'a\nb' RLIKE '(?-s)a.b'; +'a\nb' RLIKE '(?-s)a.b' +0 +SET default_regex_flags=DEFAULT; +SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); +ERROR 42000: Got error 'two named subpatterns have the same name at offset 29' from regexp +SET default_regex_flags='DUPNAMES'; +SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); +REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$') +Monday Mon +SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); +REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$') +Tuesday Tue +SET default_regex_flags=DEFAULT; +SELECT 'AB' RLIKE 'A B'; +'AB' RLIKE 'A B' +0 +SELECT 'AB' RLIKE 'A# this is a comment\nB'; +'AB' RLIKE 'A# this is a comment\nB' +0 +SET default_regex_flags='EXTENDED'; +SELECT 'AB' RLIKE 'A B'; +'AB' RLIKE 'A B' +1 +SELECT 'AB' RLIKE 'A# this is a comment\nB'; +'AB' RLIKE 'A# this is a comment\nB' +1 +SET default_regex_flags=DEFAULT; +SELECT 'Aq' RLIKE 'A\\q'; +'Aq' RLIKE 'A\\q' +1 +SET default_regex_flags='EXTRA'; +SELECT 'Aq' RLIKE 'A\\q'; +ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp +SET default_regex_flags=DEFAULT; +SELECT 'a\nb\nc' RLIKE '^b$'; +'a\nb\nc' RLIKE '^b$' +0 +SET default_regex_flags='MULTILINE'; +SELECT 'a\nb\nc' RLIKE '^b$'; +'a\nb\nc' RLIKE '^b$' +1 +SET default_regex_flags=DEFAULT; +SELECT REGEXP_SUBSTR('abc','.+'); +REGEXP_SUBSTR('abc','.+') +abc +SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2'); +REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2') +abc/ +SET default_regex_flags='UNGREEDY'; +SELECT REGEXP_SUBSTR('abc','.+'); +REGEXP_SUBSTR('abc','.+') +a +SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2'); +REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2') +/abc +SET default_regex_flags=DEFAULT; === modified file 'mysql-test/r/mysqld--help.result' --- mysql-test/r/mysqld--help.result 2014-03-28 07:31:24 +0000 +++ mysql-test/r/mysqld--help.result 2014-04-17 12:17:07 +0000 @@ -136,6 +136,10 @@ --deadlock-timeout-short=# Short timeout for the two-step deadlock detection (in microseconds) + --default-regex-flags=name + Default flags for the regex library. Syntax: + default-regex-flags='[flag[,flag[,flag...]]]'. See the + manual for the complete list of valid flags --default-storage-engine=name The default storage engine for new tables --default-time-zone=name @@ -1079,6 +1083,7 @@ deadlock-search-depth-long 15 deadlock-search-depth-short 4 deadlock-timeout-long 50000000 deadlock-timeout-short 10000 +default-regex-flags default-storage-engine myisam default-time-zone (No default value) default-week-format 0 === added file 'mysql-test/suite/sys_vars/r/default_regex_flags_basic.result' --- mysql-test/suite/sys_vars/r/default_regex_flags_basic.result 1970-01-01 00:00:00 +0000 +++ mysql-test/suite/sys_vars/r/default_regex_flags_basic.result 2014-04-17 12:19:56 +0000 @@ -0,0 +1,57 @@ +SET default_regex_flags=''; +SELECT @@default_regex_flags; +@@default_regex_flags + +SET default_regex_flags=DEFAULT; +SELECT @@default_regex_flags; +@@default_regex_flags + +SET default_regex_flags=NULL; +ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL' +SELECT @@default_regex_flags; +@@default_regex_flags + +SET default_regex_flags='UNKNOWN'; +ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'UNKNOWN' +SET default_regex_flags=123; +ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '123' +SET default_regex_flags=123.0; +ERROR 42000: Incorrect argument type to variable 'default_regex_flags' +SET default_regex_flags=123e0; +ERROR 42000: Incorrect argument type to variable 'default_regex_flags' +SET default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +@@default_regex_flags +DOTALL +SET default_regex_flags=NULL; +ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL' +SELECT @@default_regex_flags; +@@default_regex_flags +DOTALL +SET @@default_regex_flags=63; +SELECT @@default_regex_flags; +@@default_regex_flags +DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY +SET @@default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +@@default_regex_flags +DOTALL +SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY'; +SELECT @@default_regex_flags; +@@default_regex_flags +DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY +SET @@default_regex_flags=DEFAULT; +SET @@global.default_regex_flags='MULTILINE'; +SELECT @@session.default_regex_flags; +@@session.default_regex_flags + +# connection con1 +SELECT @@session.default_regex_flags; +@@session.default_regex_flags +MULTILINE +# connection default +SELECT @@session.default_regex_flags; +@@session.default_regex_flags + +SET @@global.default_regex_flags=DEFAULT; +SET default_regex_flags=DEFAULT; === added file 'mysql-test/suite/sys_vars/t/default_regex_flags_basic.test' --- mysql-test/suite/sys_vars/t/default_regex_flags_basic.test 1970-01-01 00:00:00 +0000 +++ mysql-test/suite/sys_vars/t/default_regex_flags_basic.test 2014-04-17 12:17:43 +0000 @@ -0,0 +1,43 @@ + +SET default_regex_flags=''; +SELECT @@default_regex_flags; +SET default_regex_flags=DEFAULT; +SELECT @@default_regex_flags; +--error ER_WRONG_VALUE_FOR_VAR +SET default_regex_flags=NULL; +SELECT @@default_regex_flags; +--error ER_WRONG_VALUE_FOR_VAR +SET default_regex_flags='UNKNOWN'; +--error ER_WRONG_VALUE_FOR_VAR +SET default_regex_flags=123; +--error ER_WRONG_TYPE_FOR_VAR +SET default_regex_flags=123.0; +--error ER_WRONG_TYPE_FOR_VAR +SET default_regex_flags=123e0; +SET default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +--error ER_WRONG_VALUE_FOR_VAR +SET default_regex_flags=NULL; +SELECT @@default_regex_flags; + +SET @@default_regex_flags=63; +SELECT @@default_regex_flags; +SET @@default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY'; +SELECT @@default_regex_flags; +SET @@default_regex_flags=DEFAULT; + +SET @@global.default_regex_flags='MULTILINE'; +SELECT @@session.default_regex_flags; +connect (con1,localhost,root,,); +--echo # connection con1 +connection con1; +SELECT @@session.default_regex_flags; +connection default; +--echo # connection default +disconnect con1; +SELECT @@session.default_regex_flags; +SET @@global.default_regex_flags=DEFAULT; + +SET default_regex_flags=DEFAULT; === modified file 'mysql-test/t/func_regexp_pcre.test' --- mysql-test/t/func_regexp_pcre.test 2013-10-08 14:25:17 +0000 +++ mysql-test/t/func_regexp_pcre.test 2014-04-17 12:17:49 +0000 @@ -349,3 +349,51 @@ DROP TABLE t1; SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*'); + + +--echo # +--echo # MDEV-6027 RLIKE: "." no longer matching new line +--echo # +SELECT 'cat and\ndog' RLIKE 'cat.*dog'; +SELECT 'cat and\r\ndog' RLIKE 'cat.*dog'; +SELECT 'a\nb' RLIKE 'a.b'; +SELECT 'a\nb' RLIKE '(?-s)a.b'; +SET default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +SELECT 'cat and\ndog' RLIKE 'cat.*dog'; +SELECT 'cat and\r\ndog' RLIKE 'cat.*dog'; +SELECT 'a\nb' RLIKE 'a.b'; +SELECT 'a\nb' RLIKE '(?-s)a.b'; +SET default_regex_flags=DEFAULT; + +--error ER_REGEXP_ERROR +SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); +SET default_regex_flags='DUPNAMES'; +SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); +SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); +SET default_regex_flags=DEFAULT; + +SELECT 'AB' RLIKE 'A B'; +SELECT 'AB' RLIKE 'A# this is a comment\nB'; +SET default_regex_flags='EXTENDED'; +SELECT 'AB' RLIKE 'A B'; +SELECT 'AB' RLIKE 'A# this is a comment\nB'; +SET default_regex_flags=DEFAULT; + +SELECT 'Aq' RLIKE 'A\\q'; +SET default_regex_flags='EXTRA'; +--error ER_REGEXP_ERROR +SELECT 'Aq' RLIKE 'A\\q'; +SET default_regex_flags=DEFAULT; + +SELECT 'a\nb\nc' RLIKE '^b$'; +SET default_regex_flags='MULTILINE'; +SELECT 'a\nb\nc' RLIKE '^b$'; +SET default_regex_flags=DEFAULT; + +SELECT REGEXP_SUBSTR('abc','.+'); +SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2'); +SET default_regex_flags='UNGREEDY'; +SELECT REGEXP_SUBSTR('abc','.+'); +SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2'); +SET default_regex_flags=DEFAULT; === modified file 'sql/item_cmpfunc.cc' --- sql/item_cmpfunc.cc 2014-03-26 21:25:38 +0000 +++ sql/item_cmpfunc.cc 2014-04-16 12:53:20 +0000 @@ -32,6 +32,7 @@ #include "sql_parse.h" // check_stack_overrun #include "sql_time.h" // make_truncated_value_warning #include "sql_base.h" // dynamic_column_error_message +#include "sys_vars_shared.h" // default_regex_flags_pcre static Item_result item_store_type(Item_result a, Item *item, my_bool unsigned_flag) @@ -5055,6 +5056,11 @@ bool Item_func_like::find_selective_pred } +int Regexp_processor_pcre::default_regex_flags() +{ + return default_regex_flags_pcre(current_thd); +} + /** Convert string to lib_charset, if needed. === modified file 'sql/item_cmpfunc.h' --- sql/item_cmpfunc.h 2014-03-26 21:25:38 +0000 +++ sql/item_cmpfunc.h 2014-04-16 12:24:41 +0000 @@ -1513,9 +1513,10 @@ class Regexp_processor_pcre m_library_charset(&my_charset_utf8_general_ci), m_subpatterns_needed(0) {} + int default_regex_flags(); void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns) { - m_library_flags= extra_flags | + m_library_flags= default_regex_flags() | extra_flags | (data_charset != &my_charset_bin ? (PCRE_UTF8 | PCRE_UCP) : 0) | ((data_charset->state & === modified file 'sql/sql_class.h' --- sql/sql_class.h 2014-04-15 07:29:57 +0000 +++ sql/sql_class.h 2014-04-16 12:17:43 +0000 @@ -516,6 +516,7 @@ typedef struct system_variables ulonglong join_buff_size; ulonglong sortbuff_size; ulonglong group_concat_max_len; + ulonglong default_regex_flags; ha_rows select_limit; ha_rows max_join_size; ha_rows expensive_subquery_limit; === modified file 'sql/sys_vars.cc' --- sql/sys_vars.cc 2014-03-28 07:31:24 +0000 +++ sql/sys_vars.cc 2014-04-17 12:23:12 +0000 @@ -4568,6 +4568,46 @@ static Sys_var_set Sys_log_slow_filter( log_slow_filter_names, DEFAULT(MAX_SET(array_elements(log_slow_filter_names)-1))); +static const char *default_regex_flags_names[]= +{ + "DOTALL", // (?s) . matches anything including NL + "DUPNAMES", // (?J) Allow duplicate names for subpatterns + "EXTENDED", // (?x) Ignore white space and # comments + "EXTRA", // (?X) extra features (e.g. error on unknown escape character) + "MULTILINE", // (?m) ^ and $ match newlines within data + "UNGREEDY", // (?U) Invert greediness of quantifiers + 0 +}; +static const int default_regex_flags_to_pcre[]= +{ + PCRE_DOTALL, + PCRE_DUPNAMES, + PCRE_EXTENDED, + PCRE_EXTRA, + PCRE_MULTILINE, + PCRE_UNGREEDY, + 0 +}; +int default_regex_flags_pcre(const THD *thd) +{ + ulonglong src= thd->variables.default_regex_flags; + int i, res= 0; + for (i= res= 0; default_regex_flags_to_pcre[i]; i++) + { + if (src & (1 << i)) + res|= default_regex_flags_to_pcre[i]; + } + return res; +} +static Sys_var_set Sys_default_regex_flags( + "default_regex_flags", + "Default flags for the regex library. " + "Syntax: default-regex-flags='[flag[,flag[,flag...]]]'. " + "See the manual for the complete list of valid flags", + SESSION_VAR(default_regex_flags), CMD_LINE(REQUIRED_ARG), + default_regex_flags_names, + DEFAULT(0)); + static Sys_var_ulong Sys_log_slow_rate_limit( "log_slow_rate_limit", "Write to slow log every #th slow query. Set to 1 to log everything. " === modified file 'sql/sys_vars_shared.h' --- sql/sys_vars_shared.h 2011-06-30 15:46:53 +0000 +++ sql/sys_vars_shared.h 2014-04-16 12:51:45 +0000 @@ -36,6 +36,8 @@ extern sys_var *intern_find_sys_var(cons extern sys_var_chain all_sys_vars; +extern int default_regex_flags_pcre(const THD *thd); + /** wrapper to hide a mutex and an rwlock under a common interface */ class PolyLock {
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp