Here's the patch. I did a simple test on 28_regex/*. A bootstrap may be needed, but I can't do it now.
-- Regards, Tim Shen
commit 2fd6be816c1d1797b3aad228b9fb2cfb7374483c Author: tim <timshe...@gmail.com> Date: Mon May 19 10:40:16 2014 -0400 2014-05-19 Tim Shen <timshe...@gmail.com> PR libstdc++/61227 * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_character_class): Add negative character class support. * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply): Likewise. * testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc: New testcase. * testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc: New testcase. diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index d7e2162..52f7235 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -369,15 +369,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif } + // __neg should be true for \D, \S and \W only. void - _M_add_character_class(const _StringT& __s) + _M_add_character_class(const _StringT& __s, bool __neg) { auto __mask = _M_traits.lookup_classname(__s.data(), __s.data() + __s.size(), __icase); if (__mask == 0) __throw_regex_error(regex_constants::error_ctype); - _M_class_set |= __mask; + if (!__neg) + _M_class_set |= __mask; + else + _M_neg_class_set.push_back(__mask); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -387,7 +391,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_make_range(_CharT __l, _CharT __r) { _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), - _M_translator._M_transform(__r))); + _M_translator._M_transform(__r))); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -435,6 +439,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::vector<_CharT> _M_char_set; std::vector<_StringT> _M_equiv_set; std::vector<pair<_StrTransT, _StrTransT>> _M_range_set; + std::vector<_CharClassT> _M_neg_class_set; _CharClassT _M_class_set; _TransT _M_translator; const _TraitsT& _M_traits; diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 3cf9e457..a3a4945 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -397,7 +397,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1); _BracketMatcher<_TraitsT, __icase, __collate> __matcher (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); - __matcher._M_add_character_class(_M_value); + __matcher._M_add_character_class(_M_value, false); __matcher._M_ready(); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_matcher(std::move(__matcher)))); @@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) __matcher._M_add_equivalence_class(_M_value); else if (_M_match_token(_ScannerT::_S_token_char_class_name)) - __matcher._M_add_character_class(_M_value); + __matcher._M_add_character_class(_M_value, false); else if (_M_try_char()) // [a { auto __ch = _M_value[0]; @@ -451,6 +451,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } __matcher._M_add_char(__ch); } + else if (_M_match_token(_ScannerT::_S_token_quoted_class)) + __matcher._M_add_character_class(_M_value, + _M_ctype.is(_CtypeT::upper, + _M_value[0])); else __throw_regex_error(regex_constants::error_brack); } @@ -508,6 +512,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_apply(_CharT __ch, false_type) const { bool __ret = false; + // TODO Refactor this piece of junk. if (std::find(_M_char_set.begin(), _M_char_set.end(), _M_translator._M_translate(__ch)) != _M_char_set.end()) @@ -527,6 +532,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_traits.transform_primary(&__ch, &__ch+1)) != _M_equiv_set.end()) __ret = true; + else + { + for (auto& __it : _M_neg_class_set) + if (!_M_traits.isctype(__ch, __it)) + { + __ret = true; + break; + } + } } if (_M_is_non_matching) return !__ret; diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc new file mode 100644 index 0000000..2d854f6 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc @@ -0,0 +1,47 @@ +// { dg-options "-std=gnu++11" } + +// +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.2 regex_match + +#include <regex> +#include <testsuite_hooks.h> +#include <testsuite_regex.h> + +using namespace __gnu_test; +using namespace std; + +// libstdc++/61227 +void +test01() +{ + std::regex r1{R"([^\w ])"}; + std::regex r2{R"(\b\w+\b)"}; + std::regex r3{R"(\b\w+\b)"}; + std::regex r4{"//.*$"}; + std::regex r5{R"((?:[^;"]|"[^"]*")+)"}; + std::regex r6{R"~~(([^\s"]+)|"([^"]*)")~~"}; +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc index e7280ac..8641732 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc @@ -44,6 +44,16 @@ test01() VERIFY(regex_match_debug("_az", regex("\\w*"))); VERIFY(regex_match_debug("!@#$%", regex("\\W*"))); VERIFY(!regex_match_debug("_01234", regex("\\W*"))); + + VERIFY(regex_match_debug("01", regex("[\\d]*"))); + VERIFY(regex_match_debug("asdfjkl", regex("[\\D]*"))); + VERIFY(!regex_match_debug("asdfjkl0", regex("[\\D]*"))); + VERIFY(regex_match_debug("\r\t\v\f ", regex("[\\s]*"))); + VERIFY(regex_match_debug("asdfjkl", regex("[\\S]*"))); + VERIFY(!regex_match_debug("asdfjkl\r", regex("[\\S]*"))); + VERIFY(regex_match_debug("_az", regex("[\\w]*"))); + VERIFY(regex_match_debug("!@#$%", regex("[\\W]*"))); + VERIFY(!regex_match_debug("_01234", regex("[\\W]*"))); } int