On 27/08/15 22:18 -0700, Tim Shen wrote:
Bootstrapped and tested.
Thanks!
--
Regards,
Tim Shen
commit 53c1caff442e97a18652ec8b1d984341168fd98d
Author: Tim Shen <tims...@google.com>
Date: Thu Aug 27 21:42:40 2015 -0700
PR libstdc++/67361
* include/bits/regex_error.h: Add __throw_regex_error that
supports string.
* include/bits/regex_automaton.h: Add more specific exception
messages.
* include/bits/regex_automaton.tcc: Likewise.
* include/bits/regex_compiler.h: Likewise.
* include/bits/regex_compiler.tcc: Likewise.
* include/bits/regex_scanner.h: Likewise.
* include/bits/regex_scanner.tcc: Likewise.
Nice, thanks for doing this!
@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
// time, "\\2" is valid, but "\\1" and "\\3" are not.
if (__index >= _M_subexpr_count)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference index exceeds current sub-expression count.");
for (auto __it : this->_M_paren_stack)
if (__index == __it)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference refered to an opened sub-expression.");
Should be "referred".
And one of the other strings in another throw says "befoer".
diff --git a/libstdc++-v3/include/bits/regex_compiler.h
b/libstdc++-v3/include/bits/regex_compiler.h
index 0cb0c04..12ffabe 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -397,7 +397,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ string("Invalid collate element: "));
_M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
There seems to be no need to construct a std::string here, just pass a
const char* (see below).
Also, this string ends in a colon, whereas most end in a period. Any
reason for the difference?
@@ -411,7 +412,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ string("Invalid equivalence class."));
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
_M_equiv_set.push_back(__st);
Just pass const char*.
@@ -428,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size(),
__icase);
if (__mask == 0)
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_collate,
+ string("Invalid character class."));
if (!__neg)
_M_class_set |= __mask;
else
Ditto.
@@ -442,7 +445,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_make_range(_CharT __l, _CharT __r)
{
if (__l > __r)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(regex_constants::error_range,
+ string("Invalid range in bracket expression."));
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG
Ditto.
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc
b/libstdc++-v3/include/bits/regex_compiler.tcc
index 9a62311..019ca42 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -77,16 +77,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_traits(_M_nfa->_M_traits),
_M_ctype(std::use_facet<_CtypeT>(__loc))
{
- _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
- __r._M_append(_M_nfa->_M_insert_subexpr_begin());
- this->_M_disjunction();
- if (!_M_match_token(_ScannerT::_S_token_eof))
- __throw_regex_error(regex_constants::error_paren);
- __r._M_append(_M_pop());
- _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
- __r._M_append(_M_nfa->_M_insert_subexpr_end());
- __r._M_append(_M_nfa->_M_insert_accept());
- _M_nfa->_M_eliminate_dummy();
+ __try
+ {
+ _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
+ __r._M_append(_M_nfa->_M_insert_subexpr_begin());
+ this->_M_disjunction();
+ if (!_M_match_token(_ScannerT::_S_token_eof))
+ __throw_regex_error(regex_constants::error_paren,
+ "Unexpected end of regex.");
+ __r._M_append(_M_pop());
+ _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
+ __r._M_append(_M_nfa->_M_insert_subexpr_end());
+ __r._M_append(_M_nfa->_M_insert_accept());
+ _M_nfa->_M_eliminate_dummy();
+ }
+ __catch(std::regex_error __e)
+ {
+ __throw_regex_error(__e.code(),
+ string(__e.what()) + " Location: \""
+ + _M_scanner._M_get_location_string() + "\"");
+ }
}
template<typename _TraitsT>
I wonder if we want to make this more efficient by adding a private
member to regex_error that would allow information to be appended to
the string, rather then creating a new regex_error with a new string.
diff --git a/libstdc++-v3/include/bits/regex_error.h
b/libstdc++-v3/include/bits/regex_error.h
index 778edd5..0dd1fdf 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -155,6 +155,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_constants::error_type
code() const
{ return _M_code; }
+
+ private:
+ regex_error(regex_constants::error_type __ecode, const string& __what)
+ : std::runtime_error(__what), _M_code(__ecode) { }
+
+ friend void __throw_regex_error(regex_constants::error_type, const
string&);
};
//@} // group regex
@@ -162,5 +168,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
__throw_regex_error(regex_constants::error_type __ecode);
+ inline void
+ __throw_regex_error(regex_constants::error_type __ecode, const string&
__what)
+ { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
I suggest adding another overload that takes a const char* rather than
std::string. The reason is that when using the new ABI this function
will take a std::__cxx11::string, so calling it will allocate memory
for the string data, then that string is passed to the regex_error
constructor which has to convert it internally to an old std::string,
which has to allocate a second time.
If there is an overload taking a const char* then that can be passed
to the regex_error constructor and only one allocation will be done.
(I have considered making it possible for exceptions to move the
memory from a new string into an their old string member, but that
isn't done currently.)
diff --git a/libstdc++-v3/include/bits/regex_scanner.h
b/libstdc++-v3/include/bits/regex_scanner.h
index b47103e..7795dd2 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -220,6 +220,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_get_value() const
{ return _M_value; }
+ string
+ _M_get_location_string() const
+ {
+ auto __left = std::max(_M_begin, _M_current - 2);
+ auto __right = std::min(_M_end, _M_current + 3);
+ static constexpr char __here[] = ">>><<<";
I don't think there's any advantage to using a static here, it doesn't
need to be a global symbol, and with optimisation enabled we get the
same code from just const char __here[] = ">>><<<";
@@ -247,6 +263,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_eat_class(char);
+ _IterT _M_begin;
_IterT _M_current;
_IterT _M_end;
_CtypeT& _M_ctype;
This looks like an ABI change, as the size of the type changes.
If I understand correctly this is only needed for the location info,
we could still have nice human readable text in the exceptions without
this, right?