pammon created this revision. Herald added a reviewer: EricWF. In regex, forward assertions like '(?=stuff)' are implemented by constructing a child regular expression 'stuff' and matching that. If the child regular expression contains a backreference, this would trip an assertion or reference the wrong capture group, because the child was ignorant of the capture groups of its parent. For example, /(x)(?=\1)/ would trip an assertion.
Address this by propagating submatches into the child, so that backreferences reference the correct capture groups. This also allows us to eliminate the mexp_ field, because the child expression shares the entire submatch array with the parent. https://reviews.llvm.org/D32635 Files: include/regex test/std/re/re.alg/re.alg.match/ecma.pass.cpp
Index: test/std/re/re.alg/re.alg.match/ecma.pass.cpp =================================================================== --- test/std/re/re.alg/re.alg.match/ecma.pass.cpp +++ test/std/re/re.alg/re.alg.match/ecma.pass.cpp @@ -637,6 +637,22 @@ assert(m.str(0) == s); } { + std::cmatch m; + const char s[] = "abcabc"; + assert(std::regex_match(s, m, std::regex("(.+)(?=\\1)(\\1)"))); + assert(m.size() == 3); + assert(m.str(1) == "abc"); + assert(m.str(2) == "abc"); + } + { + std::cmatch m; + const char s[] = "aa"; + assert(std::regex_match(s, m, std::regex("(a+)(?!\\1)(a*)"))); + assert(m.size() == 3); + assert(m.str(1) == "aa"); + assert(m.str(2) == ""); + } + { std::cmatch m; const char s[] = "foobar"; assert(std::regex_match(s, m, std::regex("[^\\0]*"))); Index: include/regex =================================================================== --- include/regex +++ include/regex @@ -2826,7 +2826,7 @@ void __push_end_marked_subexpression(unsigned); void __push_empty(); void __push_word_boundary(bool); - void __push_lookahead(const basic_regex&, bool, unsigned); + void __push_lookahead(basic_regex, bool); template <class _Allocator> bool @@ -2843,6 +2843,7 @@ bool __match_at_start_ecma(const _CharT* __first, const _CharT* __last, match_results<const _CharT*, _Allocator>& __m, + const vector<sub_match<const _CharT *>> &incoming_sub_matches, regex_constants::match_flag_type __flags, bool) const; template <class _Allocator> bool @@ -2964,17 +2965,16 @@ typedef __owns_one_state<_CharT> base; basic_regex<_CharT, _Traits> __exp_; - unsigned __mexp_; bool __invert_; __lookahead(const __lookahead&); __lookahead& operator=(const __lookahead&); public: typedef _VSTD::__state<_CharT> __state; _LIBCPP_INLINE_VISIBILITY - __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp) - : base(__s), __exp_(__exp), __mexp_(__mexp), __invert_(__invert) {} + __lookahead(basic_regex<_CharT, _Traits> __exp, bool __invert, __node<_CharT>* __s) + : base(__s), __exp_(move(__exp)), __invert_(__invert) {} virtual void __exec(__state&) const; }; @@ -2987,16 +2987,18 @@ __m.__init(1 + __exp_.mark_count(), __s.__current_, __s.__last_); bool __matched = __exp_.__match_at_start_ecma( __s.__current_, __s.__last_, - __m, + __m, __s.__sub_matches_, (__s.__flags_ | regex_constants::match_continuous) & ~regex_constants::__full_match, __s.__at_first_ && __s.__current_ == __s.__first_); if (__matched != __invert_) { __s.__do_ = __state::__accept_but_not_consume; __s.__node_ = this->first(); - for (unsigned __i = 1; __i < __m.size(); ++__i) { - __s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i]; + if (__matched) { + for (unsigned __i = 1; __i < __m.size(); ++__i) { + __s.__sub_matches_[__i - 1] = __m.__matches_[__i]; + } } } else @@ -4168,26 +4170,16 @@ switch (*__temp) { case '=': - { - basic_regex __exp; - __exp.__flags_ = __flags_; - __temp = __exp.__parse(++__temp, __last); - unsigned __mexp = __exp.__marked_count_; - __push_lookahead(_VSTD::move(__exp), false, __marked_count_); - __marked_count_ += __mexp; - if (__temp == __last || *__temp != ')') - __throw_regex_error<regex_constants::error_paren>(); - __first = ++__temp; - } - break; case '!': { + bool __invert = (*__temp == '!'); basic_regex __exp; __exp.__flags_ = __flags_; + __exp.__marked_count_ = __marked_count_; __temp = __exp.__parse(++__temp, __last); unsigned __mexp = __exp.__marked_count_; - __push_lookahead(_VSTD::move(__exp), true, __marked_count_); - __marked_count_ += __mexp; + __push_lookahead(_VSTD::move(__exp), __invert); + __marked_count_ = __mexp; if (__temp == __last || *__temp != ')') __throw_regex_error<regex_constants::error_paren>(); __first = ++__temp; @@ -4749,12 +4741,11 @@ template <class _CharT, class _Traits> void -basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp, - bool __invert, - unsigned __mexp) +basic_regex<_CharT, _Traits>::__push_lookahead(basic_regex __exp, + bool __invert) { - __end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert, - __end_->first(), __mexp); + __end_->first() = new __lookahead<_CharT, _Traits>(move(__exp), __invert, + __end_->first()); __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first()); } @@ -5529,6 +5520,7 @@ basic_regex<_CharT, _Traits>::__match_at_start_ecma( const _CharT* __first, const _CharT* __last, match_results<const _CharT*, _Allocator>& __m, + const vector<sub_match<const _CharT *>> &incoming_sub_matches, regex_constants::match_flag_type __flags, bool __at_first) const { vector<__state> __states; @@ -5539,13 +5531,15 @@ __unmatched.first = __last; __unmatched.second = __last; __unmatched.matched = false; + vector<sub_match<const _CharT *>> sub_matches = incoming_sub_matches; + sub_matches.resize(mark_count(), __unmatched); __states.push_back(__state()); __states.back().__do_ = 0; __states.back().__first_ = __first; __states.back().__current_ = __first; __states.back().__last_ = __last; - __states.back().__sub_matches_.resize(mark_count(), __unmatched); + __states.back().__sub_matches_ = move(sub_matches); __states.back().__loop_data_.resize(__loop_count()); __states.back().__node_ = __st; __states.back().__flags_ = __flags; @@ -5798,7 +5792,7 @@ regex_constants::match_flag_type __flags, bool __at_first) const { if ((__flags_ & 0x1F0) == ECMAScript) - return __match_at_start_ecma(__first, __last, __m, __flags, __at_first); + return __match_at_start_ecma(__first, __last, __m, {}, __flags, __at_first); if (mark_count() == 0) return __match_at_start_posix_nosubs(__first, __last, __m, __flags, __at_first); return __match_at_start_posix_subs(__first, __last, __m, __flags, __at_first);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits