Bill Fisher: This patch fixes a bug where std::regex in ECMAScript mode was ignoring capture groups inside lookahead assertions.
For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a"
git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@186954 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/regex b/include/regex
index 3ec2ff9..bde3af7 100644
--- a/include/regex
+++ b/include/regex
@@ -2769,7 +2769,7 @@
void __push_end_marked_subexpression(unsigned);
void __push_empty();
void __push_word_boundary(bool);
- void __push_lookahead(const basic_regex&, bool);
+ void __push_lookahead(const basic_regex&, bool, unsigned);
template <class _Allocator>
bool
@@ -2907,6 +2907,7 @@
typedef __owns_one_state<_CharT> base;
basic_regex<_CharT, _Traits> __exp_;
+ unsigned __mexp_;
bool __invert_;
__lookahead(const __lookahead&);
@@ -2915,8 +2916,8 @@
typedef _VSTD::__state<_CharT> __state;
_LIBCPP_INLINE_VISIBILITY
- __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s)
- : base(__s), __exp_(__exp), __invert_(__invert) {}
+ __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp)
+ : base(__s), __exp_(__exp), __invert_(__invert), __mexp_(__mexp) {}
virtual void __exec(__state&) const;
};
@@ -2935,6 +2936,9 @@
{
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
+ for (unsigned __i = 1; __i < __m.size(); ++__i) {
+ __s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i];
+ }
}
else
{
@@ -4168,7 +4172,9 @@
basic_regex __exp;
__exp.__flags_ = __flags_;
__temp = __exp.__parse(++__temp, __last);
- __push_lookahead(_VSTD::move(__exp), false);
+ unsigned __mexp = __exp.__marked_count_;
+ __push_lookahead(_VSTD::move(__exp), false, __marked_count_);
+ __marked_count_ += __mexp;
#ifndef _LIBCPP_NO_EXCEPTIONS
if (__temp == __last || *__temp != ')')
throw regex_error(regex_constants::error_paren);
@@ -4181,7 +4187,9 @@
basic_regex __exp;
__exp.__flags_ = __flags_;
__temp = __exp.__parse(++__temp, __last);
- __push_lookahead(_VSTD::move(__exp), true);
+ unsigned __mexp = __exp.__marked_count_;
+ __push_lookahead(_VSTD::move(__exp), true, __marked_count_);
+ __marked_count_ += __mexp;
#ifndef _LIBCPP_NO_EXCEPTIONS
if (__temp == __last || *__temp != ')')
throw regex_error(regex_constants::error_paren);
@@ -4759,10 +4767,11 @@
template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp,
- bool __invert)
+ bool __invert,
+ unsigned __mexp)
{
__end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert,
- __end_->first());
+ __end_->first(), __mexp);
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
diff --git a/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp b/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp
new file mode 100644
index 0000000..78e1e65
--- /dev/null
+++ b/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp
@@ -0,0 +1,98 @@
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <regex>
+
+// template <class BidirectionalIterator, class Allocator, class charT, class traits>
+// bool
+// regex_match(BidirectionalIterator first, BidirectionalIterator last,
+// match_results<BidirectionalIterator, Allocator>& m,
+// const basic_regex<charT, traits>& e,
+// regex_constants::match_flag_type flags = regex_constants::match_default);
+
+// std::regex in ECMAScript mode should not ignore capture groups inside lookahead assertions.
+// For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a"
+
+#include <regex>
+#include <cassert>
+
+#include "test_iterators.h"
+
+int main()
+{
+ {
+ std::regex re{"^(?=(.))a$"};
+ assert(re.mark_count() == 1);
+
+ std::string s{"a"};
+ std::smatch m;
+ assert(std::regex_match(s, m, re));
+ assert(m.size() == 2);
+ assert(m[0] == "a");
+ assert(m[1] == "a");
+ }
+
+ {
+ std::regex re{"^(a)(?=(.))(b)$"};
+ assert(re.mark_count() == 3);
+
+ std::string s{"ab"};
+ std::smatch m;
+ assert(std::regex_match(s, m, re));
+ assert(m.size() == 4);
+ assert(m[0] == "ab");
+ assert(m[1] == "a");
+ assert(m[2] == "b");
+ assert(m[3] == "b");
+ }
+
+ {
+ std::regex re{"^(.)(?=(.)(?=.(.)))(...)$"};
+ assert(re.mark_count() == 4);
+
+ std::string s{"abcd"};
+ std::smatch m;
+ assert(std::regex_match(s, m, re));
+ assert(m.size() == 5);
+ assert(m[0] == "abcd");
+ assert(m[1] == "a");
+ assert(m[2] == "b");
+ assert(m[3] == "d");
+ assert(m[4] == "bcd");
+ }
+
+ {
+ std::regex re{"^(a)(?!([^b]))(.c)$"};
+ assert(re.mark_count() == 3);
+
+ std::string s{"abc"};
+ std::smatch m;
+ assert(std::regex_match(s, m, re));
+ assert(m.size() == 4);
+ assert(m[0] == "abc");
+ assert(m[1] == "a");
+ assert(m[2] == "");
+ assert(m[3] == "bc");
+ }
+
+ {
+ std::regex re{"^(?!((b)))(?=(.))(?!(abc)).b$"};
+ assert(re.mark_count() == 4);
+
+ std::string s{"ab"};
+ std::smatch m;
+ assert(std::regex_match(s, m, re));
+ assert(m.size() == 5);
+ assert(m[0] == "ab");
+ assert(m[1] == "");
+ assert(m[2] == "");
+ assert(m[3] == "a");
+ assert(m[4] == "");
+ }
+}