A good start on ecma regex's. Maybe even feature complete, not sure yet. Also an unrelated fix to is_constructible thanks to Daniel Krugler.
git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@109479 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/regex b/include/regex
index c2d6a51..b60e776 100644
--- a/include/regex
+++ b/include/regex
@@ -1223,6 +1223,10 @@
template <class _BidirectionalIterator> class sub_match;
+template <class _BidirectionalIterator,
+ class _Allocator = allocator<sub_match<_BidirectionalIterator> > >
+class match_results;
+
template <class _CharT>
struct __state
{
@@ -1846,6 +1850,84 @@
}
}
+// __word_boundary
+
+template <class _CharT, class _Traits>
+class __word_boundary
+ : public __owns_one_state<_CharT>
+{
+ typedef __owns_one_state<_CharT> base;
+
+ _Traits __traits_;
+ bool __invert_;
+public:
+ typedef _STD::__state<_CharT> __state;
+
+ explicit __word_boundary(const _Traits& __traits, bool __invert,
+ __node<_CharT>* __s)
+ : base(__s), __traits_(__traits), __invert_(__invert) {}
+
+ virtual void __exec(__state&) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ if (__invert_)
+ os << "__word_boundary";
+ else
+ os << "not __word_boundary";
+ return os.str();
+ }
+};
+
+template <class _CharT, class _Traits>
+void
+__word_boundary<_CharT, _Traits>::__exec(__state& __s) const
+{
+ bool __is_word_b = false;
+ if (__s.__first_ != __s.__last_)
+ {
+ if (__s.__current_ == __s.__last_)
+ {
+ if (!(__s.__flags_ & regex_constants::match_not_eow))
+ {
+ _CharT __c = __s.__current_[-1];
+ __is_word_b = __c == '_' ||
+ __traits_.isctype(__c, ctype_base::alnum);
+ }
+ }
+ else if (__s.__current_ == __s.__first_)
+ {
+ if (!(__s.__flags_ & regex_constants::match_not_bow))
+ {
+ _CharT __c = *__s.__current_;
+ __is_word_b = __c == '_' ||
+ __traits_.isctype(__c, ctype_base::alnum);
+ }
+ }
+ else
+ {
+ _CharT __c1 = __s.__current_[-1];
+ _CharT __c2 = *__s.__current_;
+ bool __is_c1_b = __c1 == '_' ||
+ __traits_.isctype(__c1, ctype_base::alnum);
+ bool __is_c2_b = __c2 == '_' ||
+ __traits_.isctype(__c2, ctype_base::alnum);
+ __is_word_b = __is_c1_b != __is_c2_b;
+ }
+ }
+ if (__is_word_b != __invert_)
+ {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ }
+ else
+ {
+ __s.__do_ = __state::__reject;
+ __s.__node_ = nullptr;
+ }
+}
+
// __r_anchor
template <class _CharT>
@@ -1927,6 +2009,30 @@
}
}
+// __match_any_but_newline
+
+template <class _CharT>
+class __match_any_but_newline
+ : public __owns_one_state<_CharT>
+{
+ typedef __owns_one_state<_CharT> base;
+
+public:
+ typedef _STD::__state<_CharT> __state;
+
+ __match_any_but_newline(__node<_CharT>* __s)
+ : base(__s) {}
+
+ virtual void __exec(__state&) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "match any but newline";
+ return os.str();
+ }
+};
+
// __match_char
template <class _CharT>
@@ -2299,7 +2405,57 @@
}
}
-template <class, class> class match_results;
+// __lookahead
+
+template <class _CharT, class _Traits>
+class __lookahead
+ : public __owns_one_state<_CharT>
+{
+ typedef __owns_one_state<_CharT> base;
+
+ _Traits __traits_;
+ bool __invert_;
+
+ __lookahead(const __lookahead&);
+ __lookahead& operator=(const __lookahead&);
+public:
+ typedef _STD::__state<_CharT> __state;
+
+ __lookahead(const _Traits& __traits, bool __invert, __node<_CharT>* __s)
+ : base(__s), __traits_(__traits), __invert_(__invert) {}
+
+ virtual void __exec(__state&) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ if (__invert_)
+ os << "lookahead";
+ else
+ os << "not lookahead";
+ return os.str();
+ }
+};
+
+template <class _CharT, class _Traits>
+void
+__lookahead<_CharT, _Traits>::__exec(__state& __s) const
+{
+// match_results<const _CharT*> __m;
+// __m.__init(1 + mark_count(), __s.__current_, __s.__last_);
+// bool __matched = __exp_.__match_at_start_ecma(__s.__current_, __s.__last_,
+// __m, __s.__flags_);
+// if (__matched != __invert_)
+// {
+// __s.__do_ = __state::__accept_but_not_consume;
+// __s.__node_ = this->first();
+// }
+// else
+// {
+// __s.__do_ = __state::__reject;
+// __s.__node_ = nullptr;
+// }
+}
template <class _CharT, class _Traits = regex_traits<_CharT> >
class basic_regex
@@ -2521,15 +2677,32 @@
__parse_atom(_ForwardIterator __first, _ForwardIterator __last);
template <class _ForwardIterator>
_ForwardIterator
- __parse_quantifier(_ForwardIterator __first, _ForwardIterator __last) {} // temp!
+ __parse_atom_escape(_ForwardIterator __first, _ForwardIterator __last);
+ template <class _ForwardIterator>
+ _ForwardIterator
+ __parse_decimal_escape(_ForwardIterator __first, _ForwardIterator __last);
+ template <class _ForwardIterator>
+ _ForwardIterator
+ __parse_character_class_escape(_ForwardIterator __first, _ForwardIterator __last);
+ template <class _ForwardIterator>
+ _ForwardIterator
+ __parse_character_escape(_ForwardIterator __first, _ForwardIterator __last);
+ template <class _ForwardIterator>
+ _ForwardIterator
+ __parse_pattern_character(_ForwardIterator __first, _ForwardIterator __last);
void __push_l_anchor() {__left_anchor_ = true;}
void __push_r_anchor();
void __push_match_any();
+ void __push_match_any_but_newline();
void __push_greedy_inf_repeat(size_t __min, __owns_one_state<_CharT>* __s,
unsigned __mexp_begin = 0, unsigned __mexp_end = 0)
{__push_loop(__min, numeric_limits<size_t>::max(), __s,
__mexp_begin, __mexp_end);}
+ void __push_nongreedy_inf_repeat(size_t __min, __owns_one_state<_CharT>* __s,
+ unsigned __mexp_begin = 0, unsigned __mexp_end = 0)
+ {__push_loop(__min, numeric_limits<size_t>::max(), __s,
+ __mexp_begin, __mexp_end, false);}
void __push_loop(size_t __min, size_t __max, __owns_one_state<_CharT>* __s,
size_t __mexp_begin = 0, size_t __mexp_end = 0,
bool __greedy = true);
@@ -2541,11 +2714,8 @@
void __push_begin_marked_subexpression();
void __push_end_marked_subexpression(unsigned);
void __push_empty();
- void __push_word_boundary(bool) {}
- void __push_start_pos_lookahead() {}
- void __push_end_pos_lookahead() {}
- void __push_start_neg_lookahead() {}
- void __push_end_neg_lookahead() {}
+ void __push_word_boundary(bool);
+ void __push_lookahead(bool) {}
template <class _Allocator>
bool
@@ -2559,10 +2729,10 @@
match_results<const _CharT*, _Allocator>& __m,
vector<size_t>& __lc,
regex_constants::match_flag_type __flags) const;
- template <class _BidirectionalIterator, class _Allocator>
+ template <class _Allocator>
bool
- __match_at_start_ecma(_BidirectionalIterator __first, _BidirectionalIterator __last,
- match_results<_BidirectionalIterator, _Allocator>& __m,
+ __match_at_start_ecma(const _CharT* __first, const _CharT* __last,
+ match_results<const _CharT*, _Allocator>& __m,
regex_constants::match_flag_type __flags) const;
template <class _Allocator>
bool
@@ -3185,16 +3355,34 @@
switch (*__first)
{
case '*':
- __push_greedy_inf_repeat(0, __s, __mexp_begin, __mexp_end);
++__first;
+ if ((__flags_ & ECMAScript) && __first != __last && *__first == '?')
+ {
+ ++__first;
+ __push_nongreedy_inf_repeat(0, __s, __mexp_begin, __mexp_end);
+ }
+ else
+ __push_greedy_inf_repeat(0, __s, __mexp_begin, __mexp_end);
break;
case '+':
- __push_greedy_inf_repeat(1, __s, __mexp_begin, __mexp_end);
++__first;
+ if ((__flags_ & ECMAScript) && __first != __last && *__first == '?')
+ {
+ ++__first;
+ __push_nongreedy_inf_repeat(1, __s, __mexp_begin, __mexp_end);
+ }
+ else
+ __push_greedy_inf_repeat(1, __s, __mexp_begin, __mexp_end);
break;
case '?':
- __push_loop(0, 1, __s, __mexp_begin, __mexp_end);
++__first;
+ if ((__flags_ & ECMAScript) && __first != __last && *__first == '?')
+ {
+ ++__first;
+ __push_loop(0, 1, __s, __mexp_begin, __mexp_end, false);
+ }
+ else
+ __push_loop(0, 1, __s, __mexp_begin, __mexp_end);
break;
case '{':
{
@@ -3208,16 +3396,28 @@
switch (*__first)
{
case '}':
- __push_loop(__min, __min, __s, __mexp_begin, __mexp_end);
++__first;
+ if ((__flags_ & ECMAScript) && __first != __last && *__first == '?')
+ {
+ ++__first;
+ __push_loop(__min, __min, __s, __mexp_begin, __mexp_end, false);
+ }
+ else
+ __push_loop(__min, __min, __s, __mexp_begin, __mexp_end);
break;
case ',':
if (++__first == __last)
throw regex_error(regex_constants::error_badbrace);
if (*__first == '}')
{
- __push_greedy_inf_repeat(__min, __s, __mexp_begin, __mexp_end);
++__first;
+ if ((__flags_ & ECMAScript) && __first != __last && *__first == '?')
+ {
+ ++__first;
+ __push_nongreedy_inf_repeat(__min, __s, __mexp_begin, __mexp_end);
+ }
+ else
+ __push_greedy_inf_repeat(__min, __s, __mexp_begin, __mexp_end);
}
else
{
@@ -3231,7 +3431,13 @@
++__first;
if (__max < __min)
throw regex_error(regex_constants::error_badbrace);
- __push_loop(__min, __max, __s, __mexp_begin, __mexp_end);
+ if ((__flags_ & ECMAScript) && __first != __last && *__first == '?')
+ {
+ ++__first;
+ __push_loop(__min, __max, __s, __mexp_begin, __mexp_end, false);
+ }
+ else
+ __push_loop(__min, __max, __s, __mexp_begin, __mexp_end);
}
break;
default:
@@ -3536,10 +3742,15 @@
_ForwardIterator __temp = __parse_assertion(__first, __last);
if (__temp == __first)
{
+ __owns_one_state<_CharT>* __e = __end_;
+ unsigned __mexp_begin = __marked_count_;
__temp = __parse_atom(__first, __last);
if (__temp != __first)
- __first = __parse_quantifier(__temp, __last);
+ __first = __parse_ERE_dupl_symbol(__temp, __last, __e,
+ __mexp_begin+1, __marked_count_+1);
}
+ else
+ __first = __temp;
return __first;
}
@@ -3568,12 +3779,12 @@
{
if (*__temp == 'b')
{
- __push_word_boundary(true);
+ __push_word_boundary(false);
__first = ++__temp;
}
else if (*__temp == 'B')
{
- __push_word_boundary(false);
+ __push_word_boundary(true);
__first = ++__temp;
}
}
@@ -3589,19 +3800,17 @@
switch (*__temp)
{
case '=':
- __push_start_pos_lookahead();
+ __push_lookahead(false);
__temp = __parse_ecma_exp(++__temp, __last);
if (__temp == __last || *__temp != ')')
throw regex_error(regex_constants::error_paren);
- __push_end_pos_lookahead();
__first = ++__temp;
break;
case '!':
- __push_start_neg_lookahead();
+ __push_lookahead(true);
__temp = __parse_ecma_exp(++__temp, __last);
if (__temp == __last || *__temp != ')')
throw regex_error(regex_constants::error_paren);
- __push_end_neg_lookahead();
__first = ++__temp;
break;
}
@@ -3620,7 +3829,275 @@
basic_regex<_CharT, _Traits>::__parse_atom(_ForwardIterator __first,
_ForwardIterator __last)
{
- return __first; // temp!
+ if (__first != __last)
+ {
+ switch (*__first)
+ {
+ case '.':
+ __push_match_any_but_newline();
+ ++__first;
+ break;
+ case '\\':
+ __first = __parse_atom_escape(__first, __last);
+ break;
+ case '[':
+ __first = __parse_bracket_expression(__first, __last);
+ break;
+ case '(':
+ {
+ if (++__first == __last)
+ throw regex_error(regex_constants::error_paren);
+ _ForwardIterator __temp = _STD::next(__first);
+ if (__temp != __last && *__first == '?' && *__temp == ':')
+ {
+ ++__open_count_;
+ __first = __parse_ecma_exp(++__temp, __last);
+ if (__first == __last || *__first != ')')
+ throw regex_error(regex_constants::error_paren);
+ --__open_count_;
+ ++__first;
+ }
+ else
+ {
+ __push_begin_marked_subexpression();
+ unsigned __temp_count = __marked_count_;
+ ++__open_count_;
+ __first = __parse_ecma_exp(__first, __last);
+ if (__first == __last || *__first != ')')
+ throw regex_error(regex_constants::error_paren);
+ __push_end_marked_subexpression(__temp_count);
+ --__open_count_;
+ ++__first;
+ }
+ }
+ break;
+ default:
+ __first = __parse_pattern_character(__first, __last);
+ break;
+ }
+ }
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_atom_escape(_ForwardIterator __first,
+ _ForwardIterator __last)
+{
+ if (__first != __last && *__first == '\\')
+ {
+ _ForwardIterator __t1 = _STD::next(__first);
+ _ForwardIterator __t2 = __parse_decimal_escape(__t1, __last);
+ if (__t2 != __t1)
+ __first = __t2;
+ else
+ {
+ __t2 = __parse_character_class_escape(__t1, __last);
+ if (__t2 != __t1)
+ __first = __t2;
+ else
+ {
+ __t2 = __parse_character_escape(__t1, __last);
+ if (__t2 != __t1)
+ __first = __t2;
+ }
+ }
+ }
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_decimal_escape(_ForwardIterator __first,
+ _ForwardIterator __last)
+{
+ if (__first != __last)
+ {
+ if (*__first == '0')
+ {
+ __push_char(_CharT());
+ ++__first;
+ }
+ else if ('1' <= *__first && *__first <= '9')
+ {
+ unsigned __v = *__first - '0';
+ for (++__first; '0' <= *__first && *__first <= '9'; ++__first)
+ __v = 10 * __v + *__first - '0';
+ if (__v > mark_count())
+ throw regex_error(regex_constants::error_backref);
+ __push_back_ref(__v);
+ }
+ }
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_character_class_escape(_ForwardIterator __first,
+ _ForwardIterator __last)
+{
+ if (__first != __last)
+ {
+ __bracket_expression<_CharT, _Traits>* __ml;
+ switch (*__first)
+ {
+ case 'd':
+ __ml = __start_matching_list(false);
+ __ml->__add_class(ctype_base::digit);
+ ++__first;
+ break;
+ case 'D':
+ __ml = __start_matching_list(true);
+ __ml->__add_class(ctype_base::digit);
+ ++__first;
+ break;
+ case 's':
+ __ml = __start_matching_list(false);
+ __ml->__add_class(ctype_base::space);
+ ++__first;
+ break;
+ case 'S':
+ __ml = __start_matching_list(true);
+ __ml->__add_class(ctype_base::space);
+ ++__first;
+ break;
+ case 'w':
+ __ml = __start_matching_list(false);
+ __ml->__add_class(ctype_base::alnum);
+ __ml->__add_char('_');
+ ++__first;
+ break;
+ case 'W':
+ __ml = __start_matching_list(true);
+ __ml->__add_class(ctype_base::alnum);
+ __ml->__add_char('_');
+ ++__first;
+ break;
+ }
+ }
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
+ _ForwardIterator __last)
+{
+ if (__first != __last)
+ {
+ _ForwardIterator __t;
+ unsigned __sum = 0;
+ int __hd;
+ switch (*__first)
+ {
+ case 'f':
+ __push_char(_CharT(0xC));
+ ++__first;
+ break;
+ case 'n':
+ __push_char(_CharT(0xA));
+ ++__first;
+ break;
+ case 'r':
+ __push_char(_CharT(0xD));
+ ++__first;
+ break;
+ case 't':
+ __push_char(_CharT(0x9));
+ ++__first;
+ break;
+ case 'v':
+ __push_char(_CharT(0xB));
+ ++__first;
+ break;
+ case 'c':
+ if ((__t = _STD::next(__first)) != __last)
+ {
+ if ('A' <= *__t <= 'Z' || 'a' <= *__t <= 'z')
+ {
+ __push_char(_CharT(*__t % 32));
+ __first = ++__t;
+ }
+ }
+ break;
+ case 'u':
+ if (++__first == __last)
+ throw regex_error(regex_constants::error_escape);
+ __hd = __traits_.value(*__first, 16);
+ if (__hd == -1)
+ throw regex_error(regex_constants::error_escape);
+ __sum = 16 * __sum + __hd;
+ if (++__first == __last)
+ throw regex_error(regex_constants::error_escape);
+ __hd = __traits_.value(*__first, 16);
+ if (__hd == -1)
+ throw regex_error(regex_constants::error_escape);
+ __sum = 16 * __sum + __hd;
+ // drop through
+ case 'x':
+ if (++__first == __last)
+ throw regex_error(regex_constants::error_escape);
+ __hd = __traits_.value(*__first, 16);
+ if (__hd == -1)
+ throw regex_error(regex_constants::error_escape);
+ __sum = 16 * __sum + __hd;
+ if (++__first == __last)
+ throw regex_error(regex_constants::error_escape);
+ __hd = __traits_.value(*__first, 16);
+ if (__hd == -1)
+ throw regex_error(regex_constants::error_escape);
+ __sum = 16 * __sum + __hd;
+ __push_char(_CharT(__sum));
+ ++__first;
+ break;
+ default:
+ if (*__first != '_' && !__traits_.isctype(*__first, ctype_base::alnum))
+ {
+ __push_char(*__first);
+ ++__first;
+ }
+ break;
+ }
+ }
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_pattern_character(_ForwardIterator __first,
+ _ForwardIterator __last)
+{
+ if (__first != __last)
+ {
+ switch (*__first)
+ {
+ case '^':
+ case '$':
+ case '\\':
+ case '.':
+ case '*':
+ case '+':
+ case '?':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ break;
+ default:
+ __push_char(*__first);
+ ++__first;
+ break;
+ }
+ }
+ return __first;
}
template <class _CharT, class _Traits>
@@ -3700,6 +4177,14 @@
template <class _CharT, class _Traits>
void
+basic_regex<_CharT, _Traits>::__push_match_any_but_newline()
+{
+ __end_->first() = new __match_any_but_newline<_CharT>(__end_->first());
+ __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
+}
+
+template <class _CharT, class _Traits>
+void
basic_regex<_CharT, _Traits>::__push_empty()
{
__end_->first() = new __empty_state<_CharT>(__end_->first());
@@ -3708,6 +4193,15 @@
template <class _CharT, class _Traits>
void
+basic_regex<_CharT, _Traits>::__push_word_boundary(bool __invert)
+{
+ __end_->first() = new __word_boundary<_CharT, _Traits>(__traits_, __invert,
+ __end_->first());
+ __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
+}
+
+template <class _CharT, class _Traits>
+void
basic_regex<_CharT, _Traits>::__push_back_ref(int __i)
{
if (flags() & icase)
@@ -4168,8 +4662,7 @@
return __os << __m.str();
}
-template <class _BidirectionalIterator,
- class _Allocator = allocator<sub_match<_BidirectionalIterator> > >
+template <class _BidirectionalIterator, class _Allocator>
class match_results
{
public:
@@ -4333,13 +4826,64 @@
// regex_search
template <class _CharT, class _Traits>
-template <class _BidirectionalIterator, class _Allocator>
+template <class _Allocator>
bool
basic_regex<_CharT, _Traits>::__match_at_start_ecma(
- _BidirectionalIterator __first, _BidirectionalIterator __last,
- match_results<_BidirectionalIterator, _Allocator>& __m,
+ const _CharT* __first, const _CharT* __last,
+ match_results<const _CharT*, _Allocator>& __m,
regex_constants::match_flag_type __flags) const
{
+ vector<__state> __states;
+ ptrdiff_t __j = 0;
+ ptrdiff_t _N = _STD::distance(__first, __last);
+ __node* __st = __start_.get();
+ if (__st)
+ {
+ __states.push_back(__state());
+ __states.back().__do_ = 0;
+ __states.back().__first_ = __first;
+ __states.back().__current_ = __first;
+ __states.back().__last_ = __last;
+ __states.back().__sub_matches_.resize(mark_count());
+ __states.back().__loop_data_.resize(__loop_count());
+ __states.back().__node_ = __st;
+ __states.back().__flags_ = __flags;
+ bool __matched = false;
+ do
+ {
+ __state& __s = __states.back();
+ if (__s.__node_)
+ __s.__node_->__exec(__s);
+ switch (__s.__do_)
+ {
+ case __state::__end_state:
+ __m.__matches_[0].first = __first;
+ __m.__matches_[0].second = _STD::next(__first, __s.__current_ - __first);
+ __m.__matches_[0].matched = true;
+ for (unsigned __i = 0; __i < __s.__sub_matches_.size(); ++__i)
+ __m.__matches_[__i+1] = __s.__sub_matches_[__i];
+ return true;
+ case __state::__accept_and_consume:
+ case __state::__repeat:
+ case __state::__accept_but_not_consume:
+ break;
+ case __state::__split:
+ {
+ __state __snext = __s;
+ __s.__node_->__exec_split(true, __s);
+ __snext.__node_->__exec_split(false, __snext);
+ __states.push_back(_STD::move(__snext));
+ }
+ break;
+ case __state::__reject:
+ __states.pop_back();
+ break;
+ default:
+ throw regex_error(regex_constants::error_temp);
+ break;
+ }
+ } while (!__states.empty());
+ }
return false;
}
@@ -4431,8 +4975,6 @@
regex_constants::match_flag_type __flags) const
{
vector<__state> __states;
- vector<const _CharT*> __current_stack;
- vector<sub_match<const _CharT*> > __saved_matches;
__state __best_state;
ptrdiff_t __j = 0;
ptrdiff_t __highest_j = 0;