Fixed some bugs in the ecma bracket epression regarding escaped characters, and got the awk grammar going.
git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@109599 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/regex b/include/regex
index 14a0e4b..0bafbcf 100644
--- a/include/regex
+++ b/include/regex
@@ -2194,10 +2194,12 @@
_Traits __traits_;
vector<_CharT> __chars_;
+ vector<_CharT> __neg_chars_;
vector<pair<string_type, string_type> > __ranges_;
vector<pair<_CharT, _CharT> > __digraphs_;
vector<string_type> __equivalences_;
ctype_base::mask __mask_;
+ ctype_base::mask __neg_mask_;
bool __negate_;
bool __icase_;
bool __collate_;
@@ -2210,12 +2212,14 @@
__bracket_expression(const _Traits& __traits, __node<_CharT>* __s,
bool __negate, bool __icase, bool __collate)
- : base(__s), __traits_(__traits), __mask_(), __negate_(__negate),
- __icase_(__icase), __collate_(__collate),
+ : base(__s), __traits_(__traits), __mask_(), __neg_mask_(),
+ __negate_(__negate), __icase_(__icase), __collate_(__collate),
__might_have_digraph_(__traits_.getloc().name() != "C") {}
virtual void __exec(__state&) const;
+ bool __negated() const {return __negate_;}
+
void __add_char(_CharT __c)
{
if (__icase_)
@@ -2225,6 +2229,15 @@
else
__chars_.push_back(__c);
}
+ void __add_neg_char(_CharT __c)
+ {
+ if (__icase_)
+ __neg_chars_.push_back(__traits_.translate_nocase(__c));
+ else if (__collate_)
+ __neg_chars_.push_back(__traits_.translate(__c));
+ else
+ __neg_chars_.push_back(__c);
+ }
void __add_range(string_type __b, string_type __e)
{
if (__collate_)
@@ -2274,6 +2287,8 @@
{__equivalences_.push_back(__s);}
void __add_class(ctype_base::mask __mask)
{__mask_ |= __mask;}
+ void __add_neg_class(ctype_base::mask __mask)
+ {__neg_mask_ |= __mask;}
virtual string speak() const
{
@@ -2353,6 +2368,12 @@
__found = true;
goto __exit;
}
+ if (!__traits_.isctype(__ch2.first, __neg_mask_) &&
+ !__traits_.isctype(__ch2.second, __neg_mask_))
+ {
+ __found = true;
+ goto __exit;
+ }
goto __exit;
}
}
@@ -2371,6 +2392,17 @@
goto __exit;
}
}
+ if (!__neg_chars_.empty())
+ {
+ for (size_t __i = 0; __i < __neg_chars_.size(); ++__i)
+ {
+ if (__ch == __neg_chars_[__i])
+ goto __is_neg_char;
+ }
+ __found = true;
+ goto __exit;
+ }
+__is_neg_char:
if (!__ranges_.empty())
{
string_type __s2 = __collate_ ?
@@ -2398,7 +2430,15 @@
}
}
if (__traits_.isctype(__ch, __mask_))
+ {
__found = true;
+ goto __exit;
+ }
+ if (__neg_mask_ && !__traits_.isctype(__ch, __neg_mask_))
+ {
+ __found = true;
+ goto __exit;
+ }
}
else
__found = __negate_; // force reject
@@ -2644,7 +2684,8 @@
__parse_character_class_escape(_ForwardIterator __first, _ForwardIterator __last);
template <class _ForwardIterator>
_ForwardIterator
- __parse_character_escape(_ForwardIterator __first, _ForwardIterator __last);
+ __parse_character_escape(_ForwardIterator __first, _ForwardIterator __last,
+ basic_string<_CharT>* __str = nullptr);
template <class _ForwardIterator>
_ForwardIterator
__parse_pattern_character(_ForwardIterator __first, _ForwardIterator __last);
@@ -2654,6 +2695,15 @@
template <class _ForwardIterator>
_ForwardIterator
__parse_egrep(_ForwardIterator __first, _ForwardIterator __last);
+ template <class _ForwardIterator>
+ _ForwardIterator
+ __parse_class_escape(_ForwardIterator __first, _ForwardIterator __last,
+ basic_string<_CharT>& __str,
+ __bracket_expression<_CharT, _Traits>* __ml);
+ template <class _ForwardIterator>
+ _ForwardIterator
+ __parse_awk_escape(_ForwardIterator __first, _ForwardIterator __last,
+ basic_string<_CharT>* __str = nullptr);
void __push_l_anchor() {__left_anchor_ = true;}
void __push_r_anchor();
@@ -2834,9 +2884,8 @@
__first = __parse_basic_reg_exp(__first, __last);
break;
case extended:
- __first = __parse_extended_reg_exp(__first, __last);
- break;
case awk:
+ __first = __parse_extended_reg_exp(__first, __last);
break;
case grep:
__first = __parse_grep(__first, __last);
@@ -3289,6 +3338,10 @@
__push_char(*__temp);
__first = ++__temp;
break;
+ default:
+ if ((__flags_ & 0x1F0) == awk)
+ __first = __parse_awk_escape(++__first, __last);
+ break;
}
}
}
@@ -3488,7 +3541,7 @@
// __ml owned by *this
if (__first == __last)
throw regex_error(regex_constants::error_brack);
- if (*__first == ']')
+ if ((__flags_ & 0x1F0) != ECMAScript && *__first == ']')
{
__ml->__add_char(']');
++__first;
@@ -3538,7 +3591,6 @@
{
if (__first != __last && *__first != ']')
{
- bool __parsed_one = false;
_ForwardIterator __temp = next(__first);
basic_string<_CharT> __start_range;
if (__temp != __last && *__first == '[')
@@ -3548,15 +3600,23 @@
else if (*__temp == ':')
return __parse_character_class(++__temp, __last, __ml);
else if (*__temp == '.')
- {
__first = __parse_collating_symbol(++__temp, __last, __start_range);
- __parsed_one = true;
- }
}
- if (!__parsed_one)
+ unsigned __grammar = __flags_ & 0x1F0;
+ if (__start_range.empty())
{
- __start_range = *__first;
- ++__first;
+ if ((__grammar == ECMAScript || __grammar == awk) && *__first == '\\')
+ {
+ if (__grammar == ECMAScript)
+ __first = __parse_class_escape(++__first, __last, __start_range, __ml);
+ else
+ __first = __parse_awk_escape(++__first, __last, &__start_range);
+ }
+ else
+ {
+ __start_range = *__first;
+ ++__first;
+ }
}
if (__first != __last && *__first != ']')
{
@@ -3571,8 +3631,20 @@
__first = __parse_collating_symbol(++__temp, __last, __end_range);
else
{
- __end_range = *__first;
- ++__first;
+ if ((__grammar == ECMAScript || __grammar == awk) && *__first == '\\')
+ {
+ if (__grammar == ECMAScript)
+ __first = __parse_class_escape(++__first, __last,
+ __end_range, __ml);
+ else
+ __first = __parse_awk_escape(++__first, __last,
+ &__end_range);
+ }
+ else
+ {
+ __end_range = *__first;
+ ++__first;
+ }
}
__ml->__add_range(_STD::move(__start_range), _STD::move(__end_range));
}
@@ -3598,6 +3670,130 @@
template <class _CharT, class _Traits>
template <class _ForwardIterator>
_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_class_escape(_ForwardIterator __first,
+ _ForwardIterator __last,
+ basic_string<_CharT>& __str,
+ __bracket_expression<_CharT, _Traits>* __ml)
+{
+ if (__first == __last)
+ throw regex_error(regex_constants::error_escape);
+ switch (*__first)
+ {
+ case 0:
+ __str = *__first;
+ return ++__first;
+ case 'b':
+ __str = _CharT(8);
+ return ++__first;
+ case 'd':
+ __ml->__add_class(ctype_base::digit);
+ return ++__first;
+ case 'D':
+ __ml->__add_neg_class(ctype_base::digit);
+ return ++__first;
+ case 's':
+ __ml->__add_class(ctype_base::space);
+ return ++__first;
+ case 'S':
+ __ml->__add_neg_class(ctype_base::space);
+ return ++__first;
+ case 'w':
+ __ml->__add_class(ctype_base::alnum);
+ __ml->__add_char('_');
+ return ++__first;
+ case 'W':
+ __ml->__add_neg_class(ctype_base::alnum);
+ __ml->__add_neg_char('_');
+ return ++__first;
+ }
+ __first = __parse_character_escape(__first, __last, &__str);
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
+basic_regex<_CharT, _Traits>::__parse_awk_escape(_ForwardIterator __first,
+ _ForwardIterator __last,
+ basic_string<_CharT>* __str)
+{
+ if (__first == __last)
+ throw regex_error(regex_constants::error_escape);
+ switch (*__first)
+ {
+ case '\\':
+ case '"':
+ case '/':
+ if (__str)
+ *__str = *__first;
+ else
+ __push_char(*__first);
+ return ++__first;
+ case 'a':
+ if (__str)
+ *__str = _CharT(7);
+ else
+ __push_char(_CharT(7));
+ return ++__first;
+ case 'b':
+ if (__str)
+ *__str = _CharT(8);
+ else
+ __push_char(_CharT(8));
+ return ++__first;
+ case 'f':
+ if (__str)
+ *__str = _CharT(0xC);
+ else
+ __push_char(_CharT(0xC));
+ return ++__first;
+ case 'n':
+ if (__str)
+ *__str = _CharT(0xA);
+ else
+ __push_char(_CharT(0xA));
+ return ++__first;
+ case 'r':
+ if (__str)
+ *__str = _CharT(0xD);
+ else
+ __push_char(_CharT(0xD));
+ return ++__first;
+ case 't':
+ if (__str)
+ *__str = _CharT(0x9);
+ else
+ __push_char(_CharT(0x9));
+ return ++__first;
+ case 'v':
+ if (__str)
+ *__str = _CharT(0xB);
+ else
+ __push_char(_CharT(0xB));
+ return ++__first;
+ }
+ if ('0' <= *__first && *__first <= '7')
+ {
+ unsigned __val = *__first - '0';
+ if (++__first != __last && ('0' <= *__first && *__first <= '7'))
+ {
+ __val = 8 * __val + *__first - '0';
+ if (++__first != __last && ('0' <= *__first && *__first <= '7'))
+ __val = 8 * __val + *__first - '0';
+ }
+ if (__str)
+ *__str = _CharT(__val);
+ else
+ __push_char(_CharT(__val));
+ }
+ else
+ throw regex_error(regex_constants::error_escape);
+ return __first;
+}
+
+template <class _CharT, class _Traits>
+template <class _ForwardIterator>
+_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_equivalence_class(_ForwardIterator __first,
_ForwardIterator __last,
__bracket_expression<_CharT, _Traits>* __ml)
@@ -4013,7 +4209,8 @@
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
- _ForwardIterator __last)
+ _ForwardIterator __last,
+ basic_string<_CharT>* __str)
{
if (__first != __last)
{
@@ -4023,23 +4220,38 @@
switch (*__first)
{
case 'f':
- __push_char(_CharT(0xC));
+ if (__str)
+ *__str = _CharT(0xC);
+ else
+ __push_char(_CharT(0xC));
++__first;
break;
case 'n':
- __push_char(_CharT(0xA));
+ if (__str)
+ *__str = _CharT(0xA);
+ else
+ __push_char(_CharT(0xA));
++__first;
break;
case 'r':
- __push_char(_CharT(0xD));
+ if (__str)
+ *__str = _CharT(0xD);
+ else
+ __push_char(_CharT(0xD));
++__first;
break;
case 't':
- __push_char(_CharT(0x9));
+ if (__str)
+ *__str = _CharT(0x9);
+ else
+ __push_char(_CharT(0x9));
++__first;
break;
case 'v':
- __push_char(_CharT(0xB));
+ if (__str)
+ *__str = _CharT(0xB);
+ else
+ __push_char(_CharT(0xB));
++__first;
break;
case 'c':
@@ -4047,7 +4259,10 @@
{
if ('A' <= *__t <= 'Z' || 'a' <= *__t <= 'z')
{
- __push_char(_CharT(*__t % 32));
+ if (__str)
+ *__str = _CharT(*__t % 32);
+ else
+ __push_char(_CharT(*__t % 32));
__first = ++__t;
}
}
@@ -4079,15 +4294,23 @@
if (__hd == -1)
throw regex_error(regex_constants::error_escape);
__sum = 16 * __sum + __hd;
- __push_char(_CharT(__sum));
+ if (__str)
+ *__str = _CharT(__sum);
+ else
+ __push_char(_CharT(__sum));
++__first;
break;
default:
if (*__first != '_' && !__traits_.isctype(*__first, ctype_base::alnum))
{
- __push_char(*__first);
+ if (__str)
+ *__str = *__first;
+ else
+ __push_char(*__first);
++__first;
}
+ else if (__str)
+ throw regex_error(regex_constants::error_escape);
break;
}
}