Merge V8 5.3.332.45. DO NOT MERGE
Test: Manual
FPIIM-449
Change-Id: Id3254828b068abdea3cb10442e0172a8c9a98e03
(cherry picked from commit 13e2dadd00298019ed862f2b2fc5068bba730bcf)
diff --git a/src/regexp/arm/regexp-macro-assembler-arm.cc b/src/regexp/arm/regexp-macro-assembler-arm.cc
index f8dfc97..bf762b5 100644
--- a/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -9,7 +9,6 @@
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
diff --git a/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/src/regexp/arm64/regexp-macro-assembler-arm64.cc
index e8bdad8..96d0c25 100644
--- a/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -9,7 +9,6 @@
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
diff --git a/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/src/regexp/ia32/regexp-macro-assembler-ia32.cc
index 9c55af6..6b4ea24 100644
--- a/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -8,7 +8,6 @@
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 6c50f4e..c3b670b 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -397,6 +397,7 @@
Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
data->set(JSRegExp::code_index(is_one_byte), result.code);
+ SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
int register_max = IrregexpMaxRegisterCount(*data);
if (result.num_registers > register_max) {
SetIrregexpMaxRegisterCount(*data, result.num_registers);
@@ -416,6 +417,14 @@
re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
}
+void RegExpImpl::SetIrregexpCaptureNameMap(FixedArray* re,
+ Handle<FixedArray> value) {
+ if (value.is_null()) {
+ re->set(JSRegExp::kIrregexpCaptureNameMapIndex, Smi::FromInt(0));
+ } else {
+ re->set(JSRegExp::kIrregexpCaptureNameMapIndex, *value);
+ }
+}
int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
diff --git a/src/regexp/jsregexp.h b/src/regexp/jsregexp.h
index e55d650..dc8aee1 100644
--- a/src/regexp/jsregexp.h
+++ b/src/regexp/jsregexp.h
@@ -196,6 +196,8 @@
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray* re);
static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
+ static void SetIrregexpCaptureNameMap(FixedArray* re,
+ Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray* re);
static int IrregexpNumberOfRegisters(FixedArray* re);
static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte);
@@ -1530,6 +1532,7 @@
RegExpNode* node;
bool simple;
bool contains_anchor;
+ Handle<FixedArray> capture_name_map;
Handle<String> error;
int capture_count;
};
diff --git a/src/regexp/ppc/OWNERS b/src/regexp/ppc/OWNERS
index eb007cb..752e8e3 100644
--- a/src/regexp/ppc/OWNERS
+++ b/src/regexp/ppc/OWNERS
@@ -3,3 +3,4 @@
joransiu@ca.ibm.com
mbrandy@us.ibm.com
michael_dawson@ca.ibm.com
+bjaideep@ca.ibm.com
diff --git a/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index 70842f5..a7418dd 100644
--- a/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -10,7 +10,6 @@
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
diff --git a/src/regexp/regexp-ast.h b/src/regexp/regexp-ast.h
index 39c9cee..406bf84 100644
--- a/src/regexp/regexp-ast.h
+++ b/src/regexp/regexp-ast.h
@@ -7,6 +7,7 @@
#include "src/objects.h"
#include "src/utils.h"
+#include "src/zone-containers.h"
#include "src/zone.h"
namespace v8 {
@@ -412,7 +413,8 @@
class RegExpCapture final : public RegExpTree {
public:
- explicit RegExpCapture(int index) : body_(NULL), index_(index) {}
+ explicit RegExpCapture(int index)
+ : body_(NULL), index_(index), name_(nullptr) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
static RegExpNode* ToNode(RegExpTree* body, int index,
@@ -427,12 +429,15 @@
RegExpTree* body() { return body_; }
void set_body(RegExpTree* body) { body_ = body; }
int index() { return index_; }
+ const ZoneVector<uc16>* name() const { return name_; }
+ void set_name(const ZoneVector<uc16>* name) { name_ = name; }
static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; }
private:
RegExpTree* body_;
int index_;
+ const ZoneVector<uc16>* name_;
};
@@ -489,7 +494,9 @@
class RegExpBackReference final : public RegExpTree {
public:
- explicit RegExpBackReference(RegExpCapture* capture) : capture_(capture) {}
+ RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
+ explicit RegExpBackReference(RegExpCapture* capture)
+ : capture_(capture), name_(nullptr) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpBackReference* AsBackReference() override;
@@ -500,9 +507,13 @@
int max_match() override { return kInfinity; }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
+ void set_capture(RegExpCapture* capture) { capture_ = capture; }
+ const ZoneVector<uc16>* name() const { return name_; }
+ void set_name(const ZoneVector<uc16>* name) { name_ = name; }
private:
RegExpCapture* capture_;
+ const ZoneVector<uc16>* name_;
};
diff --git a/src/regexp/regexp-macro-assembler.cc b/src/regexp/regexp-macro-assembler.cc
index 7fed26e..19ecaed 100644
--- a/src/regexp/regexp-macro-assembler.cc
+++ b/src/regexp/regexp-macro-assembler.cc
@@ -177,7 +177,7 @@
return_value = RETRY;
} else {
Object* result = isolate->stack_guard()->HandleInterrupts();
- if (result->IsException()) return_value = EXCEPTION;
+ if (result->IsException(isolate)) return_value = EXCEPTION;
}
DisallowHeapAllocation no_gc;
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index abb644a..dba81ae 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -25,6 +25,8 @@
zone_(zone),
error_(error),
captures_(NULL),
+ named_captures_(NULL),
+ named_back_references_(NULL),
in_(in),
current_(kEndMarker),
ignore_case_(flags & JSRegExp::kIgnoreCase),
@@ -73,7 +75,8 @@
if (has_next()) {
StackLimitCheck check(isolate());
if (check.HasOverflowed()) {
- ReportError(CStrVector(Isolate::kStackOverflowMessage));
+ ReportError(CStrVector(
+ MessageTemplate::TemplateString(MessageTemplate::kStackOverflow)));
} else if (zone()->excess_allocation()) {
ReportError(CStrVector("Regular expression too large"));
} else {
@@ -149,6 +152,7 @@
// Disjunction
RegExpTree* RegExpParser::ParsePattern() {
RegExpTree* result = ParseDisjunction(CHECK_FAILED);
+ PatchNamedBackReferences(CHECK_FAILED);
DCHECK(!has_more());
// If the result of parsing is a literal string atom, and it has the
// same length as the input, then the atom is identical to the input.
@@ -172,7 +176,7 @@
RegExpTree* RegExpParser::ParseDisjunction() {
// Used to store current state while parsing subexpressions.
RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
- ignore_case(), unicode(), zone());
+ nullptr, ignore_case(), unicode(), zone());
RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
@@ -204,6 +208,10 @@
// Build result of subexpression.
if (group_type == CAPTURE) {
+ if (state->IsNamedCapture()) {
+ CreateNamedCaptureAtIndex(state->capture_name(),
+ capture_index CHECK_FAILED);
+ }
RegExpCapture* capture = GetCapture(capture_index);
capture->set_body(body);
body = capture;
@@ -268,47 +276,65 @@
case '(': {
SubexpressionType subexpr_type = CAPTURE;
RegExpLookaround::Type lookaround_type = state->lookaround_type();
+ bool is_named_capture = false;
Advance();
if (current() == '?') {
switch (Next()) {
case ':':
subexpr_type = GROUPING;
+ Advance(2);
break;
case '=':
lookaround_type = RegExpLookaround::LOOKAHEAD;
subexpr_type = POSITIVE_LOOKAROUND;
+ Advance(2);
break;
case '!':
lookaround_type = RegExpLookaround::LOOKAHEAD;
subexpr_type = NEGATIVE_LOOKAROUND;
+ Advance(2);
break;
case '<':
+ Advance();
if (FLAG_harmony_regexp_lookbehind) {
- Advance();
- lookaround_type = RegExpLookaround::LOOKBEHIND;
if (Next() == '=') {
subexpr_type = POSITIVE_LOOKAROUND;
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ Advance(2);
break;
} else if (Next() == '!') {
subexpr_type = NEGATIVE_LOOKAROUND;
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ Advance(2);
break;
}
}
+ if (FLAG_harmony_regexp_named_captures && unicode()) {
+ is_named_capture = true;
+ Advance();
+ break;
+ }
// Fall through.
default:
return ReportError(CStrVector("Invalid group"));
}
- Advance(2);
- } else {
+ }
+
+ const ZoneVector<uc16>* capture_name = nullptr;
+ if (subexpr_type == CAPTURE) {
if (captures_started_ >= kMaxCaptures) {
return ReportError(CStrVector("Too many captures"));
}
captures_started_++;
+
+ if (is_named_capture) {
+ capture_name = ParseCaptureGroupName(CHECK_FAILED);
+ }
}
// Store current state and begin new disjunction parsing.
state = new (zone()) RegExpParserState(
state, subexpr_type, lookaround_type, captures_started_,
- ignore_case(), unicode(), zone());
+ capture_name, ignore_case(), unicode(), zone());
builder = state->builder();
continue;
}
@@ -362,11 +388,11 @@
if (FLAG_harmony_regexp_property) {
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
- if (!ParsePropertyClass(ranges)) {
+ if (!ParsePropertyClass(ranges, p == 'P')) {
return ReportError(CStrVector("Invalid property name"));
}
RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, p == 'P');
+ new (zone()) RegExpCharacterClass(ranges, false);
builder->AddCharacterClass(cc);
} else {
// With /u, no identity escapes except for syntax characters
@@ -416,7 +442,7 @@
break;
}
}
- // FALLTHROUGH
+ // Fall through.
case '0': {
Advance();
if (unicode() && Next() >= '0' && Next() <= '9') {
@@ -497,6 +523,13 @@
}
break;
}
+ case 'k':
+ if (FLAG_harmony_regexp_named_captures && unicode()) {
+ Advance(2);
+ ParseNamedBackReference(builder, state CHECK_FAILED);
+ break;
+ }
+ // Fall through.
default:
Advance();
// With /u, no identity escapes except for syntax characters
@@ -514,14 +547,14 @@
int dummy;
bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
if (parsed) return ReportError(CStrVector("Nothing to repeat"));
- // fallthrough
+ // Fall through.
}
case '}':
case ']':
if (unicode()) {
return ReportError(CStrVector("Lone quantifier brackets"));
}
- // fallthrough
+ // Fall through.
default:
builder->AddUnicodeCharacter(current());
Advance();
@@ -675,6 +708,148 @@
return true;
}
+static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) {
+ if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ v->push_back(code_unit);
+ } else {
+ v->push_back(unibrow::Utf16::LeadSurrogate(code_unit));
+ v->push_back(unibrow::Utf16::TrailSurrogate(code_unit));
+ }
+}
+
+const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
+ DCHECK(FLAG_harmony_regexp_named_captures);
+ DCHECK(unicode());
+
+ ZoneVector<uc16>* name =
+ new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());
+
+ bool at_start = true;
+ while (true) {
+ uc32 c = current();
+ Advance();
+
+ // Convert unicode escapes.
+ if (c == '\\' && current() == 'u') {
+ Advance();
+ if (!ParseUnicodeEscape(&c)) {
+ ReportError(CStrVector("Invalid Unicode escape sequence"));
+ return nullptr;
+ }
+ }
+
+ if (at_start) {
+ if (!IdentifierStart::Is(c)) {
+ ReportError(CStrVector("Invalid capture group name"));
+ return nullptr;
+ }
+ push_code_unit(name, c);
+ at_start = false;
+ } else {
+ if (c == '>') {
+ break;
+ } else if (IdentifierPart::Is(c)) {
+ push_code_unit(name, c);
+ } else {
+ ReportError(CStrVector("Invalid capture group name"));
+ return nullptr;
+ }
+ }
+ }
+
+ return name;
+}
+
+bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
+ int index) {
+ DCHECK(FLAG_harmony_regexp_named_captures);
+ DCHECK(unicode());
+ DCHECK(0 < index && index <= captures_started_);
+ DCHECK_NOT_NULL(name);
+
+ if (named_captures_ == nullptr) {
+ named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());
+ } else {
+ // Check for duplicates and bail if we find any.
+ for (const auto& named_capture : *named_captures_) {
+ if (*named_capture->name() == *name) {
+ ReportError(CStrVector("Duplicate capture group name"));
+ return false;
+ }
+ }
+ }
+
+ RegExpCapture* capture = GetCapture(index);
+ DCHECK(capture->name() == nullptr);
+
+ capture->set_name(name);
+ named_captures_->Add(capture, zone());
+
+ return true;
+}
+
+bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
+ RegExpParserState* state) {
+ // The parser is assumed to be on the '<' in \k<name>.
+ if (current() != '<') {
+ ReportError(CStrVector("Invalid named reference"));
+ return false;
+ }
+
+ Advance();
+ const ZoneVector<uc16>* name = ParseCaptureGroupName();
+ if (name == nullptr) {
+ return false;
+ }
+
+ if (state->IsInsideCaptureGroup(name)) {
+ builder->AddEmpty();
+ } else {
+ RegExpBackReference* atom = new (zone()) RegExpBackReference();
+ atom->set_name(name);
+
+ builder->AddAtom(atom);
+
+ if (named_back_references_ == nullptr) {
+ named_back_references_ =
+ new (zone()) ZoneList<RegExpBackReference*>(1, zone());
+ }
+ named_back_references_->Add(atom, zone());
+ }
+
+ return true;
+}
+
+void RegExpParser::PatchNamedBackReferences() {
+ if (named_back_references_ == nullptr) return;
+
+ if (named_captures_ == nullptr) {
+ ReportError(CStrVector("Invalid named capture referenced"));
+ return;
+ }
+
+ // Look up and patch the actual capture for each named back reference.
+ // TODO(jgruber): O(n^2), optimize if necessary.
+
+ for (int i = 0; i < named_back_references_->length(); i++) {
+ RegExpBackReference* ref = named_back_references_->at(i);
+
+ int index = -1;
+ for (const auto& capture : *named_captures_) {
+ if (*capture->name() == *ref->name()) {
+ index = capture->index();
+ break;
+ }
+ }
+
+ if (index == -1) {
+ ReportError(CStrVector("Invalid named capture referenced"));
+ return;
+ }
+
+ ref->set_capture(GetCapture(index));
+ }
+}
RegExpCapture* RegExpParser::GetCapture(int index) {
// The index for the capture groups are one-based. Its index in the list is
@@ -691,6 +866,24 @@
return captures_->at(index - 1);
}
+Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
+ if (named_captures_ == nullptr || named_captures_->is_empty())
+ return Handle<FixedArray>();
+
+ Factory* factory = isolate()->factory();
+
+ int len = named_captures_->length() * 2;
+ Handle<FixedArray> array = factory->NewFixedArray(len);
+
+ for (int i = 0; i < named_captures_->length(); i++) {
+ RegExpCapture* capture = named_captures_->at(i);
+ MaybeHandle<String> name = factory->NewStringFromTwoByte(capture->name());
+ array->set(i * 2, *name.ToHandleChecked());
+ array->set(i * 2 + 1, Smi::FromInt(capture->index()));
+ }
+
+ return array;
+}
bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
@@ -703,6 +896,15 @@
return false;
}
+bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(
+ const ZoneVector<uc16>* name) {
+ DCHECK_NOT_NULL(name);
+ for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ if (s->capture_name() == nullptr) continue;
+ if (*s->capture_name() == *name) return true;
+ }
+ return false;
+}
// QuantifierPrefix ::
// { DecimalDigits }
@@ -845,6 +1047,9 @@
}
#ifdef V8_I18N_SUPPORT
+
+namespace {
+
bool IsExactPropertyAlias(const char* property_name, UProperty property) {
const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
@@ -875,7 +1080,7 @@
}
bool LookupPropertyValueName(UProperty property,
- const char* property_value_name,
+ const char* property_value_name, bool negate,
ZoneList<CharacterRange>* result, Zone* zone) {
int32_t property_value =
u_getPropertyValueEnum(property, property_value_name);
@@ -895,6 +1100,7 @@
if (success) {
uset_removeAllStrings(set);
+ if (negate) uset_complement(set);
int item_count = uset_getItemCount(set);
int item_result = 0;
for (int i = 0; i < item_count; i++) {
@@ -910,9 +1116,34 @@
return success;
}
-bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
+template <size_t N>
+inline bool NameEquals(const char* name, const char (&literal)[N]) {
+ return strncmp(name, literal, N + 1) == 0;
+}
+
+bool LookupSpecialPropertyValueName(const char* name,
+ ZoneList<CharacterRange>* result,
+ bool negate, Zone* zone) {
+ if (NameEquals(name, "Any")) {
+ if (!negate) result->Add(CharacterRange::Everything(), zone);
+ } else if (NameEquals(name, "ASCII")) {
+ result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
+ : CharacterRange::Range(0x0, 0x7f),
+ zone);
+ } else if (NameEquals(name, "Assigned")) {
+ return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
+ !negate, result, zone);
+ } else {
+ return false;
+ }
+ return true;
+}
+
+} // anonymous namespace
+
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
+ bool negate) {
// Parse the property class as follows:
- // - \pN with a single-character N is equivalent to \p{N}
// - In \p{name}, 'name' is interpreted
// - either as a general category property value name.
// - or as a binary property name.
@@ -935,9 +1166,6 @@
}
second_part.Add(0); // null-terminate string.
}
- } else if (current() != kEndMarker) {
- // Parse \pN, where N is a single-character property name value.
- first_part.Add(static_cast<char>(current()));
} else {
return false;
}
@@ -947,8 +1175,12 @@
if (second_part.is_empty()) {
// First attempt to interpret as general category property value name.
const char* name = first_part.ToConstVector().start();
- if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result,
- zone())) {
+ if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
+ result, zone())) {
+ return true;
+ }
+ // Interpret "Any", "ASCII", and "Assigned".
+ if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
return true;
}
// Then attempt to interpret as binary property name with value name 'Y'.
@@ -956,7 +1188,8 @@
if (property < UCHAR_BINARY_START) return false;
if (property >= UCHAR_BINARY_LIMIT) return false;
if (!IsExactPropertyAlias(name, property)) return false;
- return LookupPropertyValueName(property, "Y", result, zone());
+ return LookupPropertyValueName(property, negate ? "N" : "Y", false, result,
+ zone());
} else {
// Both property name and value name are specified. Attempt to interpret
// the property name as enumerated property.
@@ -966,13 +1199,15 @@
if (property < UCHAR_INT_START) return false;
if (property >= UCHAR_INT_LIMIT) return false;
if (!IsExactPropertyAlias(property_name, property)) return false;
- return LookupPropertyValueName(property, value_name, result, zone());
+ return LookupPropertyValueName(property, value_name, negate, result,
+ zone());
}
}
#else // V8_I18N_SUPPORT
-bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
+ bool negate) {
return false;
}
@@ -1139,7 +1374,6 @@
return CharacterRange::Singleton(first);
}
-
static const uc16 kNoCharClass = 0;
// Adds range or pre-defined character class to character ranges.
@@ -1163,19 +1397,10 @@
bool parse_success = false;
if (next == 'p') {
Advance(2);
- parse_success = ParsePropertyClass(ranges);
+ parse_success = ParsePropertyClass(ranges, false);
} else if (next == 'P') {
Advance(2);
- ZoneList<CharacterRange>* property_class =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- parse_success = ParsePropertyClass(property_class);
- if (parse_success) {
- ZoneList<CharacterRange>* negated =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- CharacterRange::Negate(property_class, negated, zone());
- const Vector<CharacterRange> negated_vector = negated->ToVector();
- ranges->AddAll(negated_vector, zone());
- }
+ parse_success = ParsePropertyClass(ranges, true);
} else {
return false;
}
@@ -1272,6 +1497,7 @@
int capture_count = parser.captures_started();
result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
result->contains_anchor = parser.contains_anchor();
+ result->capture_name_map = parser.CreateCaptureNameMap();
result->capture_count = capture_count;
}
return !parser.failed();
diff --git a/src/regexp/regexp-parser.h b/src/regexp/regexp-parser.h
index 6142a9e..a0b975d 100644
--- a/src/regexp/regexp-parser.h
+++ b/src/regexp/regexp-parser.h
@@ -174,7 +174,7 @@
bool ParseHexEscape(int length, uc32* value);
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
- bool ParsePropertyClass(ZoneList<CharacterRange>* result);
+ bool ParsePropertyClass(ZoneList<CharacterRange>* result, bool negate);
uc32 ParseOctalLiteral();
@@ -222,13 +222,15 @@
RegExpParserState(RegExpParserState* previous_state,
SubexpressionType group_type,
RegExpLookaround::Type lookaround_type,
- int disjunction_capture_index, bool ignore_case,
+ int disjunction_capture_index,
+ const ZoneVector<uc16>* capture_name, bool ignore_case,
bool unicode, Zone* zone)
: previous_state_(previous_state),
builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
group_type_(group_type),
lookaround_type_(lookaround_type),
- disjunction_capture_index_(disjunction_capture_index) {}
+ disjunction_capture_index_(disjunction_capture_index),
+ capture_name_(capture_name) {}
// Parser state of containing expression, if any.
RegExpParserState* previous_state() { return previous_state_; }
bool IsSubexpression() { return previous_state_ != NULL; }
@@ -242,9 +244,16 @@
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
int capture_index() { return disjunction_capture_index_; }
+ // The name of the current sub-expression, if group_type is CAPTURE. Only
+ // used for named captures.
+ const ZoneVector<uc16>* capture_name() { return capture_name_; }
+
+ bool IsNamedCapture() const { return capture_name_ != nullptr; }
// Check whether the parser is inside a capture group with the given index.
bool IsInsideCaptureGroup(int index);
+ // Check whether the parser is inside a capture group with the given name.
+ bool IsInsideCaptureGroup(const ZoneVector<uc16>* name);
private:
// Linked list implementation of stack of states.
@@ -257,11 +266,32 @@
RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
int disjunction_capture_index_;
+ // Stored capture name (if any).
+ const ZoneVector<uc16>* capture_name_;
};
// Return the 1-indexed RegExpCapture object, allocate if necessary.
RegExpCapture* GetCapture(int index);
+ // Creates a new named capture at the specified index. Must be called exactly
+ // once for each named capture. Fails if a capture with the same name is
+ // encountered.
+ bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index);
+
+ // Parses the name of a capture group (?<name>pattern). The name must adhere
+ // to IdentifierName in the ECMAScript standard.
+ const ZoneVector<uc16>* ParseCaptureGroupName();
+
+ bool ParseNamedBackReference(RegExpBuilder* builder,
+ RegExpParserState* state);
+
+ // After the initial parsing pass, patch corresponding RegExpCapture objects
+ // into all RegExpBackReferences. This is done after initial parsing in order
+ // to avoid complicating cases in which references comes before the capture.
+ void PatchNamedBackReferences();
+
+ Handle<FixedArray> CreateCaptureNameMap();
+
Isolate* isolate() { return isolate_; }
Zone* zone() const { return zone_; }
@@ -278,6 +308,8 @@
Zone* zone_;
Handle<String>* error_;
ZoneList<RegExpCapture*>* captures_;
+ ZoneList<RegExpCapture*>* named_captures_;
+ ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_;
uc32 current_;
bool ignore_case_;
diff --git a/src/regexp/s390/OWNERS b/src/regexp/s390/OWNERS
index eb007cb..752e8e3 100644
--- a/src/regexp/s390/OWNERS
+++ b/src/regexp/s390/OWNERS
@@ -3,3 +3,4 @@
joransiu@ca.ibm.com
mbrandy@us.ibm.com
michael_dawson@ca.ibm.com
+bjaideep@ca.ibm.com
diff --git a/src/regexp/s390/regexp-macro-assembler-s390.cc b/src/regexp/s390/regexp-macro-assembler-s390.cc
index 9dac534..d9ca1df 100644
--- a/src/regexp/s390/regexp-macro-assembler-s390.cc
+++ b/src/regexp/s390/regexp-macro-assembler-s390.cc
@@ -10,7 +10,6 @@
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/regexp/s390/regexp-macro-assembler-s390.h"
diff --git a/src/regexp/x64/regexp-macro-assembler-x64.cc b/src/regexp/x64/regexp-macro-assembler-x64.cc
index 5d73b43..aafc840 100644
--- a/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -8,7 +8,6 @@
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
diff --git a/src/regexp/x87/regexp-macro-assembler-x87.cc b/src/regexp/x87/regexp-macro-assembler-x87.cc
index 9f15b1c..4a1c3a8 100644
--- a/src/regexp/x87/regexp-macro-assembler-x87.cc
+++ b/src/regexp/x87/regexp-macro-assembler-x87.cc
@@ -8,7 +8,6 @@
#include "src/log.h"
#include "src/macro-assembler.h"
-#include "src/profiler/cpu-profiler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"