Merge V8 5.2.361.47 DO NOT MERGE
https://chromium.googlesource.com/v8/v8/+/5.2.361.47
FPIIM-449
Change-Id: Ibec421b85a9b88cb3a432ada642e469fe7e78346
(cherry picked from commit bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8)
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index ddb4a16..6c50f4e 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -5159,8 +5159,10 @@
ranges = negated;
}
if (ranges->length() == 0) {
- // No matches possible.
- return new (zone) EndNode(EndNode::BACKTRACK, zone);
+ ranges->Add(CharacterRange::Everything(), zone);
+ RegExpCharacterClass* fail =
+ new (zone) RegExpCharacterClass(ranges, true);
+ return new (zone) TextNode(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
return UnanchoredAdvance(compiler, on_success);
@@ -5879,6 +5881,7 @@
void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
ZoneList<CharacterRange>* ranges,
bool is_one_byte) {
+ CharacterRange::Canonicalize(ranges);
int range_count = ranges->length();
for (int i = 0; i < range_count; i++) {
CharacterRange range = ranges->at(i);
@@ -6762,7 +6765,7 @@
Heap* heap = pattern->GetHeap();
bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit &&
- heap->isolate()->memory_allocator()->SizeExecutable() >
+ heap->memory_allocator()->SizeExecutable() >
RegExpImpl::kRegExpExecutableMemoryLimit) {
too_much = true;
}
diff --git a/src/regexp/regexp-ast.h b/src/regexp/regexp-ast.h
index 0e718d3..39c9cee 100644
--- a/src/regexp/regexp-ast.h
+++ b/src/regexp/regexp-ast.h
@@ -296,7 +296,10 @@
bool IsCharacterClass() override;
bool IsTextElement() override { return true; }
int min_match() override { return 1; }
- int max_match() override { return 1; }
+ // The character class may match two code units for unicode regexps.
+ // TODO(yangguo): we should split this class for usage in TextElement, and
+ // make max_match() dependent on the character class content.
+ int max_match() override { return 2; }
void AppendToText(RegExpText* text, Zone* zone) override;
CharacterSet character_set() { return set_; }
// TODO(lrn): Remove need for complex version if is_standard that
diff --git a/src/regexp/regexp-macro-assembler.cc b/src/regexp/regexp-macro-assembler.cc
index 9bb5073..7fed26e 100644
--- a/src/regexp/regexp-macro-assembler.cc
+++ b/src/regexp/regexp-macro-assembler.cc
@@ -100,6 +100,15 @@
Bind(&ok);
}
+void RegExpMacroAssembler::CheckPosition(int cp_offset,
+ Label* on_outside_input) {
+ LoadCurrentCharacter(cp_offset, on_outside_input, true);
+}
+
+bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
+ Label* on_no_match) {
+ return false;
+}
#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
diff --git a/src/regexp/regexp-macro-assembler.h b/src/regexp/regexp-macro-assembler.h
index 2aa439e..76efdf9 100644
--- a/src/regexp/regexp-macro-assembler.h
+++ b/src/regexp/regexp-macro-assembler.h
@@ -113,12 +113,12 @@
// Checks whether the given offset from the current position is before
// the end of the string. May overwrite the current character.
- virtual void CheckPosition(int cp_offset, Label* on_outside_input) = 0;
+ virtual void CheckPosition(int cp_offset, Label* on_outside_input);
// Check whether a standard/default character class matches the current
// character. Returns false if the type of special character class does
// not have custom support.
// May clobber the current loaded character.
- virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) = 0;
+ virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
virtual void Fail() = 0;
virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0;
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index d433fc8..abb644a 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -130,6 +130,7 @@
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
+ if (failed_) return NULL; // Do not overwrite any existing error.
failed_ = true;
*error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
// Zip to the end to make sure the no more input is read.
@@ -511,9 +512,8 @@
break;
case '{': {
int dummy;
- if (ParseIntervalQuantifier(&dummy, &dummy)) {
- return ReportError(CStrVector("Nothing to repeat"));
- }
+ bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
+ if (parsed) return ReportError(CStrVector("Nothing to repeat"));
// fallthrough
}
case '}':
@@ -845,29 +845,46 @@
}
#ifdef V8_I18N_SUPPORT
-bool IsExactPropertyValueAlias(const char* property_name, UProperty property,
- int32_t property_value) {
- const char* short_name =
- u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
+bool IsExactPropertyAlias(const char* property_name, UProperty property) {
+ const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
for (int i = 0;; i++) {
- const char* long_name = u_getPropertyValueName(
- property, property_value,
- static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
+ const char* long_name = u_getPropertyName(
+ property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
if (long_name == NULL) break;
if (strcmp(property_name, long_name) == 0) return true;
}
return false;
}
-bool LookupPropertyClass(UProperty property, const char* property_name,
- ZoneList<CharacterRange>* result, Zone* zone) {
- int32_t property_value = u_getPropertyValueEnum(property, property_name);
+bool IsExactPropertyValueAlias(const char* property_value_name,
+ UProperty property, int32_t property_value) {
+ const char* short_name =
+ u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
+ if (short_name != NULL && strcmp(property_value_name, short_name) == 0) {
+ return true;
+ }
+ for (int i = 0;; i++) {
+ const char* long_name = u_getPropertyValueName(
+ property, property_value,
+ static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
+ if (long_name == NULL) break;
+ if (strcmp(property_value_name, long_name) == 0) return true;
+ }
+ return false;
+}
+
+bool LookupPropertyValueName(UProperty property,
+ const char* property_value_name,
+ ZoneList<CharacterRange>* result, Zone* zone) {
+ int32_t property_value =
+ u_getPropertyValueEnum(property, property_value_name);
if (property_value == UCHAR_INVALID_CODE) return false;
// We require the property name to match exactly to one of the property value
// aliases. However, u_getPropertyValueEnum uses loose matching.
- if (!IsExactPropertyValueAlias(property_name, property, property_value)) {
+ if (!IsExactPropertyValueAlias(property_value_name, property,
+ property_value)) {
return false;
}
@@ -892,49 +909,75 @@
uset_close(set);
return success;
}
-#endif // V8_I18N_SUPPORT
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
-#ifdef V8_I18N_SUPPORT
- List<char> property_name_list;
+ // Parse the property class as follows:
+ // - \pN with a single-character N is equivalent to \p{N}
+ // - In \p{name}, 'name' is interpreted
+ // - either as a general category property value name.
+ // - or as a binary property name.
+ // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
+ // and 'value' is interpreted as one of the available property value names.
+ // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
+ // - Loose matching is not applied.
+ List<char> first_part;
+ List<char> second_part;
if (current() == '{') {
- for (Advance(); current() != '}'; Advance()) {
+ // Parse \p{[PropertyName=]PropertyNameValue}
+ for (Advance(); current() != '}' && current() != '='; Advance()) {
if (!has_next()) return false;
- property_name_list.Add(static_cast<char>(current()));
+ first_part.Add(static_cast<char>(current()));
+ }
+ if (current() == '=') {
+ for (Advance(); current() != '}'; Advance()) {
+ if (!has_next()) return false;
+ second_part.Add(static_cast<char>(current()));
+ }
+ second_part.Add(0); // null-terminate string.
}
} else if (current() != kEndMarker) {
- property_name_list.Add(static_cast<char>(current()));
+ // Parse \pN, where N is a single-character property name value.
+ first_part.Add(static_cast<char>(current()));
} else {
return false;
}
Advance();
- property_name_list.Add(0); // null-terminate string.
+ first_part.Add(0); // null-terminate string.
- const char* property_name = property_name_list.ToConstVector().start();
-
-#define PROPERTY_NAME_LOOKUP(PROPERTY) \
- do { \
- if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \
- return true; \
- } \
- } while (false)
-
- // General_Category (gc) found in PropertyValueAliases.txt
- PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK);
- // Script (sc) found in Scripts.txt
- PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
- // To disambiguate from script names, block names have an "In"-prefix.
- if (property_name_list.length() > 3 && property_name[0] == 'I' &&
- property_name[1] == 'n') {
- // Block (blk) found in Blocks.txt
- property_name += 2;
- PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
+ if (second_part.is_empty()) {
+ // First attempt to interpret as general category property value name.
+ const char* name = first_part.ToConstVector().start();
+ if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result,
+ zone())) {
+ return true;
+ }
+ // Then attempt to interpret as binary property name with value name 'Y'.
+ UProperty property = u_getPropertyEnum(name);
+ if (property < UCHAR_BINARY_START) return false;
+ if (property >= UCHAR_BINARY_LIMIT) return false;
+ if (!IsExactPropertyAlias(name, property)) return false;
+ return LookupPropertyValueName(property, "Y", result, zone());
+ } else {
+ // Both property name and value name are specified. Attempt to interpret
+ // the property name as enumerated property.
+ const char* property_name = first_part.ToConstVector().start();
+ const char* value_name = second_part.ToConstVector().start();
+ UProperty property = u_getPropertyEnum(property_name);
+ if (property < UCHAR_INT_START) return false;
+ if (property >= UCHAR_INT_LIMIT) return false;
+ if (!IsExactPropertyAlias(property_name, property)) return false;
+ return LookupPropertyValueName(property, value_name, result, zone());
}
-#undef PROPERTY_NAME_LOOKUP
-#endif // V8_I18N_SUPPORT
+}
+
+#else // V8_I18N_SUPPORT
+
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
return false;
}
+#endif // V8_I18N_SUPPORT
+
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
uc32 x = 0;
int d = HexValue(current());