Merge v8 bleeding_edge revision 3689 to trunk. This fixes issue with
character ranges in regexp engine.
Review URL: http://codereview.chromium.org/548138
git-svn-id: http://v8.googlecode.com/svn/trunk@3690 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 8af472d..505cf03 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -4462,10 +4462,13 @@
while (i1 < n1 || i2 < n2) {
CharacterRange next_range;
int range_source;
- if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) {
+ if (i2 == n2 ||
+ (i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) {
+ // Next smallest element is in first set.
next_range = first_set->at(i1++);
range_source = kInsideFirst;
} else {
+ // Next smallest element is in second set.
next_range = second_set->at(i2++);
range_source = kInsideSecond;
}
diff --git a/src/version.cc b/src/version.cc
index 5309318..daaba7b 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -35,7 +35,7 @@
#define MAJOR_VERSION 2
#define MINOR_VERSION 0
#define BUILD_NUMBER 6
-#define PATCH_LEVEL 3
+#define PATCH_LEVEL 4
#define CANDIDATE_VERSION false
// Define SONAME to have the SCons build the put a specific SONAME into the
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
index c72c4d1..ad68a45 100644
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -1650,6 +1650,163 @@
ASSERT_EQ(30, list->at(0).to());
}
+// Checks whether a character is in the set represented by a list of ranges.
+static bool CharacterInSet(ZoneList<CharacterRange>* set, uc16 value) {
+ for (int i = 0; i < set->length(); i++) {
+ CharacterRange range = set->at(i);
+ if (range.from() <= value && value <= range.to()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+TEST(CharacterRangeMerge) {
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ ZoneList<CharacterRange> l1(4);
+ ZoneList<CharacterRange> l2(4);
+ // Create all combinations of intersections of ranges, both singletons and
+ // longer.
+
+ int offset = 0;
+
+ // The five kinds of singleton intersections:
+ // X
+ // Y - outside before
+ // Y - outside touching start
+ // Y - overlap
+ // Y - outside touching end
+ // Y - outside after
+
+ for (int i = 0; i < 5; i++) {
+ l1.Add(CharacterRange::Singleton(offset + 2));
+ l2.Add(CharacterRange::Singleton(offset + i));
+ offset += 6;
+ }
+
+ // The seven kinds of singleton/non-singleton intersections:
+ // XXX
+ // Y - outside before
+ // Y - outside touching start
+ // Y - inside touching start
+ // Y - entirely inside
+ // Y - inside touching end
+ // Y - outside touching end
+ // Y - disjoint after
+
+ for (int i = 0; i < 7; i++) {
+ l1.Add(CharacterRange::Range(offset + 2, offset + 4));
+ l2.Add(CharacterRange::Singleton(offset + i));
+ offset += 8;
+ }
+
+ // The eleven kinds of non-singleton intersections:
+ //
+ // XXXXXXXX
+ // YYYY - outside before.
+ // YYYY - outside touching start.
+ // YYYY - overlapping start
+ // YYYY - inside touching start
+ // YYYY - entirely inside
+ // YYYY - inside touching end
+ // YYYY - overlapping end
+ // YYYY - outside touching end
+ // YYYY - outside after
+ // YYYYYYYY - identical
+ // YYYYYYYYYYYY - containing entirely.
+
+ for (int i = 0; i < 9; i++) {
+ l1.Add(CharacterRange::Range(offset + 6, offset + 15)); // Length 8.
+ l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3));
+ offset += 22;
+ }
+ l1.Add(CharacterRange::Range(offset + 6, offset + 15));
+ l2.Add(CharacterRange::Range(offset + 6, offset + 15));
+ offset += 22;
+ l1.Add(CharacterRange::Range(offset + 6, offset + 15));
+ l2.Add(CharacterRange::Range(offset + 4, offset + 17));
+ offset += 22;
+
+ // Different kinds of multi-range overlap:
+ // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
+ // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
+
+ l1.Add(CharacterRange::Range(offset, offset + 21));
+ l1.Add(CharacterRange::Range(offset + 31, offset + 52));
+ for (int i = 0; i < 6; i++) {
+ l2.Add(CharacterRange::Range(offset + 2, offset + 5));
+ l2.Add(CharacterRange::Singleton(offset + 8));
+ offset += 9;
+ }
+
+ ASSERT(CharacterRange::IsCanonical(&l1));
+ ASSERT(CharacterRange::IsCanonical(&l2));
+
+ ZoneList<CharacterRange> first_only(4);
+ ZoneList<CharacterRange> second_only(4);
+ ZoneList<CharacterRange> both(4);
+
+ // Merge one direction.
+ CharacterRange::Merge(&l1, &l2, &first_only, &second_only, &both);
+
+ CHECK(CharacterRange::IsCanonical(&first_only));
+ CHECK(CharacterRange::IsCanonical(&second_only));
+ CHECK(CharacterRange::IsCanonical(&both));
+
+ for (uc16 i = 0; i < offset; i++) {
+ bool in_first = CharacterInSet(&l1, i);
+ bool in_second = CharacterInSet(&l2, i);
+ CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
+ CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
+ CHECK((in_first && in_second) == CharacterInSet(&both, i));
+ }
+
+ first_only.Clear();
+ second_only.Clear();
+ both.Clear();
+
+ // Merge other direction.
+ CharacterRange::Merge(&l2, &l1, &second_only, &first_only, &both);
+
+ CHECK(CharacterRange::IsCanonical(&first_only));
+ CHECK(CharacterRange::IsCanonical(&second_only));
+ CHECK(CharacterRange::IsCanonical(&both));
+
+ for (uc16 i = 0; i < offset; i++) {
+ bool in_first = CharacterInSet(&l1, i);
+ bool in_second = CharacterInSet(&l2, i);
+ CHECK((in_first && !in_second) == CharacterInSet(&first_only, i));
+ CHECK((!in_first && in_second) == CharacterInSet(&second_only, i));
+ CHECK((in_first && in_second) == CharacterInSet(&both, i));
+ }
+
+ first_only.Clear();
+ second_only.Clear();
+ both.Clear();
+
+ // Merge but don't record all combinations.
+ CharacterRange::Merge(&l1, &l2, NULL, NULL, &both);
+
+ CHECK(CharacterRange::IsCanonical(&both));
+
+ for (uc16 i = 0; i < offset; i++) {
+ bool in_first = CharacterInSet(&l1, i);
+ bool in_second = CharacterInSet(&l2, i);
+ CHECK((in_first && in_second) == CharacterInSet(&both, i));
+ }
+
+ // Merge into same set.
+ ZoneList<CharacterRange> all(4);
+ CharacterRange::Merge(&l1, &l2, &all, &all, &all);
+
+ CHECK(CharacterRange::IsCanonical(&all));
+
+ for (uc16 i = 0; i < offset; i++) {
+ bool in_first = CharacterInSet(&l1, i);
+ bool in_second = CharacterInSet(&l2, i);
+ CHECK((in_first || in_second) == CharacterInSet(&all, i));
+ }
+}
TEST(Graph) {