Version 3.4.5 Fixed issues 794, 1097, 1215(partial), 1417, 1435, 1472, 1473, 1476, and 1477. Improved code generation for !0 and !1. Reduced memory usage for regular expressions with nested qualifiers. (issue 1472) Fixed V8 to count line terminators in multi-line comments. (Chromium issue 86431) Fixed disassembler=on option for release-mode builds. (issue 1473) Performance improvements on all platforms. git-svn-id: http://v8.googlecode.com/svn/trunk@8337 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

commit: 7b26015ac58e54e88f4214e248f772ad4f055477 [log] [tgz]
author: whesse@chromium.org <whesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00> Mon Jun 20 15:33:18 2011 +0000
committer: whesse@chromium.org <whesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00> Mon Jun 20 15:33:18 2011 +0000
tree: 9ee8fa2ae804bbb24150a9e0e056913a709eb72a
parent: 6d786c9805481bd13ecb29c3155540f2f32950e1 [diff] [blame]
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index ec150c6..73dbdb0 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc

@@ -810,6 +810,11 @@
   inline bool ignore_case() { return ignore_case_; }
   inline bool ascii() { return ascii_; }
 
+  int current_expansion_factor() { return current_expansion_factor_; }
+  void set_current_expansion_factor(int value) {
+    current_expansion_factor_ = value;
+  }
+
   static const int kNoRegister = -1;
 
  private:
@@ -821,6 +826,7 @@
   bool ignore_case_;
   bool ascii_;
   bool reg_exp_too_big_;
+  int current_expansion_factor_;
 };
 
 
@@ -848,7 +854,8 @@
       recursion_depth_(0),
       ignore_case_(ignore_case),
       ascii_(ascii),
-      reg_exp_too_big_(false) {
+      reg_exp_too_big_(false),
+      current_expansion_factor_(1) {
   accept_ = new EndNode(EndNode::ACCEPT);
   ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
 }
@@ -3730,6 +3737,44 @@
 }
 
 
+// Scoped object to keep track of how much we unroll quantifier loops in the
+// regexp graph generator.
+class RegExpExpansionLimiter {
+ public:
+  static const int kMaxExpansionFactor = 6;
+  RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
+      : compiler_(compiler),
+        saved_expansion_factor_(compiler->current_expansion_factor()),
+        ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
+    ASSERT(factor > 0);
+    if (ok_to_expand_) {
+      if (factor > kMaxExpansionFactor) {
+        // Avoid integer overflow of the current expansion factor.
+        ok_to_expand_ = false;
+        compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
+      } else {
+        int new_factor = saved_expansion_factor_ * factor;
+        ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
+        compiler->set_current_expansion_factor(new_factor);
+      }
+    }
+  }
+
+  ~RegExpExpansionLimiter() {
+    compiler_->set_current_expansion_factor(saved_expansion_factor_);
+  }
+
+  bool ok_to_expand() { return ok_to_expand_; }
+
+ private:
+  RegExpCompiler* compiler_;
+  int saved_expansion_factor_;
+  bool ok_to_expand_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
+};
+
+
 RegExpNode* RegExpQuantifier::ToNode(int min,
                                      int max,
                                      bool is_greedy,
@@ -3769,38 +3814,46 @@
   } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
     // Only unroll if there are no captures and the body can't be
     // empty.
-    if (min > 0 && min <= kMaxUnrolledMinMatches) {
-      int new_max = (max == kInfinity) ? max : max - min;
-      // Recurse once to get the loop or optional matches after the fixed ones.
-      RegExpNode* answer = ToNode(
-          0, new_max, is_greedy, body, compiler, on_success, true);
-      // Unroll the forced matches from 0 to min.  This can cause chains of
-      // TextNodes (which the parser does not generate).  These should be
-      // combined if it turns out they hinder good code generation.
-      for (int i = 0; i < min; i++) {
-        answer = body->ToNode(compiler, answer);
-      }
-      return answer;
-    }
-    if (max <= kMaxUnrolledMaxMatches) {
-      ASSERT(min == 0);
-      // Unroll the optional matches up to max.
-      RegExpNode* answer = on_success;
-      for (int i = 0; i < max; i++) {
-        ChoiceNode* alternation = new ChoiceNode(2);
-        if (is_greedy) {
-          alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
-                                                                      answer)));
-          alternation->AddAlternative(GuardedAlternative(on_success));
-        } else {
-          alternation->AddAlternative(GuardedAlternative(on_success));
-          alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
-                                                                      answer)));
+    {
+      RegExpExpansionLimiter limiter(
+          compiler, min + ((max != min) ? 1 : 0));
+      if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
+        int new_max = (max == kInfinity) ? max : max - min;
+        // Recurse once to get the loop or optional matches after the fixed
+        // ones.
+        RegExpNode* answer = ToNode(
+            0, new_max, is_greedy, body, compiler, on_success, true);
+        // Unroll the forced matches from 0 to min.  This can cause chains of
+        // TextNodes (which the parser does not generate).  These should be
+        // combined if it turns out they hinder good code generation.
+        for (int i = 0; i < min; i++) {
+          answer = body->ToNode(compiler, answer);
         }
-        answer = alternation;
-        if (not_at_start) alternation->set_not_at_start();
+        return answer;
       }
-      return answer;
+    }
+    if (max <= kMaxUnrolledMaxMatches && min == 0) {
+      ASSERT(max > 0);  // Due to the 'if' above.
+      RegExpExpansionLimiter limiter(compiler, max);
+      if (limiter.ok_to_expand()) {
+        // Unroll the optional matches up to max.
+        RegExpNode* answer = on_success;
+        for (int i = 0; i < max; i++) {
+          ChoiceNode* alternation = new ChoiceNode(2);
+          if (is_greedy) {
+            alternation->AddAlternative(
+                GuardedAlternative(body->ToNode(compiler, answer)));
+            alternation->AddAlternative(GuardedAlternative(on_success));
+          } else {
+            alternation->AddAlternative(GuardedAlternative(on_success));
+            alternation->AddAlternative(
+                GuardedAlternative(body->ToNode(compiler, answer)));
+          }
+          answer = alternation;
+          if (not_at_start) alternation->set_not_at_start();
+        }
+        return answer;
+      }
     }
   }
   bool has_min = min > 0;
@@ -4126,12 +4179,6 @@
 }
 
 
-static void AddUncanonicals(Isolate* isolate,
-                            ZoneList<CharacterRange>* ranges,
-                            int bottom,
-                            int top);
-
-
 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
                                         bool is_ascii) {
   Isolate* isolate = Isolate::Current();
@@ -4292,101 +4339,6 @@
 }
 
 
-static void AddUncanonicals(Isolate* isolate,
-                            ZoneList<CharacterRange>* ranges,
-                            int bottom,
-                            int top) {
-  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-  // Zones with no case mappings.  There is a DEBUG-mode loop to assert that
-  // this table is correct.
-  // 0x0600 - 0x0fff
-  // 0x1100 - 0x1cff
-  // 0x2000 - 0x20ff
-  // 0x2200 - 0x23ff
-  // 0x2500 - 0x2bff
-  // 0x2e00 - 0xa5ff
-  // 0xa800 - 0xfaff
-  // 0xfc00 - 0xfeff
-  const int boundary_count = 18;
-  int boundaries[] = {
-      0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
-      0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
-
-  // Special ASCII rule from spec can save us some work here.
-  if (bottom == 0x80 && top == 0xffff) return;
-
-  if (top <= boundaries[0]) {
-    CharacterRange range(bottom, top);
-    range.AddCaseEquivalents(ranges, false);
-    return;
-  }
-
-  // Split up very large ranges.  This helps remove ranges where there are no
-  // case mappings.
-  for (int i = 0; i < boundary_count; i++) {
-    if (bottom < boundaries[i] && top >= boundaries[i]) {
-      AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
-      AddUncanonicals(isolate, ranges, boundaries[i], top);
-      return;
-    }
-  }
-
-  // If we are completely in a zone with no case mappings then we are done.
-  for (int i = 0; i < boundary_count; i += 2) {
-    if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
-#ifdef DEBUG
-      for (int j = bottom; j <= top; j++) {
-        unsigned current_char = j;
-        int length = isolate->jsregexp_uncanonicalize()->get(current_char,
-                                                             '\0', chars);
-        for (int k = 0; k < length; k++) {
-          ASSERT(chars[k] == current_char);
-        }
-      }
-#endif
-      return;
-    }
-  }
-
-  // Step through the range finding equivalent characters.
-  ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
-  for (int i = bottom; i <= top; i++) {
-    int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
-    for (int j = 0; j < length; j++) {
-      uc32 chr = chars[j];
-      if (chr != i && (chr < bottom || chr > top)) {
-        characters->Add(chr);
-      }
-    }
-  }
-
-  // Step through the equivalent characters finding simple ranges and
-  // adding ranges to the character class.
-  if (characters->length() > 0) {
-    int new_from = characters->at(0);
-    int new_to = new_from;
-    for (int i = 1; i < characters->length(); i++) {
-      int chr = characters->at(i);
-      if (chr == new_to + 1) {
-        new_to++;
-      } else {
-        if (new_to == new_from) {
-          ranges->Add(CharacterRange::Singleton(new_from));
-        } else {
-          ranges->Add(CharacterRange(new_from, new_to));
-        }
-        new_from = new_to = chr;
-      }
-    }
-    if (new_to == new_from) {
-      ranges->Add(CharacterRange::Singleton(new_from));
-    } else {
-      ranges->Add(CharacterRange(new_from, new_to));
-    }
-  }
-}
-
-
 ZoneList<CharacterRange>* CharacterSet::ranges() {
   if (ranges_ == NULL) {
     ranges_ = new ZoneList<CharacterRange>(2);
commit	7b26015ac58e54e88f4214e248f772ad4f055477	[log] [tgz]
author	whesse@chromium.org <whesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>	Mon Jun 20 15:33:18 2011 +0000
committer	whesse@chromium.org <whesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>	Mon Jun 20 15:33:18 2011 +0000
tree	9ee8fa2ae804bbb24150a9e0e056913a709eb72a
parent	6d786c9805481bd13ecb29c3155540f2f32950e1 [diff] [blame]