src/jsregexp.cc - fp2-dev/platform/external/chromium_org/v8 - Gitiles

 // Copyright 2006-2008 the V8 project authors. All rights reserved.
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 //       notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 //       copyright notice, this list of conditions and the following
 //       disclaimer in the documentation and/or other materials provided
 //       with the distribution.
 //     * Neither the name of Google Inc. nor the names of its
 //       contributors may be used to endorse or promote products derived
 //       from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "v8.h"

 #include "ast.h"
 #include "execution.h"
 #include "factory.h"
 #include "jsregexp-inl.h"
 #include "platform.h"
 #include "runtime.h"
 #include "top.h"
 #include "compilation-cache.h"
 #include "string-stream.h"
 #include "parser.h"
 #include "regexp-macro-assembler.h"
 #include "regexp-macro-assembler-tracer.h"
 #include "regexp-macro-assembler-irregexp.h"

 #ifdef ARM
 #include "regexp-macro-assembler-arm.h"
 #else  // IA32
 #include "macro-assembler-ia32.h"
 #include "regexp-macro-assembler-ia32.h"
 #endif

 #include "interpreter-irregexp.h"

 // Including pcre.h undefines DEBUG to avoid getting debug output from
 // the JSCRE implementation. Make sure to redefine it in debug mode
 // after having included the header file.
 #ifdef DEBUG
 #include "third_party/jscre/pcre.h"
 #define DEBUG
 #else
 #include "third_party/jscre/pcre.h"
 #endif


 namespace v8 { namespace internal {


 static Failure* malloc_failure;

 static void* JSREMalloc(size_t size) {
   Object* obj = Heap::AllocateByteArray(size);

   // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile
   // will return NULL to the caller, performs GC there.
   // Also pass failure information to the caller.
   if (obj->IsFailure()) {
     malloc_failure = Failure::cast(obj);
     return NULL;
   }

   // Note: object is unrooted, the caller of jsRegExpCompile must
   // create a handle for the return value before doing heap allocation.
   return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress());
 }


 static void JSREFree(void* p) {
   USE(p);  // Do nothing, memory is garbage collected.
 }


 String* RegExpImpl::last_ascii_string_ = NULL;
 String* RegExpImpl::two_byte_cached_string_ = NULL;


 void RegExpImpl::NewSpaceCollectionPrologue() {
   // The two byte string is always in the old space.  The Ascii string may be
   // in either place.  If it is in the old space we don't need to do anything.
   if (Heap::InNewSpace(last_ascii_string_)) {
     // Invalidate the cache.
     last_ascii_string_ = NULL;
     two_byte_cached_string_ = NULL;
   }
 }


 void RegExpImpl::OldSpaceCollectionPrologue() {
   last_ascii_string_ = NULL;
   two_byte_cached_string_ = NULL;
 }


 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
                                                Handle<String> pattern,
                                                Handle<String> flags,
                                                bool* has_pending_exception) {
   // Ensure that the constructor function has been loaded.
   if (!constructor->IsLoaded()) {
     LoadLazy(constructor, has_pending_exception);
     if (*has_pending_exception) return Handle<Object>();
   }
   // Call the construct code with 2 arguments.
   Object** argv[2] = { Handle<Object>::cast(pattern).location(),
                        Handle<Object>::cast(flags).location() };
   return Execution::New(constructor, 2, argv, has_pending_exception);
 }


 // Converts a source string to a 16 bit flat string or a SlicedString containing
 // a 16 bit flat string).
 Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
   if (*subject == last_ascii_string_) {
     ASSERT(two_byte_cached_string_ != NULL);
     return Handle<String>(String::cast(two_byte_cached_string_));
   }
   Handle<String> two_byte_string = StringToTwoByte(subject);
   last_ascii_string_ = *subject;
   two_byte_cached_string_ = *two_byte_string;
   return two_byte_string;
 }


 // Converts a source string to a 16 bit flat string or a SlicedString containing
 // a 16 bit flat string).
 Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
   StringShape shape(*pattern);
   if (!pattern->IsFlat(shape)) {
     FlattenString(pattern);
     shape = StringShape(*pattern);
   }
   Handle<String> flat_string(shape.IsCons() ?
     String::cast(ConsString::cast(*pattern)->first()) :
     *pattern);
   ASSERT(flat_string->IsString());
   StringShape flat_shape(*flat_string);
   ASSERT(!flat_shape.IsCons());
   ASSERT(flat_shape.IsSequential() ||
          flat_shape.IsSliced() ||
          flat_shape.IsExternal());
   if (!flat_shape.IsAsciiRepresentation()) {
     return flat_string;
   }

   int len = flat_string->length(flat_shape);
   Handle<String> two_byte_string =
     Factory::NewRawTwoByteString(len, TENURED);
   uc16* dest = SeqTwoByteString::cast(*two_byte_string)->GetChars();
   String::WriteToFlat(*flat_string, flat_shape, dest, 0, len);
   return two_byte_string;
 }


 static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
   int flags = JSRegExp::NONE;
   StringShape shape(*str);
   for (int i = 0; i < str->length(shape); i++) {
     switch (str->Get(shape, i)) {
       case 'i':
         flags |= JSRegExp::IGNORE_CASE;
         break;
       case 'g':
         flags |= JSRegExp::GLOBAL;
         break;
       case 'm':
         flags |= JSRegExp::MULTILINE;
         break;
     }
   }
   return JSRegExp::Flags(flags);
 }


 static inline void ThrowRegExpException(Handle<JSRegExp> re,
                                         Handle<String> pattern,
                                         Handle<String> error_text,
                                         const char* message) {
   Handle<JSArray> array = Factory::NewJSArray(2);
   SetElement(array, 0, pattern);
   SetElement(array, 1, error_text);
   Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
   Top::Throw(*regexp_err);
 }


 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
                                    Handle<String> pattern,
                                    Handle<String> flag_str) {
   JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
   Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
   bool in_cache = !cached.is_null();
   LOG(RegExpCompileEvent(re, in_cache));

   Handle<Object> result;
   if (in_cache) {
     re->set_data(*cached);
     result = re;
   } else {
     FlattenString(pattern);
     ZoneScope zone_scope(DELETE_ON_EXIT);
     RegExpParseResult parse_result;
     FlatStringReader reader(pattern);
     if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
       // Throw an exception if we fail to parse the pattern.
       ThrowRegExpException(re,
                            pattern,
                            parse_result.error,
                            "malformed_regexp");
       return Handle<Object>();
     }
     RegExpAtom* atom = parse_result.tree->AsAtom();
     if (atom != NULL && !flags.is_ignore_case()) {
       if (parse_result.has_character_escapes) {
         Vector<const uc16> atom_pattern = atom->data();
         Handle<String> atom_string =
             Factory::NewStringFromTwoByte(atom_pattern);
         result = AtomCompile(re, pattern, flags, atom_string);
       } else {
         result = AtomCompile(re, pattern, flags, pattern);
       }
     } else {
       RegExpNode* node = NULL;
       Handle<FixedArray> irregexp_data =
           RegExpEngine::Compile(&parse_result,
                                 &node,
                                 flags.is_ignore_case(),
                                 flags.is_multiline());
       if (irregexp_data.is_null()) {
         if (FLAG_disable_jscre) {
           UNIMPLEMENTED();
         }
         result = JscrePrepare(re, pattern, flags);
       } else {
         result = IrregexpPrepare(re, pattern, flags, irregexp_data);
       }
     }
     Object* data = re->data();
     if (data->IsFixedArray()) {
       // If compilation succeeded then the data is set on the regexp
       // and we can store it in the cache.
       Handle<FixedArray> data(FixedArray::cast(re->data()));
       CompilationCache::PutRegExp(pattern, flags, data);
     }
   }

   return result;
 }


 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
                                 Handle<String> subject,
                                 Handle<Object> index) {
   switch (regexp->TypeTag()) {
     case JSRegExp::JSCRE:
       if (FLAG_disable_jscre) {
         UNIMPLEMENTED();
       }
       return JscreExec(regexp, subject, index);
     case JSRegExp::ATOM:
       return AtomExec(regexp, subject, index);
     case JSRegExp::IRREGEXP:
       return IrregexpExec(regexp, subject, index);
     default:
       UNREACHABLE();
       return Handle<Object>();
   }
 }


 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
                                 Handle<String> subject) {
   switch (regexp->TypeTag()) {
     case JSRegExp::JSCRE:
       if (FLAG_disable_jscre) {
         UNIMPLEMENTED();
       }
       return JscreExecGlobal(regexp, subject);
     case JSRegExp::ATOM:
       return AtomExecGlobal(regexp, subject);
     case JSRegExp::IRREGEXP:
       return IrregexpExecGlobal(regexp, subject);
     default:
       UNREACHABLE();
       return Handle<Object>();
   }
 }


 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
                                        Handle<String> pattern,
                                        JSRegExp::Flags flags,
                                        Handle<String> match_pattern) {
   Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);
   return re;
 }


 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
                                     Handle<String> subject,
                                     Handle<Object> index) {
   Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));

   uint32_t start_index;
   if (!Array::IndexFromObject(*index, &start_index)) {
     return Handle<Smi>(Smi::FromInt(-1));
   }

   LOG(RegExpExecEvent(re, start_index, subject));
   int value = Runtime::StringMatch(subject, needle, start_index);
   if (value == -1) return Factory::null_value();

   Handle<FixedArray> array = Factory::NewFixedArray(2);
   array->set(0, Smi::FromInt(value));
   array->set(1, Smi::FromInt(value + needle->length()));
   return Factory::NewJSArrayWithElements(array);
 }


 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
                                           Handle<String> subject) {
   Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
   Handle<JSArray> result = Factory::NewJSArray(1);
   int index = 0;
   int match_count = 0;
   int subject_length = subject->length();
   int needle_length = needle->length();
   while (true) {
     LOG(RegExpExecEvent(re, index, subject));
     int value = -1;
     if (index + needle_length <= subject_length) {
       value = Runtime::StringMatch(subject, needle, index);
     }
     if (value == -1) break;
     HandleScope scope;
     int end = value + needle_length;

     Handle<FixedArray> array = Factory::NewFixedArray(2);
     array->set(0, Smi::FromInt(value));
     array->set(1, Smi::FromInt(end));
     Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
     SetElement(result, match_count, pair);
     match_count++;
     index = end;
     if (needle_length == 0) index++;
   }
   return result;
 }


 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
                                        Handle<String> pattern,
                                        JSRegExp::Flags flags) {
   Handle<Object> value(Heap::undefined_value());
   Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
   return re;
 }


 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
                                           Handle<String> pattern,
                                           JSRegExp::Flags flags,
                                           Handle<FixedArray> irregexp_data) {
   Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data);
   return re;
 }


 static inline Object* DoCompile(String* pattern,
                                 JSRegExp::Flags flags,
                                 unsigned* number_of_captures,
                                 const char** error_message,
                                 v8::jscre::JscreRegExp** code) {
   v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
     ? v8::jscre::JSRegExpIgnoreCase
     : v8::jscre::JSRegExpDoNotIgnoreCase;
   v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline()
     ? v8::jscre::JSRegExpMultiline
     : v8::jscre::JSRegExpSingleLine;
   *error_message = NULL;
   malloc_failure = Failure::Exception();
   *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(),
                                      pattern->length(),
                                      case_option,
                                      multiline_option,
                                      number_of_captures,
                                      error_message,
                                      &JSREMalloc,
                                      &JSREFree);
   if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||
                        malloc_failure->IsOutOfMemoryFailure())) {
     return malloc_failure;
   } else {
     // It doesn't matter which object we return here, we just need to return
     // a non-failure to indicate to the GC-retry code that there was no
     // allocation failure.
     return pattern;
   }
 }


 void CompileWithRetryAfterGC(Handle<String> pattern,
                              JSRegExp::Flags flags,
                              unsigned* number_of_captures,
                              const char** error_message,
                              v8::jscre::JscreRegExp** code) {
   CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
                                     flags,
                                     number_of_captures,
                                     error_message,
                                     code));
 }


 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
   ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
   ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());

   Handle<String> pattern(re->Pattern());
   JSRegExp::Flags flags = re->GetFlags();

   Handle<String> two_byte_pattern = StringToTwoByte(pattern);

   unsigned number_of_captures;
   const char* error_message = NULL;

   v8::jscre::JscreRegExp* code = NULL;
   FlattenString(pattern);

   CompileWithRetryAfterGC(two_byte_pattern,
                           flags,
                           &number_of_captures,
                           &error_message,
                           &code);

   if (code == NULL) {
     // Throw an exception.
     Handle<JSArray> array = Factory::NewJSArray(2);
     SetElement(array, 0, pattern);
     SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
         (error_message == NULL) ? "Unknown regexp error" : error_message)));
     Handle<Object> regexp_err =
         Factory::NewSyntaxError("malformed_regexp", array);
     Top::Throw(*regexp_err);
     return Handle<Object>();
   }

   // Convert the return address to a ByteArray pointer.
   Handle<ByteArray> internal(
       ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));

   Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
   value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures));
   value->set(kJscreInternalIndex, *internal);
   Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);

   return re;
 }


 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
                                             int num_captures,
                                             Handle<String> two_byte_subject,
                                             int previous_index,
                                             int* offsets_vector,
                                             int offsets_vector_length) {
 #ifdef DEBUG
   if (FLAG_trace_regexp_bytecodes) {
     String* pattern = regexp->Pattern();
     PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
     PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));
   }
 #endif
   ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
   ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
   bool rc;

   for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
     offsets_vector[i] = -1;
   }

   LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));

   FixedArray* irregexp =
       FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
   int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();

   switch (tag) {
     case RegExpMacroAssembler::kIA32Implementation: {
 #ifndef ARM
       Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
       Address start_addr =
           Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
       int string_offset =
           start_addr - reinterpret_cast<Address>(*two_byte_subject);
       int start_offset = string_offset + previous_index * sizeof(uc16);
       int end_offset =
           string_offset + two_byte_subject->length() * sizeof(uc16);
       rc = RegExpMacroAssemblerIA32::Execute(code,
                                              two_byte_subject.location(),
                                              start_offset,
                                              end_offset,
                                              offsets_vector,
                                              previous_index == 0);
       if (rc) {
         // Capture values are relative to start_offset only.
         for (int i = 0; i < offsets_vector_length; i++) {
           if (offsets_vector[i] >= 0) {
             offsets_vector[i] += previous_index;
           }
         }
       }
       break;
 #else
       UNIMPLEMENTED();
       rc = false;
       break;
 #endif
     }
     case RegExpMacroAssembler::kBytecodeImplementation: {
       Handle<ByteArray> byte_codes = IrregexpCode(regexp);

       rc = IrregexpInterpreter::Match(byte_codes,
                                       two_byte_subject,
                                       offsets_vector,
                                       previous_index);
       break;
     }
     case RegExpMacroAssembler::kARMImplementation:
     default:
       UNREACHABLE();
       rc = false;
       break;
   }

   if (!rc) {
     return Factory::null_value();
   }

   Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
   // The captures come in (start, end+1) pairs.
   for (int i = 0; i < 2 * (num_captures+1); i += 2) {
     array->set(i, Smi::FromInt(offsets_vector[i]));
     array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
   }
   return Factory::NewJSArrayWithElements(array);
 }


 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
                                          int num_captures,
                                          Handle<String> subject,
                                          int previous_index,
                                          const uc16* two_byte_subject,
                                          int* offsets_vector,
                                          int offsets_vector_length) {
   int rc;
   {
     AssertNoAllocation a;
     ByteArray* internal = JscreInternal(regexp);
     const v8::jscre::JscreRegExp* js_regexp =
         reinterpret_cast<v8::jscre::JscreRegExp*>(
             internal->GetDataStartAddress());

     LOG(RegExpExecEvent(regexp, previous_index, subject));

     rc = v8::jscre::jsRegExpExecute(js_regexp,
                                     two_byte_subject,
                                     subject->length(),
                                     previous_index,
                                     offsets_vector,
                                     offsets_vector_length);
   }

   // The KJS JavaScript engine returns null (ie, a failed match) when
   // JSRE's internal match limit is exceeded.  We duplicate that behavior here.
   if (rc == v8::jscre::JSRegExpErrorNoMatch
       || rc == v8::jscre::JSRegExpErrorHitLimit) {
     return Factory::null_value();
   }

   // Other JSRE errors:
   if (rc < 0) {
     // Throw an exception.
     Handle<Object> code(Smi::FromInt(rc));
     Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
     Handle<Object> regexp_err(
         Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
     return Handle<Object>(Top::Throw(*regexp_err));
   }

   Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
   // The captures come in (start, end+1) pairs.
   for (int i = 0; i < 2 * (num_captures+1); i += 2) {
     array->set(i, Smi::FromInt(offsets_vector[i]));
     array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
   }
   return Factory::NewJSArrayWithElements(array);
 }


 class OffsetsVector {
  public:
   inline OffsetsVector(int num_registers)
       : offsets_vector_length_(num_registers) {
     if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
       vector_ = NewArray<int>(offsets_vector_length_);
     } else {
       vector_ = static_offsets_vector_;
     }
   }


   inline ~OffsetsVector() {
     if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
       DeleteArray(vector_);
       vector_ = NULL;
     }
   }


   inline int* vector() {
     return vector_;
   }


   inline int length() {
     return offsets_vector_length_;
   }

  private:
   int* vector_;
   int offsets_vector_length_;
   static const int kStaticOffsetsVectorSize = 50;
   static int static_offsets_vector_[kStaticOffsetsVectorSize];
 };


 int OffsetsVector::static_offsets_vector_[
     OffsetsVector::kStaticOffsetsVectorSize];


 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
                                         Handle<String> subject,
                                         Handle<Object> index) {
   ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
   ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());

   // Prepare space for the return values.
   int number_of_registers = IrregexpNumberOfRegisters(regexp);
   OffsetsVector offsets(number_of_registers);

   int num_captures = IrregexpNumberOfCaptures(regexp);

   int previous_index = static_cast<int>(DoubleToInteger(index->Number()));

   Handle<String> subject16 = CachedStringToTwoByte(subject);

   Handle<Object> result(IrregexpExecOnce(regexp,
                                          num_captures,
                                          subject16,
                                          previous_index,
                                          offsets.vector(),
                                          offsets.length()));
   return result;
 }


 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
                                      Handle<String> subject,
                                      Handle<Object> index) {
   ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
   if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
     Handle<Object> compile_result = JscreCompile(regexp);
     if (compile_result.is_null()) return compile_result;
   }
   ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

   int num_captures = JscreNumberOfCaptures(regexp);

   OffsetsVector offsets((num_captures + 1) * 3);

   int previous_index = static_cast<int>(DoubleToInteger(index->Number()));

   Handle<String> subject16 = CachedStringToTwoByte(subject);

   Handle<Object> result(JscreExecOnce(regexp,
                                       num_captures,
                                       subject,
                                       previous_index,
                                       subject16->GetTwoByteData(),
                                       offsets.vector(),
                                       offsets.length()));

   return result;
 }


 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
                                               Handle<String> subject) {
   ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
   ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());

   // Prepare space for the return values.
   int number_of_registers = IrregexpNumberOfRegisters(regexp);
   OffsetsVector offsets(number_of_registers);

   int previous_index = 0;

   Handle<JSArray> result = Factory::NewJSArray(0);
   int i = 0;
   Handle<Object> matches;

   Handle<String> subject16 = CachedStringToTwoByte(subject);

   do {
     if (previous_index > subject->length() || previous_index < 0) {
       // Per ECMA-262 15.10.6.2, if the previous index is greater than the
       // string length, there is no match.
       matches = Factory::null_value();
     } else {
       matches = IrregexpExecOnce(regexp,
                                  IrregexpNumberOfCaptures(regexp),
                                  subject16,
                                  previous_index,
                                  offsets.vector(),
                                  offsets.length());

       if (matches->IsJSArray()) {
         SetElement(result, i, matches);
         i++;
         previous_index = offsets.vector()[1];
         if (offsets.vector()[0] == offsets.vector()[1]) {
           previous_index++;
         }
       }
     }
   } while (matches->IsJSArray());

   // If we exited the loop with an exception, throw it.
   if (matches->IsNull()) {
     // Exited loop normally.
     return result;
   } else {
     // Exited loop with the exception in matches.
     return matches;
   }
 }


 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
                                            Handle<String> subject) {
   ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
   if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
     Handle<Object> compile_result = JscreCompile(regexp);
     if (compile_result.is_null()) return compile_result;
   }
   ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

   // Prepare space for the return values.
   int num_captures = JscreNumberOfCaptures(regexp);

   OffsetsVector offsets((num_captures + 1) * 3);

   int previous_index = 0;

   Handle<JSArray> result = Factory::NewJSArray(0);
   int i = 0;
   Handle<Object> matches;

   Handle<String> subject16 = CachedStringToTwoByte(subject);

   do {
     if (previous_index > subject->length() || previous_index < 0) {
       // Per ECMA-262 15.10.6.2, if the previous index is greater than the
       // string length, there is no match.
       matches = Factory::null_value();
     } else {
       matches = JscreExecOnce(regexp,
                               num_captures,
                               subject,
                               previous_index,
                               subject16->GetTwoByteData(),
                               offsets.vector(),
                               offsets.length());

       if (matches->IsJSArray()) {
         SetElement(result, i, matches);
         i++;
         previous_index = offsets.vector()[1];
         if (offsets.vector()[0] == offsets.vector()[1]) {
           previous_index++;
         }
       }
     }
   } while (matches->IsJSArray());

   // If we exited the loop with an exception, throw it.
   if (matches->IsNull()) {
     // Exited loop normally.
     return result;
   } else {
     // Exited loop with the exception in matches.
     return matches;
   }
 }


 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
   FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
   return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
 }


 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
   FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
   return ByteArray::cast(value->get(kJscreInternalIndex));
 }


 int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
   FixedArray* value =
       FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
   return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
 }


 int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
   FixedArray* value =
       FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
   return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
 }


 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
   FixedArray* value =
       FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
   return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
 }


 // -------------------------------------------------------------------
 // Implmentation of the Irregexp regular expression engine.


 void RegExpTree::AppendToText(RegExpText* text) {
   UNREACHABLE();
 }


 void RegExpAtom::AppendToText(RegExpText* text) {
   text->AddElement(TextElement::Atom(this));
 }


 void RegExpCharacterClass::AppendToText(RegExpText* text) {
   text->AddElement(TextElement::CharClass(this));
 }


 void RegExpText::AppendToText(RegExpText* text) {
   for (int i = 0; i < elements()->length(); i++)
     text->AddElement(elements()->at(i));
 }


 TextElement TextElement::Atom(RegExpAtom* atom) {
   TextElement result = TextElement(ATOM);
   result.data.u_atom = atom;
   return result;
 }


 TextElement TextElement::CharClass(
       RegExpCharacterClass* char_class) {
   TextElement result = TextElement(CHAR_CLASS);
   result.data.u_char_class = char_class;
   return result;
 }


 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
   if (table_ == NULL) {
     table_ = new DispatchTable();
     DispatchTableConstructor cons(table_, ignore_case);
     cons.BuildTable(this);
   }
   return table_;
 }


 class RegExpCompiler {
  public:
   RegExpCompiler(int capture_count, bool ignore_case);

   int AllocateRegister() { return next_register_++; }

   Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
                               RegExpNode* start,
                               int capture_count);

   inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

   static const int kImplementationOffset = 0;
   static const int kNumberOfRegistersOffset = 0;
   static const int kCodeOffset = 1;

   RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
   EndNode* accept() { return accept_; }
   EndNode* backtrack() { return backtrack_; }

   static const int kMaxRecursion = 100;
   inline int recursion_depth() { return recursion_depth_; }
   inline void IncrementRecursionDepth() { recursion_depth_++; }
   inline void DecrementRecursionDepth() { recursion_depth_--; }

   inline bool ignore_case() { return ignore_case_; }

  private:
   EndNode* accept_;
   EndNode* backtrack_;
   int next_register_;
   List<RegExpNode*>* work_list_;
   int recursion_depth_;
   RegExpMacroAssembler* macro_assembler_;
   bool ignore_case_;
 };


 // Attempts to compile the regexp using an Irregexp code generator.  Returns
 // a fixed array or a null handle depending on whether it succeeded.
 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
     : next_register_(2 * (capture_count + 1)),
       work_list_(NULL),
       recursion_depth_(0),
       ignore_case_(ignore_case) {
   accept_ = new EndNode(EndNode::ACCEPT);
   backtrack_ = new EndNode(EndNode::BACKTRACK);
 }


 Handle<FixedArray> RegExpCompiler::Assemble(
     RegExpMacroAssembler* macro_assembler,
     RegExpNode* start,
     int capture_count) {
 #ifdef DEBUG
   if (FLAG_trace_regexp_assembler)
     macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
   else
 #endif
     macro_assembler_ = macro_assembler;
   List <RegExpNode*> work_list(0);
   work_list_ = &work_list;
   Label fail;
   macro_assembler_->PushBacktrack(&fail);
   if (!start->GoTo(this)) {
     fail.Unuse();
     return Handle<FixedArray>::null();
   }
   while (!work_list.is_empty()) {
     if (!work_list.RemoveLast()->GoTo(this)) {
       fail.Unuse();
       return Handle<FixedArray>::null();
     }
   }
   macro_assembler_->Bind(&fail);
   macro_assembler_->Fail();
   Handle<FixedArray> array =
       Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
   array->set(RegExpImpl::kIrregexpImplementationIndex,
              Smi::FromInt(macro_assembler_->Implementation()));
   array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
              Smi::FromInt(next_register_));
   array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
              Smi::FromInt(capture_count));
   Handle<Object> code = macro_assembler_->GetCode();
   array->set(RegExpImpl::kIrregexpCodeIndex, *code);
   work_list_ = NULL;
 #ifdef DEBUG
   if (FLAG_trace_regexp_assembler) {
     delete macro_assembler_;
   }
 #endif
   return array;
 }


 bool RegExpNode::GoTo(RegExpCompiler* compiler) {
   // TODO(erikcorry): Implement support.
   if (info_.follows_word_interest ||
       info_.follows_newline_interest ||
       info_.follows_start_interest) {
     return false;
   }
   if (label_.is_bound()) {
     compiler->macro_assembler()->GoTo(&label_);
     return true;
   } else {
     if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) {
       compiler->macro_assembler()->GoTo(&label_);
       compiler->AddWork(this);
       return true;
     } else {
       compiler->IncrementRecursionDepth();
       bool how_it_went = Emit(compiler);
       compiler->DecrementRecursionDepth();
       return how_it_went;
     }
   }
 }


 // EndNodes are special.  Because they can be very common and they are very
 // short we normally inline them.  That is, if we are asked to emit a GoTo
 // we just emit the entire node.  Since they don't have successors this
 // works.
 bool EndNode::GoTo(RegExpCompiler* compiler) {
   if (info()->follows_word_interest ||
       info()->follows_newline_interest ||
       info()->follows_start_interest) {
     return false;
   }
   return Emit(compiler);
 }


 Label* RegExpNode::label() {
   return &label_;
 }


 bool EndNode::Emit(RegExpCompiler* compiler) {
   RegExpMacroAssembler* macro = compiler->macro_assembler();
   switch (action_) {
     case ACCEPT:
       if (!label()->is_bound()) Bind(macro);
       if (info()->at_end) {
         Label succeed;
         // LoadCurrentCharacter will go to the label if we are at the end of the
         // input string.
         macro->LoadCurrentCharacter(0, &succeed);
         macro->Backtrack();
         macro->Bind(&succeed);
       }
       macro->Succeed();
       return true;
     case BACKTRACK:
       if (!label()->is_bound()) Bind(macro);
       ASSERT(!info()->at_end);
       macro->Backtrack();
       return true;
   }
   return false;
 }


 void GuardedAlternative::AddGuard(Guard* guard) {
   if (guards_ == NULL)
     guards_ = new ZoneList<Guard*>(1);
   guards_->Add(guard);
 }


 ActionNode* ActionNode::StoreRegister(int reg,
                                       int val,
                                       RegExpNode* on_success) {
   ActionNode* result = new ActionNode(STORE_REGISTER, on_success);
   result->data_.u_store_register.reg = reg;
   result->data_.u_store_register.value = val;
   return result;
 }


 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
   ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
   result->data_.u_increment_register.reg = reg;
   return result;
 }


 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
   ActionNode* result = new ActionNode(STORE_POSITION, on_success);
   result->data_.u_position_register.reg = reg;
   return result;
 }


 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
   ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
   result->data_.u_position_register.reg = reg;
   return result;
 }


 ActionNode* ActionNode::BeginSubmatch(int stack_reg,
                                       int position_reg,
                                       RegExpNode* on_success) {
   ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
   result->data_.u_submatch.stack_pointer_register = stack_reg;
   result->data_.u_submatch.current_position_register = position_reg;
   return result;
 }


 ActionNode* ActionNode::EscapeSubmatch(int stack_reg,
                                        bool restore_position,
                                        int position_reg,
                                        RegExpNode* on_success) {
   ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
   result->data_.u_submatch.stack_pointer_register = stack_reg;
   if (restore_position) {
     result->data_.u_submatch.current_position_register = position_reg;
   } else {
     result->data_.u_submatch.current_position_register = -1;
   }
   return result;
 }


 #define DEFINE_ACCEPT(Type)                                          \
   void Type##Node::Accept(NodeVisitor* visitor) {                    \
     visitor->Visit##Type(this);                                      \
   }
 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
 #undef DEFINE_ACCEPT


 // -------------------------------------------------------------------
 // Emit code.


 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
                                Guard* guard,
                                Label* on_failure) {
   switch (guard->op()) {
     case Guard::LT:
       macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure);
       break;
     case Guard::GEQ:
       macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure);
       break;
   }
 }


 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;


 static inline void EmitAtomNonLetters(
     RegExpMacroAssembler* macro_assembler,
     TextElement elm,
     Vector<const uc16> quarks,
     Label* on_failure,
     int cp_offset) {
   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   for (int i = quarks.length() - 1; i >= 0; i--) {
     uc16 c = quarks[i];
     int length = uncanonicalize.get(c, '\0', chars);
     if (length <= 1) {
       macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
       macro_assembler->CheckNotCharacter(c, on_failure);
     }
   }
 }


 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
                                       uc16 c1,
                                       uc16 c2,
                                       Label* on_failure) {
   uc16 exor = c1 ^ c2;
   // Check whether exor has only one bit set.
   if (((exor - 1) & exor) == 0) {
     // If c1 and c2 differ only by one bit.
     // Ecma262UnCanonicalize always gives the highest number last.
     ASSERT(c2 > c1);
     macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
     return true;
   }
   ASSERT(c2 > c1);
   uc16 diff = c2 - c1;
   if (((diff - 1) & diff) == 0 && c1 >= diff) {
     // If the characters differ by 2^n but don't differ by one bit then
     // subtract the difference from the found character, then do the or
     // trick.  We avoid the theoretical case where negative numbers are
     // involved in order to simplify code generation.
     macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
                                                    diff,
                                                    on_failure);
     return true;
   }
   return false;
 }


 static inline void EmitAtomLetters(
     RegExpMacroAssembler* macro_assembler,
     TextElement elm,
     Vector<const uc16> quarks,
     Label* on_failure,
     int cp_offset) {
   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   for (int i = quarks.length() - 1; i >= 0; i--) {
     uc16 c = quarks[i];
     int length = uncanonicalize.get(c, '\0', chars);
     if (length <= 1) continue;
     macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
     Label ok;
     ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
     switch (length) {
       case 2: {
         if (ShortCutEmitCharacterPair(macro_assembler,
                                       chars[0],
                                       chars[1],
                                       on_failure)) {
           ok.Unuse();
         } else {
           macro_assembler->CheckCharacter(chars[0], &ok);
           macro_assembler->CheckNotCharacter(chars[1], on_failure);
           macro_assembler->Bind(&ok);
         }
         break;
       }
       case 4:
         macro_assembler->CheckCharacter(chars[3], &ok);
         // Fall through!
       case 3:
         macro_assembler->CheckCharacter(chars[0], &ok);
         macro_assembler->CheckCharacter(chars[1], &ok);
         macro_assembler->CheckNotCharacter(chars[2], on_failure);
         macro_assembler->Bind(&ok);
         break;
       default:
         UNREACHABLE();
         break;
     }
   }
 }


 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
                           RegExpCharacterClass* cc,
                           int cp_offset,
                           Label* on_failure) {
   macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
   cp_offset++;

   ZoneList<CharacterRange>* ranges = cc->ranges();

   Label success;

   Label* char_is_in_class =
       cc->is_negated() ? on_failure : &success;

   int range_count = ranges->length();

   if (range_count == 0) {
     if (!cc->is_negated()) {
       macro_assembler->GoTo(on_failure);
     }
     return;
   }

   for (int i = 0; i < range_count - 1; i++) {
     CharacterRange& range = ranges->at(i);
     Label next_range;
     uc16 from = range.from();
     uc16 to = range.to();
     if (to == from) {
       macro_assembler->CheckCharacter(to, char_is_in_class);
     } else {
       if (from != 0) {
         macro_assembler->CheckCharacterLT(from, &next_range);
       }
       if (to != 0xffff) {
         macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
       } else {
         macro_assembler->GoTo(char_is_in_class);
       }
     }
     macro_assembler->Bind(&next_range);
   }

   CharacterRange& range = ranges->at(range_count - 1);
   uc16 from = range.from();
   uc16 to = range.to();

   if (to == from) {
     if (cc->is_negated()) {
       macro_assembler->CheckCharacter(to, on_failure);
     } else {
       macro_assembler->CheckNotCharacter(to, on_failure);
     }
   } else {
     if (from != 0) {
       if (cc->is_negated()) {
         macro_assembler->CheckCharacterLT(from, &success);
       } else {
         macro_assembler->CheckCharacterLT(from, on_failure);
       }
     }
     if (to != 0xffff) {
       if (cc->is_negated()) {
         macro_assembler->CheckCharacterLT(to + 1, on_failure);
       } else {
         macro_assembler->CheckCharacterGT(to, on_failure);
       }
     } else {
       if (cc->is_negated()) {
         macro_assembler->GoTo(on_failure);
       }
     }
   }
   macro_assembler->Bind(&success);
 }


 bool TextNode::Emit(RegExpCompiler* compiler) {
   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
   Bind(macro_assembler);
   int element_count = elms_->length();
   ASSERT(element_count != 0);
   int cp_offset = 0;
   if (info()->at_end) {
     macro_assembler->Backtrack();
     return true;
   }
   // First, handle straight character matches.
   for (int i = 0; i < element_count; i++) {
     TextElement elm = elms_->at(i);
     if (elm.type == TextElement::ATOM) {
       Vector<const uc16> quarks = elm.data.u_atom->data();
       if (compiler->ignore_case()) {
         EmitAtomNonLetters(macro_assembler,
                            elm,
                            quarks,
                            on_failure_->label(),
                            cp_offset);
       } else {
         macro_assembler->CheckCharacters(quarks,
                                          cp_offset,
                                          on_failure_->label());
       }
       cp_offset += quarks.length();
     } else {
       ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
       cp_offset++;
     }
   }
   // Second, handle case independent letter matches if any.
   if (compiler->ignore_case()) {
     cp_offset = 0;
     for (int i = 0; i < element_count; i++) {
       TextElement elm = elms_->at(i);
       if (elm.type == TextElement::ATOM) {
         Vector<const uc16> quarks = elm.data.u_atom->data();
         EmitAtomLetters(macro_assembler,
                         elm,
                         quarks,
                         on_failure_->label(),
                         cp_offset);
         cp_offset += quarks.length();
       } else {
         cp_offset++;
       }
     }
   }
   // If the fast character matches passed then do the character classes.
   cp_offset = 0;
   for (int i = 0; i < element_count; i++) {
     TextElement elm = elms_->at(i);
     if (elm.type == TextElement::CHAR_CLASS) {
       RegExpCharacterClass* cc = elm.data.u_char_class;
       EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label());
       cp_offset++;
     } else {
       cp_offset += elm.data.u_atom->data().length();
     }
   }

   compiler->AddWork(on_failure_);
   macro_assembler->AdvanceCurrentPosition(cp_offset);
   return on_success()->GoTo(compiler);
 }


 void TextNode::MakeCaseIndependent() {
   int element_count = elms_->length();
   for (int i = 0; i < element_count; i++) {
     TextElement elm = elms_->at(i);
     if (elm.type == TextElement::CHAR_CLASS) {
       RegExpCharacterClass* cc = elm.data.u_char_class;
       ZoneList<CharacterRange>* ranges = cc->ranges();
       int range_count = ranges->length();
       for (int i = 0; i < range_count; i++) {
         ranges->at(i).AddCaseEquivalents(ranges);
       }
     }
   }
 }


 bool ChoiceNode::Emit(RegExpCompiler* compiler) {
   int choice_count = alternatives_->length();
   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
   Bind(macro_assembler);
   // For now we just call all choices one after the other.  The idea ultimately
   // is to use the Dispatch table to try only the relevant ones.
   for (int i = 0; i < choice_count - 1; i++) {
     GuardedAlternative alternative = alternatives_->at(i);
     Label after;
     Label after_no_pop_cp;
     ZoneList<Guard*>* guards = alternative.guards();
     if (guards != NULL) {
       int guard_count = guards->length();
       for (int j = 0; j < guard_count; j++) {
         GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp);
       }
     }
     macro_assembler->PushCurrentPosition();
     macro_assembler->PushBacktrack(&after);
     if (!alternative.node()->GoTo(compiler)) {
       after.Unuse();
       after_no_pop_cp.Unuse();
       return false;
     }
     macro_assembler->Bind(&after);
     macro_assembler->PopCurrentPosition();
     macro_assembler->Bind(&after_no_pop_cp);
   }
   GuardedAlternative alternative = alternatives_->at(choice_count - 1);
   ZoneList<Guard*>* guards = alternative.guards();
   if (guards != NULL) {
     int guard_count = guards->length();
     for (int j = 0; j < guard_count; j++) {
       GenerateGuard(macro_assembler, guards->at(j), on_failure_->label());
     }
   }
   if (!on_failure_->IsBacktrack()) {
     ASSERT_NOT_NULL(on_failure_ -> label());
     macro_assembler->PushBacktrack(on_failure_->label());
     compiler->AddWork(on_failure_);
   }
   if (!alternative.node()->GoTo(compiler)) {
     return false;
   }
   return true;
 }


 bool ActionNode::Emit(RegExpCompiler* compiler) {
   RegExpMacroAssembler* macro = compiler->macro_assembler();
   Bind(macro);
   switch (type_) {
     case STORE_REGISTER:
       macro->SetRegister(data_.u_store_register.reg,
                          data_.u_store_register.value);
       break;
     case INCREMENT_REGISTER: {
       Label undo;
       macro->PushBacktrack(&undo);
       macro->AdvanceRegister(data_.u_increment_register.reg, 1);
       bool ok = on_success()->GoTo(compiler);
       if (!ok) {
         undo.Unuse();
         return false;
       }
       macro->Bind(&undo);
       macro->AdvanceRegister(data_.u_increment_register.reg, -1);
       macro->Backtrack();
       break;
     }
     case STORE_POSITION: {
       Label undo;
       macro->PushRegister(data_.u_position_register.reg);
       macro->PushBacktrack(&undo);
       macro->WriteCurrentPositionToRegister(data_.u_position_register.reg);
       bool ok = on_success()->GoTo(compiler);
       if (!ok) {
         undo.Unuse();
         return false;
       }
       macro->Bind(&undo);
       macro->PopRegister(data_.u_position_register.reg);
       macro->Backtrack();
       break;
     }
     case RESTORE_POSITION:
       macro->ReadCurrentPositionFromRegister(
           data_.u_position_register.reg);
       break;
     case BEGIN_SUBMATCH:
       macro->WriteCurrentPositionToRegister(
           data_.u_submatch.current_position_register);
       macro->WriteStackPointerToRegister(
           data_.u_submatch.stack_pointer_register);
       break;
     case ESCAPE_SUBMATCH:
       if (info()->at_end) {
         Label at_end;
         // Load current character jumps to the label if we are beyond the string
         // end.
         macro->LoadCurrentCharacter(0, &at_end);
         macro->Backtrack();
         macro->Bind(&at_end);
       }
       if (data_.u_submatch.current_position_register != -1) {
         macro->ReadCurrentPositionFromRegister(
             data_.u_submatch.current_position_register);
       }
       macro->ReadStackPointerFromRegister(
           data_.u_submatch.stack_pointer_register);
       break;
     default:
       UNREACHABLE();
       return false;
   }
   return on_success()->GoTo(compiler);
 }


 bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
   RegExpMacroAssembler* macro = compiler->macro_assembler();
   Bind(macro);
   ASSERT_EQ(start_reg_ + 1, end_reg_);
   if (info()->at_end) {
     // If we are constrained to match at the end of the input then succeed
     // iff the back reference is empty.
     macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label());
   } else {
     if (compiler->ignore_case()) {
       macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
     } else {
       macro->CheckNotBackReference(start_reg_, on_failure_->label());
     }
   }
   return on_success()->GoTo(compiler);
 }


 // -------------------------------------------------------------------
 // Dot/dotty output


 #ifdef DEBUG


 class DotPrinter: public NodeVisitor {
  public:
   explicit DotPrinter(bool ignore_case)
       : ignore_case_(ignore_case),
         stream_(&alloc_) { }
   void PrintNode(const char* label, RegExpNode* node);
   void Visit(RegExpNode* node);
   void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure);
   void PrintAttributes(RegExpNode* from);
   StringStream* stream() { return &stream_; }
 #define DECLARE_VISIT(Type)                                          \
   virtual void Visit##Type(Type##Node* that);
 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
 #undef DECLARE_VISIT
  private:
   bool ignore_case_;
   HeapStringAllocator alloc_;
   StringStream stream_;
 };


 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
   stream()->Add("digraph G {\n  graph [label=\"");
   for (int i = 0; label[i]; i++) {
     switch (label[i]) {
       case '\\':
         stream()->Add("\\\\");
         break;
       case '"':
         stream()->Add("\"");
         break;
       default:
         stream()->Put(label[i]);
         break;
     }
   }
   stream()->Add("\"];\n");
   Visit(node);
   stream()->Add("}\n");
   printf("%s", *(stream()->ToCString()));
 }


 void DotPrinter::Visit(RegExpNode* node) {
   if (node->info()->visited) return;
   node->info()->visited = true;
   node->Accept(this);
 }


 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
   if (on_failure->IsBacktrack()) return;
   stream()->Add("  n%p -> n%p [style=dotted];\n", from, on_failure);
   Visit(on_failure);
 }


 class TableEntryBodyPrinter {
  public:
   TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
       : stream_(stream), choice_(choice) { }
   void Call(uc16 from, DispatchTable::Entry entry) {
     OutSet* out_set = entry.out_set();
     for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
       if (out_set->Get(i)) {
         stream()->Add("    n%p:s%io%i -> n%p;\n",
                       choice(),
                       from,
                       i,
                       choice()->alternatives()->at(i).node());
       }
     }
   }
  private:
   StringStream* stream() { return stream_; }
   ChoiceNode* choice() { return choice_; }
   StringStream* stream_;
   ChoiceNode* choice_;
 };


 class TableEntryHeaderPrinter {
  public:
   explicit TableEntryHeaderPrinter(StringStream* stream)
       : first_(true), stream_(stream) { }
   void Call(uc16 from, DispatchTable::Entry entry) {
     if (first_) {
       first_ = false;
     } else {
       stream()->Add("|");
     }
     stream()->Add("{\\%k-\\%k|{", from, entry.to());
     OutSet* out_set = entry.out_set();
     int priority = 0;
     for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
       if (out_set->Get(i)) {
         if (priority > 0) stream()->Add("|");
         stream()->Add("<s%io%i> %i", from, i, priority);
         priority++;
       }
     }
     stream()->Add("}}");
   }
  private:
   bool first_;
   StringStream* stream() { return stream_; }
   StringStream* stream_;
 };


 class AttributePrinter {
  public:
   explicit AttributePrinter(DotPrinter* out)
       : out_(out), first_(true) { }
   void PrintSeparator() {
     if (first_) {
       first_ = false;
     } else {
       out_->stream()->Add("|");
     }
   }
   void PrintBit(const char* name, bool value) {
     if (!value) return;
     PrintSeparator();
     out_->stream()->Add("{%s}", name);
   }
   void PrintPositive(const char* name, int value) {
     if (value < 0) return;
     PrintSeparator();
     out_->stream()->Add("{%s|%x}", name, value);
   }
  private:
   DotPrinter* out_;
   bool first_;
 };


 void DotPrinter::PrintAttributes(RegExpNode* that) {
   stream()->Add("  a%p [shape=Mrecord, color=grey, fontcolor=grey, "
                 "margin=0.1, fontsize=10, label=\"{",
                 that);
   AttributePrinter printer(this);
   NodeInfo* info = that->info();
   printer.PrintBit("NI", info->follows_newline_interest);
   printer.PrintBit("WI", info->follows_word_interest);
   printer.PrintBit("SI", info->follows_start_interest);
   printer.PrintBit("DN", info->determine_newline);
   printer.PrintBit("DW", info->determine_word);
   printer.PrintBit("DS", info->determine_start);
   printer.PrintBit("DDN", info->does_determine_newline);
   printer.PrintBit("DDW", info->does_determine_word);
   printer.PrintBit("DDS", info->does_determine_start);
   printer.PrintPositive("IW", info->is_word);
   printer.PrintPositive("IN", info->is_newline);
   printer.PrintPositive("FN", info->follows_newline);
   printer.PrintPositive("FW", info->follows_word);
   printer.PrintPositive("FS", info->follows_start);
   Label* label = that->label();
   if (label->is_bound())
     printer.PrintPositive("@", label->pos());
   stream()->Add("}\"];\n");
   stream()->Add("  a%p -> n%p [style=dashed, color=grey, "
                 "arrowhead=none];\n", that, that);
 }


 static const bool kPrintDispatchTable = false;
 void DotPrinter::VisitChoice(ChoiceNode* that) {
   if (kPrintDispatchTable) {
     stream()->Add("  n%p [shape=Mrecord, label=\"", that);
     TableEntryHeaderPrinter header_printer(stream());
     that->GetTable(ignore_case_)->ForEach(&header_printer);
     stream()->Add("\"]\n", that);
     PrintAttributes(that);
     TableEntryBodyPrinter body_printer(stream(), that);
     that->GetTable(ignore_case_)->ForEach(&body_printer);
     PrintOnFailure(that, that->on_failure());
   } else {
     stream()->Add("  n%p [shape=Mrecord, label=\"?\"];\n", that);
     for (int i = 0; i < that->alternatives()->length(); i++) {
       GuardedAlternative alt = that->alternatives()->at(i);
       stream()->Add("  n%p -> n%p;\n", that, alt.node());
     }
   }
   for (int i = 0; i < that->alternatives()->length(); i++) {
     GuardedAlternative alt = that->alternatives()->at(i);
     alt.node()->Accept(this);
   }
 }


 void DotPrinter::VisitText(TextNode* that) {
   stream()->Add("  n%p [label=\"", that);
   for (int i = 0; i < that->elements()->length(); i++) {
     if (i > 0) stream()->Add(" ");
     TextElement elm = that->elements()->at(i);
     switch (elm.type) {
       case TextElement::ATOM: {
         stream()->Add("'%w'", elm.data.u_atom->data());
         break;
       }
       case TextElement::CHAR_CLASS: {
         RegExpCharacterClass* node = elm.data.u_char_class;
         stream()->Add("[");
         if (node->is_negated())
           stream()->Add("^");
         for (int j = 0; j < node->ranges()->length(); j++) {
           CharacterRange range = node->ranges()->at(j);
           stream()->Add("%k-%k", range.from(), range.to());
         }
         stream()->Add("]");
         break;
       }
       default:
         UNREACHABLE();
     }
   }
   stream()->Add("\", shape=box, peripheries=2];\n");
   PrintAttributes(that);
   stream()->Add("  n%p -> n%p;\n", that, that->on_success());
   Visit(that->on_success());
   PrintOnFailure(that, that->on_failure());
 }


 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
   stream()->Add("  n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
                 that,
                 that->start_register(),
                 that->end_register());
   PrintAttributes(that);
   stream()->Add("  n%p -> n%p;\n", that, that->on_success());
   Visit(that->on_success());
   PrintOnFailure(that, that->on_failure());
 }


 void DotPrinter::VisitEnd(EndNode* that) {
   stream()->Add("  n%p [style=bold, shape=point];\n", that);
   PrintAttributes(that);
 }


 void DotPrinter::VisitAction(ActionNode* that) {
   stream()->Add("  n%p [", that);
   switch (that->type_) {
     case ActionNode::STORE_REGISTER:
       stream()->Add("label=\"$%i:=%i\", shape=octagon",
                     that->data_.u_store_register.reg,
                     that->data_.u_store_register.value);
       break;
     case ActionNode::INCREMENT_REGISTER:
       stream()->Add("label=\"$%i++\", shape=octagon",
                     that->data_.u_increment_register.reg);
       break;
     case ActionNode::STORE_POSITION:
       stream()->Add("label=\"$%i:=$pos\", shape=octagon",
                     that->data_.u_position_register.reg);
       break;
     case ActionNode::RESTORE_POSITION:
       stream()->Add("label=\"$pos:=$%i\", shape=octagon",
                     that->data_.u_position_register.reg);
       break;
     case ActionNode::BEGIN_SUBMATCH:
       stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
                     that->data_.u_submatch.current_position_register);
       break;
     case ActionNode::ESCAPE_SUBMATCH:
       stream()->Add("label=\"escape\", shape=septagon");
       break;
   }
   stream()->Add("];\n");
   PrintAttributes(that);
   stream()->Add("  n%p -> n%p;\n", that, that->on_success());
   Visit(that->on_success());
 }


 class DispatchTableDumper {
  public:
   explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
   void Call(uc16 key, DispatchTable::Entry entry);
   StringStream* stream() { return stream_; }
  private:
   StringStream* stream_;
 };


 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
   stream()->Add("[%k-%k]: {", key, entry.to());
   OutSet* set = entry.out_set();
   bool first = true;
   for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
     if (set->Get(i)) {
       if (first) {
         first = false;
       } else {
         stream()->Add(", ");
       }
       stream()->Add("%i", i);
     }
   }
   stream()->Add("}\n");
 }


 void DispatchTable::Dump() {
   HeapStringAllocator alloc;
   StringStream stream(&alloc);
   DispatchTableDumper dumper(&stream);
   tree()->ForEach(&dumper);
   OS::PrintError("%s", *stream.ToCString());
 }


 void RegExpEngine::DotPrint(const char* label,
                             RegExpNode* node,
                             bool ignore_case) {
   DotPrinter printer(ignore_case);
   printer.PrintNode(label, node);
 }


 #endif  // DEBUG


 // -------------------------------------------------------------------
 // Tree to graph conversion


 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
                                RegExpNode* on_success,
                                RegExpNode* on_failure) {
   ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
   elms->Add(TextElement::Atom(this));
   return new TextNode(elms, on_success, on_failure);
 }


 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
                                RegExpNode* on_success,
                                RegExpNode* on_failure) {
   return new TextNode(elements(), on_success, on_failure);
 }


 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
                                          RegExpNode* on_success,
                                          RegExpNode* on_failure) {
   ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
   elms->Add(TextElement::CharClass(this));
   return new TextNode(elms, on_success, on_failure);
 }


 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
                                       RegExpNode* on_success,
                                       RegExpNode* on_failure) {
   ZoneList<RegExpTree*>* alternatives = this->alternatives();
   int length = alternatives->length();
   ChoiceNode* result = new ChoiceNode(length, on_failure);
   for (int i = 0; i < length; i++) {
     GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
                                                                on_success,
                                                                on_failure));
     result->AddAlternative(alternative);
   }
   return result;
 }


 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
                                      RegExpNode* on_success,
                                      RegExpNode* on_failure) {
   return ToNode(min(),
                 max(),
                 is_greedy(),
                 body(),
                 compiler,
                 on_success,
                 on_failure);
 }


 RegExpNode* RegExpQuantifier::ToNode(int min,
                                      int max,
                                      bool is_greedy,
                                      RegExpTree* body,
                                      RegExpCompiler* compiler,
                                      RegExpNode* on_success,
                                      RegExpNode* on_failure) {
   // x{f, t} becomes this:
   //
   //             (r++)<-.
   //               |     `
   //               |     (x)
   //               v     ^
   //      (r=0)-->(?)---/ [if r < t]
   //               |
   //   [if r >= f] \----> ...
   //
   //
   // TODO(someone): clear captures on repetition and handle empty
   //   matches.
   bool has_min = min > 0;
   bool has_max = max < RegExpQuantifier::kInfinity;
   bool needs_counter = has_min || has_max;
   int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
   ChoiceNode* center = new ChoiceNode(2, on_failure);
   RegExpNode* loop_return = needs_counter
       ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
       : static_cast<RegExpNode*>(center);
   RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure);
   GuardedAlternative body_alt(body_node);
   if (has_max) {
     Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
     body_alt.AddGuard(body_guard);
   }
   GuardedAlternative rest_alt(on_success);
   if (has_min) {
     Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
     rest_alt.AddGuard(rest_guard);
   }
   if (is_greedy) {
     center->AddAlternative(body_alt);
     center->AddAlternative(rest_alt);
   } else {
     center->AddAlternative(rest_alt);
     center->AddAlternative(body_alt);
   }
   if (needs_counter) {
     return ActionNode::StoreRegister(reg_ctr, 0, center);
   } else {
     return center;
   }
 }


 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
                                     RegExpNode* on_success,
                                     RegExpNode* on_failure) {
   NodeInfo info;
   switch (type()) {
     case START_OF_LINE:
       info.follows_newline_interest = true;
       break;
     case START_OF_INPUT:
       info.follows_start_interest = true;
       break;
     case BOUNDARY: case NON_BOUNDARY:
       info.follows_word_interest = true;
       break;
     case END_OF_INPUT:
       info.at_end = true;
       break;
     case END_OF_LINE:
       // This is wrong but has the effect of making the compiler abort.
       info.at_end = true;
   }
   return on_success->PropagateForward(&info);
 }


 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
                                         RegExpNode* on_success,
                                         RegExpNode* on_failure) {
   return new BackReferenceNode(RegExpCapture::StartRegister(index()),
                                RegExpCapture::EndRegister(index()),
                                on_success,
                                on_failure);
 }


 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
                                 RegExpNode* on_success,
                                 RegExpNode* on_failure) {
   return on_success;
 }


 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
                                     RegExpNode* on_success,
                                     RegExpNode* on_failure) {
   int stack_pointer_register = compiler->AllocateRegister();
   int position_register = compiler->AllocateRegister();
   if (is_positive()) {
     // begin submatch scope
     // $reg = $pos
     // if [body]
     // then
     //   $pos = $reg
     //   escape submatch scope (drop all backtracks created in scope)
     //   succeed
     // else
     //   end submatch scope (nothing to clean up, just exit the scope)
     //   fail
     return ActionNode::BeginSubmatch(
         stack_pointer_register,
         position_register,
         body()->ToNode(
             compiler,
             ActionNode::EscapeSubmatch(
                 stack_pointer_register,
                 true,                    // Also restore input position.
                 position_register,
                 on_success),
             on_failure));
   } else {
     // begin submatch scope
     // try
     // first if (body)
     //       then
     //         escape submatch scope
     //         fail
     //       else
     //         backtrack
     // second
     //       end submatch scope
     //       restore current position
     //       succeed
     ChoiceNode* try_node =
         new ChoiceNode(1, ActionNode::RestorePosition(position_register,
                                                       on_success));
     RegExpNode* body_node = body()->ToNode(
         compiler,
         ActionNode::EscapeSubmatch(stack_pointer_register,
                                    false,        // Don't also restore position
                                    0,            // Unused arguments.
                                    on_failure),
         compiler->backtrack());
     GuardedAlternative body_alt(body_node);
     try_node->AddAlternative(body_alt);
     return ActionNode::BeginSubmatch(stack_pointer_register,
                                      position_register,
                                      try_node);
   }
 }


 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
                                   RegExpNode* on_success,
                                   RegExpNode* on_failure) {
   return ToNode(body(), index(), compiler, on_success, on_failure);
 }


 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
                                   int index,
                                   RegExpCompiler* compiler,
                                   RegExpNode* on_success,
                                   RegExpNode* on_failure) {
   int start_reg = RegExpCapture::StartRegister(index);
   int end_reg = RegExpCapture::EndRegister(index);
   RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
   RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure);
   return ActionNode::StorePosition(start_reg, body_node);
 }


 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
                                       RegExpNode* on_success,
                                       RegExpNode* on_failure) {
   ZoneList<RegExpTree*>* children = nodes();
   RegExpNode* current = on_success;
   for (int i = children->length() - 1; i >= 0; i--) {
     current = children->at(i)->ToNode(compiler, current, on_failure);
   }
   return current;
 }


 static const int kSpaceRangeCount = 20;
 static const uc16 kSpaceRanges[kSpaceRangeCount] = {
   0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
   0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
   0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
 };


 static const int kWordRangeCount = 8;
 static const uc16 kWordRanges[kWordRangeCount] = {
   '0', '9', 'A', 'Z', '_', '_', 'a', 'z'
 };


 static const int kDigitRangeCount = 2;
 static const uc16 kDigitRanges[kDigitRangeCount] = {
   '0', '9'
 };


 static const int kLineTerminatorRangeCount = 6;
 static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {
   0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029
 };


 static void AddClass(const uc16* elmv,
                      int elmc,
                      ZoneList<CharacterRange>* ranges) {
   for (int i = 0; i < elmc; i += 2) {
     ASSERT(elmv[i] <= elmv[i + 1]);
     ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));
   }
 }


 static void AddClassNegated(const uc16 *elmv,
                             int elmc,
                             ZoneList<CharacterRange>* ranges) {
   ASSERT(elmv[0] != 0x0000);
   ASSERT(elmv[elmc-1] != 0xFFFF);
   uc16 last = 0x0000;
   for (int i = 0; i < elmc; i += 2) {
     ASSERT(last <= elmv[i] - 1);
     ASSERT(elmv[i] <= elmv[i + 1]);
     ranges->Add(CharacterRange(last, elmv[i] - 1));
     last = elmv[i + 1] + 1;
   }
   ranges->Add(CharacterRange(last, 0xFFFF));
 }


 void CharacterRange::AddClassEscape(uc16 type,
                                     ZoneList<CharacterRange>* ranges) {
   switch (type) {
     case 's':
       AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
       break;
     case 'S':
       AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
       break;
     case 'w':
       AddClass(kWordRanges, kWordRangeCount, ranges);
       break;
     case 'W':
       AddClassNegated(kWordRanges, kWordRangeCount, ranges);
       break;
     case 'd':
       AddClass(kDigitRanges, kDigitRangeCount, ranges);
       break;
     case 'D':
       AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
       break;
     case '.':
       AddClassNegated(kLineTerminatorRanges,
                       kLineTerminatorRangeCount,
                       ranges);
       break;
     // This is not a character range as defined by the spec but a
     // convenient shorthand for a character class that matches any
     // character.
     case '*':
       ranges->Add(CharacterRange::Everything());
       break;
     default:
       UNREACHABLE();
   }
 }


 Vector<const uc16> CharacterRange::GetWordBounds() {
   return Vector<const uc16>(kWordRanges, kWordRangeCount);
 }


 class CharacterRangeSplitter {
  public:
   CharacterRangeSplitter(ZoneList<CharacterRange>** included,
                           ZoneList<CharacterRange>** excluded)
       : included_(included),
         excluded_(excluded) { }
   void Call(uc16 from, DispatchTable::Entry entry);

   static const int kInBase = 0;
   static const int kInOverlay = 1;

  private:
   ZoneList<CharacterRange>** included_;
   ZoneList<CharacterRange>** excluded_;
 };


 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
   if (!entry.out_set()->Get(kInBase)) return;
   ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
     ? included_
     : excluded_;
   if (*target == NULL) *target = new ZoneList<CharacterRange>(2);
   (*target)->Add(CharacterRange(entry.from(), entry.to()));
 }


 void CharacterRange::Split(ZoneList<CharacterRange>* base,
                            Vector<const uc16> overlay,
                            ZoneList<CharacterRange>** included,
                            ZoneList<CharacterRange>** excluded) {
   ASSERT_EQ(NULL, *included);
   ASSERT_EQ(NULL, *excluded);
   DispatchTable table;
   for (int i = 0; i < base->length(); i++)
     table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
   for (int i = 0; i < overlay.length(); i += 2) {
     table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
                    CharacterRangeSplitter::kInOverlay);
   }
   CharacterRangeSplitter callback(included, excluded);
   table.ForEach(&callback);
 }


 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   if (IsSingleton()) {
     // If this is a singleton we just expand the one character.
     int length = uncanonicalize.get(from(), '\0', chars);
     for (int i = 0; i < length; i++) {
       uc32 chr = chars[i];
       if (chr != from()) {
         ranges->Add(CharacterRange::Singleton(chars[i]));
       }
     }
   } else if (from() <= kRangeCanonicalizeMax &&
              to() <= kRangeCanonicalizeMax) {
     // If this is a range we expand the characters block by block,
     // expanding contiguous subranges (blocks) one at a time.
     // The approach is as follows.  For a given start character we
     // look up the block that contains it, for instance 'a' if the
     // start character is 'c'.  A block is characterized by the property
     // that all characters uncanonicalize in the same way as the first
     // element, except that each entry in the result is incremented
     // by the distance from the first element.  So a-z is a block
     // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter
     // uncanonicalizes to ['a' + k, 'A' + k].
     // Once we've found the start point we look up its uncanonicalization
     // and produce a range for each element.  For instance for [c-f]
     // we look up ['a', 'A'] and produce [c-f] and [C-F].  We then only
     // add a range if it is not already contained in the input, so [c-f]
     // will be skipped but [C-F] will be added.  If this range is not
     // completely contained in a block we do this for all the blocks
     // covered by the range.
     unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
     // First, look up the block that contains the 'from' character.
     int length = canonrange.get(from(), '\0', range);
     if (length == 0) {
       range[0] = from();
     } else {
       ASSERT_EQ(1, length);
     }
     int pos = from();
     // The start of the current block.  Note that except for the first
     // iteration 'start' is always equal to 'pos'.
     int start;
     // If it is not the start point of a block the entry contains the
     // offset of the character from the start point.
     if ((range[0] & kStartMarker) == 0) {
       start = pos - range[0];
     } else {
       start = pos;
     }
     // Then we add the ranges on at a time, incrementing the current
     // position to be after the last block each time.  The position
     // always points to the start of a block.
     while (pos < to()) {
       length = canonrange.get(start, '\0', range);
       if (length == 0) {
         range[0] = start;
       } else {
         ASSERT_EQ(1, length);
       }
       ASSERT((range[0] & kStartMarker) != 0);
       // The start point of a block contains the distance to the end
       // of the range.
       int block_end = start + (range[0] & kPayloadMask) - 1;
       int end = (block_end > to()) ? to() : block_end;
       length = uncanonicalize.get(start, '\0', range);
       for (int i = 0; i < length; i++) {
         uc32 c = range[i];
         uc16 range_from = c + (pos - start);
         uc16 range_to = c + (end - start);
         if (!(from() <= range_from && range_to <= to())) {
           ranges->Add(CharacterRange(range_from, range_to));
         }
       }
       start = pos = block_end + 1;
     }
   } else {
     // TODO(plesner) when we've fixed the 2^11 bug in unibrow.
   }
 }


 // -------------------------------------------------------------------
 // Interest propagation


 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) {
   for (int i = 0; i < siblings_.length(); i++) {
     RegExpNode* sibling = siblings_.Get(i);
     if (sibling->info()->Matches(info))
       return sibling;
   }
   return NULL;
 }


 RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) {
   ASSERT_EQ(false, *cloned);
   ASSERT(!info->HasAssertions());
   siblings_.Ensure(this);
   RegExpNode* result = TryGetSibling(info);
   if (result != NULL) return result;
   result = this->Clone();
   NodeInfo* new_info = result->info();
   new_info->ResetCompilationState();
   new_info->AddFromPreceding(info);
   AddSibling(result);
   *cloned = true;
   return result;
 }


 template <class C>
 static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
   NodeInfo full_info(*node->info());
   full_info.AddFromPreceding(info);
   bool cloned = false;
   return RegExpNode::EnsureSibling(node, &full_info, &cloned);
 }


 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
   NodeInfo full_info(*this->info());
   full_info.AddFromPreceding(info);
   bool cloned = false;
   ActionNode* action = EnsureSibling(this, &full_info, &cloned);
   if (cloned && type_ != ESCAPE_SUBMATCH) {
     action->set_on_success(action->on_success()->PropagateForward(info));
   }
   return action;
 }


 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
   NodeInfo full_info(*this->info());
   full_info.AddFromPreceding(info);
   bool cloned = false;
   ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
   if (cloned) {
     ZoneList<GuardedAlternative>* old_alternatives = alternatives();
     int count = old_alternatives->length();
     choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
     for (int i = 0; i < count; i++) {
       GuardedAlternative alternative = old_alternatives->at(i);
       alternative.set_node(alternative.node()->PropagateForward(info));
       choice->alternatives()->Add(alternative);
     }
     if (!choice->on_failure_->IsBacktrack()) {
       choice->on_failure_ = choice->on_failure_->PropagateForward(info);
     }
   }
   return choice;
 }


 RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
   return PropagateToEndpoint(this, info);
 }


 RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
   NodeInfo full_info(*this->info());
   full_info.AddFromPreceding(info);
   bool cloned = false;
   BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
   if (cloned) {
     // TODO(erikcorry): A back reference has to have two successors (by default
     // the same node).  The first is used if the back reference matches a non-
     // empty back reference, the second if it matches an empty one.  This
     // doesn't matter for at_end, which is the only one implemented right now,
     // but it will matter for other pieces of info.
     back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
   }
   return back_ref;
 }


 RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
   return PropagateToEndpoint(this, info);
 }


 // -------------------------------------------------------------------
 // Splay tree


 OutSet* OutSet::Extend(unsigned value) {
   if (Get(value))
     return this;
   if (successors() != NULL) {
     for (int i = 0; i < successors()->length(); i++) {
       OutSet* successor = successors()->at(i);
       if (successor->Get(value))
         return successor;
     }
   } else {
     successors_ = new ZoneList<OutSet*>(2);
   }
   OutSet* result = new OutSet(first_, remaining_);
   result->Set(value);
   successors()->Add(result);
   return result;
 }


 void OutSet::Set(unsigned value) {
   if (value < kFirstLimit) {
     first_ |= (1 << value);
   } else {
     if (remaining_ == NULL)
       remaining_ = new ZoneList<unsigned>(1);
     if (remaining_->is_empty() || !remaining_->Contains(value))
       remaining_->Add(value);
   }
 }


 bool OutSet::Get(unsigned value) {
   if (value < kFirstLimit) {
     return (first_ & (1 << value)) != 0;
   } else if (remaining_ == NULL) {
     return false;
   } else {
     return remaining_->Contains(value);
   }
 }


 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
 const DispatchTable::Entry DispatchTable::Config::kNoValue;


 void DispatchTable::AddRange(CharacterRange full_range, int value) {
   CharacterRange current = full_range;
   if (tree()->is_empty()) {
     // If this is the first range we just insert into the table.
     ZoneSplayTree<Config>::Locator loc;
     ASSERT_RESULT(tree()->Insert(current.from(), &loc));
     loc.set_value(Entry(current.from(), current.to(), empty()->Extend(value)));
     return;
   }
   // First see if there is a range to the left of this one that
   // overlaps.
   ZoneSplayTree<Config>::Locator loc;
   if (tree()->FindGreatestLessThan(current.from(), &loc)) {
     Entry* entry = &loc.value();
     // If we've found a range that overlaps with this one, and it
     // starts strictly to the left of this one, we have to fix it
     // because the following code only handles ranges that start on
     // or after the start point of the range we're adding.
     if (entry->from() < current.from() && entry->to() >= current.from()) {
       // Snap the overlapping range in half around the start point of
       // the range we're adding.
       CharacterRange left(entry->from(), current.from() - 1);
       CharacterRange right(current.from(), entry->to());
       // The left part of the overlapping range doesn't overlap.
       // Truncate the whole entry to be just the left part.
       entry->set_to(left.to());
       // The right part is the one that overlaps.  We add this part
       // to the map and let the next step deal with merging it with
       // the range we're adding.
       ZoneSplayTree<Config>::Locator loc;
       ASSERT_RESULT(tree()->Insert(right.from(), &loc));
       loc.set_value(Entry(right.from(),
                           right.to(),
                           entry->out_set()));
     }
   }
   while (current.is_valid()) {
     if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
         (loc.value().from() <= current.to()) &&
         (loc.value().to() >= current.from())) {
       Entry* entry = &loc.value();
       // We have overlap.  If there is space between the start point of
       // the range we're adding and where the overlapping range starts
       // then we have to add a range covering just that space.
       if (current.from() < entry->from()) {
         ZoneSplayTree<Config>::Locator ins;
         ASSERT_RESULT(tree()->Insert(current.from(), &ins));
         ins.set_value(Entry(current.from(),
                             entry->from() - 1,
                             empty()->Extend(value)));
         current.set_from(entry->from());
       }
       ASSERT_EQ(current.from(), entry->from());
       // If the overlapping range extends beyond the one we want to add
       // we have to snap the right part off and add it separately.
       if (entry->to() > current.to()) {
         ZoneSplayTree<Config>::Locator ins;
         ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));
         ins.set_value(Entry(current.to() + 1,
                             entry->to(),
                             entry->out_set()));
         entry->set_to(current.to());
       }
       ASSERT(entry->to() <= current.to());
       // The overlapping range is now completely contained by the range
       // we're adding so we can just update it and move the start point
       // of the range we're adding just past it.
       entry->AddValue(value);
       // Bail out if the last interval ended at 0xFFFF since otherwise
       // adding 1 will wrap around to 0.
       if (entry->to() == 0xFFFF)
         break;
       ASSERT(entry->to() + 1 > current.from());
       current.set_from(entry->to() + 1);
     } else {
       // There is no overlap so we can just add the range
       ZoneSplayTree<Config>::Locator ins;
       ASSERT_RESULT(tree()->Insert(current.from(), &ins));
       ins.set_value(Entry(current.from(),
                           current.to(),
                           empty()->Extend(value)));
       break;
     }
   }
 }


 OutSet* DispatchTable::Get(uc16 value) {
   ZoneSplayTree<Config>::Locator loc;
   if (!tree()->FindGreatestLessThan(value, &loc))
     return empty();
   Entry* entry = &loc.value();
   if (value <= entry->to())
     return entry->out_set();
   else
     return empty();
 }


 // -------------------------------------------------------------------
 // Analysis


 void Analysis::EnsureAnalyzed(RegExpNode* that) {
   if (that->info()->been_analyzed || that->info()->being_analyzed)
     return;
   that->info()->being_analyzed = true;
   that->Accept(this);
   that->info()->being_analyzed = false;
   that->info()->been_analyzed = true;
 }


 void Analysis::VisitEnd(EndNode* that) {
   // nothing to do
 }


 void Analysis::VisitText(TextNode* that) {
   if (ignore_case_) {
     that->MakeCaseIndependent();
   }
   EnsureAnalyzed(that->on_success());
   EnsureAnalyzed(that->on_failure());
   NodeInfo* info = that->info();
   NodeInfo* next_info = that->on_success()->info();
   // If the following node is interested in what it follows then this
   // node must determine it.
   info->determine_newline = next_info->follows_newline_interest;
   info->determine_word = next_info->follows_word_interest;
   info->determine_start = next_info->follows_start_interest;
 }


 void Analysis::VisitAction(ActionNode* that) {
   EnsureAnalyzed(that->on_success());
   // If the next node is interested in what it follows then this node
   // has to be interested too so it can pass the information on.
   that->info()->AddFromFollowing(that->on_success()->info());
 }


 void Analysis::VisitChoice(ChoiceNode* that) {
   NodeInfo* info = that->info();
   for (int i = 0; i < that->alternatives()->length(); i++) {
     RegExpNode* node = that->alternatives()->at(i).node();
     EnsureAnalyzed(node);
     // Anything the following nodes need to know has to be known by
     // this node also, so it can pass it on.
     info->AddFromFollowing(node->info());
   }
   EnsureAnalyzed(that->on_failure());
 }


 void Analysis::VisitBackReference(BackReferenceNode* that) {
   EnsureAnalyzed(that->on_success());
   EnsureAnalyzed(that->on_failure());
 }


 // -------------------------------------------------------------------
 // Assumption expansion


 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
   siblings_.Ensure(this);
   NodeInfo new_info = *this->info();
   if (new_info.follows_word_interest)
     new_info.follows_word = info->follows_word;
   if (new_info.follows_newline_interest)
     new_info.follows_newline = info->follows_newline;
   // If the following node should determine something we need to get
   // a sibling that determines it.
   new_info.does_determine_newline = new_info.determine_newline;
   new_info.does_determine_word = new_info.determine_word;
   new_info.does_determine_start = new_info.determine_start;
   RegExpNode* sibling = TryGetSibling(&new_info);
   if (sibling == NULL) {
     sibling = ExpandLocal(&new_info);
     siblings_.Add(sibling);
     sibling->info()->being_expanded = true;
     sibling->ExpandChildren();
     sibling->info()->being_expanded = false;
     sibling->info()->been_expanded = true;
   } else {
     NodeInfo* sib_info = sibling->info();
     if (!sib_info->been_expanded && !sib_info->being_expanded) {
       sibling->info()->being_expanded = true;
       sibling->ExpandChildren();
       sibling->info()->being_expanded = false;
       sibling->info()->been_expanded = true;
     }
   }
   return sibling;
 }


 RegExpNode* ChoiceNode::ExpandLocal(NodeInfo* info) {
   ChoiceNode* clone = this->Clone();
   clone->info()->ResetCompilationState();
   clone->info()->AddAssumptions(info);
   return clone;
 }


 void ChoiceNode::ExpandChildren() {
   ZoneList<GuardedAlternative>* alts = alternatives();
   ZoneList<GuardedAlternative>* new_alts
       = new ZoneList<GuardedAlternative>(alts->length());
   for (int i = 0; i < alts->length(); i++) {
     GuardedAlternative next = alts->at(i);
     next.set_node(next.node()->EnsureExpanded(info()));
     new_alts->Add(next);
   }
   alternatives_ = new_alts;
 }


 RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
   TextElement last = elements()->last();
   if (last.type == TextElement::CHAR_CLASS) {
     RegExpCharacterClass* char_class = last.data.u_char_class;
     if (info->does_determine_word) {
       ZoneList<CharacterRange>* word = NULL;
       ZoneList<CharacterRange>* non_word = NULL;
       CharacterRange::Split(char_class->ranges(),
                             CharacterRange::GetWordBounds(),
                             &word,
                             &non_word);
       if (non_word == NULL) {
         // This node contains no non-word characters so it must be
         // all word.
         this->info()->is_word = NodeInfo::TRUE;
       } else if (word == NULL) {
         // Vice versa.
         this->info()->is_word = NodeInfo::FALSE;
       } else {
         // If this character class contains both word and non-word
         // characters we need to split it into two.
         ChoiceNode* result = new ChoiceNode(2, on_failure());
         // Welcome to the family, son!
         result->set_siblings(this->siblings());
         *result->info() = *this->info();
         result->info()->ResetCompilationState();
         result->info()->AddAssumptions(info);
         RegExpNode* word_node
             = new TextNode(new RegExpCharacterClass(word, false),
                            on_success(),
                            on_failure());
         word_node->info()->determine_word = true;
         word_node->info()->does_determine_word = true;
         word_node->info()->is_word = NodeInfo::TRUE;
         result->alternatives()->Add(GuardedAlternative(word_node));
         RegExpNode* non_word_node
             = new TextNode(new RegExpCharacterClass(non_word, false),
                            on_success(),
                            on_failure());
         non_word_node->info()->determine_word = true;
         non_word_node->info()->does_determine_word = true;
         non_word_node->info()->is_word = NodeInfo::FALSE;
         result->alternatives()->Add(GuardedAlternative(non_word_node));
         return result;
       }
     }
   }
   TextNode* clone = this->Clone();
   clone->info()->ResetCompilationState();
   clone->info()->AddAssumptions(info);
   return clone;
 }


 void TextNode::ExpandAtomChildren(RegExpAtom* that) {
   NodeInfo new_info = *info();
   uc16 last = that->data()[that->data().length() - 1];
   if (info()->determine_word) {
     new_info.follows_word = IsRegExpWord(last)
       ? NodeInfo::TRUE : NodeInfo::FALSE;
   } else {
     new_info.follows_word = NodeInfo::UNKNOWN;
   }
   if (info()->determine_newline) {
     new_info.follows_newline = IsRegExpNewline(last)
       ? NodeInfo::TRUE : NodeInfo::FALSE;
   } else {
     new_info.follows_newline = NodeInfo::UNKNOWN;
   }
   if (info()->determine_start) {
     new_info.follows_start = NodeInfo::FALSE;
   } else {
     new_info.follows_start = NodeInfo::UNKNOWN;
   }
   set_on_success(on_success()->EnsureExpanded(&new_info));
 }


 void TextNode::ExpandCharClassChildren(RegExpCharacterClass* that) {
   if (info()->does_determine_word) {
     // ASSERT(info()->is_word != NodeInfo::UNKNOWN);
     NodeInfo next_info = *on_success()->info();
     next_info.follows_word = info()->is_word;
     set_on_success(on_success()->EnsureExpanded(&next_info));
   } else {
     set_on_success(on_success()->EnsureExpanded(info()));
   }
 }


 void TextNode::ExpandChildren() {
   TextElement last = elements()->last();
   switch (last.type) {
     case TextElement::ATOM:
       ExpandAtomChildren(last.data.u_atom);
       break;
     case TextElement::CHAR_CLASS:
       ExpandCharClassChildren(last.data.u_char_class);
       break;
     default:
       UNREACHABLE();
   }
 }


 RegExpNode* ActionNode::ExpandLocal(NodeInfo* info) {
   ActionNode* clone = this->Clone();
   clone->info()->ResetCompilationState();
   clone->info()->AddAssumptions(info);
   return clone;
 }


 void ActionNode::ExpandChildren() {
   set_on_success(on_success()->EnsureExpanded(info()));
 }


 RegExpNode* BackReferenceNode::ExpandLocal(NodeInfo* info) {
   BackReferenceNode* clone = this->Clone();
   clone->info()->ResetCompilationState();
   clone->info()->AddAssumptions(info);
   return clone;
 }


 void BackReferenceNode::ExpandChildren() {
   set_on_success(on_success()->EnsureExpanded(info()));
 }


 RegExpNode* EndNode::ExpandLocal(NodeInfo* info) {
   EndNode* clone = this->Clone();
   clone->info()->ResetCompilationState();
   clone->info()->AddAssumptions(info);
   return clone;
 }


 void EndNode::ExpandChildren() {
   // nothing to do
 }


 // -------------------------------------------------------------------
 // Dispatch table construction


 void DispatchTableConstructor::VisitEnd(EndNode* that) {
   AddRange(CharacterRange::Everything());
 }


 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
   node->set_being_calculated(true);
   ZoneList<GuardedAlternative>* alternatives = node->alternatives();
   for (int i = 0; i < alternatives->length(); i++) {
     set_choice_index(i);
     alternatives->at(i).node()->Accept(this);
   }
   node->set_being_calculated(false);
 }


 class AddDispatchRange {
  public:
   explicit AddDispatchRange(DispatchTableConstructor* constructor)
     : constructor_(constructor) { }
   void Call(uc32 from, DispatchTable::Entry entry);
  private:
   DispatchTableConstructor* constructor_;
 };


 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
   CharacterRange range(from, entry.to());
   constructor_->AddRange(range);
 }


 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
   if (node->being_calculated())
     return;
   DispatchTable* table = node->GetTable(ignore_case_);
   AddDispatchRange adder(this);
   table->ForEach(&adder);
 }


 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
   // TODO(160): Find the node that we refer back to and propagate its start
   // set back to here.  For now we just accept anything.
   AddRange(CharacterRange::Everything());
 }


 static int CompareRangeByFrom(const CharacterRange* a,
                               const CharacterRange* b) {
   return Compare<uc16>(a->from(), b->from());
 }


 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
   ranges->Sort(CompareRangeByFrom);
   uc16 last = 0;
   for (int i = 0; i < ranges->length(); i++) {
     CharacterRange range = ranges->at(i);
     if (last < range.from())
       AddRange(CharacterRange(last, range.from() - 1));
     if (range.to() >= last) {
       if (range.to() == 0xFFFF) {
         return;
       } else {
         last = range.to() + 1;
       }
     }
   }
   AddRange(CharacterRange(last, 0xFFFF));
 }


 void DispatchTableConstructor::VisitText(TextNode* that) {
   TextElement elm = that->elements()->at(0);
   switch (elm.type) {
     case TextElement::ATOM: {
       uc16 c = elm.data.u_atom->data()[0];
       AddRange(CharacterRange(c, c));
       break;
     }
     case TextElement::CHAR_CLASS: {
       RegExpCharacterClass* tree = elm.data.u_char_class;
       ZoneList<CharacterRange>* ranges = tree->ranges();
       if (tree->is_negated()) {
         AddInverse(ranges);
       } else {
         for (int i = 0; i < ranges->length(); i++)
           AddRange(ranges->at(i));
       }
       break;
     }
     default: {
       UNIMPLEMENTED();
     }
   }
 }


 void DispatchTableConstructor::VisitAction(ActionNode* that) {
   that->on_success()->Accept(this);
 }


 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
                                          RegExpNode** node_return,
                                          bool ignore_case,
                                          bool is_multiline) {
   RegExpCompiler compiler(input->capture_count, ignore_case);
   // Wrap the body of the regexp in capture #0.
   RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
                                                     0,
                                                     &compiler,
                                                     compiler.accept(),
                                                     compiler.backtrack());
   // Add a .*? at the beginning, outside the body capture.
   // Note: We could choose to not add this if the regexp is anchored at
   //   the start of the input but I'm not sure how best to do that and
   //   since we don't even handle ^ yet I'm saving that optimization for
   //   later.
   RegExpNode* node = RegExpQuantifier::ToNode(0,
                                               RegExpQuantifier::kInfinity,
                                               false,
                                               new RegExpCharacterClass('*'),
                                               &compiler,
                                               captured_body,
                                               compiler.backtrack());
   if (node_return != NULL) *node_return = node;
   Analysis analysis(ignore_case);
   analysis.EnsureAnalyzed(node);

   NodeInfo info = *node->info();
   node = node->EnsureExpanded(&info);

   if (!FLAG_irregexp) {
     return Handle<FixedArray>::null();
   }

   if (is_multiline && !FLAG_attempt_multiline_irregexp) {
     return Handle<FixedArray>::null();
   }

   if (FLAG_irregexp_native) {
 #ifdef ARM
     // Unimplemented, fall-through to bytecode implementation.
 #else  // IA32
     RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
                                              (input->capture_count + 1) * 2);
     return compiler.Assemble(&macro_assembler,
                              node,
                              input->capture_count);
 #endif
   }
   EmbeddedVector<byte, 1024> codes;
   RegExpMacroAssemblerIrregexp macro_assembler(codes);
   return compiler.Assemble(&macro_assembler,
                            node,
                            input->capture_count);
 }


 }}  // namespace v8::internal