| // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following |
| // disclaimer in the documentation and/or other materials provided |
| // with the distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived |
| // from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "v8.h" |
| |
| #include "execution.h" |
| #include "factory.h" |
| #include "jsregexp.h" |
| #include "platform.h" |
| #include "runtime.h" |
| #include "top.h" |
| #include "compilation-cache.h" |
| |
| // Including pcre.h undefines DEBUG to avoid getting debug output from |
| // the JSCRE implementation. Make sure to redefine it in debug mode |
| // after having included the header file. |
| #ifdef DEBUG |
| #include "third_party/jscre/pcre.h" |
| #define DEBUG |
| #else |
| #include "third_party/jscre/pcre.h" |
| #endif |
| |
| namespace v8 { namespace internal { |
| |
| |
| #define CAPTURE_INDEX 0 |
| #define INTERNAL_INDEX 1 |
| |
| static Failure* malloc_failure; |
| |
| static void* JSREMalloc(size_t size) { |
| Object* obj = Heap::AllocateByteArray(size); |
| |
| // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile |
| // will return NULL to the caller, performs GC there. |
| // Also pass failure information to the caller. |
| if (obj->IsFailure()) { |
| malloc_failure = Failure::cast(obj); |
| return NULL; |
| } |
| |
| // Note: object is unrooted, the caller of jsRegExpCompile must |
| // create a handle for the return value before doing heap allocation. |
| return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress()); |
| } |
| |
| |
| static void JSREFree(void* p) { |
| USE(p); // Do nothing, memory is garbage collected. |
| } |
| |
| |
| String* RegExpImpl::last_ascii_string_ = NULL; |
| String* RegExpImpl::two_byte_cached_string_ = NULL; |
| |
| |
| void RegExpImpl::NewSpaceCollectionPrologue() { |
| // The two byte string is always in the old space. The Ascii string may be |
| // in either place. If it is in the old space we don't need to do anything. |
| if (Heap::InNewSpace(last_ascii_string_)) { |
| // Invalidate the cache. |
| last_ascii_string_ = NULL; |
| two_byte_cached_string_ = NULL; |
| } |
| } |
| |
| |
| void RegExpImpl::OldSpaceCollectionPrologue() { |
| last_ascii_string_ = NULL; |
| two_byte_cached_string_ = NULL; |
| } |
| |
| |
| Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, |
| Handle<String> pattern, |
| Handle<String> flags, |
| bool* has_pending_exception) { |
| // Ensure that the constructor function has been loaded. |
| if (!constructor->IsLoaded()) { |
| LoadLazy(constructor, has_pending_exception); |
| if (*has_pending_exception) return Handle<Object>(Failure::Exception()); |
| } |
| // Call the construct code with 2 arguments. |
| Object** argv[2] = { Handle<Object>::cast(pattern).location(), |
| Handle<Object>::cast(flags).location() }; |
| return Execution::New(constructor, 2, argv, has_pending_exception); |
| } |
| |
| |
| // Converts a source string to a 16 bit flat string or a SlicedString containing |
| // a 16 bit flat string). |
| Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) { |
| if (*subject == last_ascii_string_) { |
| ASSERT(two_byte_cached_string_ != NULL); |
| return Handle<String>(String::cast(two_byte_cached_string_)); |
| } |
| Handle<String> two_byte_string = StringToTwoByte(subject); |
| last_ascii_string_ = *subject; |
| two_byte_cached_string_ = *two_byte_string; |
| return two_byte_string; |
| } |
| |
| |
| // Converts a source string to a 16 bit flat string or a SlicedString containing |
| // a 16 bit flat string). |
| Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) { |
| StringShape shape(*pattern); |
| if (!pattern->IsFlat(shape)) { |
| FlattenString(pattern); |
| shape = StringShape(*pattern); |
| } |
| Handle<String> flat_string(shape.IsCons() ? |
| String::cast(ConsString::cast(*pattern)->first()) : |
| *pattern); |
| ASSERT(flat_string->IsString()); |
| StringShape flat_shape(*flat_string); |
| ASSERT(!flat_shape.IsCons()); |
| ASSERT(flat_shape.IsSequential() || |
| flat_shape.IsSliced() || |
| flat_shape.IsExternal()); |
| if (!flat_shape.IsAsciiRepresentation()) { |
| return flat_string; |
| } |
| |
| int len = flat_string->length(flat_shape); |
| Handle<String> two_byte_string = |
| Factory::NewRawTwoByteString(len, TENURED); |
| uc16* dest = SeqTwoByteString::cast(*two_byte_string)->GetChars(); |
| String::WriteToFlat(*flat_string, flat_shape, dest, 0, len); |
| return two_byte_string; |
| } |
| |
| |
| static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) { |
| int flags = JSRegExp::NONE; |
| StringShape shape(*str); |
| for (int i = 0; i < str->length(shape); i++) { |
| switch (str->Get(shape, i)) { |
| case 'i': |
| flags |= JSRegExp::IGNORE_CASE; |
| break; |
| case 'g': |
| flags |= JSRegExp::GLOBAL; |
| break; |
| case 'm': |
| flags |= JSRegExp::MULTILINE; |
| break; |
| } |
| } |
| return JSRegExp::Flags(flags); |
| } |
| |
| |
| unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char; |
| |
| |
| Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| Handle<String> pattern, |
| Handle<String> flag_str) { |
| JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
| bool in_cache = !cached.is_null(); |
| Handle<Object> result; |
| StringShape shape(*pattern); |
| if (in_cache) { |
| re->set_data(*cached); |
| result = re; |
| } else { |
| bool is_atom = !flags.is_ignore_case(); |
| for (int i = 0; is_atom && i < pattern->length(shape); i++) { |
| if (is_reg_exp_special_char.get(pattern->Get(shape, i))) |
| is_atom = false; |
| } |
| if (is_atom) { |
| result = AtomCompile(re, pattern, flags); |
| } else { |
| result = JsreCompile(re, pattern, flags); |
| } |
| Object* data = re->data(); |
| if (data->IsFixedArray()) { |
| // If compilation succeeded then the data is set on the regexp |
| // and we can store it in the cache. |
| Handle<FixedArray> data(FixedArray::cast(re->data())); |
| CompilationCache::PutRegExp(pattern, flags, data); |
| } |
| } |
| |
| LOG(RegExpCompileEvent(re, in_cache)); |
| return result; |
| } |
| |
| |
| Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| Handle<Object> index) { |
| switch (regexp->TypeTag()) { |
| case JSRegExp::JSCRE: |
| return JsreExec(regexp, subject, index); |
| case JSRegExp::ATOM: |
| return AtomExec(regexp, subject, index); |
| default: |
| UNREACHABLE(); |
| return Handle<Object>(); |
| } |
| } |
| |
| |
| Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| Handle<String> subject) { |
| switch (regexp->TypeTag()) { |
| case JSRegExp::JSCRE: |
| return JsreExecGlobal(regexp, subject); |
| case JSRegExp::ATOM: |
| return AtomExecGlobal(regexp, subject); |
| default: |
| UNREACHABLE(); |
| return Handle<Object>(); |
| } |
| } |
| |
| |
| Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| Handle<String> pattern, |
| JSRegExp::Flags flags) { |
| Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, pattern); |
| return re; |
| } |
| |
| |
| Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| Handle<String> subject, |
| Handle<Object> index) { |
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| |
| uint32_t start_index; |
| if (!Array::IndexFromObject(*index, &start_index)) { |
| return Handle<Smi>(Smi::FromInt(-1)); |
| } |
| |
| LOG(RegExpExecEvent(re, start_index, subject)); |
| int value = Runtime::StringMatch(subject, needle, start_index); |
| if (value == -1) return Factory::null_value(); |
| |
| Handle<FixedArray> array = Factory::NewFixedArray(2); |
| array->set(0, |
| Smi::FromInt(value), |
| SKIP_WRITE_BARRIER); |
| array->set(1, |
| Smi::FromInt(value + needle->length()), |
| SKIP_WRITE_BARRIER); |
| return Factory::NewJSArrayWithElements(array); |
| } |
| |
| |
| Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
| Handle<String> subject) { |
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| Handle<JSArray> result = Factory::NewJSArray(1); |
| int index = 0; |
| int match_count = 0; |
| int subject_length = subject->length(); |
| int needle_length = needle->length(); |
| while (true) { |
| LOG(RegExpExecEvent(re, index, subject)); |
| int value = -1; |
| if (index + needle_length <= subject_length) { |
| value = Runtime::StringMatch(subject, needle, index); |
| } |
| if (value == -1) break; |
| HandleScope scope; |
| int end = value + needle_length; |
| |
| Handle<FixedArray> array = Factory::NewFixedArray(2); |
| array->set(0, |
| Smi::FromInt(value), |
| SKIP_WRITE_BARRIER); |
| array->set(1, |
| Smi::FromInt(end), |
| SKIP_WRITE_BARRIER); |
| Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
| SetElement(result, match_count, pair); |
| match_count++; |
| index = end; |
| if (needle_length == 0) index++; |
| } |
| return result; |
| } |
| |
| |
| static inline Object* DoCompile(String* pattern, |
| JSRegExp::Flags flags, |
| unsigned* number_of_captures, |
| const char** error_message, |
| JscreRegExp** code) { |
| JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() |
| ? JSRegExpIgnoreCase |
| : JSRegExpDoNotIgnoreCase; |
| JSRegExpMultilineOption multiline_option = flags.is_multiline() |
| ? JSRegExpMultiline |
| : JSRegExpSingleLine; |
| *error_message = NULL; |
| malloc_failure = Failure::Exception(); |
| *code = jsRegExpCompile(pattern->GetTwoByteData(), |
| pattern->length(), |
| case_option, |
| multiline_option, |
| number_of_captures, |
| error_message, |
| &JSREMalloc, |
| &JSREFree); |
| if (*code == NULL && (malloc_failure->IsRetryAfterGC() || |
| malloc_failure->IsOutOfMemoryFailure())) { |
| return malloc_failure; |
| } else { |
| // It doesn't matter which object we return here, we just need to return |
| // a non-failure to indicate to the GC-retry code that there was no |
| // allocation failure. |
| return pattern; |
| } |
| } |
| |
| |
| void CompileWithRetryAfterGC(Handle<String> pattern, |
| JSRegExp::Flags flags, |
| unsigned* number_of_captures, |
| const char** error_message, |
| JscreRegExp** code) { |
| CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern, |
| flags, |
| number_of_captures, |
| error_message, |
| code)); |
| } |
| |
| |
| Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re, |
| Handle<String> pattern, |
| JSRegExp::Flags flags) { |
| Handle<String> two_byte_pattern = StringToTwoByte(pattern); |
| |
| unsigned number_of_captures; |
| const char* error_message = NULL; |
| |
| JscreRegExp* code = NULL; |
| FlattenString(pattern); |
| |
| CompileWithRetryAfterGC(two_byte_pattern, |
| flags, |
| &number_of_captures, |
| &error_message, |
| &code); |
| |
| if (code == NULL) { |
| // Throw an exception. |
| Handle<JSArray> array = Factory::NewJSArray(2); |
| SetElement(array, 0, pattern); |
| SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( |
| (error_message == NULL) ? "Unknown regexp error" : error_message))); |
| Handle<Object> regexp_err = |
| Factory::NewSyntaxError("malformed_regexp", array); |
| Top::Throw(*regexp_err); |
| return Handle<Object>(); |
| } |
| |
| // Convert the return address to a ByteArray pointer. |
| Handle<ByteArray> internal( |
| ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); |
| |
| Handle<FixedArray> value = Factory::NewFixedArray(2); |
| value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures)); |
| value->set(INTERNAL_INDEX, *internal); |
| Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); |
| |
| return re; |
| } |
| |
| |
| Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp, |
| int num_captures, |
| Handle<String> subject, |
| int previous_index, |
| const uc16* two_byte_subject, |
| int* offsets_vector, |
| int offsets_vector_length) { |
| int rc; |
| { |
| AssertNoAllocation a; |
| ByteArray* internal = JsreInternal(regexp); |
| const JscreRegExp* js_regexp = |
| reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress()); |
| |
| LOG(RegExpExecEvent(regexp, previous_index, subject)); |
| |
| rc = jsRegExpExecute(js_regexp, |
| two_byte_subject, |
| subject->length(), |
| previous_index, |
| offsets_vector, |
| offsets_vector_length); |
| } |
| |
| // The KJS JavaScript engine returns null (ie, a failed match) when |
| // JSRE's internal match limit is exceeded. We duplicate that behavior here. |
| if (rc == JSRegExpErrorNoMatch |
| || rc == JSRegExpErrorHitLimit) { |
| return Factory::null_value(); |
| } |
| |
| // Other JSRE errors: |
| if (rc < 0) { |
| // Throw an exception. |
| Handle<Object> code(Smi::FromInt(rc)); |
| Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code }; |
| Handle<Object> regexp_err( |
| Factory::NewTypeError("jsre_error", HandleVector(args, 2))); |
| return Handle<Object>(Top::Throw(*regexp_err)); |
| } |
| |
| Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
| // The captures come in (start, end+1) pairs. |
| for (int i = 0; i < 2 * (num_captures+1); i += 2) { |
| array->set(i, |
| Smi::FromInt(offsets_vector[i]), |
| SKIP_WRITE_BARRIER); |
| array->set(i+1, |
| Smi::FromInt(offsets_vector[i+1]), |
| SKIP_WRITE_BARRIER); |
| } |
| return Factory::NewJSArrayWithElements(array); |
| } |
| |
| |
| class OffsetsVector { |
| public: |
| inline OffsetsVector(int num_captures) { |
| offsets_vector_length_ = (num_captures + 1) * 3; |
| if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
| vector_ = NewArray<int>(offsets_vector_length_); |
| } else { |
| vector_ = static_offsets_vector_; |
| } |
| } |
| |
| |
| inline ~OffsetsVector() { |
| if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
| DeleteArray(vector_); |
| vector_ = NULL; |
| } |
| } |
| |
| |
| inline int* vector() { |
| return vector_; |
| } |
| |
| |
| inline int length() { |
| return offsets_vector_length_; |
| } |
| |
| private: |
| int* vector_; |
| int offsets_vector_length_; |
| static const int kStaticOffsetsVectorSize = 30; |
| static int static_offsets_vector_[kStaticOffsetsVectorSize]; |
| }; |
| |
| |
| int OffsetsVector::static_offsets_vector_[ |
| OffsetsVector::kStaticOffsetsVectorSize]; |
| |
| |
| Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| Handle<Object> index) { |
| // Prepare space for the return values. |
| int num_captures = JsreCapture(regexp); |
| |
| OffsetsVector offsets(num_captures); |
| |
| int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| |
| Handle<String> subject16 = CachedStringToTwoByte(subject); |
| |
| Handle<Object> result(JsreExecOnce(regexp, num_captures, subject, |
| previous_index, |
| subject16->GetTwoByteData(), |
| offsets.vector(), offsets.length())); |
| |
| return result; |
| } |
| |
| |
| Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp, |
| Handle<String> subject) { |
| // Prepare space for the return values. |
| int num_captures = JsreCapture(regexp); |
| |
| OffsetsVector offsets(num_captures); |
| |
| int previous_index = 0; |
| |
| Handle<JSArray> result = Factory::NewJSArray(0); |
| int i = 0; |
| Handle<Object> matches; |
| |
| Handle<String> subject16 = CachedStringToTwoByte(subject); |
| |
| do { |
| if (previous_index > subject->length() || previous_index < 0) { |
| // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| // string length, there is no match. |
| matches = Factory::null_value(); |
| } else { |
| matches = JsreExecOnce(regexp, num_captures, subject, previous_index, |
| subject16->GetTwoByteData(), |
| offsets.vector(), offsets.length()); |
| |
| if (matches->IsJSArray()) { |
| SetElement(result, i, matches); |
| i++; |
| previous_index = offsets.vector()[1]; |
| if (offsets.vector()[0] == offsets.vector()[1]) { |
| previous_index++; |
| } |
| } |
| } |
| } while (matches->IsJSArray()); |
| |
| // If we exited the loop with an exception, throw it. |
| if (matches->IsNull()) { // Exited loop normally. |
| return result; |
| } else { // Exited loop with the exception in matches. |
| return matches; |
| } |
| } |
| |
| |
| int RegExpImpl::JsreCapture(Handle<JSRegExp> re) { |
| FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); |
| return Smi::cast(value->get(CAPTURE_INDEX))->value(); |
| } |
| |
| |
| ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { |
| FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); |
| return ByteArray::cast(value->get(INTERNAL_INDEX)); |
| } |
| |
| }} // namespace v8::internal |