blob: 08568de9ef3174d7c429bbc03116db0696f00ef6 [file] [log] [blame]
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +00001// Copyright 2012 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +000038RegExpMacroAssembler::RegExpMacroAssembler()
39 : slow_safe_compiler_(false),
40 global_(false) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000041}
42
43
44RegExpMacroAssembler::~RegExpMacroAssembler() {
45}
46
47
ager@chromium.org18ad94b2009-09-02 08:22:29 +000048bool RegExpMacroAssembler::CanReadUnaligned() {
49#ifdef V8_HOST_CAN_READ_UNALIGNED
50 return true;
51#else
52 return false;
53#endif
54}
55
56
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000057#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000058
karlklose@chromium.org83a47282011-05-11 11:54:09 +000059NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
60 : RegExpMacroAssembler() {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000061}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000062
63
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000064NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
65}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000066
ager@chromium.org18ad94b2009-09-02 08:22:29 +000067
68bool NativeRegExpMacroAssembler::CanReadUnaligned() {
69#ifdef V8_TARGET_CAN_READ_UNALIGNED
karlklose@chromium.org83a47282011-05-11 11:54:09 +000070 return !slow_safe();
ager@chromium.org18ad94b2009-09-02 08:22:29 +000071#else
72 return false;
73#endif
74}
75
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000076const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
77 String* subject,
78 int start_index) {
79 // Not just flat, but ultra flat.
80 ASSERT(subject->IsExternalString() || subject->IsSeqString());
81 ASSERT(start_index >= 0);
82 ASSERT(start_index <= subject->length());
83 if (subject->IsAsciiRepresentation()) {
84 const byte* address;
85 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000086 const char* data = ExternalAsciiString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000087 address = reinterpret_cast<const byte*>(data);
88 } else {
89 ASSERT(subject->IsSeqAsciiString());
90 char* data = SeqAsciiString::cast(subject)->GetChars();
91 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000092 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000093 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000094 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000095 const uc16* data;
96 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000097 data = ExternalTwoByteString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000098 } else {
99 ASSERT(subject->IsSeqTwoByteString());
100 data = SeqTwoByteString::cast(subject)->GetChars();
101 }
102 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000103}
104
ager@chromium.org32912102009-01-16 10:38:43 +0000105
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000106NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
107 Handle<Code> regexp_code,
108 Handle<String> subject,
109 int* offsets_vector,
110 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000111 int previous_index,
112 Isolate* isolate) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000113
114 ASSERT(subject->IsFlat());
115 ASSERT(previous_index >= 0);
116 ASSERT(previous_index <= subject->length());
117
118 // No allocations before calling the regexp, but we can't use
119 // AssertNoAllocation, since regexps might be preempted, and another thread
120 // might do allocation anyway.
121
122 String* subject_ptr = *subject;
123 // Character offsets into string.
124 int start_offset = previous_index;
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000125 int char_length = subject_ptr->length() - start_offset;
126 int slice_offset = 0;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000127
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000128 // The string has been flattened, so if it is a cons string it contains the
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000129 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000130 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000131 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000132 subject_ptr = ConsString::cast(subject_ptr)->first();
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000133 } else if (StringShape(subject_ptr).IsSliced()) {
134 SlicedString* slice = SlicedString::cast(subject_ptr);
135 subject_ptr = slice->parent();
136 slice_offset = slice->offset();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000137 }
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000138 // Ensure that an underlying string has the same ASCII-ness.
lrn@chromium.org32d961d2010-06-30 09:09:34 +0000139 bool is_ascii = subject_ptr->IsAsciiRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000140 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
141 // String is now either Sequential or External
142 int char_size_shift = is_ascii ? 0 : 1;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000143
144 const byte* input_start =
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000145 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000146 int byte_length = char_length << char_size_shift;
147 const byte* input_end = input_start + byte_length;
148 Result res = Execute(*regexp_code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000149 *subject,
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000150 start_offset,
151 input_start,
152 input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000153 offsets_vector,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000154 offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000155 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000156 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000157}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000158
159
160NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
161 Code* code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000162 String* input, // This needs to be the unpacked (sliced, cons) string.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000163 int start_offset,
164 const byte* input_start,
165 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000166 int* output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000167 int output_size,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000168 Isolate* isolate) {
169 ASSERT(isolate == Isolate::Current());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000170 // Ensure that the minimum stack has been allocated.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000171 RegExpStackScope stack_scope(isolate);
172 Address stack_base = stack_scope.stack()->stack_base();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000173
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000174 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000175 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000176 input,
177 start_offset,
178 input_start,
179 input_end,
180 output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000181 output_size,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000182 stack_base,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000183 direct_call,
184 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000185 ASSERT(result >= RETRY);
186
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000187 if (result == EXCEPTION && !isolate->has_pending_exception()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000188 // We detected a stack overflow (on the backtrack stack) in RegExp code,
189 // but haven't created the exception yet.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000190 isolate->StackOverflow();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000191 }
192 return static_cast<Result>(result);
193}
194
195
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000196const byte NativeRegExpMacroAssembler::word_character_map[] = {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000197 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201
202 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
203 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
205 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
206
207 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
208 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
209 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
210 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
211
212 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
213 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
214 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
215 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
216};
217
218
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000219int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
220 Address byte_offset1,
221 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000222 size_t byte_length,
223 Isolate* isolate) {
224 ASSERT(isolate == Isolate::Current());
225 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
226 isolate->regexp_macro_assembler_canonicalize();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000227 // This function is not allowed to cause a garbage collection.
228 // A GC might move the calling generated code and invalidate the
229 // return address on the stack.
230 ASSERT(byte_length % 2 == 0);
231 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
232 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
233 size_t length = byte_length >> 1;
234
235 for (size_t i = 0; i < length; i++) {
236 unibrow::uchar c1 = substring1[i];
237 unibrow::uchar c2 = substring2[i];
238 if (c1 != c2) {
239 unibrow::uchar s1[1] = { c1 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000240 canonicalize->get(c1, '\0', s1);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000241 if (s1[0] != c2) {
242 unibrow::uchar s2[1] = { c2 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000243 canonicalize->get(c2, '\0', s2);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000244 if (s1[0] != s2[0]) {
245 return 0;
246 }
247 }
248 }
249 }
250 return 1;
251}
252
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000253
254Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000255 Address* stack_base,
256 Isolate* isolate) {
257 ASSERT(isolate == Isolate::Current());
258 RegExpStack* regexp_stack = isolate->regexp_stack();
259 size_t size = regexp_stack->stack_capacity();
260 Address old_stack_base = regexp_stack->stack_base();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000261 ASSERT(old_stack_base == *stack_base);
262 ASSERT(stack_pointer <= old_stack_base);
263 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000264 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000265 if (new_stack_base == NULL) {
266 return NULL;
267 }
268 *stack_base = new_stack_base;
269 intptr_t stack_content_size = old_stack_base - stack_pointer;
270 return new_stack_base - stack_content_size;
271}
272
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000273#endif // V8_INTERPRETED_REGEXP
274
ager@chromium.org32912102009-01-16 10:38:43 +0000275} } // namespace v8::internal