blob: 3ebf5a8e00bb17a746b60d08d8a1e4139ee8e65e [file] [log] [blame]
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +00001// Copyright 2012 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
mmassi@chromium.org7028c052012-06-13 11:51:58 +000038RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +000039 : slow_safe_compiler_(false),
mmassi@chromium.org7028c052012-06-13 11:51:58 +000040 global_mode_(NOT_GLOBAL),
41 zone_(zone) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000042}
43
44
45RegExpMacroAssembler::~RegExpMacroAssembler() {
46}
47
48
ager@chromium.org18ad94b2009-09-02 08:22:29 +000049bool RegExpMacroAssembler::CanReadUnaligned() {
50#ifdef V8_HOST_CAN_READ_UNALIGNED
51 return true;
52#else
53 return false;
54#endif
55}
56
57
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000058#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000059
mmassi@chromium.org7028c052012-06-13 11:51:58 +000060NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
61 : RegExpMacroAssembler(zone) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000062}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000063
64
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000065NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
66}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000067
ager@chromium.org18ad94b2009-09-02 08:22:29 +000068
69bool NativeRegExpMacroAssembler::CanReadUnaligned() {
rossberg@chromium.org89e18f52012-10-22 13:09:53 +000070 return FLAG_enable_unaligned_accesses && !slow_safe();
ager@chromium.org18ad94b2009-09-02 08:22:29 +000071}
72
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000073const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
ulan@chromium.org8e8d8822012-11-23 14:36:46 +000080 if (subject->IsOneByteRepresentation()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000081 const byte* address;
82 if (StringShape(subject).IsExternal()) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000083 const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000084 address = reinterpret_cast<const byte*>(data);
85 } else {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +000086 ASSERT(subject->IsSeqOneByteString());
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000087 const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000088 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000089 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000090 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000091 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000092 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000094 data = ExternalTwoByteString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000095 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000100}
101
ager@chromium.org32912102009-01-16 10:38:43 +0000102
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000108 int previous_index,
109 Isolate* isolate) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000110
111 ASSERT(subject->IsFlat());
112 ASSERT(previous_index >= 0);
113 ASSERT(previous_index <= subject->length());
114
115 // No allocations before calling the regexp, but we can't use
116 // AssertNoAllocation, since regexps might be preempted, and another thread
117 // might do allocation anyway.
118
119 String* subject_ptr = *subject;
120 // Character offsets into string.
121 int start_offset = previous_index;
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000122 int char_length = subject_ptr->length() - start_offset;
123 int slice_offset = 0;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000124
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000125 // The string has been flattened, so if it is a cons string it contains the
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000126 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000127 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000128 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000129 subject_ptr = ConsString::cast(subject_ptr)->first();
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000130 } else if (StringShape(subject_ptr).IsSliced()) {
131 SlicedString* slice = SlicedString::cast(subject_ptr);
132 subject_ptr = slice->parent();
133 slice_offset = slice->offset();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000134 }
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000135 // Ensure that an underlying string has the same ASCII-ness.
ulan@chromium.org8e8d8822012-11-23 14:36:46 +0000136 bool is_ascii = subject_ptr->IsOneByteRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000137 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
138 // String is now either Sequential or External
139 int char_size_shift = is_ascii ? 0 : 1;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000140
141 const byte* input_start =
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000142 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000143 int byte_length = char_length << char_size_shift;
144 const byte* input_end = input_start + byte_length;
145 Result res = Execute(*regexp_code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000146 *subject,
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000147 start_offset,
148 input_start,
149 input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000150 offsets_vector,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000151 offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000152 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000153 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000154}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000155
156
157NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
158 Code* code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000159 String* input, // This needs to be the unpacked (sliced, cons) string.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000160 int start_offset,
161 const byte* input_start,
162 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000163 int* output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000164 int output_size,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000165 Isolate* isolate) {
166 ASSERT(isolate == Isolate::Current());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000167 // Ensure that the minimum stack has been allocated.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000168 RegExpStackScope stack_scope(isolate);
169 Address stack_base = stack_scope.stack()->stack_base();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000170
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000171 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000172 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000173 input,
174 start_offset,
175 input_start,
176 input_end,
177 output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000178 output_size,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000179 stack_base,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000180 direct_call,
181 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000182 ASSERT(result >= RETRY);
183
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000184 if (result == EXCEPTION && !isolate->has_pending_exception()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000185 // We detected a stack overflow (on the backtrack stack) in RegExp code,
186 // but haven't created the exception yet.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000187 isolate->StackOverflow();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000188 }
189 return static_cast<Result>(result);
190}
191
192
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000193const byte NativeRegExpMacroAssembler::word_character_map[] = {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198
199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
202 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
203
204 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
206 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
207 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
208
209 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
210 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
211 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
212 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
mvstanton@chromium.org6bec0092013-01-23 13:46:53 +0000213 // Latin-1 range
214 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
215 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
216 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
217 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
218
219 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
220 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
221 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
222 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
223
224 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
225 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
226 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
227 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
228
229 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
230 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
231 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
232 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000233};
234
235
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000236int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
237 Address byte_offset1,
238 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000239 size_t byte_length,
240 Isolate* isolate) {
241 ASSERT(isolate == Isolate::Current());
242 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
243 isolate->regexp_macro_assembler_canonicalize();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000244 // This function is not allowed to cause a garbage collection.
245 // A GC might move the calling generated code and invalidate the
246 // return address on the stack.
247 ASSERT(byte_length % 2 == 0);
248 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
249 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
250 size_t length = byte_length >> 1;
251
252 for (size_t i = 0; i < length; i++) {
253 unibrow::uchar c1 = substring1[i];
254 unibrow::uchar c2 = substring2[i];
255 if (c1 != c2) {
256 unibrow::uchar s1[1] = { c1 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000257 canonicalize->get(c1, '\0', s1);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000258 if (s1[0] != c2) {
259 unibrow::uchar s2[1] = { c2 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000260 canonicalize->get(c2, '\0', s2);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000261 if (s1[0] != s2[0]) {
262 return 0;
263 }
264 }
265 }
266 }
267 return 1;
268}
269
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000270
271Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000272 Address* stack_base,
273 Isolate* isolate) {
274 ASSERT(isolate == Isolate::Current());
275 RegExpStack* regexp_stack = isolate->regexp_stack();
276 size_t size = regexp_stack->stack_capacity();
277 Address old_stack_base = regexp_stack->stack_base();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000278 ASSERT(old_stack_base == *stack_base);
279 ASSERT(stack_pointer <= old_stack_base);
280 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000281 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000282 if (new_stack_base == NULL) {
283 return NULL;
284 }
285 *stack_base = new_stack_base;
286 intptr_t stack_content_size = old_stack_base - stack_pointer;
287 return new_stack_base - stack_content_size;
288}
289
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000290#endif // V8_INTERPRETED_REGEXP
291
ager@chromium.org32912102009-01-16 10:38:43 +0000292} } // namespace v8::internal