blob: a4719b53fc0439814bb63ac377b1d5a0532d35fa [file] [log] [blame]
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +00001// Copyright 2012 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
mmassi@chromium.org7028c052012-06-13 11:51:58 +000038RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +000039 : slow_safe_compiler_(false),
mmassi@chromium.org7028c052012-06-13 11:51:58 +000040 global_mode_(NOT_GLOBAL),
41 zone_(zone) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000042}
43
44
45RegExpMacroAssembler::~RegExpMacroAssembler() {
46}
47
48
ager@chromium.org18ad94b2009-09-02 08:22:29 +000049bool RegExpMacroAssembler::CanReadUnaligned() {
50#ifdef V8_HOST_CAN_READ_UNALIGNED
51 return true;
52#else
53 return false;
54#endif
55}
56
57
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000058#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000059
mmassi@chromium.org7028c052012-06-13 11:51:58 +000060NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
61 : RegExpMacroAssembler(zone) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000062}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000063
64
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000065NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
66}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000067
ager@chromium.org18ad94b2009-09-02 08:22:29 +000068
69bool NativeRegExpMacroAssembler::CanReadUnaligned() {
70#ifdef V8_TARGET_CAN_READ_UNALIGNED
karlklose@chromium.org83a47282011-05-11 11:54:09 +000071 return !slow_safe();
ager@chromium.org18ad94b2009-09-02 08:22:29 +000072#else
73 return false;
74#endif
75}
76
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000077const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
78 String* subject,
79 int start_index) {
80 // Not just flat, but ultra flat.
81 ASSERT(subject->IsExternalString() || subject->IsSeqString());
82 ASSERT(start_index >= 0);
83 ASSERT(start_index <= subject->length());
84 if (subject->IsAsciiRepresentation()) {
85 const byte* address;
86 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000087 const char* data = ExternalAsciiString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000088 address = reinterpret_cast<const byte*>(data);
89 } else {
90 ASSERT(subject->IsSeqAsciiString());
91 char* data = SeqAsciiString::cast(subject)->GetChars();
92 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000093 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000094 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000095 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000096 const uc16* data;
97 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000098 data = ExternalTwoByteString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000099 } else {
100 ASSERT(subject->IsSeqTwoByteString());
101 data = SeqTwoByteString::cast(subject)->GetChars();
102 }
103 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000104}
105
ager@chromium.org32912102009-01-16 10:38:43 +0000106
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000107NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
108 Handle<Code> regexp_code,
109 Handle<String> subject,
110 int* offsets_vector,
111 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000112 int previous_index,
113 Isolate* isolate) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000114
115 ASSERT(subject->IsFlat());
116 ASSERT(previous_index >= 0);
117 ASSERT(previous_index <= subject->length());
118
119 // No allocations before calling the regexp, but we can't use
120 // AssertNoAllocation, since regexps might be preempted, and another thread
121 // might do allocation anyway.
122
123 String* subject_ptr = *subject;
124 // Character offsets into string.
125 int start_offset = previous_index;
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000126 int char_length = subject_ptr->length() - start_offset;
127 int slice_offset = 0;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000128
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000129 // The string has been flattened, so if it is a cons string it contains the
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000130 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000131 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000132 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000133 subject_ptr = ConsString::cast(subject_ptr)->first();
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000134 } else if (StringShape(subject_ptr).IsSliced()) {
135 SlicedString* slice = SlicedString::cast(subject_ptr);
136 subject_ptr = slice->parent();
137 slice_offset = slice->offset();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000138 }
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000139 // Ensure that an underlying string has the same ASCII-ness.
lrn@chromium.org32d961d2010-06-30 09:09:34 +0000140 bool is_ascii = subject_ptr->IsAsciiRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000141 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
142 // String is now either Sequential or External
143 int char_size_shift = is_ascii ? 0 : 1;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000144
145 const byte* input_start =
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000146 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000147 int byte_length = char_length << char_size_shift;
148 const byte* input_end = input_start + byte_length;
149 Result res = Execute(*regexp_code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000150 *subject,
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000151 start_offset,
152 input_start,
153 input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000154 offsets_vector,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000155 offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000156 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000157 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000158}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000159
160
161NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
162 Code* code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000163 String* input, // This needs to be the unpacked (sliced, cons) string.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000164 int start_offset,
165 const byte* input_start,
166 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000167 int* output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000168 int output_size,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000169 Isolate* isolate) {
170 ASSERT(isolate == Isolate::Current());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000171 // Ensure that the minimum stack has been allocated.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000172 RegExpStackScope stack_scope(isolate);
173 Address stack_base = stack_scope.stack()->stack_base();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000174
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000175 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000176 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000177 input,
178 start_offset,
179 input_start,
180 input_end,
181 output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000182 output_size,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000183 stack_base,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000184 direct_call,
185 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000186 ASSERT(result >= RETRY);
187
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000188 if (result == EXCEPTION && !isolate->has_pending_exception()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000189 // We detected a stack overflow (on the backtrack stack) in RegExp code,
190 // but haven't created the exception yet.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000191 isolate->StackOverflow();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000192 }
193 return static_cast<Result>(result);
194}
195
196
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000197const byte NativeRegExpMacroAssembler::word_character_map[] = {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
202
203 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
206 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
207
208 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
209 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
210 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
211 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
212
213 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
214 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
215 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
216 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
217};
218
219
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000220int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
221 Address byte_offset1,
222 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000223 size_t byte_length,
224 Isolate* isolate) {
225 ASSERT(isolate == Isolate::Current());
226 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
227 isolate->regexp_macro_assembler_canonicalize();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000228 // This function is not allowed to cause a garbage collection.
229 // A GC might move the calling generated code and invalidate the
230 // return address on the stack.
231 ASSERT(byte_length % 2 == 0);
232 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
233 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
234 size_t length = byte_length >> 1;
235
236 for (size_t i = 0; i < length; i++) {
237 unibrow::uchar c1 = substring1[i];
238 unibrow::uchar c2 = substring2[i];
239 if (c1 != c2) {
240 unibrow::uchar s1[1] = { c1 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000241 canonicalize->get(c1, '\0', s1);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000242 if (s1[0] != c2) {
243 unibrow::uchar s2[1] = { c2 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000244 canonicalize->get(c2, '\0', s2);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000245 if (s1[0] != s2[0]) {
246 return 0;
247 }
248 }
249 }
250 }
251 return 1;
252}
253
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000254
255Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000256 Address* stack_base,
257 Isolate* isolate) {
258 ASSERT(isolate == Isolate::Current());
259 RegExpStack* regexp_stack = isolate->regexp_stack();
260 size_t size = regexp_stack->stack_capacity();
261 Address old_stack_base = regexp_stack->stack_base();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000262 ASSERT(old_stack_base == *stack_base);
263 ASSERT(stack_pointer <= old_stack_base);
264 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000265 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000266 if (new_stack_base == NULL) {
267 return NULL;
268 }
269 *stack_base = new_stack_base;
270 intptr_t stack_content_size = old_stack_base - stack_pointer;
271 return new_stack_base - stack_content_size;
272}
273
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000274#endif // V8_INTERPRETED_REGEXP
275
ager@chromium.org32912102009-01-16 10:38:43 +0000276} } // namespace v8::internal