blob: 55782431bec353820f868b71b94b4d4cb0a661e6 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
karlklose@chromium.org83a47282011-05-11 11:54:09 +000038RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000039}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
ager@chromium.org18ad94b2009-09-02 08:22:29 +000046bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000055#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000056
karlklose@chromium.org83a47282011-05-11 11:54:09 +000057NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
58 : RegExpMacroAssembler() {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000059}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000060
61
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000062NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
63}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000064
ager@chromium.org18ad94b2009-09-02 08:22:29 +000065
66bool NativeRegExpMacroAssembler::CanReadUnaligned() {
67#ifdef V8_TARGET_CAN_READ_UNALIGNED
karlklose@chromium.org83a47282011-05-11 11:54:09 +000068 return !slow_safe();
ager@chromium.org18ad94b2009-09-02 08:22:29 +000069#else
70 return false;
71#endif
72}
73
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000074const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
75 String* subject,
76 int start_index) {
77 // Not just flat, but ultra flat.
78 ASSERT(subject->IsExternalString() || subject->IsSeqString());
79 ASSERT(start_index >= 0);
80 ASSERT(start_index <= subject->length());
81 if (subject->IsAsciiRepresentation()) {
82 const byte* address;
83 if (StringShape(subject).IsExternal()) {
84 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
85 address = reinterpret_cast<const byte*>(data);
86 } else {
87 ASSERT(subject->IsSeqAsciiString());
88 char* data = SeqAsciiString::cast(subject)->GetChars();
89 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000090 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000091 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000092 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000093 const uc16* data;
94 if (StringShape(subject).IsExternal()) {
95 data = ExternalTwoByteString::cast(subject)->resource()->data();
96 } else {
97 ASSERT(subject->IsSeqTwoByteString());
98 data = SeqTwoByteString::cast(subject)->GetChars();
99 }
100 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000101}
102
ager@chromium.org32912102009-01-16 10:38:43 +0000103
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000104NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
105 Handle<Code> regexp_code,
106 Handle<String> subject,
107 int* offsets_vector,
108 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000109 int previous_index,
110 Isolate* isolate) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000111
112 ASSERT(subject->IsFlat());
113 ASSERT(previous_index >= 0);
114 ASSERT(previous_index <= subject->length());
115
116 // No allocations before calling the regexp, but we can't use
117 // AssertNoAllocation, since regexps might be preempted, and another thread
118 // might do allocation anyway.
119
120 String* subject_ptr = *subject;
121 // Character offsets into string.
122 int start_offset = previous_index;
123 int end_offset = subject_ptr->length();
124
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000125 // The string has been flattened, so it it is a cons string it contains the
126 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000127 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000128 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000129 subject_ptr = ConsString::cast(subject_ptr)->first();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000130 }
131 // Ensure that an underlying string has the same ascii-ness.
lrn@chromium.org32d961d2010-06-30 09:09:34 +0000132 bool is_ascii = subject_ptr->IsAsciiRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000133 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
134 // String is now either Sequential or External
135 int char_size_shift = is_ascii ? 0 : 1;
136 int char_length = end_offset - start_offset;
137
138 const byte* input_start =
139 StringCharacterPosition(subject_ptr, start_offset);
140 int byte_length = char_length << char_size_shift;
141 const byte* input_end = input_start + byte_length;
142 Result res = Execute(*regexp_code,
143 subject_ptr,
144 start_offset,
145 input_start,
146 input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000147 offsets_vector,
148 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000149 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000150}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000151
152
153NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
154 Code* code,
155 String* input,
156 int start_offset,
157 const byte* input_start,
158 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000159 int* output,
160 Isolate* isolate) {
161 ASSERT(isolate == Isolate::Current());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000162 // Ensure that the minimum stack has been allocated.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000163 RegExpStackScope stack_scope(isolate);
164 Address stack_base = stack_scope.stack()->stack_base();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000165
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000166 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000167 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000168 input,
169 start_offset,
170 input_start,
171 input_end,
172 output,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000173 stack_base,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000174 direct_call,
175 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000176 ASSERT(result <= SUCCESS);
177 ASSERT(result >= RETRY);
178
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000179 if (result == EXCEPTION && !isolate->has_pending_exception()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000180 // We detected a stack overflow (on the backtrack stack) in RegExp code,
181 // but haven't created the exception yet.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000182 isolate->StackOverflow();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000183 }
184 return static_cast<Result>(result);
185}
186
187
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000188const byte NativeRegExpMacroAssembler::word_character_map[] = {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000189 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193
194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
197 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
198
199 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
200 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
201 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
202 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
203
204 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
206 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
207 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
208};
209
210
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000211int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
212 Address byte_offset1,
213 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000214 size_t byte_length,
215 Isolate* isolate) {
216 ASSERT(isolate == Isolate::Current());
217 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
218 isolate->regexp_macro_assembler_canonicalize();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000219 // This function is not allowed to cause a garbage collection.
220 // A GC might move the calling generated code and invalidate the
221 // return address on the stack.
222 ASSERT(byte_length % 2 == 0);
223 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
224 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
225 size_t length = byte_length >> 1;
226
227 for (size_t i = 0; i < length; i++) {
228 unibrow::uchar c1 = substring1[i];
229 unibrow::uchar c2 = substring2[i];
230 if (c1 != c2) {
231 unibrow::uchar s1[1] = { c1 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000232 canonicalize->get(c1, '\0', s1);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000233 if (s1[0] != c2) {
234 unibrow::uchar s2[1] = { c2 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000235 canonicalize->get(c2, '\0', s2);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000236 if (s1[0] != s2[0]) {
237 return 0;
238 }
239 }
240 }
241 }
242 return 1;
243}
244
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000245
246Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000247 Address* stack_base,
248 Isolate* isolate) {
249 ASSERT(isolate == Isolate::Current());
250 RegExpStack* regexp_stack = isolate->regexp_stack();
251 size_t size = regexp_stack->stack_capacity();
252 Address old_stack_base = regexp_stack->stack_base();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000253 ASSERT(old_stack_base == *stack_base);
254 ASSERT(stack_pointer <= old_stack_base);
255 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000256 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000257 if (new_stack_base == NULL) {
258 return NULL;
259 }
260 *stack_base = new_stack_base;
261 intptr_t stack_content_size = old_stack_base - stack_pointer;
262 return new_stack_base - stack_content_size;
263}
264
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000265#endif // V8_INTERPRETED_REGEXP
266
ager@chromium.org32912102009-01-16 10:38:43 +0000267} } // namespace v8::internal