blob: 99f3a37f4c8deaf7de4b1fb916ff85c1f89163f9 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
karlklose@chromium.org83a47282011-05-11 11:54:09 +000038RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000039}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
ager@chromium.org18ad94b2009-09-02 08:22:29 +000046bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000055#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000056
karlklose@chromium.org83a47282011-05-11 11:54:09 +000057NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
58 : RegExpMacroAssembler() {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000059}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000060
61
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000062NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
63}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000064
ager@chromium.org18ad94b2009-09-02 08:22:29 +000065
66bool NativeRegExpMacroAssembler::CanReadUnaligned() {
67#ifdef V8_TARGET_CAN_READ_UNALIGNED
karlklose@chromium.org83a47282011-05-11 11:54:09 +000068 return !slow_safe();
ager@chromium.org18ad94b2009-09-02 08:22:29 +000069#else
70 return false;
71#endif
72}
73
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000074const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
75 String* subject,
76 int start_index) {
77 // Not just flat, but ultra flat.
78 ASSERT(subject->IsExternalString() || subject->IsSeqString());
79 ASSERT(start_index >= 0);
80 ASSERT(start_index <= subject->length());
81 if (subject->IsAsciiRepresentation()) {
82 const byte* address;
83 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000084 const char* data = ExternalAsciiString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000085 address = reinterpret_cast<const byte*>(data);
86 } else {
87 ASSERT(subject->IsSeqAsciiString());
88 char* data = SeqAsciiString::cast(subject)->GetChars();
89 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000090 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000091 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000092 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000093 const uc16* data;
94 if (StringShape(subject).IsExternal()) {
erikcorry0ad885c2011-11-21 13:51:57 +000095 data = ExternalTwoByteString::cast(subject)->GetChars();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000096 } else {
97 ASSERT(subject->IsSeqTwoByteString());
98 data = SeqTwoByteString::cast(subject)->GetChars();
99 }
100 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000101}
102
ager@chromium.org32912102009-01-16 10:38:43 +0000103
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000104NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
105 Handle<Code> regexp_code,
106 Handle<String> subject,
107 int* offsets_vector,
108 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000109 int previous_index,
110 Isolate* isolate) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000111
112 ASSERT(subject->IsFlat());
113 ASSERT(previous_index >= 0);
114 ASSERT(previous_index <= subject->length());
115
116 // No allocations before calling the regexp, but we can't use
117 // AssertNoAllocation, since regexps might be preempted, and another thread
118 // might do allocation anyway.
119
120 String* subject_ptr = *subject;
121 // Character offsets into string.
122 int start_offset = previous_index;
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000123 int char_length = subject_ptr->length() - start_offset;
124 int slice_offset = 0;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000125
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000126 // The string has been flattened, so if it is a cons string it contains the
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000127 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000128 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000129 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000130 subject_ptr = ConsString::cast(subject_ptr)->first();
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000131 } else if (StringShape(subject_ptr).IsSliced()) {
132 SlicedString* slice = SlicedString::cast(subject_ptr);
133 subject_ptr = slice->parent();
134 slice_offset = slice->offset();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000135 }
136 // Ensure that an underlying string has the same ascii-ness.
lrn@chromium.org32d961d2010-06-30 09:09:34 +0000137 bool is_ascii = subject_ptr->IsAsciiRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000138 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
139 // String is now either Sequential or External
140 int char_size_shift = is_ascii ? 0 : 1;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000141
142 const byte* input_start =
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000143 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000144 int byte_length = char_length << char_size_shift;
145 const byte* input_end = input_start + byte_length;
146 Result res = Execute(*regexp_code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000147 *subject,
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000148 start_offset,
149 input_start,
150 input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000151 offsets_vector,
152 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000153 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000154}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000155
156
157NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
158 Code* code,
ricow@chromium.org4668a2c2011-08-29 10:41:00 +0000159 String* input, // This needs to be the unpacked (sliced, cons) string.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000160 int start_offset,
161 const byte* input_start,
162 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000163 int* output,
164 Isolate* isolate) {
165 ASSERT(isolate == Isolate::Current());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000166 // Ensure that the minimum stack has been allocated.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000167 RegExpStackScope stack_scope(isolate);
168 Address stack_base = stack_scope.stack()->stack_base();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000169
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000170 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000171 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000172 input,
173 start_offset,
174 input_start,
175 input_end,
176 output,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000177 stack_base,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000178 direct_call,
179 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000180 ASSERT(result <= SUCCESS);
181 ASSERT(result >= RETRY);
182
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000183 if (result == EXCEPTION && !isolate->has_pending_exception()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000184 // We detected a stack overflow (on the backtrack stack) in RegExp code,
185 // but haven't created the exception yet.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000186 isolate->StackOverflow();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000187 }
188 return static_cast<Result>(result);
189}
190
191
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000192const byte NativeRegExpMacroAssembler::word_character_map[] = {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197
198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
201 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
202
203 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
204 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
206 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
207
208 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
209 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
210 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
211 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
212};
213
214
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000215int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
216 Address byte_offset1,
217 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000218 size_t byte_length,
219 Isolate* isolate) {
220 ASSERT(isolate == Isolate::Current());
221 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
222 isolate->regexp_macro_assembler_canonicalize();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000223 // This function is not allowed to cause a garbage collection.
224 // A GC might move the calling generated code and invalidate the
225 // return address on the stack.
226 ASSERT(byte_length % 2 == 0);
227 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
228 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
229 size_t length = byte_length >> 1;
230
231 for (size_t i = 0; i < length; i++) {
232 unibrow::uchar c1 = substring1[i];
233 unibrow::uchar c2 = substring2[i];
234 if (c1 != c2) {
235 unibrow::uchar s1[1] = { c1 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000236 canonicalize->get(c1, '\0', s1);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000237 if (s1[0] != c2) {
238 unibrow::uchar s2[1] = { c2 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000239 canonicalize->get(c2, '\0', s2);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000240 if (s1[0] != s2[0]) {
241 return 0;
242 }
243 }
244 }
245 }
246 return 1;
247}
248
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000249
250Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000251 Address* stack_base,
252 Isolate* isolate) {
253 ASSERT(isolate == Isolate::Current());
254 RegExpStack* regexp_stack = isolate->regexp_stack();
255 size_t size = regexp_stack->stack_capacity();
256 Address old_stack_base = regexp_stack->stack_base();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000257 ASSERT(old_stack_base == *stack_base);
258 ASSERT(stack_pointer <= old_stack_base);
259 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000260 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000261 if (new_stack_base == NULL) {
262 return NULL;
263 }
264 *stack_base = new_stack_base;
265 intptr_t stack_content_size = old_stack_base - stack_pointer;
266 return new_stack_base - stack_content_size;
267}
268
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000269#endif // V8_INTERPRETED_REGEXP
270
ager@chromium.org32912102009-01-16 10:38:43 +0000271} } // namespace v8::internal