blob: ea41db63e21e96dd82ef7ab48985e874ceab4527 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
ager@chromium.org18ad94b2009-09-02 08:22:29 +000046bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000055#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000056
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000059
60
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000061NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000063
ager@chromium.org18ad94b2009-09-02 08:22:29 +000064
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67 return true;
68#else
69 return false;
70#endif
71}
72
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000073const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
80 if (subject->IsAsciiRepresentation()) {
81 const byte* address;
82 if (StringShape(subject).IsExternal()) {
83 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84 address = reinterpret_cast<const byte*>(data);
85 } else {
86 ASSERT(subject->IsSeqAsciiString());
87 char* data = SeqAsciiString::cast(subject)->GetChars();
88 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000089 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000090 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000091 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000092 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
94 data = ExternalTwoByteString::cast(subject)->resource()->data();
95 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000100}
101
ager@chromium.org32912102009-01-16 10:38:43 +0000102
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000108 int previous_index,
109 Isolate* isolate) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000110
111 ASSERT(subject->IsFlat());
112 ASSERT(previous_index >= 0);
113 ASSERT(previous_index <= subject->length());
114
115 // No allocations before calling the regexp, but we can't use
116 // AssertNoAllocation, since regexps might be preempted, and another thread
117 // might do allocation anyway.
118
119 String* subject_ptr = *subject;
120 // Character offsets into string.
121 int start_offset = previous_index;
122 int end_offset = subject_ptr->length();
123
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000124 // The string has been flattened, so it it is a cons string it contains the
125 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000126 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000127 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000128 subject_ptr = ConsString::cast(subject_ptr)->first();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000129 }
130 // Ensure that an underlying string has the same ascii-ness.
lrn@chromium.org32d961d2010-06-30 09:09:34 +0000131 bool is_ascii = subject_ptr->IsAsciiRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000132 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
133 // String is now either Sequential or External
134 int char_size_shift = is_ascii ? 0 : 1;
135 int char_length = end_offset - start_offset;
136
137 const byte* input_start =
138 StringCharacterPosition(subject_ptr, start_offset);
139 int byte_length = char_length << char_size_shift;
140 const byte* input_end = input_start + byte_length;
141 Result res = Execute(*regexp_code,
142 subject_ptr,
143 start_offset,
144 input_start,
145 input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000146 offsets_vector,
147 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000148 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000149}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000150
151
152NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
153 Code* code,
154 String* input,
155 int start_offset,
156 const byte* input_start,
157 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000158 int* output,
159 Isolate* isolate) {
160 ASSERT(isolate == Isolate::Current());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000161 // Ensure that the minimum stack has been allocated.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000162 RegExpStackScope stack_scope(isolate);
163 Address stack_base = stack_scope.stack()->stack_base();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000164
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000165 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000166 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000167 input,
168 start_offset,
169 input_start,
170 input_end,
171 output,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000172 stack_base,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000173 direct_call,
174 isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000175 ASSERT(result <= SUCCESS);
176 ASSERT(result >= RETRY);
177
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000178 if (result == EXCEPTION && !isolate->has_pending_exception()) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000179 // We detected a stack overflow (on the backtrack stack) in RegExp code,
180 // but haven't created the exception yet.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000181 isolate->StackOverflow();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000182 }
183 return static_cast<Result>(result);
184}
185
186
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000187const byte NativeRegExpMacroAssembler::word_character_map[] = {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000188 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
189 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192
193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
196 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
197
198 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
199 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
200 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
201 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
202
203 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
204 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
206 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
207};
208
209
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000210int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
211 Address byte_offset1,
212 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000213 size_t byte_length,
214 Isolate* isolate) {
215 ASSERT(isolate == Isolate::Current());
216 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
217 isolate->regexp_macro_assembler_canonicalize();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000218 // This function is not allowed to cause a garbage collection.
219 // A GC might move the calling generated code and invalidate the
220 // return address on the stack.
221 ASSERT(byte_length % 2 == 0);
222 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
223 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
224 size_t length = byte_length >> 1;
225
226 for (size_t i = 0; i < length; i++) {
227 unibrow::uchar c1 = substring1[i];
228 unibrow::uchar c2 = substring2[i];
229 if (c1 != c2) {
230 unibrow::uchar s1[1] = { c1 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000231 canonicalize->get(c1, '\0', s1);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000232 if (s1[0] != c2) {
233 unibrow::uchar s2[1] = { c2 };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000234 canonicalize->get(c2, '\0', s2);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000235 if (s1[0] != s2[0]) {
236 return 0;
237 }
238 }
239 }
240 }
241 return 1;
242}
243
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000244
245Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000246 Address* stack_base,
247 Isolate* isolate) {
248 ASSERT(isolate == Isolate::Current());
249 RegExpStack* regexp_stack = isolate->regexp_stack();
250 size_t size = regexp_stack->stack_capacity();
251 Address old_stack_base = regexp_stack->stack_base();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000252 ASSERT(old_stack_base == *stack_base);
253 ASSERT(stack_pointer <= old_stack_base);
254 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000255 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000256 if (new_stack_base == NULL) {
257 return NULL;
258 }
259 *stack_base = new_stack_base;
260 intptr_t stack_content_size = old_stack_base - stack_pointer;
261 return new_stack_base - stack_content_size;
262}
263
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000264#endif // V8_INTERPRETED_REGEXP
265
ager@chromium.org32912102009-01-16 10:38:43 +0000266} } // namespace v8::internal