blob: 3685fcd3d8c39c5e59beeec79ef00cfd5a5615a5 [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
31#include "regexp-stack.h"
32#include "regexp-macro-assembler.h"
Steve Blockd0582a62009-12-15 09:54:21 +000033#include "simulator.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000034
35namespace v8 {
36namespace internal {
37
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
46bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
55#ifdef V8_NATIVE_REGEXP // Avoid unused code, e.g., on ARM.
56
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
59
60
61NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
63
64
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67 return true;
68#else
69 return false;
70#endif
71}
72
73const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
80 if (subject->IsAsciiRepresentation()) {
81 const byte* address;
82 if (StringShape(subject).IsExternal()) {
83 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84 address = reinterpret_cast<const byte*>(data);
85 } else {
86 ASSERT(subject->IsSeqAsciiString());
87 char* data = SeqAsciiString::cast(subject)->GetChars();
88 address = reinterpret_cast<const byte*>(data);
89 }
90 return address + start_index;
91 }
92 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
94 data = ExternalTwoByteString::cast(subject)->resource()->data();
95 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
100}
101
102
103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
108 int previous_index) {
109
110 ASSERT(subject->IsFlat());
111 ASSERT(previous_index >= 0);
112 ASSERT(previous_index <= subject->length());
113
114 // No allocations before calling the regexp, but we can't use
115 // AssertNoAllocation, since regexps might be preempted, and another thread
116 // might do allocation anyway.
117
118 String* subject_ptr = *subject;
119 // Character offsets into string.
120 int start_offset = previous_index;
121 int end_offset = subject_ptr->length();
122
123 bool is_ascii = subject->IsAsciiRepresentation();
124
125 if (StringShape(subject_ptr).IsCons()) {
126 subject_ptr = ConsString::cast(subject_ptr)->first();
Steve Blocka7e24c12009-10-30 11:49:00 +0000127 }
128 // Ensure that an underlying string has the same ascii-ness.
129 ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
130 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
131 // String is now either Sequential or External
132 int char_size_shift = is_ascii ? 0 : 1;
133 int char_length = end_offset - start_offset;
134
135 const byte* input_start =
136 StringCharacterPosition(subject_ptr, start_offset);
137 int byte_length = char_length << char_size_shift;
138 const byte* input_end = input_start + byte_length;
139 Result res = Execute(*regexp_code,
140 subject_ptr,
141 start_offset,
142 input_start,
143 input_end,
144 offsets_vector,
145 previous_index == 0);
Steve Blocka7e24c12009-10-30 11:49:00 +0000146 return res;
147}
148
149
150NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
151 Code* code,
152 String* input,
153 int start_offset,
154 const byte* input_start,
155 const byte* input_end,
156 int* output,
157 bool at_start) {
158 typedef int (*matcher)(String*, int, const byte*,
Leon Clarkee46be812010-01-19 14:06:41 +0000159 const byte*, int*, int, Address, int);
Steve Blocka7e24c12009-10-30 11:49:00 +0000160 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
161
162 int at_start_val = at_start ? 1 : 0;
163
164 // Ensure that the minimum stack has been allocated.
165 RegExpStack stack;
166 Address stack_base = RegExpStack::stack_base();
167
Leon Clarkee46be812010-01-19 14:06:41 +0000168 int direct_call = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +0000169 int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
170 input,
171 start_offset,
172 input_start,
173 input_end,
174 output,
175 at_start_val,
Leon Clarkee46be812010-01-19 14:06:41 +0000176 stack_base,
177 direct_call);
Steve Blocka7e24c12009-10-30 11:49:00 +0000178 ASSERT(result <= SUCCESS);
179 ASSERT(result >= RETRY);
180
181 if (result == EXCEPTION && !Top::has_pending_exception()) {
182 // We detected a stack overflow (on the backtrack stack) in RegExp code,
183 // but haven't created the exception yet.
184 Top::StackOverflow();
185 }
186 return static_cast<Result>(result);
187}
188
189
190static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
191
Leon Clarkee46be812010-01-19 14:06:41 +0000192
193byte NativeRegExpMacroAssembler::word_character_map[] = {
194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198
199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
202 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
203
204 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
206 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
207 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
208
209 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
210 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
211 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
212 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
213};
214
215
Steve Blocka7e24c12009-10-30 11:49:00 +0000216int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
217 Address byte_offset1,
218 Address byte_offset2,
219 size_t byte_length) {
220 // This function is not allowed to cause a garbage collection.
221 // A GC might move the calling generated code and invalidate the
222 // return address on the stack.
223 ASSERT(byte_length % 2 == 0);
224 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
225 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
226 size_t length = byte_length >> 1;
227
228 for (size_t i = 0; i < length; i++) {
229 unibrow::uchar c1 = substring1[i];
230 unibrow::uchar c2 = substring2[i];
231 if (c1 != c2) {
232 unibrow::uchar s1[1] = { c1 };
233 canonicalize.get(c1, '\0', s1);
234 if (s1[0] != c2) {
235 unibrow::uchar s2[1] = { c2 };
236 canonicalize.get(c2, '\0', s2);
237 if (s1[0] != s2[0]) {
238 return 0;
239 }
240 }
241 }
242 }
243 return 1;
244}
245
246
247Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
248 Address* stack_base) {
249 size_t size = RegExpStack::stack_capacity();
250 Address old_stack_base = RegExpStack::stack_base();
251 ASSERT(old_stack_base == *stack_base);
252 ASSERT(stack_pointer <= old_stack_base);
253 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
254 Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
255 if (new_stack_base == NULL) {
256 return NULL;
257 }
258 *stack_base = new_stack_base;
259 intptr_t stack_content_size = old_stack_base - stack_pointer;
260 return new_stack_base - stack_content_size;
261}
262
263#endif // V8_NATIVE_REGEXP
264} } // namespace v8::internal