blob: 09797ca2a4f80165c008b950c1cc5ab5a0bc83a9 [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
31#include "regexp-stack.h"
32#include "regexp-macro-assembler.h"
Steve Blockd0582a62009-12-15 09:54:21 +000033#include "simulator.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000034
35namespace v8 {
36namespace internal {
37
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
46bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
Steve Block6ded16b2010-05-10 14:33:55 +010055#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
Steve Blocka7e24c12009-10-30 11:49:00 +000056
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
59
60
61NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
63
64
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67 return true;
68#else
69 return false;
70#endif
71}
72
73const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
80 if (subject->IsAsciiRepresentation()) {
81 const byte* address;
82 if (StringShape(subject).IsExternal()) {
83 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84 address = reinterpret_cast<const byte*>(data);
85 } else {
86 ASSERT(subject->IsSeqAsciiString());
87 char* data = SeqAsciiString::cast(subject)->GetChars();
88 address = reinterpret_cast<const byte*>(data);
89 }
90 return address + start_index;
91 }
92 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
94 data = ExternalTwoByteString::cast(subject)->resource()->data();
95 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
100}
101
102
103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
108 int previous_index) {
109
110 ASSERT(subject->IsFlat());
111 ASSERT(previous_index >= 0);
112 ASSERT(previous_index <= subject->length());
113
114 // No allocations before calling the regexp, but we can't use
115 // AssertNoAllocation, since regexps might be preempted, and another thread
116 // might do allocation anyway.
117
118 String* subject_ptr = *subject;
119 // Character offsets into string.
120 int start_offset = previous_index;
121 int end_offset = subject_ptr->length();
122
Leon Clarked91b9f72010-01-27 17:25:45 +0000123 // The string has been flattened, so it it is a cons string it contains the
124 // full string in the first part.
Steve Blocka7e24c12009-10-30 11:49:00 +0000125 if (StringShape(subject_ptr).IsCons()) {
Leon Clarked91b9f72010-01-27 17:25:45 +0000126 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
Steve Blocka7e24c12009-10-30 11:49:00 +0000127 subject_ptr = ConsString::cast(subject_ptr)->first();
Steve Blocka7e24c12009-10-30 11:49:00 +0000128 }
129 // Ensure that an underlying string has the same ascii-ness.
Steve Block8defd9f2010-07-08 12:39:36 +0100130 bool is_ascii = subject_ptr->IsAsciiRepresentation();
Steve Blocka7e24c12009-10-30 11:49:00 +0000131 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
132 // String is now either Sequential or External
133 int char_size_shift = is_ascii ? 0 : 1;
134 int char_length = end_offset - start_offset;
135
136 const byte* input_start =
137 StringCharacterPosition(subject_ptr, start_offset);
138 int byte_length = char_length << char_size_shift;
139 const byte* input_end = input_start + byte_length;
140 Result res = Execute(*regexp_code,
141 subject_ptr,
142 start_offset,
143 input_start,
144 input_end,
Leon Clarked91b9f72010-01-27 17:25:45 +0000145 offsets_vector);
Steve Blocka7e24c12009-10-30 11:49:00 +0000146 return res;
147}
148
149
150NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
151 Code* code,
152 String* input,
153 int start_offset,
154 const byte* input_start,
155 const byte* input_end,
Leon Clarked91b9f72010-01-27 17:25:45 +0000156 int* output) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000157 typedef int (*matcher)(String*, int, const byte*,
Leon Clarked91b9f72010-01-27 17:25:45 +0000158 const byte*, int*, Address, int);
Steve Blocka7e24c12009-10-30 11:49:00 +0000159 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
160
Steve Blocka7e24c12009-10-30 11:49:00 +0000161 // Ensure that the minimum stack has been allocated.
162 RegExpStack stack;
163 Address stack_base = RegExpStack::stack_base();
164
Leon Clarkee46be812010-01-19 14:06:41 +0000165 int direct_call = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +0000166 int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
167 input,
168 start_offset,
169 input_start,
170 input_end,
171 output,
Leon Clarkee46be812010-01-19 14:06:41 +0000172 stack_base,
173 direct_call);
Steve Blocka7e24c12009-10-30 11:49:00 +0000174 ASSERT(result <= SUCCESS);
175 ASSERT(result >= RETRY);
176
177 if (result == EXCEPTION && !Top::has_pending_exception()) {
178 // We detected a stack overflow (on the backtrack stack) in RegExp code,
179 // but haven't created the exception yet.
180 Top::StackOverflow();
181 }
182 return static_cast<Result>(result);
183}
184
185
186static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
187
Leon Clarkee46be812010-01-19 14:06:41 +0000188
189byte NativeRegExpMacroAssembler::word_character_map[] = {
190 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
198 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
199
200 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
201 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
202 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
203 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
204
205 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
206 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
207 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
208 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
209};
210
211
Steve Blocka7e24c12009-10-30 11:49:00 +0000212int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
213 Address byte_offset1,
214 Address byte_offset2,
215 size_t byte_length) {
216 // This function is not allowed to cause a garbage collection.
217 // A GC might move the calling generated code and invalidate the
218 // return address on the stack.
219 ASSERT(byte_length % 2 == 0);
220 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
221 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
222 size_t length = byte_length >> 1;
223
224 for (size_t i = 0; i < length; i++) {
225 unibrow::uchar c1 = substring1[i];
226 unibrow::uchar c2 = substring2[i];
227 if (c1 != c2) {
228 unibrow::uchar s1[1] = { c1 };
229 canonicalize.get(c1, '\0', s1);
230 if (s1[0] != c2) {
231 unibrow::uchar s2[1] = { c2 };
232 canonicalize.get(c2, '\0', s2);
233 if (s1[0] != s2[0]) {
234 return 0;
235 }
236 }
237 }
238 }
239 return 1;
240}
241
242
243Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
244 Address* stack_base) {
245 size_t size = RegExpStack::stack_capacity();
246 Address old_stack_base = RegExpStack::stack_base();
247 ASSERT(old_stack_base == *stack_base);
248 ASSERT(stack_pointer <= old_stack_base);
249 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
250 Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
251 if (new_stack_base == NULL) {
252 return NULL;
253 }
254 *stack_base = new_stack_base;
255 intptr_t stack_content_size = old_stack_base - stack_pointer;
256 return new_stack_base - stack_content_size;
257}
258
Steve Block6ded16b2010-05-10 14:33:55 +0100259#endif // V8_INTERPRETED_REGEXP
260
Steve Blocka7e24c12009-10-30 11:49:00 +0000261} } // namespace v8::internal