blob: 51f4015f6cf35da4daf2f1a344a139cea385ab45 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
ager@chromium.org18ad94b2009-09-02 08:22:29 +000046bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000055#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000056
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000059
60
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000061NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000063
ager@chromium.org18ad94b2009-09-02 08:22:29 +000064
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67 return true;
68#else
69 return false;
70#endif
71}
72
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000073const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
80 if (subject->IsAsciiRepresentation()) {
81 const byte* address;
82 if (StringShape(subject).IsExternal()) {
83 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84 address = reinterpret_cast<const byte*>(data);
85 } else {
86 ASSERT(subject->IsSeqAsciiString());
87 char* data = SeqAsciiString::cast(subject)->GetChars();
88 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000089 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000090 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000091 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000092 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
94 data = ExternalTwoByteString::cast(subject)->resource()->data();
95 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000100}
101
ager@chromium.org32912102009-01-16 10:38:43 +0000102
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
108 int previous_index) {
109
110 ASSERT(subject->IsFlat());
111 ASSERT(previous_index >= 0);
112 ASSERT(previous_index <= subject->length());
113
114 // No allocations before calling the regexp, but we can't use
115 // AssertNoAllocation, since regexps might be preempted, and another thread
116 // might do allocation anyway.
117
118 String* subject_ptr = *subject;
119 // Character offsets into string.
120 int start_offset = previous_index;
121 int end_offset = subject_ptr->length();
122
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000123 // The string has been flattened, so it it is a cons string it contains the
124 // full string in the first part.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000125 if (StringShape(subject_ptr).IsCons()) {
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000126 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000127 subject_ptr = ConsString::cast(subject_ptr)->first();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000128 }
129 // Ensure that an underlying string has the same ascii-ness.
lrn@chromium.org32d961d2010-06-30 09:09:34 +0000130 bool is_ascii = subject_ptr->IsAsciiRepresentation();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000131 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
132 // String is now either Sequential or External
133 int char_size_shift = is_ascii ? 0 : 1;
134 int char_length = end_offset - start_offset;
135
136 const byte* input_start =
137 StringCharacterPosition(subject_ptr, start_offset);
138 int byte_length = char_length << char_size_shift;
139 const byte* input_end = input_start + byte_length;
140 Result res = Execute(*regexp_code,
141 subject_ptr,
142 start_offset,
143 input_start,
144 input_end,
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000145 offsets_vector);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000146 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000147}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000148
149
150NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
151 Code* code,
152 String* input,
153 int start_offset,
154 const byte* input_start,
155 const byte* input_end,
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000156 int* output) {
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000157 // Ensure that the minimum stack has been allocated.
158 RegExpStack stack;
159 Address stack_base = RegExpStack::stack_base();
160
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000161 int direct_call = 0;
kmillikin@chromium.org49edbdf2011-02-16 12:32:18 +0000162 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000163 input,
164 start_offset,
165 input_start,
166 input_end,
167 output,
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000168 stack_base,
169 direct_call);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000170 ASSERT(result <= SUCCESS);
171 ASSERT(result >= RETRY);
172
173 if (result == EXCEPTION && !Top::has_pending_exception()) {
174 // We detected a stack overflow (on the backtrack stack) in RegExp code,
175 // but haven't created the exception yet.
176 Top::StackOverflow();
177 }
178 return static_cast<Result>(result);
179}
180
181
182static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
183
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000184
185byte NativeRegExpMacroAssembler::word_character_map[] = {
186 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
187 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
188 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
189 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190
191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
194 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
195
196 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
197 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
198 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
199 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
200
201 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
202 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
203 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
204 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
205};
206
207
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000208int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
209 Address byte_offset1,
210 Address byte_offset2,
211 size_t byte_length) {
212 // This function is not allowed to cause a garbage collection.
213 // A GC might move the calling generated code and invalidate the
214 // return address on the stack.
215 ASSERT(byte_length % 2 == 0);
216 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
217 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
218 size_t length = byte_length >> 1;
219
220 for (size_t i = 0; i < length; i++) {
221 unibrow::uchar c1 = substring1[i];
222 unibrow::uchar c2 = substring2[i];
223 if (c1 != c2) {
224 unibrow::uchar s1[1] = { c1 };
225 canonicalize.get(c1, '\0', s1);
226 if (s1[0] != c2) {
227 unibrow::uchar s2[1] = { c2 };
228 canonicalize.get(c2, '\0', s2);
229 if (s1[0] != s2[0]) {
230 return 0;
231 }
232 }
233 }
234 }
235 return 1;
236}
237
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000238
239Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
240 Address* stack_base) {
241 size_t size = RegExpStack::stack_capacity();
242 Address old_stack_base = RegExpStack::stack_base();
243 ASSERT(old_stack_base == *stack_base);
244 ASSERT(stack_pointer <= old_stack_base);
245 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
246 Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
247 if (new_stack_base == NULL) {
248 return NULL;
249 }
250 *stack_base = new_stack_base;
251 intptr_t stack_content_size = old_stack_base - stack_pointer;
252 return new_stack_base - stack_content_size;
253}
254
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000255#endif // V8_INTERPRETED_REGEXP
256
ager@chromium.org32912102009-01-16 10:38:43 +0000257} } // namespace v8::internal