blob: 9ae19d723e6d09769ad817d3ac9e900d9ceeb6c2 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
ager@chromium.orgc4c92722009-11-18 14:12:51 +000033#include "simulator.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000037
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
ager@chromium.org18ad94b2009-09-02 08:22:29 +000046bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49#else
50 return false;
51#endif
52}
53
54
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000055#ifdef V8_NATIVE_REGEXP // Avoid unused code, e.g., on ARM.
56
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000059
60
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000061NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000063
ager@chromium.org18ad94b2009-09-02 08:22:29 +000064
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67 return true;
68#else
69 return false;
70#endif
71}
72
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000073const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
80 if (subject->IsAsciiRepresentation()) {
81 const byte* address;
82 if (StringShape(subject).IsExternal()) {
83 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84 address = reinterpret_cast<const byte*>(data);
85 } else {
86 ASSERT(subject->IsSeqAsciiString());
87 char* data = SeqAsciiString::cast(subject)->GetChars();
88 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000089 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000090 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000091 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000092 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
94 data = ExternalTwoByteString::cast(subject)->resource()->data();
95 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000100}
101
ager@chromium.org32912102009-01-16 10:38:43 +0000102
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
108 int previous_index) {
109
110 ASSERT(subject->IsFlat());
111 ASSERT(previous_index >= 0);
112 ASSERT(previous_index <= subject->length());
113
114 // No allocations before calling the regexp, but we can't use
115 // AssertNoAllocation, since regexps might be preempted, and another thread
116 // might do allocation anyway.
117
118 String* subject_ptr = *subject;
119 // Character offsets into string.
120 int start_offset = previous_index;
121 int end_offset = subject_ptr->length();
122
123 bool is_ascii = subject->IsAsciiRepresentation();
124
125 if (StringShape(subject_ptr).IsCons()) {
126 subject_ptr = ConsString::cast(subject_ptr)->first();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000127 }
128 // Ensure that an underlying string has the same ascii-ness.
129 ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
130 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
131 // String is now either Sequential or External
132 int char_size_shift = is_ascii ? 0 : 1;
133 int char_length = end_offset - start_offset;
134
135 const byte* input_start =
136 StringCharacterPosition(subject_ptr, start_offset);
137 int byte_length = char_length << char_size_shift;
138 const byte* input_end = input_start + byte_length;
139 Result res = Execute(*regexp_code,
140 subject_ptr,
141 start_offset,
142 input_start,
143 input_end,
144 offsets_vector,
145 previous_index == 0);
146
147 if (res == SUCCESS) {
148 // Capture values are relative to start_offset only.
149 // Convert them to be relative to start of string.
150 for (int i = 0; i < offsets_vector_length; i++) {
151 if (offsets_vector[i] >= 0) {
152 offsets_vector[i] += previous_index;
153 }
154 }
155 }
156
157 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000158}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000159
160
161NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
162 Code* code,
163 String* input,
164 int start_offset,
165 const byte* input_start,
166 const byte* input_end,
167 int* output,
168 bool at_start) {
169 typedef int (*matcher)(String*, int, const byte*,
170 const byte*, int*, int, Address);
171 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
172
173 int at_start_val = at_start ? 1 : 0;
174
175 // Ensure that the minimum stack has been allocated.
176 RegExpStack stack;
177 Address stack_base = RegExpStack::stack_base();
178
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000179 int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
180 input,
181 start_offset,
182 input_start,
183 input_end,
184 output,
185 at_start_val,
186 stack_base);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000187 ASSERT(result <= SUCCESS);
188 ASSERT(result >= RETRY);
189
190 if (result == EXCEPTION && !Top::has_pending_exception()) {
191 // We detected a stack overflow (on the backtrack stack) in RegExp code,
192 // but haven't created the exception yet.
193 Top::StackOverflow();
194 }
195 return static_cast<Result>(result);
196}
197
198
199static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
200
201int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
202 Address byte_offset1,
203 Address byte_offset2,
204 size_t byte_length) {
205 // This function is not allowed to cause a garbage collection.
206 // A GC might move the calling generated code and invalidate the
207 // return address on the stack.
208 ASSERT(byte_length % 2 == 0);
209 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
210 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
211 size_t length = byte_length >> 1;
212
213 for (size_t i = 0; i < length; i++) {
214 unibrow::uchar c1 = substring1[i];
215 unibrow::uchar c2 = substring2[i];
216 if (c1 != c2) {
217 unibrow::uchar s1[1] = { c1 };
218 canonicalize.get(c1, '\0', s1);
219 if (s1[0] != c2) {
220 unibrow::uchar s2[1] = { c2 };
221 canonicalize.get(c2, '\0', s2);
222 if (s1[0] != s2[0]) {
223 return 0;
224 }
225 }
226 }
227 }
228 return 1;
229}
230
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000231
232Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
233 Address* stack_base) {
234 size_t size = RegExpStack::stack_capacity();
235 Address old_stack_base = RegExpStack::stack_base();
236 ASSERT(old_stack_base == *stack_base);
237 ASSERT(stack_pointer <= old_stack_base);
238 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
239 Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
240 if (new_stack_base == NULL) {
241 return NULL;
242 }
243 *stack_base = new_stack_base;
244 intptr_t stack_content_size = old_stack_base - stack_pointer;
245 return new_stack_base - stack_content_size;
246}
247
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000248#endif // V8_NATIVE_REGEXP
ager@chromium.org32912102009-01-16 10:38:43 +0000249} } // namespace v8::internal