blob: 7f830fe4855ec909e1975edb1ad732cdbdd05174 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ager@chromium.orga74f0da2008-12-03 16:05:52 +000028#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000031#include "regexp-stack.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000032#include "regexp-macro-assembler.h"
33
kasperl@chromium.org71affb52009-05-26 05:44:31 +000034namespace v8 {
35namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000036
37RegExpMacroAssembler::RegExpMacroAssembler() {
38}
39
40
41RegExpMacroAssembler::~RegExpMacroAssembler() {
42}
43
44
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000045#ifdef V8_NATIVE_REGEXP // Avoid unused code, e.g., on ARM.
46
47NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
48}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000049
50
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000051NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
52}
ager@chromium.orga74f0da2008-12-03 16:05:52 +000053
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000054const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
55 String* subject,
56 int start_index) {
57 // Not just flat, but ultra flat.
58 ASSERT(subject->IsExternalString() || subject->IsSeqString());
59 ASSERT(start_index >= 0);
60 ASSERT(start_index <= subject->length());
61 if (subject->IsAsciiRepresentation()) {
62 const byte* address;
63 if (StringShape(subject).IsExternal()) {
64 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
65 address = reinterpret_cast<const byte*>(data);
66 } else {
67 ASSERT(subject->IsSeqAsciiString());
68 char* data = SeqAsciiString::cast(subject)->GetChars();
69 address = reinterpret_cast<const byte*>(data);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000070 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000071 return address + start_index;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000072 }
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000073 const uc16* data;
74 if (StringShape(subject).IsExternal()) {
75 data = ExternalTwoByteString::cast(subject)->resource()->data();
76 } else {
77 ASSERT(subject->IsSeqTwoByteString());
78 data = SeqTwoByteString::cast(subject)->GetChars();
79 }
80 return reinterpret_cast<const byte*>(data + start_index);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000081}
82
ager@chromium.org32912102009-01-16 10:38:43 +000083
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000084NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
85 Handle<Code> regexp_code,
86 Handle<String> subject,
87 int* offsets_vector,
88 int offsets_vector_length,
89 int previous_index) {
90
91 ASSERT(subject->IsFlat());
92 ASSERT(previous_index >= 0);
93 ASSERT(previous_index <= subject->length());
94
95 // No allocations before calling the regexp, but we can't use
96 // AssertNoAllocation, since regexps might be preempted, and another thread
97 // might do allocation anyway.
98
99 String* subject_ptr = *subject;
100 // Character offsets into string.
101 int start_offset = previous_index;
102 int end_offset = subject_ptr->length();
103
104 bool is_ascii = subject->IsAsciiRepresentation();
105
106 if (StringShape(subject_ptr).IsCons()) {
107 subject_ptr = ConsString::cast(subject_ptr)->first();
108 } else if (StringShape(subject_ptr).IsSliced()) {
109 SlicedString* slice = SlicedString::cast(subject_ptr);
110 start_offset += slice->start();
111 end_offset += slice->start();
112 subject_ptr = slice->buffer();
113 }
114 // Ensure that an underlying string has the same ascii-ness.
115 ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
116 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
117 // String is now either Sequential or External
118 int char_size_shift = is_ascii ? 0 : 1;
119 int char_length = end_offset - start_offset;
120
121 const byte* input_start =
122 StringCharacterPosition(subject_ptr, start_offset);
123 int byte_length = char_length << char_size_shift;
124 const byte* input_end = input_start + byte_length;
125 Result res = Execute(*regexp_code,
126 subject_ptr,
127 start_offset,
128 input_start,
129 input_end,
130 offsets_vector,
131 previous_index == 0);
132
133 if (res == SUCCESS) {
134 // Capture values are relative to start_offset only.
135 // Convert them to be relative to start of string.
136 for (int i = 0; i < offsets_vector_length; i++) {
137 if (offsets_vector[i] >= 0) {
138 offsets_vector[i] += previous_index;
139 }
140 }
141 }
142
143 return res;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000144}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000145
146
147NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
148 Code* code,
149 String* input,
150 int start_offset,
151 const byte* input_start,
152 const byte* input_end,
153 int* output,
154 bool at_start) {
155 typedef int (*matcher)(String*, int, const byte*,
156 const byte*, int*, int, Address);
157 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
158
159 int at_start_val = at_start ? 1 : 0;
160
161 // Ensure that the minimum stack has been allocated.
162 RegExpStack stack;
163 Address stack_base = RegExpStack::stack_base();
164
165 int result = matcher_func(input,
166 start_offset,
167 input_start,
168 input_end,
169 output,
170 at_start_val,
171 stack_base);
172 ASSERT(result <= SUCCESS);
173 ASSERT(result >= RETRY);
174
175 if (result == EXCEPTION && !Top::has_pending_exception()) {
176 // We detected a stack overflow (on the backtrack stack) in RegExp code,
177 // but haven't created the exception yet.
178 Top::StackOverflow();
179 }
180 return static_cast<Result>(result);
181}
182
183
184static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
185
186int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
187 Address byte_offset1,
188 Address byte_offset2,
189 size_t byte_length) {
190 // This function is not allowed to cause a garbage collection.
191 // A GC might move the calling generated code and invalidate the
192 // return address on the stack.
193 ASSERT(byte_length % 2 == 0);
194 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
195 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
196 size_t length = byte_length >> 1;
197
198 for (size_t i = 0; i < length; i++) {
199 unibrow::uchar c1 = substring1[i];
200 unibrow::uchar c2 = substring2[i];
201 if (c1 != c2) {
202 unibrow::uchar s1[1] = { c1 };
203 canonicalize.get(c1, '\0', s1);
204 if (s1[0] != c2) {
205 unibrow::uchar s2[1] = { c2 };
206 canonicalize.get(c2, '\0', s2);
207 if (s1[0] != s2[0]) {
208 return 0;
209 }
210 }
211 }
212 }
213 return 1;
214}
215
216#endif // V8_NATIVE_REGEXP
ager@chromium.org32912102009-01-16 10:38:43 +0000217} } // namespace v8::internal