blob: 2fc9fd302574357aa21dc9cbd8281fe9bd16ff09 [file] [log] [blame]
ricow@chromium.orgddd545c2011-08-24 12:02:41 +00001// Copyright 2011 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// A simple interpreter for the Irregexp byte code.
29
30
31#include "v8.h"
32#include "unicode.h"
33#include "utils.h"
34#include "ast.h"
35#include "bytecodes-irregexp.h"
36#include "interpreter-irregexp.h"
jkummerow@chromium.org1456e702012-03-30 08:38:13 +000037#include "jsregexp.h"
38#include "regexp-macro-assembler.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000039
kasperl@chromium.org71affb52009-05-26 05:44:31 +000040namespace v8 {
41namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000042
43
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000044typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000045
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000046static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
47 int from,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000048 int current,
49 int len,
50 Vector<const uc16> subject) {
51 for (int i = 0; i < len; i++) {
52 unibrow::uchar old_char = subject[from++];
53 unibrow::uchar new_char = subject[current++];
54 if (old_char == new_char) continue;
kasperl@chromium.orge959c182009-07-27 08:59:04 +000055 unibrow::uchar old_string[1] = { old_char };
56 unibrow::uchar new_string[1] = { new_char };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000057 interp_canonicalize->get(old_char, '\0', old_string);
58 interp_canonicalize->get(new_char, '\0', new_string);
kasperl@chromium.orge959c182009-07-27 08:59:04 +000059 if (old_string[0] != new_string[0]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000060 return false;
61 }
62 }
63 return true;
64}
65
66
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000067static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
68 int from,
ager@chromium.org8bb60582008-12-11 12:02:20 +000069 int current,
70 int len,
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000071 Vector<const uint8_t> subject) {
ager@chromium.org8bb60582008-12-11 12:02:20 +000072 for (int i = 0; i < len; i++) {
73 unsigned int old_char = subject[from++];
74 unsigned int new_char = subject[current++];
75 if (old_char == new_char) continue;
mstarzinger@chromium.orgf705b502013-04-04 11:38:09 +000076 // Convert both characters to lower case.
77 old_char |= 0x20;
78 new_char |= 0x20;
ager@chromium.org8bb60582008-12-11 12:02:20 +000079 if (old_char != new_char) return false;
mstarzinger@chromium.orgf705b502013-04-04 11:38:09 +000080 // Not letters in the ASCII range and Latin-1 range.
81 if (!(old_char - 'a' <= 'z' - 'a') &&
82 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
83 return false;
84 }
ager@chromium.org8bb60582008-12-11 12:02:20 +000085 }
86 return true;
87}
88
89
ager@chromium.orga74f0da2008-12-03 16:05:52 +000090#ifdef DEBUG
91static void TraceInterpreter(const byte* code_base,
92 const byte* pc,
93 int stack_depth,
94 int current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000095 uint32_t current_char,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000096 int bytecode_length,
97 const char* bytecode_name) {
98 if (FLAG_trace_regexp_bytecodes) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000099 bool printable = (current_char < 127 && current_char >= 32);
100 const char* format =
101 printable ?
102 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
103 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
104 PrintF(format,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000105 pc - code_base,
106 stack_depth,
107 current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000108 current_char,
109 printable ? current_char : '.',
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000110 bytecode_name);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000111 for (int i = 0; i < bytecode_length; i++) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000112 printf(", %02x", pc[i]);
113 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000114 printf(" ");
115 for (int i = 1; i < bytecode_length; i++) {
116 unsigned char b = pc[i];
117 if (b < 127 && b >= 32) {
118 printf("%c", b);
119 } else {
120 printf(".");
121 }
122 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000123 printf("\n");
124 }
125}
126
127
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000128#define BYTECODE(name) \
129 case BC_##name: \
130 TraceInterpreter(code_base, \
131 pc, \
132 static_cast<int>(backtrack_sp - backtrack_stack_base), \
133 current, \
134 current_char, \
135 BC_##name##_LENGTH, \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000136 #name);
137#else
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000138#define BYTECODE(name) \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000139 case BC_##name:
140#endif
141
142
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000143static int32_t Load32Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000144 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000145 return *reinterpret_cast<const int32_t *>(pc);
146}
147
148
149static int32_t Load16Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000150 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000151 return *reinterpret_cast<const uint16_t *>(pc);
152}
153
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000154
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000155// A simple abstraction over the backtracking stack used by the interpreter.
156// This backtracking stack does not grow automatically, but it ensures that the
157// the memory held by the stack is released or remembered in a cache if the
158// matching terminates.
159class BacktrackStack {
160 public:
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000161 explicit BacktrackStack(Isolate* isolate) : isolate_(isolate) {
162 if (isolate->irregexp_interpreter_backtrack_stack_cache() != NULL) {
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000163 // If the cache is not empty reuse the previously allocated stack.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000164 data_ = isolate->irregexp_interpreter_backtrack_stack_cache();
165 isolate->set_irregexp_interpreter_backtrack_stack_cache(NULL);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000166 } else {
167 // Cache was empty. Allocate a new backtrack stack.
168 data_ = NewArray<int>(kBacktrackStackSize);
169 }
170 }
171
172 ~BacktrackStack() {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000173 if (isolate_->irregexp_interpreter_backtrack_stack_cache() == NULL) {
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000174 // The cache is empty. Keep this backtrack stack around.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000175 isolate_->set_irregexp_interpreter_backtrack_stack_cache(data_);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000176 } else {
177 // A backtrack stack was already cached, just release this one.
178 DeleteArray(data_);
179 }
180 }
181
182 int* data() const { return data_; }
183
184 int max_size() const { return kBacktrackStackSize; }
185
186 private:
187 static const int kBacktrackStackSize = 10000;
188
189 int* data_;
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000190 Isolate* isolate_;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000191
192 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
193};
194
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000195
ager@chromium.org8bb60582008-12-11 12:02:20 +0000196template <typename Char>
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000197static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
198 const byte* code_base,
199 Vector<const Char> subject,
200 int* registers,
201 int current,
202 uint32_t current_char) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000203 const byte* pc = code_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000204 // BacktrackStack ensures that the memory allocated for the backtracking stack
205 // is returned to the system or cached if there is no stack being cached at
206 // the moment.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000207 BacktrackStack backtrack_stack(isolate);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000208 int* backtrack_stack_base = backtrack_stack.data();
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000209 int* backtrack_sp = backtrack_stack_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000210 int backtrack_stack_space = backtrack_stack.max_size();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000211#ifdef DEBUG
212 if (FLAG_trace_regexp_bytecodes) {
213 PrintF("\n\nStart bytecode interpreter\n\n");
214 }
215#endif
216 while (true) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000217 int32_t insn = Load32Aligned(pc);
218 switch (insn & BYTECODE_MASK) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000219 BYTECODE(BREAK)
220 UNREACHABLE();
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000221 return RegExpImpl::RE_FAILURE;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000222 BYTECODE(PUSH_CP)
223 if (--backtrack_stack_space < 0) {
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000224 return RegExpImpl::RE_EXCEPTION;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000225 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000226 *backtrack_sp++ = current;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000227 pc += BC_PUSH_CP_LENGTH;
228 break;
229 BYTECODE(PUSH_BT)
230 if (--backtrack_stack_space < 0) {
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000231 return RegExpImpl::RE_EXCEPTION;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000232 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000233 *backtrack_sp++ = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000234 pc += BC_PUSH_BT_LENGTH;
235 break;
236 BYTECODE(PUSH_REGISTER)
237 if (--backtrack_stack_space < 0) {
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000238 return RegExpImpl::RE_EXCEPTION;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000239 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000240 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000241 pc += BC_PUSH_REGISTER_LENGTH;
242 break;
243 BYTECODE(SET_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000244 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000245 pc += BC_SET_REGISTER_LENGTH;
246 break;
247 BYTECODE(ADVANCE_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000248 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000249 pc += BC_ADVANCE_REGISTER_LENGTH;
250 break;
251 BYTECODE(SET_REGISTER_TO_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000252 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000253 pc += BC_SET_REGISTER_TO_CP_LENGTH;
254 break;
255 BYTECODE(SET_CP_TO_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000256 current = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000257 pc += BC_SET_CP_TO_REGISTER_LENGTH;
258 break;
259 BYTECODE(SET_REGISTER_TO_SP)
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000260 registers[insn >> BYTECODE_SHIFT] =
261 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000262 pc += BC_SET_REGISTER_TO_SP_LENGTH;
263 break;
264 BYTECODE(SET_SP_TO_REGISTER)
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000265 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000266 backtrack_stack_space = backtrack_stack.max_size() -
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000267 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000268 pc += BC_SET_SP_TO_REGISTER_LENGTH;
269 break;
270 BYTECODE(POP_CP)
271 backtrack_stack_space++;
272 --backtrack_sp;
273 current = *backtrack_sp;
274 pc += BC_POP_CP_LENGTH;
275 break;
276 BYTECODE(POP_BT)
277 backtrack_stack_space++;
278 --backtrack_sp;
279 pc = code_base + *backtrack_sp;
280 break;
281 BYTECODE(POP_REGISTER)
282 backtrack_stack_space++;
283 --backtrack_sp;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000284 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000285 pc += BC_POP_REGISTER_LENGTH;
286 break;
287 BYTECODE(FAIL)
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000288 return RegExpImpl::RE_FAILURE;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000289 BYTECODE(SUCCEED)
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000290 return RegExpImpl::RE_SUCCESS;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000291 BYTECODE(ADVANCE_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000292 current += insn >> BYTECODE_SHIFT;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000293 pc += BC_ADVANCE_CP_LENGTH;
294 break;
295 BYTECODE(GOTO)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000296 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000297 break;
ager@chromium.org381abbb2009-02-25 13:23:22 +0000298 BYTECODE(ADVANCE_CP_AND_GOTO)
299 current += insn >> BYTECODE_SHIFT;
300 pc = code_base + Load32Aligned(pc + 4);
301 break;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000302 BYTECODE(CHECK_GREEDY)
303 if (current == backtrack_sp[-1]) {
304 backtrack_sp--;
305 backtrack_stack_space++;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000306 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000307 } else {
308 pc += BC_CHECK_GREEDY_LENGTH;
309 }
310 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000311 BYTECODE(LOAD_CURRENT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000312 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000313 if (pos >= subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000314 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000315 } else {
316 current_char = subject[pos];
317 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
318 }
319 break;
320 }
ager@chromium.org8bb60582008-12-11 12:02:20 +0000321 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000322 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000323 current_char = subject[pos];
324 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
325 break;
326 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000327 BYTECODE(LOAD_2_CURRENT_CHARS) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000328 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000329 if (pos + 2 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000330 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000331 } else {
332 Char next = subject[pos + 1];
333 current_char =
334 (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
335 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
336 }
337 break;
338 }
339 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000340 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000341 Char next = subject[pos + 1];
342 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
343 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
344 break;
345 }
346 BYTECODE(LOAD_4_CURRENT_CHARS) {
347 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000348 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000349 if (pos + 4 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000350 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000351 } else {
352 Char next1 = subject[pos + 1];
353 Char next2 = subject[pos + 2];
354 Char next3 = subject[pos + 3];
355 current_char = (subject[pos] |
356 (next1 << 8) |
357 (next2 << 16) |
358 (next3 << 24));
359 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
360 }
361 break;
362 }
363 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
364 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000365 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000366 Char next1 = subject[pos + 1];
367 Char next2 = subject[pos + 2];
368 Char next3 = subject[pos + 3];
369 current_char = (subject[pos] |
370 (next1 << 8) |
371 (next2 << 16) |
372 (next3 << 24));
373 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
374 break;
375 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000376 BYTECODE(CHECK_4_CHARS) {
377 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000378 if (c == current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000379 pc = code_base + Load32Aligned(pc + 8);
380 } else {
381 pc += BC_CHECK_4_CHARS_LENGTH;
382 }
383 break;
384 }
385 BYTECODE(CHECK_CHAR) {
386 uint32_t c = (insn >> BYTECODE_SHIFT);
387 if (c == current_char) {
388 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000389 } else {
390 pc += BC_CHECK_CHAR_LENGTH;
391 }
392 break;
393 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000394 BYTECODE(CHECK_NOT_4_CHARS) {
395 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000396 if (c != current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000397 pc = code_base + Load32Aligned(pc + 8);
398 } else {
399 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
400 }
401 break;
402 }
403 BYTECODE(CHECK_NOT_CHAR) {
404 uint32_t c = (insn >> BYTECODE_SHIFT);
405 if (c != current_char) {
406 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000407 } else {
408 pc += BC_CHECK_NOT_CHAR_LENGTH;
409 }
410 break;
411 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000412 BYTECODE(AND_CHECK_4_CHARS) {
413 uint32_t c = Load32Aligned(pc + 4);
414 if (c == (current_char & Load32Aligned(pc + 8))) {
415 pc = code_base + Load32Aligned(pc + 12);
416 } else {
417 pc += BC_AND_CHECK_4_CHARS_LENGTH;
418 }
419 break;
420 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000421 BYTECODE(AND_CHECK_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000422 uint32_t c = (insn >> BYTECODE_SHIFT);
423 if (c == (current_char & Load32Aligned(pc + 4))) {
424 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000425 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000426 pc += BC_AND_CHECK_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000427 }
428 break;
429 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000430 BYTECODE(AND_CHECK_NOT_4_CHARS) {
431 uint32_t c = Load32Aligned(pc + 4);
432 if (c != (current_char & Load32Aligned(pc + 8))) {
433 pc = code_base + Load32Aligned(pc + 12);
434 } else {
435 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
436 }
437 break;
438 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000439 BYTECODE(AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000440 uint32_t c = (insn >> BYTECODE_SHIFT);
441 if (c != (current_char & Load32Aligned(pc + 4))) {
442 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000443 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000444 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
445 }
446 break;
447 }
448 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000449 uint32_t c = (insn >> BYTECODE_SHIFT);
450 uint32_t minus = Load16Aligned(pc + 4);
451 uint32_t mask = Load16Aligned(pc + 6);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000452 if (c != ((current_char - minus) & mask)) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000453 pc = code_base + Load32Aligned(pc + 8);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000454 } else {
455 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000456 }
457 break;
458 }
jkummerow@chromium.org1456e702012-03-30 08:38:13 +0000459 BYTECODE(CHECK_CHAR_IN_RANGE) {
460 uint32_t from = Load16Aligned(pc + 4);
461 uint32_t to = Load16Aligned(pc + 6);
462 if (from <= current_char && current_char <= to) {
463 pc = code_base + Load32Aligned(pc + 8);
464 } else {
465 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
466 }
467 break;
468 }
469 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
470 uint32_t from = Load16Aligned(pc + 4);
471 uint32_t to = Load16Aligned(pc + 6);
472 if (from > current_char || current_char > to) {
473 pc = code_base + Load32Aligned(pc + 8);
474 } else {
475 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
476 }
477 break;
478 }
479 BYTECODE(CHECK_BIT_IN_TABLE) {
480 int mask = RegExpMacroAssembler::kTableMask;
481 byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
482 int bit = (current_char & (kBitsPerByte - 1));
483 if ((b & (1 << bit)) != 0) {
484 pc = code_base + Load32Aligned(pc + 4);
485 } else {
486 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
487 }
488 break;
489 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000490 BYTECODE(CHECK_LT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000491 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000492 if (current_char < limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000493 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000494 } else {
495 pc += BC_CHECK_LT_LENGTH;
496 }
497 break;
498 }
499 BYTECODE(CHECK_GT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000500 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000501 if (current_char > limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000502 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000503 } else {
504 pc += BC_CHECK_GT_LENGTH;
505 }
506 break;
507 }
508 BYTECODE(CHECK_REGISTER_LT)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000509 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
510 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000511 } else {
512 pc += BC_CHECK_REGISTER_LT_LENGTH;
513 }
514 break;
515 BYTECODE(CHECK_REGISTER_GE)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000516 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
517 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000518 } else {
519 pc += BC_CHECK_REGISTER_GE_LENGTH;
520 }
521 break;
ager@chromium.org32912102009-01-16 10:38:43 +0000522 BYTECODE(CHECK_REGISTER_EQ_POS)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000523 if (registers[insn >> BYTECODE_SHIFT] == current) {
524 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org32912102009-01-16 10:38:43 +0000525 } else {
526 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
527 }
528 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000529 BYTECODE(CHECK_NOT_REGS_EQUAL)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000530 if (registers[insn >> BYTECODE_SHIFT] ==
531 registers[Load32Aligned(pc + 4)]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000532 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
533 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000534 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000535 }
536 break;
537 BYTECODE(CHECK_NOT_BACK_REF) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000538 int from = registers[insn >> BYTECODE_SHIFT];
539 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000540 if (from < 0 || len <= 0) {
541 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
542 break;
543 }
544 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000545 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000546 break;
547 } else {
548 int i;
549 for (i = 0; i < len; i++) {
550 if (subject[from + i] != subject[current + i]) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000551 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000552 break;
553 }
554 }
555 if (i < len) break;
556 current += len;
557 }
558 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
559 break;
560 }
561 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000562 int from = registers[insn >> BYTECODE_SHIFT];
563 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000564 if (from < 0 || len <= 0) {
565 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
566 break;
567 }
568 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000569 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000570 break;
571 } else {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000572 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
573 from, current, len, subject)) {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000574 current += len;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000575 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
576 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000577 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000578 }
579 }
580 break;
581 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000582 BYTECODE(CHECK_AT_START)
583 if (current == 0) {
584 pc = code_base + Load32Aligned(pc + 4);
585 } else {
586 pc += BC_CHECK_AT_START_LENGTH;
587 }
588 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000589 BYTECODE(CHECK_NOT_AT_START)
590 if (current == 0) {
591 pc += BC_CHECK_NOT_AT_START_LENGTH;
592 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000593 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000594 }
595 break;
whesse@chromium.org4a5224e2010-10-20 12:37:07 +0000596 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
597 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
598 if (subject.length() - current > by) {
599 current = subject.length() - by;
600 current_char = subject[current - 1];
601 }
602 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
603 break;
604 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000605 default:
606 UNREACHABLE();
607 break;
608 }
609 }
610}
611
612
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000613RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
614 Isolate* isolate,
615 Handle<ByteArray> code_array,
616 Handle<String> subject,
617 int* registers,
618 int start_position) {
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000619 ASSERT(subject->IsFlat());
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000620
rossberg@chromium.org79e79022013-06-03 15:43:46 +0000621 DisallowHeapAllocation no_gc;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000622 const byte* code_base = code_array->GetDataStartAddress();
623 uc16 previous_char = '\n';
ricow@chromium.orgddd545c2011-08-24 12:02:41 +0000624 String::FlatContent subject_content = subject->GetFlatContent();
625 if (subject_content.IsAscii()) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000626 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
ager@chromium.org8bb60582008-12-11 12:02:20 +0000627 if (start_position != 0) previous_char = subject_vector[start_position - 1];
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000628 return RawMatch(isolate,
629 code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000630 subject_vector,
631 registers,
632 start_position,
633 previous_char);
634 } else {
ricow@chromium.orgddd545c2011-08-24 12:02:41 +0000635 ASSERT(subject_content.IsTwoByte());
636 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
ager@chromium.org8bb60582008-12-11 12:02:20 +0000637 if (start_position != 0) previous_char = subject_vector[start_position - 1];
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000638 return RawMatch(isolate,
639 code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000640 subject_vector,
641 registers,
642 start_position,
643 previous_char);
644 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000645}
646
647} } // namespace v8::internal