blob: 3a92b8455421fad65daeb83f59855f890644080b [file] [log] [blame]
ricow@chromium.orgddd545c2011-08-24 12:02:41 +00001// Copyright 2011 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// A simple interpreter for the Irregexp byte code.
29
30
31#include "v8.h"
32#include "unicode.h"
33#include "utils.h"
34#include "ast.h"
35#include "bytecodes-irregexp.h"
36#include "interpreter-irregexp.h"
jkummerow@chromium.org1456e702012-03-30 08:38:13 +000037#include "jsregexp.h"
38#include "regexp-macro-assembler.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000039
kasperl@chromium.org71affb52009-05-26 05:44:31 +000040namespace v8 {
41namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000042
43
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000044typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000045
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000046static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
47 int from,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000048 int current,
49 int len,
50 Vector<const uc16> subject) {
51 for (int i = 0; i < len; i++) {
52 unibrow::uchar old_char = subject[from++];
53 unibrow::uchar new_char = subject[current++];
54 if (old_char == new_char) continue;
kasperl@chromium.orge959c182009-07-27 08:59:04 +000055 unibrow::uchar old_string[1] = { old_char };
56 unibrow::uchar new_string[1] = { new_char };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000057 interp_canonicalize->get(old_char, '\0', old_string);
58 interp_canonicalize->get(new_char, '\0', new_string);
kasperl@chromium.orge959c182009-07-27 08:59:04 +000059 if (old_string[0] != new_string[0]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000060 return false;
61 }
62 }
63 return true;
64}
65
66
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000067static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
68 int from,
ager@chromium.org8bb60582008-12-11 12:02:20 +000069 int current,
70 int len,
71 Vector<const char> subject) {
72 for (int i = 0; i < len; i++) {
73 unsigned int old_char = subject[from++];
74 unsigned int new_char = subject[current++];
75 if (old_char == new_char) continue;
76 if (old_char - 'A' <= 'Z' - 'A') old_char |= 0x20;
77 if (new_char - 'A' <= 'Z' - 'A') new_char |= 0x20;
78 if (old_char != new_char) return false;
79 }
80 return true;
81}
82
83
ager@chromium.orga74f0da2008-12-03 16:05:52 +000084#ifdef DEBUG
85static void TraceInterpreter(const byte* code_base,
86 const byte* pc,
87 int stack_depth,
88 int current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000089 uint32_t current_char,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000090 int bytecode_length,
91 const char* bytecode_name) {
92 if (FLAG_trace_regexp_bytecodes) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000093 bool printable = (current_char < 127 && current_char >= 32);
94 const char* format =
95 printable ?
96 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
97 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
98 PrintF(format,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000099 pc - code_base,
100 stack_depth,
101 current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000102 current_char,
103 printable ? current_char : '.',
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000104 bytecode_name);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000105 for (int i = 0; i < bytecode_length; i++) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000106 printf(", %02x", pc[i]);
107 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000108 printf(" ");
109 for (int i = 1; i < bytecode_length; i++) {
110 unsigned char b = pc[i];
111 if (b < 127 && b >= 32) {
112 printf("%c", b);
113 } else {
114 printf(".");
115 }
116 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000117 printf("\n");
118 }
119}
120
121
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000122#define BYTECODE(name) \
123 case BC_##name: \
124 TraceInterpreter(code_base, \
125 pc, \
126 static_cast<int>(backtrack_sp - backtrack_stack_base), \
127 current, \
128 current_char, \
129 BC_##name##_LENGTH, \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000130 #name);
131#else
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000132#define BYTECODE(name) \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000133 case BC_##name:
134#endif
135
136
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000137static int32_t Load32Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000138 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000139 return *reinterpret_cast<const int32_t *>(pc);
140}
141
142
143static int32_t Load16Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000144 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000145 return *reinterpret_cast<const uint16_t *>(pc);
146}
147
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000148
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000149// A simple abstraction over the backtracking stack used by the interpreter.
150// This backtracking stack does not grow automatically, but it ensures that the
151// the memory held by the stack is released or remembered in a cache if the
152// matching terminates.
153class BacktrackStack {
154 public:
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000155 explicit BacktrackStack(Isolate* isolate) : isolate_(isolate) {
156 if (isolate->irregexp_interpreter_backtrack_stack_cache() != NULL) {
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000157 // If the cache is not empty reuse the previously allocated stack.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000158 data_ = isolate->irregexp_interpreter_backtrack_stack_cache();
159 isolate->set_irregexp_interpreter_backtrack_stack_cache(NULL);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000160 } else {
161 // Cache was empty. Allocate a new backtrack stack.
162 data_ = NewArray<int>(kBacktrackStackSize);
163 }
164 }
165
166 ~BacktrackStack() {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000167 if (isolate_->irregexp_interpreter_backtrack_stack_cache() == NULL) {
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000168 // The cache is empty. Keep this backtrack stack around.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000169 isolate_->set_irregexp_interpreter_backtrack_stack_cache(data_);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000170 } else {
171 // A backtrack stack was already cached, just release this one.
172 DeleteArray(data_);
173 }
174 }
175
176 int* data() const { return data_; }
177
178 int max_size() const { return kBacktrackStackSize; }
179
180 private:
181 static const int kBacktrackStackSize = 10000;
182
183 int* data_;
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000184 Isolate* isolate_;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000185
186 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
187};
188
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000189
ager@chromium.org8bb60582008-12-11 12:02:20 +0000190template <typename Char>
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000191static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
192 const byte* code_base,
193 Vector<const Char> subject,
194 int* registers,
195 int current,
196 uint32_t current_char) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000197 const byte* pc = code_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000198 // BacktrackStack ensures that the memory allocated for the backtracking stack
199 // is returned to the system or cached if there is no stack being cached at
200 // the moment.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000201 BacktrackStack backtrack_stack(isolate);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000202 int* backtrack_stack_base = backtrack_stack.data();
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000203 int* backtrack_sp = backtrack_stack_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000204 int backtrack_stack_space = backtrack_stack.max_size();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000205#ifdef DEBUG
206 if (FLAG_trace_regexp_bytecodes) {
207 PrintF("\n\nStart bytecode interpreter\n\n");
208 }
209#endif
210 while (true) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000211 int32_t insn = Load32Aligned(pc);
212 switch (insn & BYTECODE_MASK) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000213 BYTECODE(BREAK)
214 UNREACHABLE();
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000215 return RegExpImpl::RE_FAILURE;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000216 BYTECODE(PUSH_CP)
217 if (--backtrack_stack_space < 0) {
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000218 return RegExpImpl::RE_EXCEPTION;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000219 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000220 *backtrack_sp++ = current;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000221 pc += BC_PUSH_CP_LENGTH;
222 break;
223 BYTECODE(PUSH_BT)
224 if (--backtrack_stack_space < 0) {
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000225 return RegExpImpl::RE_EXCEPTION;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000226 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000227 *backtrack_sp++ = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000228 pc += BC_PUSH_BT_LENGTH;
229 break;
230 BYTECODE(PUSH_REGISTER)
231 if (--backtrack_stack_space < 0) {
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000232 return RegExpImpl::RE_EXCEPTION;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000233 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000234 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000235 pc += BC_PUSH_REGISTER_LENGTH;
236 break;
237 BYTECODE(SET_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000238 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000239 pc += BC_SET_REGISTER_LENGTH;
240 break;
241 BYTECODE(ADVANCE_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000242 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000243 pc += BC_ADVANCE_REGISTER_LENGTH;
244 break;
245 BYTECODE(SET_REGISTER_TO_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000246 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000247 pc += BC_SET_REGISTER_TO_CP_LENGTH;
248 break;
249 BYTECODE(SET_CP_TO_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000250 current = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000251 pc += BC_SET_CP_TO_REGISTER_LENGTH;
252 break;
253 BYTECODE(SET_REGISTER_TO_SP)
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000254 registers[insn >> BYTECODE_SHIFT] =
255 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000256 pc += BC_SET_REGISTER_TO_SP_LENGTH;
257 break;
258 BYTECODE(SET_SP_TO_REGISTER)
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000259 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000260 backtrack_stack_space = backtrack_stack.max_size() -
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000261 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000262 pc += BC_SET_SP_TO_REGISTER_LENGTH;
263 break;
264 BYTECODE(POP_CP)
265 backtrack_stack_space++;
266 --backtrack_sp;
267 current = *backtrack_sp;
268 pc += BC_POP_CP_LENGTH;
269 break;
270 BYTECODE(POP_BT)
271 backtrack_stack_space++;
272 --backtrack_sp;
273 pc = code_base + *backtrack_sp;
274 break;
275 BYTECODE(POP_REGISTER)
276 backtrack_stack_space++;
277 --backtrack_sp;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000278 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000279 pc += BC_POP_REGISTER_LENGTH;
280 break;
281 BYTECODE(FAIL)
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000282 return RegExpImpl::RE_FAILURE;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000283 BYTECODE(SUCCEED)
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000284 return RegExpImpl::RE_SUCCESS;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000285 BYTECODE(ADVANCE_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000286 current += insn >> BYTECODE_SHIFT;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000287 pc += BC_ADVANCE_CP_LENGTH;
288 break;
289 BYTECODE(GOTO)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000290 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000291 break;
ager@chromium.org381abbb2009-02-25 13:23:22 +0000292 BYTECODE(ADVANCE_CP_AND_GOTO)
293 current += insn >> BYTECODE_SHIFT;
294 pc = code_base + Load32Aligned(pc + 4);
295 break;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000296 BYTECODE(CHECK_GREEDY)
297 if (current == backtrack_sp[-1]) {
298 backtrack_sp--;
299 backtrack_stack_space++;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000300 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000301 } else {
302 pc += BC_CHECK_GREEDY_LENGTH;
303 }
304 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000305 BYTECODE(LOAD_CURRENT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000306 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000307 if (pos >= subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000308 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000309 } else {
310 current_char = subject[pos];
311 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
312 }
313 break;
314 }
ager@chromium.org8bb60582008-12-11 12:02:20 +0000315 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000316 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000317 current_char = subject[pos];
318 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
319 break;
320 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000321 BYTECODE(LOAD_2_CURRENT_CHARS) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000322 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000323 if (pos + 2 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000324 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000325 } else {
326 Char next = subject[pos + 1];
327 current_char =
328 (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
329 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
330 }
331 break;
332 }
333 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000334 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000335 Char next = subject[pos + 1];
336 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
337 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
338 break;
339 }
340 BYTECODE(LOAD_4_CURRENT_CHARS) {
341 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000342 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000343 if (pos + 4 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000344 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000345 } else {
346 Char next1 = subject[pos + 1];
347 Char next2 = subject[pos + 2];
348 Char next3 = subject[pos + 3];
349 current_char = (subject[pos] |
350 (next1 << 8) |
351 (next2 << 16) |
352 (next3 << 24));
353 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
354 }
355 break;
356 }
357 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
358 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000359 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000360 Char next1 = subject[pos + 1];
361 Char next2 = subject[pos + 2];
362 Char next3 = subject[pos + 3];
363 current_char = (subject[pos] |
364 (next1 << 8) |
365 (next2 << 16) |
366 (next3 << 24));
367 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
368 break;
369 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000370 BYTECODE(CHECK_4_CHARS) {
371 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000372 if (c == current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000373 pc = code_base + Load32Aligned(pc + 8);
374 } else {
375 pc += BC_CHECK_4_CHARS_LENGTH;
376 }
377 break;
378 }
379 BYTECODE(CHECK_CHAR) {
380 uint32_t c = (insn >> BYTECODE_SHIFT);
381 if (c == current_char) {
382 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000383 } else {
384 pc += BC_CHECK_CHAR_LENGTH;
385 }
386 break;
387 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000388 BYTECODE(CHECK_NOT_4_CHARS) {
389 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000390 if (c != current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000391 pc = code_base + Load32Aligned(pc + 8);
392 } else {
393 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
394 }
395 break;
396 }
397 BYTECODE(CHECK_NOT_CHAR) {
398 uint32_t c = (insn >> BYTECODE_SHIFT);
399 if (c != current_char) {
400 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000401 } else {
402 pc += BC_CHECK_NOT_CHAR_LENGTH;
403 }
404 break;
405 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000406 BYTECODE(AND_CHECK_4_CHARS) {
407 uint32_t c = Load32Aligned(pc + 4);
408 if (c == (current_char & Load32Aligned(pc + 8))) {
409 pc = code_base + Load32Aligned(pc + 12);
410 } else {
411 pc += BC_AND_CHECK_4_CHARS_LENGTH;
412 }
413 break;
414 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000415 BYTECODE(AND_CHECK_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000416 uint32_t c = (insn >> BYTECODE_SHIFT);
417 if (c == (current_char & Load32Aligned(pc + 4))) {
418 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000419 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000420 pc += BC_AND_CHECK_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000421 }
422 break;
423 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000424 BYTECODE(AND_CHECK_NOT_4_CHARS) {
425 uint32_t c = Load32Aligned(pc + 4);
426 if (c != (current_char & Load32Aligned(pc + 8))) {
427 pc = code_base + Load32Aligned(pc + 12);
428 } else {
429 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
430 }
431 break;
432 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000433 BYTECODE(AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000434 uint32_t c = (insn >> BYTECODE_SHIFT);
435 if (c != (current_char & Load32Aligned(pc + 4))) {
436 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000437 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000438 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
439 }
440 break;
441 }
442 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000443 uint32_t c = (insn >> BYTECODE_SHIFT);
444 uint32_t minus = Load16Aligned(pc + 4);
445 uint32_t mask = Load16Aligned(pc + 6);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000446 if (c != ((current_char - minus) & mask)) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000447 pc = code_base + Load32Aligned(pc + 8);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000448 } else {
449 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000450 }
451 break;
452 }
jkummerow@chromium.org1456e702012-03-30 08:38:13 +0000453 BYTECODE(CHECK_CHAR_IN_RANGE) {
454 uint32_t from = Load16Aligned(pc + 4);
455 uint32_t to = Load16Aligned(pc + 6);
456 if (from <= current_char && current_char <= to) {
457 pc = code_base + Load32Aligned(pc + 8);
458 } else {
459 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
460 }
461 break;
462 }
463 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
464 uint32_t from = Load16Aligned(pc + 4);
465 uint32_t to = Load16Aligned(pc + 6);
466 if (from > current_char || current_char > to) {
467 pc = code_base + Load32Aligned(pc + 8);
468 } else {
469 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
470 }
471 break;
472 }
473 BYTECODE(CHECK_BIT_IN_TABLE) {
474 int mask = RegExpMacroAssembler::kTableMask;
475 byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
476 int bit = (current_char & (kBitsPerByte - 1));
477 if ((b & (1 << bit)) != 0) {
478 pc = code_base + Load32Aligned(pc + 4);
479 } else {
480 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
481 }
482 break;
483 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000484 BYTECODE(CHECK_LT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000485 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000486 if (current_char < limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000487 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000488 } else {
489 pc += BC_CHECK_LT_LENGTH;
490 }
491 break;
492 }
493 BYTECODE(CHECK_GT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000494 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000495 if (current_char > limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000496 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000497 } else {
498 pc += BC_CHECK_GT_LENGTH;
499 }
500 break;
501 }
502 BYTECODE(CHECK_REGISTER_LT)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000503 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
504 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000505 } else {
506 pc += BC_CHECK_REGISTER_LT_LENGTH;
507 }
508 break;
509 BYTECODE(CHECK_REGISTER_GE)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000510 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
511 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000512 } else {
513 pc += BC_CHECK_REGISTER_GE_LENGTH;
514 }
515 break;
ager@chromium.org32912102009-01-16 10:38:43 +0000516 BYTECODE(CHECK_REGISTER_EQ_POS)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000517 if (registers[insn >> BYTECODE_SHIFT] == current) {
518 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org32912102009-01-16 10:38:43 +0000519 } else {
520 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
521 }
522 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000523 BYTECODE(CHECK_NOT_REGS_EQUAL)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000524 if (registers[insn >> BYTECODE_SHIFT] ==
525 registers[Load32Aligned(pc + 4)]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000526 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
527 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000528 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000529 }
530 break;
531 BYTECODE(CHECK_NOT_BACK_REF) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000532 int from = registers[insn >> BYTECODE_SHIFT];
533 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000534 if (from < 0 || len <= 0) {
535 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
536 break;
537 }
538 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000539 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000540 break;
541 } else {
542 int i;
543 for (i = 0; i < len; i++) {
544 if (subject[from + i] != subject[current + i]) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000545 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000546 break;
547 }
548 }
549 if (i < len) break;
550 current += len;
551 }
552 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
553 break;
554 }
555 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000556 int from = registers[insn >> BYTECODE_SHIFT];
557 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000558 if (from < 0 || len <= 0) {
559 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
560 break;
561 }
562 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000563 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000564 break;
565 } else {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000566 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
567 from, current, len, subject)) {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000568 current += len;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000569 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
570 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000571 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000572 }
573 }
574 break;
575 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000576 BYTECODE(CHECK_AT_START)
577 if (current == 0) {
578 pc = code_base + Load32Aligned(pc + 4);
579 } else {
580 pc += BC_CHECK_AT_START_LENGTH;
581 }
582 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000583 BYTECODE(CHECK_NOT_AT_START)
584 if (current == 0) {
585 pc += BC_CHECK_NOT_AT_START_LENGTH;
586 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000587 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000588 }
589 break;
whesse@chromium.org4a5224e2010-10-20 12:37:07 +0000590 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
591 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
592 if (subject.length() - current > by) {
593 current = subject.length() - by;
594 current_char = subject[current - 1];
595 }
596 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
597 break;
598 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000599 default:
600 UNREACHABLE();
601 break;
602 }
603 }
604}
605
606
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +0000607RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
608 Isolate* isolate,
609 Handle<ByteArray> code_array,
610 Handle<String> subject,
611 int* registers,
612 int start_position) {
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000613 ASSERT(subject->IsFlat());
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000614
615 AssertNoAllocation a;
616 const byte* code_base = code_array->GetDataStartAddress();
617 uc16 previous_char = '\n';
ricow@chromium.orgddd545c2011-08-24 12:02:41 +0000618 String::FlatContent subject_content = subject->GetFlatContent();
619 if (subject_content.IsAscii()) {
620 Vector<const char> subject_vector = subject_content.ToAsciiVector();
ager@chromium.org8bb60582008-12-11 12:02:20 +0000621 if (start_position != 0) previous_char = subject_vector[start_position - 1];
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000622 return RawMatch(isolate,
623 code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000624 subject_vector,
625 registers,
626 start_position,
627 previous_char);
628 } else {
ricow@chromium.orgddd545c2011-08-24 12:02:41 +0000629 ASSERT(subject_content.IsTwoByte());
630 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
ager@chromium.org8bb60582008-12-11 12:02:20 +0000631 if (start_position != 0) previous_char = subject_vector[start_position - 1];
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000632 return RawMatch(isolate,
633 code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000634 subject_vector,
635 registers,
636 start_position,
637 previous_char);
638 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000639}
640
641} } // namespace v8::internal