blob: 1c6c52ca89711049d13f8ff3c8285a1166b12ab1 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// A simple interpreter for the Irregexp byte code.
29
30
31#include "v8.h"
32#include "unicode.h"
33#include "utils.h"
34#include "ast.h"
35#include "bytecodes-irregexp.h"
36#include "interpreter-irregexp.h"
37
38
kasperl@chromium.org71affb52009-05-26 05:44:31 +000039namespace v8 {
40namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000041
42
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000043typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000044
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000045static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
46 int from,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000047 int current,
48 int len,
49 Vector<const uc16> subject) {
50 for (int i = 0; i < len; i++) {
51 unibrow::uchar old_char = subject[from++];
52 unibrow::uchar new_char = subject[current++];
53 if (old_char == new_char) continue;
kasperl@chromium.orge959c182009-07-27 08:59:04 +000054 unibrow::uchar old_string[1] = { old_char };
55 unibrow::uchar new_string[1] = { new_char };
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000056 interp_canonicalize->get(old_char, '\0', old_string);
57 interp_canonicalize->get(new_char, '\0', new_string);
kasperl@chromium.orge959c182009-07-27 08:59:04 +000058 if (old_string[0] != new_string[0]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000059 return false;
60 }
61 }
62 return true;
63}
64
65
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000066static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
67 int from,
ager@chromium.org8bb60582008-12-11 12:02:20 +000068 int current,
69 int len,
70 Vector<const char> subject) {
71 for (int i = 0; i < len; i++) {
72 unsigned int old_char = subject[from++];
73 unsigned int new_char = subject[current++];
74 if (old_char == new_char) continue;
75 if (old_char - 'A' <= 'Z' - 'A') old_char |= 0x20;
76 if (new_char - 'A' <= 'Z' - 'A') new_char |= 0x20;
77 if (old_char != new_char) return false;
78 }
79 return true;
80}
81
82
ager@chromium.orga74f0da2008-12-03 16:05:52 +000083#ifdef DEBUG
84static void TraceInterpreter(const byte* code_base,
85 const byte* pc,
86 int stack_depth,
87 int current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000088 uint32_t current_char,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000089 int bytecode_length,
90 const char* bytecode_name) {
91 if (FLAG_trace_regexp_bytecodes) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000092 bool printable = (current_char < 127 && current_char >= 32);
93 const char* format =
94 printable ?
95 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
96 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
97 PrintF(format,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000098 pc - code_base,
99 stack_depth,
100 current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000101 current_char,
102 printable ? current_char : '.',
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000103 bytecode_name);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000104 for (int i = 0; i < bytecode_length; i++) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000105 printf(", %02x", pc[i]);
106 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000107 printf(" ");
108 for (int i = 1; i < bytecode_length; i++) {
109 unsigned char b = pc[i];
110 if (b < 127 && b >= 32) {
111 printf("%c", b);
112 } else {
113 printf(".");
114 }
115 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000116 printf("\n");
117 }
118}
119
120
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000121#define BYTECODE(name) \
122 case BC_##name: \
123 TraceInterpreter(code_base, \
124 pc, \
125 static_cast<int>(backtrack_sp - backtrack_stack_base), \
126 current, \
127 current_char, \
128 BC_##name##_LENGTH, \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000129 #name);
130#else
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000131#define BYTECODE(name) \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000132 case BC_##name:
133#endif
134
135
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000136static int32_t Load32Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000137 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000138 return *reinterpret_cast<const int32_t *>(pc);
139}
140
141
142static int32_t Load16Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000143 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000144 return *reinterpret_cast<const uint16_t *>(pc);
145}
146
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000147
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000148// A simple abstraction over the backtracking stack used by the interpreter.
149// This backtracking stack does not grow automatically, but it ensures that the
150// the memory held by the stack is released or remembered in a cache if the
151// matching terminates.
152class BacktrackStack {
153 public:
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000154 explicit BacktrackStack(Isolate* isolate) : isolate_(isolate) {
155 if (isolate->irregexp_interpreter_backtrack_stack_cache() != NULL) {
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000156 // If the cache is not empty reuse the previously allocated stack.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000157 data_ = isolate->irregexp_interpreter_backtrack_stack_cache();
158 isolate->set_irregexp_interpreter_backtrack_stack_cache(NULL);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000159 } else {
160 // Cache was empty. Allocate a new backtrack stack.
161 data_ = NewArray<int>(kBacktrackStackSize);
162 }
163 }
164
165 ~BacktrackStack() {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000166 if (isolate_->irregexp_interpreter_backtrack_stack_cache() == NULL) {
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000167 // The cache is empty. Keep this backtrack stack around.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000168 isolate_->set_irregexp_interpreter_backtrack_stack_cache(data_);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000169 } else {
170 // A backtrack stack was already cached, just release this one.
171 DeleteArray(data_);
172 }
173 }
174
175 int* data() const { return data_; }
176
177 int max_size() const { return kBacktrackStackSize; }
178
179 private:
180 static const int kBacktrackStackSize = 10000;
181
182 int* data_;
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000183 Isolate* isolate_;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000184
185 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
186};
187
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000188
ager@chromium.org8bb60582008-12-11 12:02:20 +0000189template <typename Char>
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000190static bool RawMatch(Isolate* isolate,
191 const byte* code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000192 Vector<const Char> subject,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000193 int* registers,
194 int current,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000195 uint32_t current_char) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000196 const byte* pc = code_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000197 // BacktrackStack ensures that the memory allocated for the backtracking stack
198 // is returned to the system or cached if there is no stack being cached at
199 // the moment.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000200 BacktrackStack backtrack_stack(isolate);
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000201 int* backtrack_stack_base = backtrack_stack.data();
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000202 int* backtrack_sp = backtrack_stack_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000203 int backtrack_stack_space = backtrack_stack.max_size();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000204#ifdef DEBUG
205 if (FLAG_trace_regexp_bytecodes) {
206 PrintF("\n\nStart bytecode interpreter\n\n");
207 }
208#endif
209 while (true) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000210 int32_t insn = Load32Aligned(pc);
211 switch (insn & BYTECODE_MASK) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000212 BYTECODE(BREAK)
213 UNREACHABLE();
214 return false;
215 BYTECODE(PUSH_CP)
216 if (--backtrack_stack_space < 0) {
217 return false; // No match on backtrack stack overflow.
218 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000219 *backtrack_sp++ = current;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000220 pc += BC_PUSH_CP_LENGTH;
221 break;
222 BYTECODE(PUSH_BT)
223 if (--backtrack_stack_space < 0) {
224 return false; // No match on backtrack stack overflow.
225 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000226 *backtrack_sp++ = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000227 pc += BC_PUSH_BT_LENGTH;
228 break;
229 BYTECODE(PUSH_REGISTER)
230 if (--backtrack_stack_space < 0) {
231 return false; // No match on backtrack stack overflow.
232 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000233 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000234 pc += BC_PUSH_REGISTER_LENGTH;
235 break;
236 BYTECODE(SET_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000237 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000238 pc += BC_SET_REGISTER_LENGTH;
239 break;
240 BYTECODE(ADVANCE_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000241 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000242 pc += BC_ADVANCE_REGISTER_LENGTH;
243 break;
244 BYTECODE(SET_REGISTER_TO_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000245 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000246 pc += BC_SET_REGISTER_TO_CP_LENGTH;
247 break;
248 BYTECODE(SET_CP_TO_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000249 current = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000250 pc += BC_SET_CP_TO_REGISTER_LENGTH;
251 break;
252 BYTECODE(SET_REGISTER_TO_SP)
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000253 registers[insn >> BYTECODE_SHIFT] =
254 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000255 pc += BC_SET_REGISTER_TO_SP_LENGTH;
256 break;
257 BYTECODE(SET_SP_TO_REGISTER)
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000258 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000259 backtrack_stack_space = backtrack_stack.max_size() -
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000260 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000261 pc += BC_SET_SP_TO_REGISTER_LENGTH;
262 break;
263 BYTECODE(POP_CP)
264 backtrack_stack_space++;
265 --backtrack_sp;
266 current = *backtrack_sp;
267 pc += BC_POP_CP_LENGTH;
268 break;
269 BYTECODE(POP_BT)
270 backtrack_stack_space++;
271 --backtrack_sp;
272 pc = code_base + *backtrack_sp;
273 break;
274 BYTECODE(POP_REGISTER)
275 backtrack_stack_space++;
276 --backtrack_sp;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000277 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000278 pc += BC_POP_REGISTER_LENGTH;
279 break;
280 BYTECODE(FAIL)
281 return false;
282 BYTECODE(SUCCEED)
283 return true;
284 BYTECODE(ADVANCE_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000285 current += insn >> BYTECODE_SHIFT;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000286 pc += BC_ADVANCE_CP_LENGTH;
287 break;
288 BYTECODE(GOTO)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000289 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000290 break;
ager@chromium.org381abbb2009-02-25 13:23:22 +0000291 BYTECODE(ADVANCE_CP_AND_GOTO)
292 current += insn >> BYTECODE_SHIFT;
293 pc = code_base + Load32Aligned(pc + 4);
294 break;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000295 BYTECODE(CHECK_GREEDY)
296 if (current == backtrack_sp[-1]) {
297 backtrack_sp--;
298 backtrack_stack_space++;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000299 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000300 } else {
301 pc += BC_CHECK_GREEDY_LENGTH;
302 }
303 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000304 BYTECODE(LOAD_CURRENT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000305 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000306 if (pos >= subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000307 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000308 } else {
309 current_char = subject[pos];
310 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
311 }
312 break;
313 }
ager@chromium.org8bb60582008-12-11 12:02:20 +0000314 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000315 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000316 current_char = subject[pos];
317 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
318 break;
319 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000320 BYTECODE(LOAD_2_CURRENT_CHARS) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000321 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000322 if (pos + 2 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000323 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000324 } else {
325 Char next = subject[pos + 1];
326 current_char =
327 (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
328 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
329 }
330 break;
331 }
332 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000333 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000334 Char next = subject[pos + 1];
335 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
336 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
337 break;
338 }
339 BYTECODE(LOAD_4_CURRENT_CHARS) {
340 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000341 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000342 if (pos + 4 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000343 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000344 } else {
345 Char next1 = subject[pos + 1];
346 Char next2 = subject[pos + 2];
347 Char next3 = subject[pos + 3];
348 current_char = (subject[pos] |
349 (next1 << 8) |
350 (next2 << 16) |
351 (next3 << 24));
352 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
353 }
354 break;
355 }
356 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
357 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000358 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000359 Char next1 = subject[pos + 1];
360 Char next2 = subject[pos + 2];
361 Char next3 = subject[pos + 3];
362 current_char = (subject[pos] |
363 (next1 << 8) |
364 (next2 << 16) |
365 (next3 << 24));
366 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
367 break;
368 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000369 BYTECODE(CHECK_4_CHARS) {
370 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000371 if (c == current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000372 pc = code_base + Load32Aligned(pc + 8);
373 } else {
374 pc += BC_CHECK_4_CHARS_LENGTH;
375 }
376 break;
377 }
378 BYTECODE(CHECK_CHAR) {
379 uint32_t c = (insn >> BYTECODE_SHIFT);
380 if (c == current_char) {
381 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000382 } else {
383 pc += BC_CHECK_CHAR_LENGTH;
384 }
385 break;
386 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000387 BYTECODE(CHECK_NOT_4_CHARS) {
388 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000389 if (c != current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000390 pc = code_base + Load32Aligned(pc + 8);
391 } else {
392 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
393 }
394 break;
395 }
396 BYTECODE(CHECK_NOT_CHAR) {
397 uint32_t c = (insn >> BYTECODE_SHIFT);
398 if (c != current_char) {
399 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000400 } else {
401 pc += BC_CHECK_NOT_CHAR_LENGTH;
402 }
403 break;
404 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000405 BYTECODE(AND_CHECK_4_CHARS) {
406 uint32_t c = Load32Aligned(pc + 4);
407 if (c == (current_char & Load32Aligned(pc + 8))) {
408 pc = code_base + Load32Aligned(pc + 12);
409 } else {
410 pc += BC_AND_CHECK_4_CHARS_LENGTH;
411 }
412 break;
413 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000414 BYTECODE(AND_CHECK_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000415 uint32_t c = (insn >> BYTECODE_SHIFT);
416 if (c == (current_char & Load32Aligned(pc + 4))) {
417 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000418 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000419 pc += BC_AND_CHECK_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000420 }
421 break;
422 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000423 BYTECODE(AND_CHECK_NOT_4_CHARS) {
424 uint32_t c = Load32Aligned(pc + 4);
425 if (c != (current_char & Load32Aligned(pc + 8))) {
426 pc = code_base + Load32Aligned(pc + 12);
427 } else {
428 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
429 }
430 break;
431 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000432 BYTECODE(AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000433 uint32_t c = (insn >> BYTECODE_SHIFT);
434 if (c != (current_char & Load32Aligned(pc + 4))) {
435 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000436 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000437 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
438 }
439 break;
440 }
441 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000442 uint32_t c = (insn >> BYTECODE_SHIFT);
443 uint32_t minus = Load16Aligned(pc + 4);
444 uint32_t mask = Load16Aligned(pc + 6);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000445 if (c != ((current_char - minus) & mask)) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000446 pc = code_base + Load32Aligned(pc + 8);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000447 } else {
448 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000449 }
450 break;
451 }
452 BYTECODE(CHECK_LT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000453 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000454 if (current_char < limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000455 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000456 } else {
457 pc += BC_CHECK_LT_LENGTH;
458 }
459 break;
460 }
461 BYTECODE(CHECK_GT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000462 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000463 if (current_char > limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000464 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000465 } else {
466 pc += BC_CHECK_GT_LENGTH;
467 }
468 break;
469 }
470 BYTECODE(CHECK_REGISTER_LT)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000471 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
472 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000473 } else {
474 pc += BC_CHECK_REGISTER_LT_LENGTH;
475 }
476 break;
477 BYTECODE(CHECK_REGISTER_GE)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000478 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
479 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000480 } else {
481 pc += BC_CHECK_REGISTER_GE_LENGTH;
482 }
483 break;
ager@chromium.org32912102009-01-16 10:38:43 +0000484 BYTECODE(CHECK_REGISTER_EQ_POS)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000485 if (registers[insn >> BYTECODE_SHIFT] == current) {
486 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org32912102009-01-16 10:38:43 +0000487 } else {
488 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
489 }
490 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000491 BYTECODE(LOOKUP_MAP1) {
492 // Look up character in a bitmap. If we find a 0, then jump to the
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000493 // location at pc + 8. Otherwise fall through!
494 int index = current_char - (insn >> BYTECODE_SHIFT);
495 byte map = code_base[Load32Aligned(pc + 4) + (index >> 3)];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000496 map = ((map >> (index & 7)) & 1);
497 if (map == 0) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000498 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000499 } else {
500 pc += BC_LOOKUP_MAP1_LENGTH;
501 }
502 break;
503 }
504 BYTECODE(LOOKUP_MAP2) {
505 // Look up character in a half-nibble map. If we find 00, then jump to
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000506 // the location at pc + 8. If we find 01 then jump to location at
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000507 // pc + 11, etc.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000508 int index = (current_char - (insn >> BYTECODE_SHIFT)) << 1;
509 byte map = code_base[Load32Aligned(pc + 3) + (index >> 3)];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000510 map = ((map >> (index & 7)) & 3);
511 if (map < 2) {
512 if (map == 0) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000513 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000514 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000515 pc = code_base + Load32Aligned(pc + 12);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000516 }
517 } else {
518 if (map == 2) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000519 pc = code_base + Load32Aligned(pc + 16);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000520 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000521 pc = code_base + Load32Aligned(pc + 20);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000522 }
523 }
524 break;
525 }
526 BYTECODE(LOOKUP_MAP8) {
527 // Look up character in a byte map. Use the byte as an index into a
528 // table that follows this instruction immediately.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000529 int index = current_char - (insn >> BYTECODE_SHIFT);
530 byte map = code_base[Load32Aligned(pc + 4) + index];
531 const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2);
532 pc = code_base + Load32Aligned(new_pc);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000533 break;
534 }
535 BYTECODE(LOOKUP_HI_MAP8) {
536 // Look up high byte of this character in a byte map. Use the byte as
537 // an index into a table that follows this instruction immediately.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000538 int index = (current_char >> 8) - (insn >> BYTECODE_SHIFT);
539 byte map = code_base[Load32Aligned(pc + 4) + index];
540 const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2);
541 pc = code_base + Load32Aligned(new_pc);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000542 break;
543 }
544 BYTECODE(CHECK_NOT_REGS_EQUAL)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000545 if (registers[insn >> BYTECODE_SHIFT] ==
546 registers[Load32Aligned(pc + 4)]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000547 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
548 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000549 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000550 }
551 break;
552 BYTECODE(CHECK_NOT_BACK_REF) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000553 int from = registers[insn >> BYTECODE_SHIFT];
554 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000555 if (from < 0 || len <= 0) {
556 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
557 break;
558 }
559 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000560 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000561 break;
562 } else {
563 int i;
564 for (i = 0; i < len; i++) {
565 if (subject[from + i] != subject[current + i]) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000566 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000567 break;
568 }
569 }
570 if (i < len) break;
571 current += len;
572 }
573 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
574 break;
575 }
576 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000577 int from = registers[insn >> BYTECODE_SHIFT];
578 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000579 if (from < 0 || len <= 0) {
580 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
581 break;
582 }
583 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000584 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000585 break;
586 } else {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000587 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
588 from, current, len, subject)) {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000589 current += len;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000590 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
591 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000592 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000593 }
594 }
595 break;
596 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000597 BYTECODE(CHECK_AT_START)
598 if (current == 0) {
599 pc = code_base + Load32Aligned(pc + 4);
600 } else {
601 pc += BC_CHECK_AT_START_LENGTH;
602 }
603 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000604 BYTECODE(CHECK_NOT_AT_START)
605 if (current == 0) {
606 pc += BC_CHECK_NOT_AT_START_LENGTH;
607 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000608 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000609 }
610 break;
whesse@chromium.org4a5224e2010-10-20 12:37:07 +0000611 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
612 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
613 if (subject.length() - current > by) {
614 current = subject.length() - by;
615 current_char = subject[current - 1];
616 }
617 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
618 break;
619 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000620 default:
621 UNREACHABLE();
622 break;
623 }
624 }
625}
626
627
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000628bool IrregexpInterpreter::Match(Isolate* isolate,
629 Handle<ByteArray> code_array,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000630 Handle<String> subject,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000631 int* registers,
632 int start_position) {
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000633 ASSERT(subject->IsFlat());
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000634
635 AssertNoAllocation a;
636 const byte* code_base = code_array->GetDataStartAddress();
637 uc16 previous_char = '\n';
ager@chromium.org5ec48922009-05-05 07:25:34 +0000638 if (subject->IsAsciiRepresentation()) {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000639 Vector<const char> subject_vector = subject->ToAsciiVector();
640 if (start_position != 0) previous_char = subject_vector[start_position - 1];
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000641 return RawMatch(isolate,
642 code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000643 subject_vector,
644 registers,
645 start_position,
646 previous_char);
647 } else {
648 Vector<const uc16> subject_vector = subject->ToUC16Vector();
649 if (start_position != 0) previous_char = subject_vector[start_position - 1];
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000650 return RawMatch(isolate,
651 code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000652 subject_vector,
653 registers,
654 start_position,
655 previous_char);
656 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000657}
658
659} } // namespace v8::internal