blob: c9c3cc4c0eb712837d3cd40893851e3c2751c2e1 [file] [log] [blame]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// A simple interpreter for the Irregexp byte code.
29
30
31#include "v8.h"
32#include "unicode.h"
33#include "utils.h"
34#include "ast.h"
35#include "bytecodes-irregexp.h"
36#include "interpreter-irregexp.h"
37
38
kasperl@chromium.org71affb52009-05-26 05:44:31 +000039namespace v8 {
40namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000041
42
kasperl@chromium.org7be3c992009-03-12 07:19:55 +000043static unibrow::Mapping<unibrow::Ecma262Canonicalize> interp_canonicalize;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000044
45
46static bool BackRefMatchesNoCase(int from,
47 int current,
48 int len,
49 Vector<const uc16> subject) {
50 for (int i = 0; i < len; i++) {
51 unibrow::uchar old_char = subject[from++];
52 unibrow::uchar new_char = subject[current++];
53 if (old_char == new_char) continue;
kasperl@chromium.orge959c182009-07-27 08:59:04 +000054 unibrow::uchar old_string[1] = { old_char };
55 unibrow::uchar new_string[1] = { new_char };
56 interp_canonicalize.get(old_char, '\0', old_string);
57 interp_canonicalize.get(new_char, '\0', new_string);
58 if (old_string[0] != new_string[0]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000059 return false;
60 }
61 }
62 return true;
63}
64
65
ager@chromium.org8bb60582008-12-11 12:02:20 +000066static bool BackRefMatchesNoCase(int from,
67 int current,
68 int len,
69 Vector<const char> subject) {
70 for (int i = 0; i < len; i++) {
71 unsigned int old_char = subject[from++];
72 unsigned int new_char = subject[current++];
73 if (old_char == new_char) continue;
74 if (old_char - 'A' <= 'Z' - 'A') old_char |= 0x20;
75 if (new_char - 'A' <= 'Z' - 'A') new_char |= 0x20;
76 if (old_char != new_char) return false;
77 }
78 return true;
79}
80
81
ager@chromium.orga74f0da2008-12-03 16:05:52 +000082#ifdef DEBUG
83static void TraceInterpreter(const byte* code_base,
84 const byte* pc,
85 int stack_depth,
86 int current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000087 uint32_t current_char,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000088 int bytecode_length,
89 const char* bytecode_name) {
90 if (FLAG_trace_regexp_bytecodes) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000091 bool printable = (current_char < 127 && current_char >= 32);
92 const char* format =
93 printable ?
94 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
95 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
96 PrintF(format,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000097 pc - code_base,
98 stack_depth,
99 current_position,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000100 current_char,
101 printable ? current_char : '.',
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000102 bytecode_name);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000103 for (int i = 0; i < bytecode_length; i++) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000104 printf(", %02x", pc[i]);
105 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000106 printf(" ");
107 for (int i = 1; i < bytecode_length; i++) {
108 unsigned char b = pc[i];
109 if (b < 127 && b >= 32) {
110 printf("%c", b);
111 } else {
112 printf(".");
113 }
114 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000115 printf("\n");
116 }
117}
118
119
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000120#define BYTECODE(name) \
121 case BC_##name: \
122 TraceInterpreter(code_base, \
123 pc, \
124 static_cast<int>(backtrack_sp - backtrack_stack_base), \
125 current, \
126 current_char, \
127 BC_##name##_LENGTH, \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000128 #name);
129#else
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000130#define BYTECODE(name) \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000131 case BC_##name:
132#endif
133
134
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000135static int32_t Load32Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000136 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000137 return *reinterpret_cast<const int32_t *>(pc);
138}
139
140
141static int32_t Load16Aligned(const byte* pc) {
ager@chromium.org9085a012009-05-11 19:22:57 +0000142 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000143 return *reinterpret_cast<const uint16_t *>(pc);
144}
145
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000146
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000147// A simple abstraction over the backtracking stack used by the interpreter.
148// This backtracking stack does not grow automatically, but it ensures that the
149// the memory held by the stack is released or remembered in a cache if the
150// matching terminates.
151class BacktrackStack {
152 public:
153 explicit BacktrackStack() {
154 if (cache_ != NULL) {
155 // If the cache is not empty reuse the previously allocated stack.
156 data_ = cache_;
157 cache_ = NULL;
158 } else {
159 // Cache was empty. Allocate a new backtrack stack.
160 data_ = NewArray<int>(kBacktrackStackSize);
161 }
162 }
163
164 ~BacktrackStack() {
165 if (cache_ == NULL) {
166 // The cache is empty. Keep this backtrack stack around.
167 cache_ = data_;
168 } else {
169 // A backtrack stack was already cached, just release this one.
170 DeleteArray(data_);
171 }
172 }
173
174 int* data() const { return data_; }
175
176 int max_size() const { return kBacktrackStackSize; }
177
178 private:
179 static const int kBacktrackStackSize = 10000;
180
181 int* data_;
182 static int* cache_;
183
184 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
185};
186
187int* BacktrackStack::cache_ = NULL;
188
189
ager@chromium.org8bb60582008-12-11 12:02:20 +0000190template <typename Char>
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000191static bool RawMatch(const byte* code_base,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000192 Vector<const Char> subject,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000193 int* registers,
194 int current,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000195 uint32_t current_char) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000196 const byte* pc = code_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000197 // BacktrackStack ensures that the memory allocated for the backtracking stack
198 // is returned to the system or cached if there is no stack being cached at
199 // the moment.
200 BacktrackStack backtrack_stack;
201 int* backtrack_stack_base = backtrack_stack.data();
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000202 int* backtrack_sp = backtrack_stack_base;
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000203 int backtrack_stack_space = backtrack_stack.max_size();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000204#ifdef DEBUG
205 if (FLAG_trace_regexp_bytecodes) {
206 PrintF("\n\nStart bytecode interpreter\n\n");
207 }
208#endif
209 while (true) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000210 int32_t insn = Load32Aligned(pc);
211 switch (insn & BYTECODE_MASK) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000212 BYTECODE(BREAK)
213 UNREACHABLE();
214 return false;
215 BYTECODE(PUSH_CP)
216 if (--backtrack_stack_space < 0) {
217 return false; // No match on backtrack stack overflow.
218 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000219 *backtrack_sp++ = current;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000220 pc += BC_PUSH_CP_LENGTH;
221 break;
222 BYTECODE(PUSH_BT)
223 if (--backtrack_stack_space < 0) {
224 return false; // No match on backtrack stack overflow.
225 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000226 *backtrack_sp++ = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000227 pc += BC_PUSH_BT_LENGTH;
228 break;
229 BYTECODE(PUSH_REGISTER)
230 if (--backtrack_stack_space < 0) {
231 return false; // No match on backtrack stack overflow.
232 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000233 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000234 pc += BC_PUSH_REGISTER_LENGTH;
235 break;
236 BYTECODE(SET_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000237 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000238 pc += BC_SET_REGISTER_LENGTH;
239 break;
240 BYTECODE(ADVANCE_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000241 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000242 pc += BC_ADVANCE_REGISTER_LENGTH;
243 break;
244 BYTECODE(SET_REGISTER_TO_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000245 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000246 pc += BC_SET_REGISTER_TO_CP_LENGTH;
247 break;
248 BYTECODE(SET_CP_TO_REGISTER)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000249 current = registers[insn >> BYTECODE_SHIFT];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000250 pc += BC_SET_CP_TO_REGISTER_LENGTH;
251 break;
252 BYTECODE(SET_REGISTER_TO_SP)
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000253 registers[insn >> BYTECODE_SHIFT] =
254 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000255 pc += BC_SET_REGISTER_TO_SP_LENGTH;
256 break;
257 BYTECODE(SET_SP_TO_REGISTER)
kasperl@chromium.org2abc4502009-07-02 07:00:29 +0000258 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
kasperl@chromium.org86f77b72009-07-06 08:21:57 +0000259 backtrack_stack_space = backtrack_stack.max_size() -
ager@chromium.orgc4c92722009-11-18 14:12:51 +0000260 static_cast<int>(backtrack_sp - backtrack_stack_base);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000261 pc += BC_SET_SP_TO_REGISTER_LENGTH;
262 break;
263 BYTECODE(POP_CP)
264 backtrack_stack_space++;
265 --backtrack_sp;
266 current = *backtrack_sp;
267 pc += BC_POP_CP_LENGTH;
268 break;
269 BYTECODE(POP_BT)
270 backtrack_stack_space++;
271 --backtrack_sp;
272 pc = code_base + *backtrack_sp;
273 break;
274 BYTECODE(POP_REGISTER)
275 backtrack_stack_space++;
276 --backtrack_sp;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000277 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000278 pc += BC_POP_REGISTER_LENGTH;
279 break;
280 BYTECODE(FAIL)
281 return false;
282 BYTECODE(SUCCEED)
283 return true;
284 BYTECODE(ADVANCE_CP)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000285 current += insn >> BYTECODE_SHIFT;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000286 pc += BC_ADVANCE_CP_LENGTH;
287 break;
288 BYTECODE(GOTO)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000289 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000290 break;
ager@chromium.org381abbb2009-02-25 13:23:22 +0000291 BYTECODE(ADVANCE_CP_AND_GOTO)
292 current += insn >> BYTECODE_SHIFT;
293 pc = code_base + Load32Aligned(pc + 4);
294 break;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000295 BYTECODE(CHECK_GREEDY)
296 if (current == backtrack_sp[-1]) {
297 backtrack_sp--;
298 backtrack_stack_space++;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000299 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000300 } else {
301 pc += BC_CHECK_GREEDY_LENGTH;
302 }
303 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000304 BYTECODE(LOAD_CURRENT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000305 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000306 if (pos >= subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000307 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000308 } else {
309 current_char = subject[pos];
310 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
311 }
312 break;
313 }
ager@chromium.org8bb60582008-12-11 12:02:20 +0000314 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000315 int pos = current + (insn >> BYTECODE_SHIFT);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000316 current_char = subject[pos];
317 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
318 break;
319 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000320 BYTECODE(LOAD_2_CURRENT_CHARS) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000321 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000322 if (pos + 2 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000323 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000324 } else {
325 Char next = subject[pos + 1];
326 current_char =
327 (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
328 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
329 }
330 break;
331 }
332 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000333 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000334 Char next = subject[pos + 1];
335 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
336 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
337 break;
338 }
339 BYTECODE(LOAD_4_CURRENT_CHARS) {
340 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000341 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000342 if (pos + 4 > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000343 pc = code_base + Load32Aligned(pc + 4);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000344 } else {
345 Char next1 = subject[pos + 1];
346 Char next2 = subject[pos + 2];
347 Char next3 = subject[pos + 3];
348 current_char = (subject[pos] |
349 (next1 << 8) |
350 (next2 << 16) |
351 (next3 << 24));
352 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
353 }
354 break;
355 }
356 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
357 ASSERT(sizeof(Char) == 1);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000358 int pos = current + (insn >> BYTECODE_SHIFT);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000359 Char next1 = subject[pos + 1];
360 Char next2 = subject[pos + 2];
361 Char next3 = subject[pos + 3];
362 current_char = (subject[pos] |
363 (next1 << 8) |
364 (next2 << 16) |
365 (next3 << 24));
366 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
367 break;
368 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000369 BYTECODE(CHECK_4_CHARS) {
370 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000371 if (c == current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000372 pc = code_base + Load32Aligned(pc + 8);
373 } else {
374 pc += BC_CHECK_4_CHARS_LENGTH;
375 }
376 break;
377 }
378 BYTECODE(CHECK_CHAR) {
379 uint32_t c = (insn >> BYTECODE_SHIFT);
380 if (c == current_char) {
381 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000382 } else {
383 pc += BC_CHECK_CHAR_LENGTH;
384 }
385 break;
386 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000387 BYTECODE(CHECK_NOT_4_CHARS) {
388 uint32_t c = Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000389 if (c != current_char) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000390 pc = code_base + Load32Aligned(pc + 8);
391 } else {
392 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
393 }
394 break;
395 }
396 BYTECODE(CHECK_NOT_CHAR) {
397 uint32_t c = (insn >> BYTECODE_SHIFT);
398 if (c != current_char) {
399 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000400 } else {
401 pc += BC_CHECK_NOT_CHAR_LENGTH;
402 }
403 break;
404 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000405 BYTECODE(AND_CHECK_4_CHARS) {
406 uint32_t c = Load32Aligned(pc + 4);
407 if (c == (current_char & Load32Aligned(pc + 8))) {
408 pc = code_base + Load32Aligned(pc + 12);
409 } else {
410 pc += BC_AND_CHECK_4_CHARS_LENGTH;
411 }
412 break;
413 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000414 BYTECODE(AND_CHECK_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000415 uint32_t c = (insn >> BYTECODE_SHIFT);
416 if (c == (current_char & Load32Aligned(pc + 4))) {
417 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000418 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000419 pc += BC_AND_CHECK_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000420 }
421 break;
422 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000423 BYTECODE(AND_CHECK_NOT_4_CHARS) {
424 uint32_t c = Load32Aligned(pc + 4);
425 if (c != (current_char & Load32Aligned(pc + 8))) {
426 pc = code_base + Load32Aligned(pc + 12);
427 } else {
428 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
429 }
430 break;
431 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000432 BYTECODE(AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000433 uint32_t c = (insn >> BYTECODE_SHIFT);
434 if (c != (current_char & Load32Aligned(pc + 4))) {
435 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000436 } else {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000437 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
438 }
439 break;
440 }
441 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000442 uint32_t c = (insn >> BYTECODE_SHIFT);
443 uint32_t minus = Load16Aligned(pc + 4);
444 uint32_t mask = Load16Aligned(pc + 6);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000445 if (c != ((current_char - minus) & mask)) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000446 pc = code_base + Load32Aligned(pc + 8);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000447 } else {
448 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000449 }
450 break;
451 }
452 BYTECODE(CHECK_LT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000453 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000454 if (current_char < limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000455 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000456 } else {
457 pc += BC_CHECK_LT_LENGTH;
458 }
459 break;
460 }
461 BYTECODE(CHECK_GT) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000462 uint32_t limit = (insn >> BYTECODE_SHIFT);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000463 if (current_char > limit) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000464 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000465 } else {
466 pc += BC_CHECK_GT_LENGTH;
467 }
468 break;
469 }
470 BYTECODE(CHECK_REGISTER_LT)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000471 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
472 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000473 } else {
474 pc += BC_CHECK_REGISTER_LT_LENGTH;
475 }
476 break;
477 BYTECODE(CHECK_REGISTER_GE)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000478 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
479 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000480 } else {
481 pc += BC_CHECK_REGISTER_GE_LENGTH;
482 }
483 break;
ager@chromium.org32912102009-01-16 10:38:43 +0000484 BYTECODE(CHECK_REGISTER_EQ_POS)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000485 if (registers[insn >> BYTECODE_SHIFT] == current) {
486 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.org32912102009-01-16 10:38:43 +0000487 } else {
488 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
489 }
490 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000491 BYTECODE(LOOKUP_MAP1) {
492 // Look up character in a bitmap. If we find a 0, then jump to the
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000493 // location at pc + 8. Otherwise fall through!
494 int index = current_char - (insn >> BYTECODE_SHIFT);
495 byte map = code_base[Load32Aligned(pc + 4) + (index >> 3)];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000496 map = ((map >> (index & 7)) & 1);
497 if (map == 0) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000498 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000499 } else {
500 pc += BC_LOOKUP_MAP1_LENGTH;
501 }
502 break;
503 }
504 BYTECODE(LOOKUP_MAP2) {
505 // Look up character in a half-nibble map. If we find 00, then jump to
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000506 // the location at pc + 8. If we find 01 then jump to location at
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000507 // pc + 11, etc.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000508 int index = (current_char - (insn >> BYTECODE_SHIFT)) << 1;
509 byte map = code_base[Load32Aligned(pc + 3) + (index >> 3)];
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000510 map = ((map >> (index & 7)) & 3);
511 if (map < 2) {
512 if (map == 0) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000513 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000514 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000515 pc = code_base + Load32Aligned(pc + 12);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000516 }
517 } else {
518 if (map == 2) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000519 pc = code_base + Load32Aligned(pc + 16);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000520 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000521 pc = code_base + Load32Aligned(pc + 20);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000522 }
523 }
524 break;
525 }
526 BYTECODE(LOOKUP_MAP8) {
527 // Look up character in a byte map. Use the byte as an index into a
528 // table that follows this instruction immediately.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000529 int index = current_char - (insn >> BYTECODE_SHIFT);
530 byte map = code_base[Load32Aligned(pc + 4) + index];
531 const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2);
532 pc = code_base + Load32Aligned(new_pc);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000533 break;
534 }
535 BYTECODE(LOOKUP_HI_MAP8) {
536 // Look up high byte of this character in a byte map. Use the byte as
537 // an index into a table that follows this instruction immediately.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000538 int index = (current_char >> 8) - (insn >> BYTECODE_SHIFT);
539 byte map = code_base[Load32Aligned(pc + 4) + index];
540 const byte* new_pc = code_base + Load32Aligned(pc + 8) + (map << 2);
541 pc = code_base + Load32Aligned(new_pc);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000542 break;
543 }
544 BYTECODE(CHECK_NOT_REGS_EQUAL)
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000545 if (registers[insn >> BYTECODE_SHIFT] ==
546 registers[Load32Aligned(pc + 4)]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000547 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
548 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000549 pc = code_base + Load32Aligned(pc + 8);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000550 }
551 break;
552 BYTECODE(CHECK_NOT_BACK_REF) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000553 int from = registers[insn >> BYTECODE_SHIFT];
554 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000555 if (from < 0 || len <= 0) {
556 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
557 break;
558 }
559 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000560 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000561 break;
562 } else {
563 int i;
564 for (i = 0; i < len; i++) {
565 if (subject[from + i] != subject[current + i]) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000566 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000567 break;
568 }
569 }
570 if (i < len) break;
571 current += len;
572 }
573 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
574 break;
575 }
576 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000577 int from = registers[insn >> BYTECODE_SHIFT];
578 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000579 if (from < 0 || len <= 0) {
580 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
581 break;
582 }
583 if (current + len > subject.length()) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000584 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000585 break;
586 } else {
587 if (BackRefMatchesNoCase(from, current, len, subject)) {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000588 current += len;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000589 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
590 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000591 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000592 }
593 }
594 break;
595 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000596 BYTECODE(CHECK_AT_START)
597 if (current == 0) {
598 pc = code_base + Load32Aligned(pc + 4);
599 } else {
600 pc += BC_CHECK_AT_START_LENGTH;
601 }
602 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000603 BYTECODE(CHECK_NOT_AT_START)
604 if (current == 0) {
605 pc += BC_CHECK_NOT_AT_START_LENGTH;
606 } else {
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000607 pc = code_base + Load32Aligned(pc + 4);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000608 }
609 break;
whesse@chromium.org4a5224e2010-10-20 12:37:07 +0000610 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
611 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
612 if (subject.length() - current > by) {
613 current = subject.length() - by;
614 current_char = subject[current - 1];
615 }
616 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
617 break;
618 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000619 default:
620 UNREACHABLE();
621 break;
622 }
623 }
624}
625
626
627bool IrregexpInterpreter::Match(Handle<ByteArray> code_array,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000628 Handle<String> subject,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000629 int* registers,
630 int start_position) {
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000631 ASSERT(subject->IsFlat());
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000632
633 AssertNoAllocation a;
634 const byte* code_base = code_array->GetDataStartAddress();
635 uc16 previous_char = '\n';
ager@chromium.org5ec48922009-05-05 07:25:34 +0000636 if (subject->IsAsciiRepresentation()) {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000637 Vector<const char> subject_vector = subject->ToAsciiVector();
638 if (start_position != 0) previous_char = subject_vector[start_position - 1];
639 return RawMatch(code_base,
640 subject_vector,
641 registers,
642 start_position,
643 previous_char);
644 } else {
645 Vector<const uc16> subject_vector = subject->ToUC16Vector();
646 if (start_position != 0) previous_char = subject_vector[start_position - 1];
647 return RawMatch(code_base,
648 subject_vector,
649 registers,
650 start_position,
651 previous_char);
652 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000653}
654
655} } // namespace v8::internal