blob: 2e13d8aeed6e7ae768960bd667e084535d3f8a1c [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001// Copyright 2008-2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "unicode.h"
30#include "log.h"
31#include "ast.h"
32#include "regexp-stack.h"
33#include "macro-assembler.h"
34#include "regexp-macro-assembler.h"
35#include "ia32/macro-assembler-ia32.h"
36#include "ia32/regexp-macro-assembler-ia32.h"
37
38namespace v8 {
39namespace internal {
40
41#ifdef V8_NATIVE_REGEXP
42/*
43 * This assembler uses the following register assignment convention
44 * - edx : current character. Must be loaded using LoadCurrentCharacter
45 * before using any of the dispatch methods.
46 * - edi : current position in input, as negative offset from end of string.
47 * Please notice that this is the byte offset, not the character offset!
48 * - esi : end of input (points to byte after last character in input).
49 * - ebp : frame pointer. Used to access arguments, local variables and
50 * RegExp registers.
51 * - esp : points to tip of C stack.
52 * - ecx : points to tip of backtrack stack
53 *
54 * The registers eax, ebx and ecx are free to use for computations.
55 *
56 * Each call to a public method should retain this convention.
57 * The stack will have the following structure:
58 * - stack_area_base (High end of the memory area to use as
59 * backtracking stack)
60 * - at_start (if 1, start at start of string, if 0, don't)
61 * - int* capture_array (int[num_saved_registers_], for output).
62 * - end of input (Address of end of string)
63 * - start of input (Address of first character in string)
64 * - void* input_string (location of a handle containing the string)
65 * --- frame alignment (if applicable) ---
66 * - return address
67 * ebp-> - old ebp
68 * - backup of caller esi
69 * - backup of caller edi
70 * - backup of caller ebx
71 * - Offset of location before start of input (effectively character
72 * position -1). Used to initialize capture registers to a non-position.
73 * - register 0 ebp[-4] (Only positions must be stored in the first
74 * - register 1 ebp[-8] num_saved_registers_ registers)
75 * - ...
76 *
77 * The first num_saved_registers_ registers are initialized to point to
78 * "character -1" in the string (i.e., char_size() bytes before the first
79 * character of the string). The remaining registers starts out as garbage.
80 *
81 * The data up to the return address must be placed there by the calling
82 * code, by calling the code entry as cast to a function with the signature:
83 * int (*match)(String* input_string,
84 * Address start,
85 * Address end,
86 * int* capture_output_array,
87 * bool at_start,
88 * byte* stack_area_base)
89 */
90
91#define __ ACCESS_MASM(masm_)
92
93RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(
94 Mode mode,
95 int registers_to_save)
96 : masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
97 mode_(mode),
98 num_registers_(registers_to_save),
99 num_saved_registers_(registers_to_save),
100 entry_label_(),
101 start_label_(),
102 success_label_(),
103 backtrack_label_(),
104 exit_label_() {
105 ASSERT_EQ(0, registers_to_save % 2);
106 __ jmp(&entry_label_); // We'll write the entry code later.
107 __ bind(&start_label_); // And then continue from here.
108}
109
110
111RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
112 delete masm_;
113 // Unuse labels in case we throw away the assembler without calling GetCode.
114 entry_label_.Unuse();
115 start_label_.Unuse();
116 success_label_.Unuse();
117 backtrack_label_.Unuse();
118 exit_label_.Unuse();
119 check_preempt_label_.Unuse();
120 stack_overflow_label_.Unuse();
121}
122
123
124int RegExpMacroAssemblerIA32::stack_limit_slack() {
125 return RegExpStack::kStackLimitSlack;
126}
127
128
129void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
130 if (by != 0) {
131 Label inside_string;
132 __ add(Operand(edi), Immediate(by * char_size()));
133 }
134}
135
136
137void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) {
138 ASSERT(reg >= 0);
139 ASSERT(reg < num_registers_);
140 if (by != 0) {
141 __ add(register_location(reg), Immediate(by));
142 }
143}
144
145
146void RegExpMacroAssemblerIA32::Backtrack() {
147 CheckPreemption();
148 // Pop Code* offset from backtrack stack, add Code* and jump to location.
149 Pop(ebx);
150 __ add(Operand(ebx), Immediate(masm_->CodeObject()));
151 __ jmp(Operand(ebx));
152}
153
154
155void RegExpMacroAssemblerIA32::Bind(Label* label) {
156 __ bind(label);
157}
158
159
160void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
161 __ cmp(current_character(), c);
162 BranchOrBacktrack(equal, on_equal);
163}
164
165
166void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
167 __ cmp(current_character(), limit);
168 BranchOrBacktrack(greater, on_greater);
169}
170
171
172void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
173 Label not_at_start;
174 // Did we start the match at the start of the string at all?
175 __ cmp(Operand(ebp, kAtStart), Immediate(0));
176 BranchOrBacktrack(equal, &not_at_start);
177 // If we did, are we still at the start of the input?
178 __ lea(eax, Operand(esi, edi, times_1, 0));
179 __ cmp(eax, Operand(ebp, kInputStart));
180 BranchOrBacktrack(equal, on_at_start);
181 __ bind(&not_at_start);
182}
183
184
185void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) {
186 // Did we start the match at the start of the string at all?
187 __ cmp(Operand(ebp, kAtStart), Immediate(0));
188 BranchOrBacktrack(equal, on_not_at_start);
189 // If we did, are we still at the start of the input?
190 __ lea(eax, Operand(esi, edi, times_1, 0));
191 __ cmp(eax, Operand(ebp, kInputStart));
192 BranchOrBacktrack(not_equal, on_not_at_start);
193}
194
195
196void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
197 __ cmp(current_character(), limit);
198 BranchOrBacktrack(less, on_less);
199}
200
201
202void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
203 int cp_offset,
204 Label* on_failure,
205 bool check_end_of_string) {
206 int byte_length = str.length() * char_size();
207 int byte_offset = cp_offset * char_size();
208 if (check_end_of_string) {
209 // Check that there are at least str.length() characters left in the input.
210 __ cmp(Operand(edi), Immediate(-(byte_offset + byte_length)));
211 BranchOrBacktrack(greater, on_failure);
212 }
213
214 if (on_failure == NULL) {
215 // Instead of inlining a backtrack, (re)use the global backtrack target.
216 on_failure = &backtrack_label_;
217 }
218
219 for (int i = 0; i < str.length(); i++) {
220 if (mode_ == ASCII) {
221 __ cmpb(Operand(esi, edi, times_1, byte_offset + i),
222 static_cast<int8_t>(str[i]));
223 } else {
224 ASSERT(mode_ == UC16);
225 __ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)),
226 Immediate(str[i]));
227 }
228 BranchOrBacktrack(not_equal, on_failure);
229 }
230}
231
232
233void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
234 Label fallthrough;
235 __ cmp(edi, Operand(backtrack_stackpointer(), 0));
236 __ j(not_equal, &fallthrough);
237 __ add(Operand(backtrack_stackpointer()), Immediate(kPointerSize)); // Pop.
238 BranchOrBacktrack(no_condition, on_equal);
239 __ bind(&fallthrough);
240}
241
242
243void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
244 int start_reg,
245 Label* on_no_match) {
246 Label fallthrough;
247 __ mov(edx, register_location(start_reg)); // Index of start of capture
248 __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
249 __ sub(ebx, Operand(edx)); // Length of capture.
250
251 // The length of a capture should not be negative. This can only happen
252 // if the end of the capture is unrecorded, or at a point earlier than
253 // the start of the capture.
254 BranchOrBacktrack(less, on_no_match, not_taken);
255
256 // If length is zero, either the capture is empty or it is completely
257 // uncaptured. In either case succeed immediately.
258 __ j(equal, &fallthrough);
259
260 if (mode_ == ASCII) {
261 Label success;
262 Label fail;
263 Label loop_increment;
264 // Save register contents to make the registers available below.
265 __ push(edi);
266 __ push(backtrack_stackpointer());
267 // After this, the eax, ecx, and edi registers are available.
268
269 __ add(edx, Operand(esi)); // Start of capture
270 __ add(edi, Operand(esi)); // Start of text to match against capture.
271 __ add(ebx, Operand(edi)); // End of text to match against capture.
272
273 Label loop;
274 __ bind(&loop);
275 __ movzx_b(eax, Operand(edi, 0));
276 __ cmpb_al(Operand(edx, 0));
277 __ j(equal, &loop_increment);
278
279 // Mismatch, try case-insensitive match (converting letters to lower-case).
280 __ or_(eax, 0x20); // Convert match character to lower-case.
281 __ lea(ecx, Operand(eax, -'a'));
282 __ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter?
283 __ j(above, &fail);
284 // Also convert capture character.
285 __ movzx_b(ecx, Operand(edx, 0));
286 __ or_(ecx, 0x20);
287
288 __ cmp(eax, Operand(ecx));
289 __ j(not_equal, &fail);
290
291 __ bind(&loop_increment);
292 // Increment pointers into match and capture strings.
293 __ add(Operand(edx), Immediate(1));
294 __ add(Operand(edi), Immediate(1));
295 // Compare to end of match, and loop if not done.
296 __ cmp(edi, Operand(ebx));
297 __ j(below, &loop, taken);
298 __ jmp(&success);
299
300 __ bind(&fail);
301 // Restore original values before failing.
302 __ pop(backtrack_stackpointer());
303 __ pop(edi);
304 BranchOrBacktrack(no_condition, on_no_match);
305
306 __ bind(&success);
307 // Restore original value before continuing.
308 __ pop(backtrack_stackpointer());
309 // Drop original value of character position.
310 __ add(Operand(esp), Immediate(kPointerSize));
311 // Compute new value of character position after the matched part.
312 __ sub(edi, Operand(esi));
313 } else {
314 ASSERT(mode_ == UC16);
315 // Save registers before calling C function.
316 __ push(esi);
317 __ push(edi);
318 __ push(backtrack_stackpointer());
319 __ push(ebx);
320
321 const int argument_count = 3;
322 FrameAlign(argument_count, ecx);
323 // Put arguments into allocated stack area, last argument highest on stack.
324 // Parameters are
325 // Address byte_offset1 - Address captured substring's start.
326 // Address byte_offset2 - Address of current character position.
327 // size_t byte_length - length of capture in bytes(!)
328
329 // Set byte_length.
330 __ mov(Operand(esp, 2 * kPointerSize), ebx);
331 // Set byte_offset2.
332 // Found by adding negative string-end offset of current position (edi)
333 // to end of string.
334 __ add(edi, Operand(esi));
335 __ mov(Operand(esp, 1 * kPointerSize), edi);
336 // Set byte_offset1.
337 // Start of capture, where edx already holds string-end negative offset.
338 __ add(edx, Operand(esi));
339 __ mov(Operand(esp, 0 * kPointerSize), edx);
340
341 ExternalReference compare =
342 ExternalReference::re_case_insensitive_compare_uc16();
343 CallCFunction(compare, argument_count);
344 // Pop original values before reacting on result value.
345 __ pop(ebx);
346 __ pop(backtrack_stackpointer());
347 __ pop(edi);
348 __ pop(esi);
349
350 // Check if function returned non-zero for success or zero for failure.
351 __ or_(eax, Operand(eax));
352 BranchOrBacktrack(zero, on_no_match);
353 // On success, increment position by length of capture.
354 __ add(edi, Operand(ebx));
355 }
356 __ bind(&fallthrough);
357}
358
359
360void RegExpMacroAssemblerIA32::CheckNotBackReference(
361 int start_reg,
362 Label* on_no_match) {
363 Label fallthrough;
364 Label success;
365 Label fail;
366
367 // Find length of back-referenced capture.
368 __ mov(edx, register_location(start_reg));
369 __ mov(eax, register_location(start_reg + 1));
370 __ sub(eax, Operand(edx)); // Length to check.
371 // Fail on partial or illegal capture (start of capture after end of capture).
372 BranchOrBacktrack(less, on_no_match);
373 // Succeed on empty capture (including no capture)
374 __ j(equal, &fallthrough);
375
376 // Check that there are sufficient characters left in the input.
377 __ mov(ebx, edi);
378 __ add(ebx, Operand(eax));
379 BranchOrBacktrack(greater, on_no_match);
380
381 // Save register to make it available below.
382 __ push(backtrack_stackpointer());
383
384 // Compute pointers to match string and capture string
385 __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
386 __ add(edx, Operand(esi)); // Start of capture.
387 __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match
388
389 Label loop;
390 __ bind(&loop);
391 if (mode_ == ASCII) {
392 __ movzx_b(eax, Operand(edx, 0));
393 __ cmpb_al(Operand(ebx, 0));
394 } else {
395 ASSERT(mode_ == UC16);
396 __ movzx_w(eax, Operand(edx, 0));
397 __ cmpw_ax(Operand(ebx, 0));
398 }
399 __ j(not_equal, &fail);
400 // Increment pointers into capture and match string.
401 __ add(Operand(edx), Immediate(char_size()));
402 __ add(Operand(ebx), Immediate(char_size()));
403 // Check if we have reached end of match area.
404 __ cmp(ebx, Operand(ecx));
405 __ j(below, &loop);
406 __ jmp(&success);
407
408 __ bind(&fail);
409 // Restore backtrack stackpointer.
410 __ pop(backtrack_stackpointer());
411 BranchOrBacktrack(no_condition, on_no_match);
412
413 __ bind(&success);
414 // Move current character position to position after match.
415 __ mov(edi, ecx);
416 __ sub(Operand(edi), esi);
417 // Restore backtrack stackpointer.
418 __ pop(backtrack_stackpointer());
419
420 __ bind(&fallthrough);
421}
422
423
424void RegExpMacroAssemblerIA32::CheckNotRegistersEqual(int reg1,
425 int reg2,
426 Label* on_not_equal) {
427 __ mov(eax, register_location(reg1));
428 __ cmp(eax, register_location(reg2));
429 BranchOrBacktrack(not_equal, on_not_equal);
430}
431
432
433void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
434 Label* on_not_equal) {
435 __ cmp(current_character(), c);
436 BranchOrBacktrack(not_equal, on_not_equal);
437}
438
439
440void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
441 uint32_t mask,
442 Label* on_equal) {
443 __ mov(eax, current_character());
444 __ and_(eax, mask);
445 __ cmp(eax, c);
446 BranchOrBacktrack(equal, on_equal);
447}
448
449
450void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
451 uint32_t mask,
452 Label* on_not_equal) {
453 __ mov(eax, current_character());
454 __ and_(eax, mask);
455 __ cmp(eax, c);
456 BranchOrBacktrack(not_equal, on_not_equal);
457}
458
459
460void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
461 uc16 c,
462 uc16 minus,
463 uc16 mask,
464 Label* on_not_equal) {
465 ASSERT(minus < String::kMaxUC16CharCode);
466 __ lea(eax, Operand(current_character(), -minus));
467 __ and_(eax, mask);
468 __ cmp(eax, c);
469 BranchOrBacktrack(not_equal, on_not_equal);
470}
471
472
473bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
474 int cp_offset,
475 bool check_offset,
476 Label* on_no_match) {
477 // Range checks (c in min..max) are generally implemented by an unsigned
478 // (c - min) <= (max - min) check
479 switch (type) {
480 case 's':
481 // Match space-characters
482 if (mode_ == ASCII) {
483 // ASCII space characters are '\t'..'\r' and ' '.
484 if (check_offset) {
485 LoadCurrentCharacter(cp_offset, on_no_match);
486 } else {
487 LoadCurrentCharacterUnchecked(cp_offset, 1);
488 }
489 Label success;
490 __ cmp(current_character(), ' ');
491 __ j(equal, &success);
492 // Check range 0x09..0x0d
493 __ sub(Operand(current_character()), Immediate('\t'));
494 __ cmp(current_character(), '\r' - '\t');
495 BranchOrBacktrack(above, on_no_match);
496 __ bind(&success);
497 return true;
498 }
499 return false;
500 case 'S':
501 // Match non-space characters.
502 if (check_offset) {
503 LoadCurrentCharacter(cp_offset, on_no_match, 1);
504 } else {
505 LoadCurrentCharacterUnchecked(cp_offset, 1);
506 }
507 if (mode_ == ASCII) {
508 // ASCII space characters are '\t'..'\r' and ' '.
509 __ cmp(current_character(), ' ');
510 BranchOrBacktrack(equal, on_no_match);
511 __ sub(Operand(current_character()), Immediate('\t'));
512 __ cmp(current_character(), '\r' - '\t');
513 BranchOrBacktrack(below_equal, on_no_match);
514 return true;
515 }
516 return false;
517 case 'd':
518 // Match ASCII digits ('0'..'9')
519 if (check_offset) {
520 LoadCurrentCharacter(cp_offset, on_no_match, 1);
521 } else {
522 LoadCurrentCharacterUnchecked(cp_offset, 1);
523 }
524 __ sub(Operand(current_character()), Immediate('0'));
525 __ cmp(current_character(), '9' - '0');
526 BranchOrBacktrack(above, on_no_match);
527 return true;
528 case 'D':
529 // Match non ASCII-digits
530 if (check_offset) {
531 LoadCurrentCharacter(cp_offset, on_no_match, 1);
532 } else {
533 LoadCurrentCharacterUnchecked(cp_offset, 1);
534 }
535 __ sub(Operand(current_character()), Immediate('0'));
536 __ cmp(current_character(), '9' - '0');
537 BranchOrBacktrack(below_equal, on_no_match);
538 return true;
539 case '.': {
540 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
541 if (check_offset) {
542 LoadCurrentCharacter(cp_offset, on_no_match, 1);
543 } else {
544 LoadCurrentCharacterUnchecked(cp_offset, 1);
545 }
546 __ xor_(Operand(current_character()), Immediate(0x01));
547 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
548 __ sub(Operand(current_character()), Immediate(0x0b));
549 __ cmp(current_character(), 0x0c - 0x0b);
550 BranchOrBacktrack(below_equal, on_no_match);
551 if (mode_ == UC16) {
552 // Compare original value to 0x2028 and 0x2029, using the already
553 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
554 // 0x201d (0x2028 - 0x0b) or 0x201e.
555 __ sub(Operand(current_character()), Immediate(0x2028 - 0x0b));
556 __ cmp(current_character(), 1);
557 BranchOrBacktrack(below_equal, on_no_match);
558 }
559 return true;
560 }
561 case '*':
562 // Match any character.
563 if (check_offset) {
564 CheckPosition(cp_offset, on_no_match);
565 }
566 return true;
567 // No custom implementation (yet): w, W, s(UC16), S(UC16).
568 default:
569 return false;
570 }
571}
572
573
574void RegExpMacroAssemblerIA32::Fail() {
575 ASSERT(FAILURE == 0); // Return value for failure is zero.
576 __ xor_(eax, Operand(eax)); // zero eax.
577 __ jmp(&exit_label_);
578}
579
580
581Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
582 // Finalize code - write the entry point code now we know how many
583 // registers we need.
584
585 // Entry code:
586 __ bind(&entry_label_);
587 // Start new stack frame.
588 __ push(ebp);
589 __ mov(ebp, esp);
590 // Save callee-save registers. Order here should correspond to order of
591 // kBackup_ebx etc.
592 __ push(esi);
593 __ push(edi);
594 __ push(ebx); // Callee-save on MacOS.
595 __ push(Immediate(0)); // Make room for "input start - 1" constant.
596
597 // Check if we have space on the stack for registers.
598 Label stack_limit_hit;
599 Label stack_ok;
600
Steve Blockd0582a62009-12-15 09:54:21 +0000601 ExternalReference stack_limit =
602 ExternalReference::address_of_stack_limit();
Steve Blocka7e24c12009-10-30 11:49:00 +0000603 __ mov(ecx, esp);
Steve Blockd0582a62009-12-15 09:54:21 +0000604 __ sub(ecx, Operand::StaticVariable(stack_limit));
Steve Blocka7e24c12009-10-30 11:49:00 +0000605 // Handle it if the stack pointer is already below the stack limit.
606 __ j(below_equal, &stack_limit_hit, not_taken);
607 // Check if there is room for the variable number of registers above
608 // the stack limit.
609 __ cmp(ecx, num_registers_ * kPointerSize);
610 __ j(above_equal, &stack_ok, taken);
611 // Exit with OutOfMemory exception. There is not enough space on the stack
612 // for our working registers.
613 __ mov(eax, EXCEPTION);
614 __ jmp(&exit_label_);
615
616 __ bind(&stack_limit_hit);
617 CallCheckStackGuardState(ebx);
618 __ or_(eax, Operand(eax));
619 // If returned value is non-zero, we exit with the returned value as result.
620 __ j(not_zero, &exit_label_);
621
622 __ bind(&stack_ok);
623
624 // Allocate space on stack for registers.
625 __ sub(Operand(esp), Immediate(num_registers_ * kPointerSize));
626 // Load string length.
627 __ mov(esi, Operand(ebp, kInputEnd));
628 // Load input position.
629 __ mov(edi, Operand(ebp, kInputStart));
630 // Set up edi to be negative offset from string end.
631 __ sub(edi, Operand(esi));
632 // Set eax to address of char before start of input
633 // (effectively string position -1).
634 __ lea(eax, Operand(edi, -char_size()));
635 // Store this value in a local variable, for use when clearing
636 // position registers.
637 __ mov(Operand(ebp, kInputStartMinusOne), eax);
638 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
639 // Fill saved registers with initial value = start offset - 1
640 // Fill in stack push order, to avoid accessing across an unwritten
641 // page (a problem on Windows).
642 __ mov(ecx, kRegisterZero);
643 Label init_loop;
644 __ bind(&init_loop);
645 __ mov(Operand(ebp, ecx, times_1, +0), eax);
646 __ sub(Operand(ecx), Immediate(kPointerSize));
647 __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
648 __ j(greater, &init_loop);
649 }
650 // Ensure that we have written to each stack page, in order. Skipping a page
651 // on Windows can cause segmentation faults. Assuming page size is 4k.
652 const int kPageSize = 4096;
653 const int kRegistersPerPage = kPageSize / kPointerSize;
654 for (int i = num_saved_registers_ + kRegistersPerPage - 1;
655 i < num_registers_;
656 i += kRegistersPerPage) {
657 __ mov(register_location(i), eax); // One write every page.
658 }
659
660
661 // Initialize backtrack stack pointer.
662 __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
663 // Load previous char as initial value of current-character.
664 Label at_start;
665 __ cmp(Operand(ebp, kAtStart), Immediate(0));
666 __ j(not_equal, &at_start);
667 LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
668 __ jmp(&start_label_);
669 __ bind(&at_start);
670 __ mov(current_character(), '\n');
671 __ jmp(&start_label_);
672
673
674 // Exit code:
675 if (success_label_.is_linked()) {
676 // Save captures when successful.
677 __ bind(&success_label_);
678 if (num_saved_registers_ > 0) {
679 // copy captures to output
680 __ mov(ebx, Operand(ebp, kRegisterOutput));
681 __ mov(ecx, Operand(ebp, kInputEnd));
682 __ sub(ecx, Operand(ebp, kInputStart));
683 for (int i = 0; i < num_saved_registers_; i++) {
684 __ mov(eax, register_location(i));
685 __ add(eax, Operand(ecx)); // Convert to index from start, not end.
686 if (mode_ == UC16) {
687 __ sar(eax, 1); // Convert byte index to character index.
688 }
689 __ mov(Operand(ebx, i * kPointerSize), eax);
690 }
691 }
692 __ mov(eax, Immediate(SUCCESS));
693 }
694 // Exit and return eax
695 __ bind(&exit_label_);
696 // Skip esp past regexp registers.
697 __ lea(esp, Operand(ebp, kBackup_ebx));
698 // Restore callee-save registers.
699 __ pop(ebx);
700 __ pop(edi);
701 __ pop(esi);
702 // Exit function frame, restore previous one.
703 __ pop(ebp);
704 __ ret(0);
705
706 // Backtrack code (branch target for conditional backtracks).
707 if (backtrack_label_.is_linked()) {
708 __ bind(&backtrack_label_);
709 Backtrack();
710 }
711
712 Label exit_with_exception;
713
714 // Preempt-code
715 if (check_preempt_label_.is_linked()) {
716 SafeCallTarget(&check_preempt_label_);
717
718 __ push(backtrack_stackpointer());
719 __ push(edi);
720
721 CallCheckStackGuardState(ebx);
722 __ or_(eax, Operand(eax));
723 // If returning non-zero, we should end execution with the given
724 // result as return value.
725 __ j(not_zero, &exit_label_);
726
727 __ pop(edi);
728 __ pop(backtrack_stackpointer());
729 // String might have moved: Reload esi from frame.
730 __ mov(esi, Operand(ebp, kInputEnd));
731 SafeReturn();
732 }
733
734 // Backtrack stack overflow code.
735 if (stack_overflow_label_.is_linked()) {
736 SafeCallTarget(&stack_overflow_label_);
737 // Reached if the backtrack-stack limit has been hit.
738
739 Label grow_failed;
740 // Save registers before calling C function
741 __ push(esi);
742 __ push(edi);
743
744 // Call GrowStack(backtrack_stackpointer())
745 int num_arguments = 2;
746 FrameAlign(num_arguments, ebx);
747 __ lea(eax, Operand(ebp, kStackHighEnd));
748 __ mov(Operand(esp, 1 * kPointerSize), eax);
749 __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer());
750 ExternalReference grow_stack = ExternalReference::re_grow_stack();
751 CallCFunction(grow_stack, num_arguments);
752 // If return NULL, we have failed to grow the stack, and
753 // must exit with a stack-overflow exception.
754 __ or_(eax, Operand(eax));
755 __ j(equal, &exit_with_exception);
756 // Otherwise use return value as new stack pointer.
757 __ mov(backtrack_stackpointer(), eax);
758 // Restore saved registers and continue.
759 __ pop(edi);
760 __ pop(esi);
761 SafeReturn();
762 }
763
764 if (exit_with_exception.is_linked()) {
765 // If any of the code above needed to exit with an exception.
766 __ bind(&exit_with_exception);
767 // Exit with Result EXCEPTION(-1) to signal thrown exception.
768 __ mov(eax, EXCEPTION);
769 __ jmp(&exit_label_);
770 }
771
772 CodeDesc code_desc;
773 masm_->GetCode(&code_desc);
774 Handle<Code> code = Factory::NewCode(code_desc,
775 NULL,
776 Code::ComputeFlags(Code::REGEXP),
777 masm_->CodeObject());
778 LOG(RegExpCodeCreateEvent(*code, *source));
779 return Handle<Object>::cast(code);
780}
781
782
783void RegExpMacroAssemblerIA32::GoTo(Label* to) {
784 BranchOrBacktrack(no_condition, to);
785}
786
787
788void RegExpMacroAssemblerIA32::IfRegisterGE(int reg,
789 int comparand,
790 Label* if_ge) {
791 __ cmp(register_location(reg), Immediate(comparand));
792 BranchOrBacktrack(greater_equal, if_ge);
793}
794
795
796void RegExpMacroAssemblerIA32::IfRegisterLT(int reg,
797 int comparand,
798 Label* if_lt) {
799 __ cmp(register_location(reg), Immediate(comparand));
800 BranchOrBacktrack(less, if_lt);
801}
802
803
804void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg,
805 Label* if_eq) {
806 __ cmp(edi, register_location(reg));
807 BranchOrBacktrack(equal, if_eq);
808}
809
810
811RegExpMacroAssembler::IrregexpImplementation
812 RegExpMacroAssemblerIA32::Implementation() {
813 return kIA32Implementation;
814}
815
816
817void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
818 Label* on_end_of_input,
819 bool check_bounds,
820 int characters) {
821 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
822 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
823 if (check_bounds) {
824 CheckPosition(cp_offset + characters - 1, on_end_of_input);
825 }
826 LoadCurrentCharacterUnchecked(cp_offset, characters);
827}
828
829
830void RegExpMacroAssemblerIA32::PopCurrentPosition() {
831 Pop(edi);
832}
833
834
835void RegExpMacroAssemblerIA32::PopRegister(int register_index) {
836 Pop(eax);
837 __ mov(register_location(register_index), eax);
838}
839
840
841void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) {
842 Push(Immediate::CodeRelativeOffset(label));
843 CheckStackLimit();
844}
845
846
847void RegExpMacroAssemblerIA32::PushCurrentPosition() {
848 Push(edi);
849}
850
851
852void RegExpMacroAssemblerIA32::PushRegister(int register_index,
853 StackCheckFlag check_stack_limit) {
854 __ mov(eax, register_location(register_index));
855 Push(eax);
856 if (check_stack_limit) CheckStackLimit();
857}
858
859
860void RegExpMacroAssemblerIA32::ReadCurrentPositionFromRegister(int reg) {
861 __ mov(edi, register_location(reg));
862}
863
864
865void RegExpMacroAssemblerIA32::ReadStackPointerFromRegister(int reg) {
866 __ mov(backtrack_stackpointer(), register_location(reg));
867 __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
868}
869
870
871void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) {
872 ASSERT(register_index >= num_saved_registers_); // Reserved for positions!
873 __ mov(register_location(register_index), Immediate(to));
874}
875
876
877void RegExpMacroAssemblerIA32::Succeed() {
878 __ jmp(&success_label_);
879}
880
881
882void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
883 int cp_offset) {
884 if (cp_offset == 0) {
885 __ mov(register_location(reg), edi);
886 } else {
887 __ lea(eax, Operand(edi, cp_offset * char_size()));
888 __ mov(register_location(reg), eax);
889 }
890}
891
892
893void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
894 ASSERT(reg_from <= reg_to);
895 __ mov(eax, Operand(ebp, kInputStartMinusOne));
896 for (int reg = reg_from; reg <= reg_to; reg++) {
897 __ mov(register_location(reg), eax);
898 }
899}
900
901
902void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
903 __ mov(eax, backtrack_stackpointer());
904 __ sub(eax, Operand(ebp, kStackHighEnd));
905 __ mov(register_location(reg), eax);
906}
907
908
909// Private methods:
910
911void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) {
912 int num_arguments = 3;
913 FrameAlign(num_arguments, scratch);
914 // RegExp code frame pointer.
915 __ mov(Operand(esp, 2 * kPointerSize), ebp);
916 // Code* of self.
917 __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject()));
918 // Next address on the stack (will be address of return address).
919 __ lea(eax, Operand(esp, -kPointerSize));
920 __ mov(Operand(esp, 0 * kPointerSize), eax);
921 ExternalReference check_stack_guard =
922 ExternalReference::re_check_stack_guard_state();
923 CallCFunction(check_stack_guard, num_arguments);
924}
925
926
927// Helper function for reading a value out of a stack frame.
928template <typename T>
929static T& frame_entry(Address re_frame, int frame_offset) {
930 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
931}
932
933
934int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
935 Code* re_code,
936 Address re_frame) {
937 if (StackGuard::IsStackOverflow()) {
938 Top::StackOverflow();
939 return EXCEPTION;
940 }
941
942 // If not real stack overflow the stack guard was used to interrupt
943 // execution for another purpose.
944
945 // Prepare for possible GC.
946 HandleScope handles;
947 Handle<Code> code_handle(re_code);
948
949 Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
950 // Current string.
951 bool is_ascii = subject->IsAsciiRepresentation();
952
953 ASSERT(re_code->instruction_start() <= *return_address);
954 ASSERT(*return_address <=
955 re_code->instruction_start() + re_code->instruction_size());
956
957 Object* result = Execution::HandleStackGuardInterrupt();
958
959 if (*code_handle != re_code) { // Return address no longer valid
960 int delta = *code_handle - re_code;
961 // Overwrite the return address on the stack.
962 *return_address += delta;
963 }
964
965 if (result->IsException()) {
966 return EXCEPTION;
967 }
968
969 // String might have changed.
970 if (subject->IsAsciiRepresentation() != is_ascii) {
971 // If we changed between an ASCII and an UC16 string, the specialized
972 // code cannot be used, and we need to restart regexp matching from
973 // scratch (including, potentially, compiling a new version of the code).
974 return RETRY;
975 }
976
977 // Otherwise, the content of the string might have moved. It must still
978 // be a sequential or external string with the same content.
979 // Update the start and end pointers in the stack frame to the current
980 // location (whether it has actually moved or not).
981 ASSERT(StringShape(*subject).IsSequential() ||
982 StringShape(*subject).IsExternal());
983
984 // The original start address of the characters to match.
985 const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
986
987 // Find the current start address of the same character at the current string
988 // position.
989 int start_index = frame_entry<int>(re_frame, kStartIndex);
990 const byte* new_address = StringCharacterPosition(*subject, start_index);
991
992 if (start_address != new_address) {
993 // If there is a difference, update the object pointer and start and end
994 // addresses in the RegExp stack frame to match the new value.
995 const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
996 int byte_length = end_address - start_address;
997 frame_entry<const String*>(re_frame, kInputString) = *subject;
998 frame_entry<const byte*>(re_frame, kInputStart) = new_address;
999 frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
1000 }
1001
1002 return 0;
1003}
1004
1005
1006Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
1007 ASSERT(register_index < (1<<30));
1008 if (num_registers_ <= register_index) {
1009 num_registers_ = register_index + 1;
1010 }
1011 return Operand(ebp, kRegisterZero - register_index * kPointerSize);
1012}
1013
1014
1015void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset,
1016 Label* on_outside_input) {
1017 __ cmp(edi, -cp_offset * char_size());
1018 BranchOrBacktrack(greater_equal, on_outside_input);
1019}
1020
1021
1022void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
1023 Label* to,
1024 Hint hint) {
1025 if (condition < 0) { // No condition
1026 if (to == NULL) {
1027 Backtrack();
1028 return;
1029 }
1030 __ jmp(to);
1031 return;
1032 }
1033 if (to == NULL) {
1034 __ j(condition, &backtrack_label_, hint);
1035 return;
1036 }
1037 __ j(condition, to, hint);
1038}
1039
1040
1041void RegExpMacroAssemblerIA32::SafeCall(Label* to) {
1042 __ call(to);
1043}
1044
1045
1046void RegExpMacroAssemblerIA32::SafeReturn() {
1047 __ add(Operand(esp, 0), Immediate(masm_->CodeObject()));
1048 __ ret(0);
1049}
1050
1051
1052void RegExpMacroAssemblerIA32::SafeCallTarget(Label* name) {
1053 __ bind(name);
1054 __ sub(Operand(esp, 0), Immediate(masm_->CodeObject()));
1055}
1056
1057
1058void RegExpMacroAssemblerIA32::Push(Register source) {
1059 ASSERT(!source.is(backtrack_stackpointer()));
1060 // Notice: This updates flags, unlike normal Push.
1061 __ sub(Operand(backtrack_stackpointer()), Immediate(kPointerSize));
1062 __ mov(Operand(backtrack_stackpointer(), 0), source);
1063}
1064
1065
1066void RegExpMacroAssemblerIA32::Push(Immediate value) {
1067 // Notice: This updates flags, unlike normal Push.
1068 __ sub(Operand(backtrack_stackpointer()), Immediate(kPointerSize));
1069 __ mov(Operand(backtrack_stackpointer(), 0), value);
1070}
1071
1072
1073void RegExpMacroAssemblerIA32::Pop(Register target) {
1074 ASSERT(!target.is(backtrack_stackpointer()));
1075 __ mov(target, Operand(backtrack_stackpointer(), 0));
1076 // Notice: This updates flags, unlike normal Pop.
1077 __ add(Operand(backtrack_stackpointer()), Immediate(kPointerSize));
1078}
1079
1080
1081void RegExpMacroAssemblerIA32::CheckPreemption() {
1082 // Check for preemption.
1083 Label no_preempt;
Steve Blockd0582a62009-12-15 09:54:21 +00001084 ExternalReference stack_limit =
1085 ExternalReference::address_of_stack_limit();
1086 __ cmp(esp, Operand::StaticVariable(stack_limit));
Steve Blocka7e24c12009-10-30 11:49:00 +00001087 __ j(above, &no_preempt, taken);
1088
1089 SafeCall(&check_preempt_label_);
1090
1091 __ bind(&no_preempt);
1092}
1093
1094
1095void RegExpMacroAssemblerIA32::CheckStackLimit() {
Steve Blockd0582a62009-12-15 09:54:21 +00001096 Label no_stack_overflow;
1097 ExternalReference stack_limit =
1098 ExternalReference::address_of_regexp_stack_limit();
1099 __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit));
1100 __ j(above, &no_stack_overflow);
Steve Blocka7e24c12009-10-30 11:49:00 +00001101
Steve Blockd0582a62009-12-15 09:54:21 +00001102 SafeCall(&stack_overflow_label_);
Steve Blocka7e24c12009-10-30 11:49:00 +00001103
Steve Blockd0582a62009-12-15 09:54:21 +00001104 __ bind(&no_stack_overflow);
Steve Blocka7e24c12009-10-30 11:49:00 +00001105}
1106
1107
1108void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) {
1109 // TODO(lrn): Since we no longer use the system stack arbitrarily (but we do
1110 // use it, e.g., for SafeCall), we know the number of elements on the stack
1111 // since the last frame alignment. We might be able to do this simpler then.
1112 int frameAlignment = OS::ActivationFrameAlignment();
1113 if (frameAlignment != 0) {
1114 // Make stack end at alignment and make room for num_arguments words
1115 // and the original value of esp.
1116 __ mov(scratch, esp);
1117 __ sub(Operand(esp), Immediate((num_arguments + 1) * kPointerSize));
1118 ASSERT(IsPowerOf2(frameAlignment));
1119 __ and_(esp, -frameAlignment);
1120 __ mov(Operand(esp, num_arguments * kPointerSize), scratch);
1121 } else {
1122 __ sub(Operand(esp), Immediate(num_arguments * kPointerSize));
1123 }
1124}
1125
1126
1127void RegExpMacroAssemblerIA32::CallCFunction(ExternalReference function,
1128 int num_arguments) {
1129 __ mov(Operand(eax), Immediate(function));
1130 __ call(Operand(eax));
1131 if (OS::ActivationFrameAlignment() != 0) {
1132 __ mov(esp, Operand(esp, num_arguments * kPointerSize));
1133 } else {
1134 __ add(Operand(esp), Immediate(num_arguments * sizeof(int32_t)));
1135 }
1136}
1137
1138
1139void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
1140 int characters) {
1141 if (mode_ == ASCII) {
1142 if (characters == 4) {
1143 __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
1144 } else if (characters == 2) {
1145 __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
1146 } else {
1147 ASSERT(characters == 1);
1148 __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
1149 }
1150 } else {
1151 ASSERT(mode_ == UC16);
1152 if (characters == 2) {
1153 __ mov(current_character(),
1154 Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1155 } else {
1156 ASSERT(characters == 1);
1157 __ movzx_w(current_character(),
1158 Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1159 }
1160 }
1161}
1162
1163
Steve Blocka7e24c12009-10-30 11:49:00 +00001164#undef __
1165
1166#endif // V8_NATIVE_REGEXP
1167
1168}} // namespace v8::internal