Version 3.11.5
Performance and stability improvements on all platforms.
git-svn-id: http://v8.googlecode.com/svn/trunk@11634 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/ChangeLog b/ChangeLog
index 97dac40..27665a1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2012-05-23: Version 3.11.5
+
+ Performance and stability improvements on all platforms.
+
+
2012-05-22: Version 3.11.4
Some cleanup to common.gypi. This fixes some host/target combinations
diff --git a/src/api.cc b/src/api.cc
index 52a84ed..b43aaf3 100644
--- a/src/api.cc
+++ b/src/api.cc
@@ -6045,13 +6045,6 @@
}
-int HeapGraphNode::GetRetainedSize() const {
- i::Isolate* isolate = i::Isolate::Current();
- IsDeadCheck(isolate, "v8::HeapSnapshot::GetRetainedSize");
- return ToInternal(this)->retained_size();
-}
-
-
int HeapGraphNode::GetChildrenCount() const {
i::Isolate* isolate = i::Isolate::Current();
IsDeadCheck(isolate, "v8::HeapSnapshot::GetChildrenCount");
@@ -6067,28 +6060,6 @@
}
-int HeapGraphNode::GetRetainersCount() const {
- i::Isolate* isolate = i::Isolate::Current();
- IsDeadCheck(isolate, "v8::HeapSnapshot::GetRetainersCount");
- return ToInternal(this)->retainers().length();
-}
-
-
-const HeapGraphEdge* HeapGraphNode::GetRetainer(int index) const {
- i::Isolate* isolate = i::Isolate::Current();
- IsDeadCheck(isolate, "v8::HeapSnapshot::GetRetainer");
- return reinterpret_cast<const HeapGraphEdge*>(
- ToInternal(this)->retainers()[index]);
-}
-
-
-const HeapGraphNode* HeapGraphNode::GetDominatorNode() const {
- i::Isolate* isolate = i::Isolate::Current();
- IsDeadCheck(isolate, "v8::HeapSnapshot::GetDominatorNode");
- return reinterpret_cast<const HeapGraphNode*>(ToInternal(this)->dominator());
-}
-
-
v8::Handle<v8::Value> HeapGraphNode::GetHeapValue() const {
i::Isolate* isolate = i::Isolate::Current();
IsDeadCheck(isolate, "v8::HeapGraphNode::GetHeapValue");
diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc
index ad2ab7e..218c355 100644
--- a/src/arm/code-stubs-arm.cc
+++ b/src/arm/code-stubs-arm.cc
@@ -4824,27 +4824,32 @@
__ IncrementCounter(isolate->counters()->regexp_entry_native(), 1, r0, r2);
// Isolates: note we add an additional parameter here (isolate pointer).
- const int kRegExpExecuteArguments = 8;
+ const int kRegExpExecuteArguments = 9;
const int kParameterRegisters = 4;
__ EnterExitFrame(false, kRegExpExecuteArguments - kParameterRegisters);
// Stack pointer now points to cell where return address is to be written.
// Arguments are before that on the stack or in registers.
- // Argument 8 (sp[16]): Pass current isolate address.
+ // Argument 9 (sp[20]): Pass current isolate address.
__ mov(r0, Operand(ExternalReference::isolate_address()));
+ __ str(r0, MemOperand(sp, 5 * kPointerSize));
+
+ // Argument 8 (sp[16]): Indicate that this is a direct call from JavaScript.
+ __ mov(r0, Operand(1));
__ str(r0, MemOperand(sp, 4 * kPointerSize));
- // Argument 7 (sp[12]): Indicate that this is a direct call from JavaScript.
- __ mov(r0, Operand(1));
- __ str(r0, MemOperand(sp, 3 * kPointerSize));
-
- // Argument 6 (sp[8]): Start (high end) of backtracking stack memory area.
+ // Argument 7 (sp[12]): Start (high end) of backtracking stack memory area.
__ mov(r0, Operand(address_of_regexp_stack_memory_address));
__ ldr(r0, MemOperand(r0, 0));
__ mov(r2, Operand(address_of_regexp_stack_memory_size));
__ ldr(r2, MemOperand(r2, 0));
__ add(r0, r0, Operand(r2));
+ __ str(r0, MemOperand(sp, 3 * kPointerSize));
+
+ // Argument 6: Set the number of capture registers to zero to force global
+ // regexps to behave as non-global. This does not affect non-global regexps.
+ __ mov(r0, Operand(0));
__ str(r0, MemOperand(sp, 2 * kPointerSize));
// Argument 5 (sp[4]): static offsets vector buffer.
@@ -4893,7 +4898,9 @@
// Check the result.
Label success;
- __ cmp(r0, Operand(NativeRegExpMacroAssembler::SUCCESS));
+ __ cmp(r0, Operand(1));
+ // We expect exactly one result since we force the called regexp to behave
+ // as non-global.
__ b(eq, &success);
Label failure;
__ cmp(r0, Operand(NativeRegExpMacroAssembler::FAILURE));
diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc
index a833624..11790e5 100644
--- a/src/arm/regexp-macro-assembler-arm.cc
+++ b/src/arm/regexp-macro-assembler-arm.cc
@@ -1,4 +1,4 @@
-// Copyright 2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -43,45 +43,49 @@
#ifndef V8_INTERPRETED_REGEXP
/*
* This assembler uses the following register assignment convention
+ * - r4 : Temporarily stores the index of capture start after a matching pass
+ * for a global regexp.
* - r5 : Pointer to current code object (Code*) including heap object tag.
* - r6 : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character offset!
* - r7 : Currently loaded character. Must be loaded using
* LoadCurrentCharacter before using any of the dispatch methods.
- * - r8 : points to tip of backtrack stack
+ * - r8 : Points to tip of backtrack stack
* - r9 : Unused, might be used by C code and expected unchanged.
* - r10 : End of input (points to byte after last character in input).
* - r11 : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
* - r12 : IP register, used by assembler. Very volatile.
- * - r13/sp : points to tip of C stack.
+ * - r13/sp : Points to tip of C stack.
*
* The remaining registers are free for computations.
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
- * - fp[52] Isolate* isolate (Address of the current isolate)
- * - fp[48] direct_call (if 1, direct call from JavaScript code,
- * if 0, call through the runtime system).
- * - fp[44] stack_area_base (High end of the memory area to use as
- * backtracking stack).
+ * - fp[56] Isolate* isolate (address of the current isolate)
+ * - fp[52] direct_call (if 1, direct call from JavaScript code,
+ * if 0, call through the runtime system).
+ * - fp[48] stack_area_base (high end of the memory area to use as
+ * backtracking stack).
+ * - fp[44] capture array size (may fit multiple sets of matches)
* - fp[40] int* capture_array (int[num_saved_registers_], for output).
* - fp[36] secondary link/return address used by native call.
* --- sp when called ---
- * - fp[32] return address (lr).
- * - fp[28] old frame pointer (r11).
+ * - fp[32] return address (lr).
+ * - fp[28] old frame pointer (r11).
* - fp[0..24] backup of registers r4..r10.
* --- frame pointer ----
- * - fp[-4] end of input (Address of end of string).
- * - fp[-8] start of input (Address of first character in string).
+ * - fp[-4] end of input (address of end of string).
+ * - fp[-8] start of input (address of first character in string).
* - fp[-12] start index (character index of start).
* - fp[-16] void* input_string (location of a handle containing the string).
- * - fp[-20] Offset of location before start of input (effectively character
+ * - fp[-20] success counter (only for global regexps to count matches).
+ * - fp[-24] Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a
* non-position.
- * - fp[-24] At start (if 1, we are starting at the start of the
+ * - fp[-28] At start (if 1, we are starting at the start of the
* string, otherwise 0)
- * - fp[-28] register 0 (Only positions must be stored in the first
+ * - fp[-32] register 0 (Only positions must be stored in the first
* - register 1 num_saved_registers_ registers)
* - ...
* - register num_registers-1
@@ -197,9 +201,9 @@
void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) {
Label not_at_start;
// Did we start the match at the start of the string at all?
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
+ __ ldr(r0, MemOperand(frame_pointer(), kStartIndex));
__ cmp(r0, Operand(0, RelocInfo::NONE));
- BranchOrBacktrack(eq, ¬_at_start);
+ BranchOrBacktrack(ne, ¬_at_start);
// If we did, are we still at the start of the input?
__ ldr(r1, MemOperand(frame_pointer(), kInputStart));
@@ -212,9 +216,9 @@
void RegExpMacroAssemblerARM::CheckNotAtStart(Label* on_not_at_start) {
// Did we start the match at the start of the string at all?
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
+ __ ldr(r0, MemOperand(frame_pointer(), kStartIndex));
__ cmp(r0, Operand(0, RelocInfo::NONE));
- BranchOrBacktrack(eq, on_not_at_start);
+ BranchOrBacktrack(ne, on_not_at_start);
// If we did, are we still at the start of the input?
__ ldr(r1, MemOperand(frame_pointer(), kInputStart));
__ add(r0, end_of_input_address(), Operand(current_input_offset()));
@@ -655,6 +659,7 @@
Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
+ Label return_r0;
// Finalize code - write the entry point code now we know how many
// registers we need.
@@ -678,8 +683,9 @@
// Set frame pointer in space for it if this is not a direct call
// from generated code.
__ add(frame_pointer(), sp, Operand(4 * kPointerSize));
+ __ mov(r0, Operand(0, RelocInfo::NONE));
+ __ push(r0); // Make room for success counter and initialize it to 0.
__ push(r0); // Make room for "position - 1" constant (value is irrelevant).
- __ push(r0); // Make room for "at start" constant (value is irrelevant).
// Check if we have space on the stack for registers.
Label stack_limit_hit;
Label stack_ok;
@@ -698,13 +704,13 @@
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ mov(r0, Operand(EXCEPTION));
- __ jmp(&exit_label_);
+ __ jmp(&return_r0);
__ bind(&stack_limit_hit);
CallCheckStackGuardState(r0);
__ cmp(r0, Operand(0, RelocInfo::NONE));
// If returned value is non-zero, we exit with the returned value as result.
- __ b(ne, &exit_label_);
+ __ b(ne, &return_r0);
__ bind(&stack_ok);
@@ -725,41 +731,45 @@
// position registers.
__ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
- // Determine whether the start index is zero, that is at the start of the
- // string, and store that value in a local variable.
- __ cmp(r1, Operand(0));
- __ mov(r1, Operand(1), LeaveCC, eq);
- __ mov(r1, Operand(0, RelocInfo::NONE), LeaveCC, ne);
- __ str(r1, MemOperand(frame_pointer(), kAtStart));
+ // Initialize code pointer register
+ __ mov(code_pointer(), Operand(masm_->CodeObject()));
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmp(r1, Operand(0, RelocInfo::NONE));
+ __ b(ne, &load_char_start_regexp);
+ __ mov(current_character(), Operand('\n'), LeaveCC, eq);
+ __ jmp(&start_regexp);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+
+ // Initialize on-stack registers.
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
// Fill saved registers with initial value = start offset - 1
-
- // Address of register 0.
- __ add(r1, frame_pointer(), Operand(kRegisterZero));
- __ mov(r2, Operand(num_saved_registers_));
- Label init_loop;
- __ bind(&init_loop);
- __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex));
- __ sub(r2, r2, Operand(1), SetCC);
- __ b(ne, &init_loop);
+ if (num_saved_registers_ > 8) {
+ // Address of register 0.
+ __ add(r1, frame_pointer(), Operand(kRegisterZero));
+ __ mov(r2, Operand(num_saved_registers_));
+ Label init_loop;
+ __ bind(&init_loop);
+ __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex));
+ __ sub(r2, r2, Operand(1), SetCC);
+ __ b(ne, &init_loop);
+ } else {
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ str(r0, register_location(i));
+ }
+ }
}
// Initialize backtrack stack pointer.
__ ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd));
- // Initialize code pointer register
- __ mov(code_pointer(), Operand(masm_->CodeObject()));
- // Load previous char as initial value of current character register.
- Label at_start;
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
- __ cmp(r0, Operand(0, RelocInfo::NONE));
- __ b(ne, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ mov(current_character(), Operand('\n'));
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
@@ -786,6 +796,10 @@
for (int i = 0; i < num_saved_registers_; i += 2) {
__ ldr(r2, register_location(i));
__ ldr(r3, register_location(i + 1));
+ if (global()) {
+ // Keep capture start in r4 for the zero-length check later.
+ __ mov(r4, r2);
+ }
if (mode_ == UC16) {
__ add(r2, r1, Operand(r2, ASR, 1));
__ add(r3, r1, Operand(r3, ASR, 1));
@@ -797,10 +811,54 @@
__ str(r3, MemOperand(r0, kPointerSize, PostIndex));
}
}
- __ mov(r0, Operand(SUCCESS));
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ __ ldr(r1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ __ ldr(r2, MemOperand(frame_pointer(), kRegisterOutput));
+ // Increment success counter.
+ __ add(r0, r0, Operand(1));
+ __ str(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ sub(r1, r1, Operand(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmp(r1, Operand(num_saved_registers_));
+ __ b(lt, &return_r0);
+
+ __ str(r1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ // Advance the location for output.
+ __ add(r2, r2, Operand(num_saved_registers_ * kPointerSize));
+ __ str(r2, MemOperand(frame_pointer(), kRegisterOutput));
+
+ // Prepare r0 to initialize registers with its value in the next run.
+ __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ // Special case for zero-length matches.
+ // r4: capture start index
+ __ cmp(current_input_offset(), r4);
+ // Not a zero-length match, restart.
+ __ b(ne, &load_char_start_regexp);
+ // Offset from the end is zero if we already reached the end.
+ __ cmp(current_input_offset(), Operand(0));
+ __ b(eq, &exit_label_);
+ // Advance current position after a zero-length match.
+ __ add(current_input_offset(),
+ current_input_offset(),
+ Operand((mode_ == UC16) ? 2 : 1));
+ __ b(&load_char_start_regexp);
+ } else {
+ __ mov(r0, Operand(SUCCESS));
+ }
}
+
// Exit and return r0
__ bind(&exit_label_);
+ if (global()) {
+ __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ }
+
+ __ bind(&return_r0);
// Skip sp past regexp registers and local variables..
__ mov(sp, frame_pointer());
// Restore registers r4..r11 and return (restoring lr to pc).
@@ -822,7 +880,7 @@
__ cmp(r0, Operand(0, RelocInfo::NONE));
// If returning non-zero, we should end execution with the given
// result as return value.
- __ b(ne, &exit_label_);
+ __ b(ne, &return_r0);
// String might have moved: Reload end of string from frame.
__ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
@@ -859,7 +917,7 @@
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ mov(r0, Operand(EXCEPTION));
- __ jmp(&exit_label_);
+ __ jmp(&return_r0);
}
CodeDesc code_desc;
@@ -1014,8 +1072,9 @@
}
-void RegExpMacroAssemblerARM::Succeed() {
+bool RegExpMacroAssemblerARM::Succeed() {
__ jmp(&success_label_);
+ return global();
}
@@ -1307,8 +1366,9 @@
int characters) {
Register offset = current_input_offset();
if (cp_offset != 0) {
- __ add(r0, current_input_offset(), Operand(cp_offset * char_size()));
- offset = r0;
+ // r4 is not being used to store the capture start index at this point.
+ __ add(r4, current_input_offset(), Operand(cp_offset * char_size()));
+ offset = r4;
}
// The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
// and the operating system running on the target allow it.
diff --git a/src/arm/regexp-macro-assembler-arm.h b/src/arm/regexp-macro-assembler-arm.h
index 14f984f..f2d5c55 100644
--- a/src/arm/regexp-macro-assembler-arm.h
+++ b/src/arm/regexp-macro-assembler-arm.h
@@ -1,4 +1,4 @@
-// Copyright 2006-2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -113,7 +113,7 @@
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
@@ -137,7 +137,8 @@
static const int kSecondaryReturnAddress = kReturnAddress + kPointerSize;
// Stack parameters placed by caller.
static const int kRegisterOutput = kSecondaryReturnAddress + kPointerSize;
- static const int kStackHighEnd = kRegisterOutput + kPointerSize;
+ static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
+ static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
@@ -149,10 +150,10 @@
static const int kInputString = kStartIndex - kPointerSize;
// When adding local variables remember to push space for them in
// the frame in GetCode.
- static const int kInputStartMinusOne = kInputString - kPointerSize;
- static const int kAtStart = kInputStartMinusOne - kPointerSize;
+ static const int kSuccessfulCaptures = kInputString - kPointerSize;
+ static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kAtStart - kPointerSize;
+ static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h
index 585f1e0..d1cad15 100644
--- a/src/arm/simulator-arm.h
+++ b/src/arm/simulator-arm.h
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -49,16 +49,16 @@
(entry(p0, p1, p2, p3, p4))
typedef int (*arm_regexp_matcher)(String*, int, const byte*, const byte*,
- void*, int*, Address, int, Isolate*);
+ void*, int*, int, Address, int, Isolate*);
// Call the generated regexp code directly. The code at the entry address
// should act as a function matching the type arm_regexp_matcher.
// The fifth argument is a dummy that reserves the space used for
// the return address added by the ExitFrame in native calls.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
(FUNCTION_CAST<arm_regexp_matcher>(entry)( \
- p0, p1, p2, p3, NULL, p4, p5, p6, p7))
+ p0, p1, p2, p3, NULL, p4, p5, p6, p7, p8))
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
reinterpret_cast<TryCatch*>(try_catch_address)
@@ -401,9 +401,9 @@
reinterpret_cast<Object*>(Simulator::current(Isolate::Current())->Call( \
FUNCTION_ADDR(entry), 5, p0, p1, p2, p3, p4))
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
Simulator::current(Isolate::Current())->Call( \
- entry, 9, p0, p1, p2, p3, NULL, p4, p5, p6, p7)
+ entry, 10, p0, p1, p2, p3, NULL, p4, p5, p6, p7, p8)
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
try_catch_address == NULL ? \
diff --git a/src/d8.cc b/src/d8.cc
index ddd4100..3f0b224 100644
--- a/src/d8.cc
+++ b/src/d8.cc
@@ -834,8 +834,6 @@
global_template->Set(String::New("print"), FunctionTemplate::New(Print));
global_template->Set(String::New("write"), FunctionTemplate::New(Write));
global_template->Set(String::New("read"), FunctionTemplate::New(Read));
- global_template->Set(String::New("readbinary"),
- FunctionTemplate::New(ReadBinary));
global_template->Set(String::New("readbuffer"),
FunctionTemplate::New(ReadBuffer));
global_template->Set(String::New("readline"),
@@ -1056,23 +1054,6 @@
}
-Handle<Value> Shell::ReadBinary(const Arguments& args) {
- String::Utf8Value filename(args[0]);
- int size;
- if (*filename == NULL) {
- return ThrowException(String::New("Error loading file"));
- }
- char* chars = ReadChars(*filename, &size);
- if (chars == NULL) {
- return ThrowException(String::New("Error reading file"));
- }
- // We skip checking the string for UTF8 characters and use it raw as
- // backing store for the external string with 8-bit characters.
- BinaryResource* resource = new BinaryResource(chars, size);
- return String::NewExternal(resource);
-}
-
-
Handle<Value> Shell::ReadBuffer(const Arguments& args) {
String::Utf8Value filename(args[0]);
int length;
diff --git a/src/d8.h b/src/d8.h
index 23fdebc..b315086 100644
--- a/src/d8.h
+++ b/src/d8.h
@@ -307,7 +307,6 @@
static Handle<Value> EnableProfiler(const Arguments& args);
static Handle<Value> DisableProfiler(const Arguments& args);
static Handle<Value> Read(const Arguments& args);
- static Handle<Value> ReadBinary(const Arguments& args);
static Handle<Value> ReadBuffer(const Arguments& args);
static Handle<String> ReadFromStdin();
static Handle<Value> ReadLine(const Arguments& args) {
diff --git a/src/heap-inl.h b/src/heap-inl.h
index e12895a..9d79db2 100644
--- a/src/heap-inl.h
+++ b/src/heap-inl.h
@@ -595,12 +595,24 @@
void ExternalStringTable::Verify() {
#ifdef DEBUG
for (int i = 0; i < new_space_strings_.length(); ++i) {
- ASSERT(heap_->InNewSpace(new_space_strings_[i]));
- ASSERT(new_space_strings_[i] != HEAP->raw_unchecked_the_hole_value());
+ Object* obj = Object::cast(new_space_strings_[i]);
+ // TODO(yangguo): check that the object is indeed an external string.
+ ASSERT(heap_->InNewSpace(obj));
+ ASSERT(obj != HEAP->raw_unchecked_the_hole_value());
+ if (obj->IsExternalAsciiString()) {
+ ExternalAsciiString* string = ExternalAsciiString::cast(obj);
+ ASSERT(String::IsAscii(string->GetChars(), string->length()));
+ }
}
for (int i = 0; i < old_space_strings_.length(); ++i) {
- ASSERT(!heap_->InNewSpace(old_space_strings_[i]));
- ASSERT(old_space_strings_[i] != HEAP->raw_unchecked_the_hole_value());
+ Object* obj = Object::cast(old_space_strings_[i]);
+ // TODO(yangguo): check that the object is indeed an external string.
+ ASSERT(!heap_->InNewSpace(obj));
+ ASSERT(obj != HEAP->raw_unchecked_the_hole_value());
+ if (obj->IsExternalAsciiString()) {
+ ExternalAsciiString* string = ExternalAsciiString::cast(obj);
+ ASSERT(String::IsAscii(string->GetChars(), string->length()));
+ }
}
#endif
}
diff --git a/src/heap.cc b/src/heap.cc
index d3c7f0a..47b259e 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -3326,6 +3326,8 @@
return Failure::OutOfMemoryException();
}
+ ASSERT(String::IsAscii(resource->data(), static_cast<int>(length)));
+
Map* map = external_ascii_string_map();
Object* result;
{ MaybeObject* maybe_result = Allocate(map, NEW_SPACE);
@@ -4490,6 +4492,16 @@
String::cast(result)->set_length(length);
String::cast(result)->set_hash_field(String::kEmptyHashField);
ASSERT_EQ(size, HeapObject::cast(result)->Size());
+
+#ifdef DEBUG
+ if (FLAG_verify_heap) {
+ // Initialize string's content to ensure ASCII-ness (character range 0-127)
+ // as required when verifying the heap.
+ char* dest = SeqAsciiString::cast(result)->GetChars();
+ memset(dest, 0x0F, length * kCharSize);
+ }
+#endif // DEBUG
+
return result;
}
diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc
index a1c6edd..a701d1b 100644
--- a/src/ia32/code-stubs-ia32.cc
+++ b/src/ia32/code-stubs-ia32.cc
@@ -3822,20 +3822,24 @@
__ IncrementCounter(counters->regexp_entry_native(), 1);
// Isolates: note we add an additional parameter here (isolate pointer).
- static const int kRegExpExecuteArguments = 8;
+ static const int kRegExpExecuteArguments = 9;
__ EnterApiExitFrame(kRegExpExecuteArguments);
- // Argument 8: Pass current isolate address.
- __ mov(Operand(esp, 7 * kPointerSize),
+ // Argument 9: Pass current isolate address.
+ __ mov(Operand(esp, 8 * kPointerSize),
Immediate(ExternalReference::isolate_address()));
- // Argument 7: Indicate that this is a direct call from JavaScript.
- __ mov(Operand(esp, 6 * kPointerSize), Immediate(1));
+ // Argument 8: Indicate that this is a direct call from JavaScript.
+ __ mov(Operand(esp, 7 * kPointerSize), Immediate(1));
- // Argument 6: Start (high end) of backtracking stack memory area.
+ // Argument 7: Start (high end) of backtracking stack memory area.
__ mov(esi, Operand::StaticVariable(address_of_regexp_stack_memory_address));
__ add(esi, Operand::StaticVariable(address_of_regexp_stack_memory_size));
- __ mov(Operand(esp, 5 * kPointerSize), esi);
+ __ mov(Operand(esp, 6 * kPointerSize), esi);
+
+ // Argument 6: Set the number of capture registers to zero to force global
+ // regexps to behave as non-global. This does not affect non-global regexps.
+ __ mov(Operand(esp, 5 * kPointerSize), Immediate(0));
// Argument 5: static offsets vector buffer.
__ mov(Operand(esp, 4 * kPointerSize),
@@ -3898,7 +3902,9 @@
// Check the result.
Label success;
- __ cmp(eax, NativeRegExpMacroAssembler::SUCCESS);
+ __ cmp(eax, 1);
+ // We expect exactly one result since we force the called regexp to behave
+ // as non-global.
__ j(equal, &success);
Label failure;
__ cmp(eax, NativeRegExpMacroAssembler::FAILURE);
diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc
index 0029f33..cba1660 100644
--- a/src/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/ia32/regexp-macro-assembler-ia32.cc
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -42,28 +42,30 @@
#ifndef V8_INTERPRETED_REGEXP
/*
* This assembler uses the following register assignment convention
- * - edx : current character. Must be loaded using LoadCurrentCharacter
- * before using any of the dispatch methods.
- * - edi : current position in input, as negative offset from end of string.
+ * - edx : Current character. Must be loaded using LoadCurrentCharacter
+ * before using any of the dispatch methods. Temporarily stores the
+ * index of capture start after a matching pass for a global regexp.
+ * - edi : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character offset!
* - esi : end of input (points to byte after last character in input).
- * - ebp : frame pointer. Used to access arguments, local variables and
+ * - ebp : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
- * - esp : points to tip of C stack.
- * - ecx : points to tip of backtrack stack
+ * - esp : Points to tip of C stack.
+ * - ecx : Points to tip of backtrack stack
*
* The registers eax and ebx are free to use for computations.
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
- * - Isolate* isolate (Address of the current isolate)
+ * - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0
* call through the runtime system)
- * - stack_area_base (High end of the memory area to use as
+ * - stack_area_base (high end of the memory area to use as
* backtracking stack)
+ * - capture array size (may fit multiple sets of matches)
* - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
+ * - end of input (address of end of string)
+ * - start of input (address of first character in string)
* - start index (character index of start)
* - String* input_string (location of a handle containing the string)
* --- frame alignment (if applicable) ---
@@ -72,9 +74,10 @@
* - backup of caller esi
* - backup of caller edi
* - backup of caller ebx
+ * - success counter (only for global regexps to count matches).
* - Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a non-position.
- * - register 0 ebp[-4] (Only positions must be stored in the first
+ * - register 0 ebp[-4] (only positions must be stored in the first
* - register 1 ebp[-8] num_saved_registers_ registers)
* - ...
*
@@ -706,13 +709,16 @@
void RegExpMacroAssemblerIA32::Fail() {
- ASSERT(FAILURE == 0); // Return value for failure is zero.
- __ Set(eax, Immediate(0));
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ __ Set(eax, Immediate(FAILURE));
+ }
__ jmp(&exit_label_);
}
Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
+ Label return_eax;
// Finalize code - write the entry point code now we know how many
// registers we need.
@@ -731,6 +737,7 @@
__ push(esi);
__ push(edi);
__ push(ebx); // Callee-save on MacOS.
+ __ push(Immediate(0)); // Number of successful matches in a global regexp.
__ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers.
@@ -750,13 +757,13 @@
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ mov(eax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_eax);
__ bind(&stack_limit_hit);
CallCheckStackGuardState(ebx);
__ or_(eax, eax);
// If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_eax);
__ bind(&stack_ok);
// Load start index for later use.
@@ -783,19 +790,8 @@
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
- if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
- // Fill saved registers with initial value = start offset - 1
- // Fill in stack push order, to avoid accessing across an unwritten
- // page (a problem on Windows).
- __ mov(ecx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ mov(Operand(ebp, ecx, times_1, +0), eax);
- __ sub(ecx, Immediate(kPointerSize));
- __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
- __ j(greater, &init_loop);
- }
- // Ensure that we have written to each stack page, in order. Skipping a page
+#ifdef WIN32
+ // Ensure that we write to each stack page, in order. Skipping a page
// on Windows can cause segmentation faults. Assuming page size is 4k.
const int kPageSize = 4096;
const int kRegistersPerPage = kPageSize / kPointerSize;
@@ -804,20 +800,45 @@
i += kRegistersPerPage) {
__ mov(register_location(i), eax); // One write every page.
}
+#endif // WIN32
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmp(Operand(ebp, kStartIndex), Immediate(0));
+ __ j(not_equal, &load_char_start_regexp, Label::kNear);
+ __ mov(current_character(), '\n');
+ __ jmp(&start_regexp, Label::kNear);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+
+ // Initialize on-stack registers.
+ if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
+ // Fill saved registers with initial value = start offset - 1
+ // Fill in stack push order, to avoid accessing across an unwritten
+ // page (a problem on Windows).
+ if (num_saved_registers_ > 8) {
+ __ mov(ecx, kRegisterZero);
+ Label init_loop;
+ __ bind(&init_loop);
+ __ mov(Operand(ebp, ecx, times_1, 0), eax);
+ __ sub(ecx, Immediate(kPointerSize));
+ __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
+ __ j(greater, &init_loop);
+ } else { // Unroll the loop.
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ mov(register_location(i), eax);
+ }
+ }
+ }
// Initialize backtrack stack pointer.
__ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
- // Load previous char as initial value of current-character.
- Label at_start;
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- __ j(equal, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ mov(current_character(), '\n');
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
@@ -836,6 +857,10 @@
}
for (int i = 0; i < num_saved_registers_; i++) {
__ mov(eax, register_location(i));
+ if (i == 0 && global()) {
+ // Keep capture start in edx for the zero-length check later.
+ __ mov(edx, eax);
+ }
// Convert to index from start of string, not end.
__ add(eax, ecx);
if (mode_ == UC16) {
@@ -844,10 +869,54 @@
__ mov(Operand(ebx, i * kPointerSize), eax);
}
}
- __ mov(eax, Immediate(SUCCESS));
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ // Increment success counter.
+ __ inc(Operand(ebp, kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ mov(ecx, Operand(ebp, kNumOutputRegisters));
+ __ sub(ecx, Immediate(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmp(ecx, Immediate(num_saved_registers_));
+ __ j(less, &exit_label_);
+
+ __ mov(Operand(ebp, kNumOutputRegisters), ecx);
+ // Advance the location for output.
+ __ add(Operand(ebp, kRegisterOutput),
+ Immediate(num_saved_registers_ * kPointerSize));
+
+ // Prepare eax to initialize registers with its value in the next run.
+ __ mov(eax, Operand(ebp, kInputStartMinusOne));
+
+ // Special case for zero-length matches.
+ // edx: capture start index
+ __ cmp(edi, edx);
+ // Not a zero-length match, restart.
+ __ j(not_equal, &load_char_start_regexp);
+ // edi (offset from the end) is zero if we already reached the end.
+ __ test(edi, edi);
+ __ j(zero, &exit_label_, Label::kNear);
+ // Advance current position after a zero-length match.
+ if (mode_ == UC16) {
+ __ add(edi, Immediate(2));
+ } else {
+ __ inc(edi);
+ }
+ __ jmp(&load_char_start_regexp);
+ } else {
+ __ mov(eax, Immediate(SUCCESS));
+ }
}
- // Exit and return eax
+
__ bind(&exit_label_);
+ if (global()) {
+ // Return the number of successful captures.
+ __ mov(eax, Operand(ebp, kSuccessfulCaptures));
+ }
+
+ __ bind(&return_eax);
// Skip esp past regexp registers.
__ lea(esp, Operand(ebp, kBackup_ebx));
// Restore callee-save registers.
@@ -877,7 +946,7 @@
__ or_(eax, eax);
// If returning non-zero, we should end execution with the given
// result as return value.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_eax);
__ pop(edi);
__ pop(backtrack_stackpointer());
@@ -924,7 +993,7 @@
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ mov(eax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_eax);
}
CodeDesc code_desc;
@@ -1043,8 +1112,9 @@
}
-void RegExpMacroAssemblerIA32::Succeed() {
+bool RegExpMacroAssemblerIA32::Succeed() {
__ jmp(&success_label_);
+ return global();
}
diff --git a/src/ia32/regexp-macro-assembler-ia32.h b/src/ia32/regexp-macro-assembler-ia32.h
index 78cd069..f631ffc 100644
--- a/src/ia32/regexp-macro-assembler-ia32.h
+++ b/src/ia32/regexp-macro-assembler-ia32.h
@@ -1,4 +1,4 @@
-// Copyright 2008-2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -111,7 +111,7 @@
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
@@ -135,7 +135,11 @@
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
- static const int kStackHighEnd = kRegisterOutput + kPointerSize;
+ // For the case of global regular expression, we have room to store at least
+ // one set of capture results. For the case of non-global regexp, we ignore
+ // this value.
+ static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
+ static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
// Below the frame pointer - local stack variables.
@@ -144,7 +148,8 @@
static const int kBackup_esi = kFramePointer - kPointerSize;
static const int kBackup_edi = kBackup_esi - kPointerSize;
static const int kBackup_ebx = kBackup_edi - kPointerSize;
- static const int kInputStartMinusOne = kBackup_ebx - kPointerSize;
+ static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize;
+ static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
diff --git a/src/ia32/simulator-ia32.h b/src/ia32/simulator-ia32.h
index 13ddf35..478d4ce 100644
--- a/src/ia32/simulator-ia32.h
+++ b/src/ia32/simulator-ia32.h
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -40,12 +40,12 @@
typedef int (*regexp_matcher)(String*, int, const byte*,
- const byte*, int*, Address, int, Isolate*);
+ const byte*, int*, int, Address, int, Isolate*);
// Call the generated regexp code directly. The code at the entry address should
// expect eight int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
- (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7))
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
+ (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7, p8))
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
diff --git a/src/isolate.h b/src/isolate.h
index f1c9b3c..f51b4e1 100644
--- a/src/isolate.h
+++ b/src/isolate.h
@@ -965,7 +965,7 @@
// SerializerDeserializer state.
static const int kPartialSnapshotCacheCapacity = 1400;
- static const int kJSRegexpStaticOffsetsVectorSize = 50;
+ static const int kJSRegexpStaticOffsetsVectorSize = 128;
Address external_callback() {
return thread_local_top_.external_callback_;
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 3455abc..cbd0b26 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -429,6 +429,7 @@
RegExpEngine::CompilationResult result =
RegExpEngine::Compile(&compile_data,
flags.is_ignore_case(),
+ flags.is_global(),
flags.is_multiline(),
pattern,
sample_subject,
@@ -515,7 +516,23 @@
}
-RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
+int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
+ int registers_per_match,
+ int* max_matches) {
+#ifdef V8_INTERPRETED_REGEXP
+ // Global loop in interpreted regexp is not implemented. Therefore we choose
+ // the size of the offsets vector so that it can only store one match.
+ *max_matches = 1;
+ return registers_per_match;
+#else // V8_INTERPRETED_REGEXP
+ int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
+ *max_matches = size / registers_per_match;
+ return size;
+#endif // V8_INTERPRETED_REGEXP
+}
+
+
+int RegExpImpl::IrregexpExecRaw(
Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
@@ -617,7 +634,7 @@
OffsetsVector registers(required_registers, isolate);
- IrregexpResult res = RegExpImpl::IrregexpExecOnce(
+ int res = RegExpImpl::IrregexpExecRaw(
jsregexp, subject, previous_index, Vector<int>(registers.vector(),
registers.length()));
if (res == RE_SUCCESS) {
@@ -5780,6 +5797,7 @@
RegExpEngine::CompilationResult RegExpEngine::Compile(
RegExpCompileData* data,
bool ignore_case,
+ bool is_global,
bool is_multiline,
Handle<String> pattern,
Handle<String> sample_subject,
@@ -5883,6 +5901,8 @@
macro_assembler.SetCurrentPositionFromEnd(max_length);
}
+ macro_assembler.set_global(is_global);
+
return compiler.Assemble(¯o_assembler,
node,
data->capture_count,
diff --git a/src/jsregexp.h b/src/jsregexp.h
index 20313ca..2e90e9a 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -109,16 +109,22 @@
static int IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject);
- // Execute a regular expression once on the subject, starting from
- // character "index".
- // If successful, returns RE_SUCCESS and set the capture positions
- // in the first registers.
+ // Calculate the size of offsets vector for the case of global regexp
+ // and the number of matches this vector is able to store.
+ static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
+ int registers_per_match,
+ int* max_matches);
+
+ // Execute a regular expression on the subject, starting from index.
+ // If matching succeeds, return the number of matches. This can be larger
+ // than one in the case of global regular expressions.
+ // The captures and subcaptures are stored into the registers vector.
// If matching fails, returns RE_FAILURE.
// If execution fails, sets a pending exception and returns RE_EXCEPTION.
- static IrregexpResult IrregexpExecOnce(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int index,
- Vector<int> registers);
+ static int IrregexpExecRaw(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ int index,
+ Vector<int> registers);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
@@ -1545,6 +1551,7 @@
static CompilationResult Compile(RegExpCompileData* input,
bool ignore_case,
+ bool global,
bool multiline,
Handle<String> pattern,
Handle<String> sample_subject,
@@ -1573,7 +1580,8 @@
inline int* vector() { return vector_; }
inline int length() { return offsets_vector_length_; }
- static const int kStaticOffsetsVectorSize = 50;
+ static const int kStaticOffsetsVectorSize =
+ Isolate::kJSRegexpStaticOffsetsVectorSize;
private:
static Address static_offsets_vector_address(Isolate* isolate) {
diff --git a/src/mark-compact.cc b/src/mark-compact.cc
index c455564..6954268 100644
--- a/src/mark-compact.cc
+++ b/src/mark-compact.cc
@@ -2738,7 +2738,9 @@
// We have to zap this pointer, because the store buffer may overflow later,
// and then we have to scan the entire heap and we don't want to find
// spurious newspace pointers in the old space.
- *p = reinterpret_cast<HeapObject*>(Smi::FromInt(0));
+ // TODO(mstarzinger): This was changed to a sentinel value to track down
+ // rare crashes, change it back to Smi::FromInt(0) later.
+ *p = reinterpret_cast<HeapObject*>(Smi::FromInt(0x0f100d00 >> 1)); // flood
}
}
diff --git a/src/objects-debug.cc b/src/objects-debug.cc
index 9006abd..89b25bd 100644
--- a/src/objects-debug.cc
+++ b/src/objects-debug.cc
@@ -458,10 +458,17 @@
ConsString::cast(this)->ConsStringVerify();
} else if (IsSlicedString()) {
SlicedString::cast(this)->SlicedStringVerify();
+ } else if (IsSeqAsciiString()) {
+ SeqAsciiString::cast(this)->SeqAsciiStringVerify();
}
}
+void SeqAsciiString::SeqAsciiStringVerify() {
+ CHECK(String::IsAscii(GetChars(), length()));
+}
+
+
void ConsString::ConsStringVerify() {
CHECK(this->first()->IsString());
CHECK(this->second() == GetHeap()->empty_string() ||
diff --git a/src/objects.h b/src/objects.h
index 4fd29ad..421a531 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -7242,6 +7242,10 @@
unsigned* offset,
unsigned chars);
+#ifdef DEBUG
+ void SeqAsciiStringVerify();
+#endif
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SeqAsciiString);
};
diff --git a/src/profile-generator-inl.h b/src/profile-generator-inl.h
index 9afc52f..6c64350 100644
--- a/src/profile-generator-inl.h
+++ b/src/profile-generator-inl.h
@@ -118,32 +118,12 @@
}
-int HeapEntry::set_retainers_index(int index) {
- retainers_index_ = index;
- int next_index = index + retainers_count_;
- retainers_count_ = 0;
- return next_index;
-}
-
-
HeapGraphEdge** HeapEntry::children_arr() {
ASSERT(children_index_ >= 0);
return &snapshot_->children()[children_index_];
}
-HeapGraphEdge** HeapEntry::retainers_arr() {
- ASSERT(retainers_index_ >= 0);
- return &snapshot_->retainers()[retainers_index_];
-}
-
-
-HeapEntry* HeapEntry::dominator() const {
- ASSERT(dominator_ >= 0);
- return &snapshot_->entries()[dominator_];
-}
-
-
SnapshotObjectId HeapObjectsMap::GetNthGcSubrootId(int delta) {
return kGcRootsFirstSubrootId + delta * kObjectIdStep;
}
diff --git a/src/profile-generator.cc b/src/profile-generator.cc
index da2a969..b670d4e 100644
--- a/src/profile-generator.cc
+++ b/src/profile-generator.cc
@@ -964,16 +964,10 @@
const char* name,
SnapshotObjectId id,
int self_size)
- : painted_(false),
- user_reachable_(false),
- dominator_(kNoEntry),
- type_(type),
- retainers_count_(0),
- retainers_index_(-1),
+ : type_(type),
children_count_(0),
children_index_(-1),
self_size_(self_size),
- retained_size_(0),
id_(id),
snapshot_(snapshot),
name_(name) { }
@@ -985,7 +979,6 @@
HeapGraphEdge edge(type, name, this->index(), entry->index());
snapshot_->edges().Add(edge);
++children_count_;
- ++entry->retainers_count_;
}
@@ -995,7 +988,6 @@
HeapGraphEdge edge(type, index, this->index(), entry->index());
snapshot_->edges().Add(edge);
++children_count_;
- ++entry->retainers_count_;
}
@@ -1007,9 +999,8 @@
void HeapEntry::Print(
const char* prefix, const char* edge_name, int max_depth, int indent) {
STATIC_CHECK(sizeof(unsigned) == sizeof(id()));
- OS::Print("%6d %7d @%6u %*c %s%s: ",
- self_size(), retained_size(), id(),
- indent, ' ', prefix, edge_name);
+ OS::Print("%6d @%6u %*c %s%s: ",
+ self_size(), id(), indent, ' ', prefix, edge_name);
if (type() != kString) {
OS::Print("%s %.40s\n", TypeAsString(), name_);
} else {
@@ -1091,13 +1082,13 @@
template <> struct SnapshotSizeConstants<4> {
static const int kExpectedHeapGraphEdgeSize = 12;
- static const int kExpectedHeapEntrySize = 40;
+ static const int kExpectedHeapEntrySize = 24;
static const size_t kMaxSerializableSnapshotRawSize = 256 * MB;
};
template <> struct SnapshotSizeConstants<8> {
static const int kExpectedHeapGraphEdgeSize = 24;
- static const int kExpectedHeapEntrySize = 48;
+ static const int kExpectedHeapEntrySize = 32;
static const uint64_t kMaxSerializableSnapshotRawSize =
static_cast<uint64_t>(6000) * MB;
};
@@ -1139,16 +1130,6 @@
}
-static void HeapEntryClearPaint(HeapEntry* entry_ptr) {
- entry_ptr->clear_paint();
-}
-
-
-void HeapSnapshot::ClearPaint() {
- entries_.Iterate(HeapEntryClearPaint);
-}
-
-
HeapEntry* HeapSnapshot::AddRootEntry() {
ASSERT(root_index_ == HeapEntry::kNoEntry);
ASSERT(entries_.is_empty()); // Root entry must be the first one.
@@ -1196,32 +1177,19 @@
}
-void HeapSnapshot::FillChildrenAndRetainers() {
+void HeapSnapshot::FillChildren() {
ASSERT(children().is_empty());
children().Allocate(edges().length());
- ASSERT(retainers().is_empty());
- retainers().Allocate(edges().length());
int children_index = 0;
- int retainers_index = 0;
for (int i = 0; i < entries().length(); ++i) {
HeapEntry* entry = &entries()[i];
children_index = entry->set_children_index(children_index);
- retainers_index = entry->set_retainers_index(retainers_index);
}
ASSERT(edges().length() == children_index);
- ASSERT(edges().length() == retainers_index);
for (int i = 0; i < edges().length(); ++i) {
HeapGraphEdge* edge = &edges()[i];
edge->ReplaceToIndexWithEntry(this);
edge->from()->add_child(edge);
- edge->to()->add_retainer(edge);
- }
-}
-
-
-void HeapSnapshot::SetDominatorsToSelf() {
- for (int i = 0; i < entries_.length(); ++i) {
- entries_[i].set_dominator(&entries_[i]);
}
}
@@ -1284,7 +1252,6 @@
GetMemoryUsedByList(entries_) +
GetMemoryUsedByList(edges_) +
GetMemoryUsedByList(children_) +
- GetMemoryUsedByList(retainers_) +
GetMemoryUsedByList(sorted_entries_);
}
@@ -3091,12 +3058,9 @@
if (!FillReferences()) return false;
- snapshot_->FillChildrenAndRetainers();
+ snapshot_->FillChildren();
snapshot_->RememberLastJSObjectId();
- if (!SetEntriesDominators()) return false;
- if (!CalculateRetainedSizes()) return false;
-
progress_counter_ = progress_total_;
if (!ProgressReport(true)) return false;
return true;
@@ -3138,187 +3102,6 @@
}
-bool HeapSnapshotGenerator::IsUserGlobalReference(const HeapGraphEdge* edge) {
- ASSERT(edge->from() == snapshot_->root());
- return edge->type() == HeapGraphEdge::kShortcut;
-}
-
-
-void HeapSnapshotGenerator::MarkUserReachableObjects() {
- List<HeapEntry*> worklist;
-
- Vector<HeapGraphEdge*> children = snapshot_->root()->children();
- for (int i = 0; i < children.length(); ++i) {
- if (IsUserGlobalReference(children[i])) {
- worklist.Add(children[i]->to());
- }
- }
-
- while (!worklist.is_empty()) {
- HeapEntry* entry = worklist.RemoveLast();
- if (entry->user_reachable()) continue;
- entry->set_user_reachable();
- Vector<HeapGraphEdge*> children = entry->children();
- for (int i = 0; i < children.length(); ++i) {
- HeapEntry* child = children[i]->to();
- if (!child->user_reachable()) {
- worklist.Add(child);
- }
- }
- }
-}
-
-
-static bool IsRetainingEdge(HeapGraphEdge* edge) {
- if (edge->type() == HeapGraphEdge::kShortcut) return false;
- // The edge is not retaining if it goes from system domain
- // (i.e. an object not reachable from window) to the user domain
- // (i.e. a reachable object).
- return edge->from()->user_reachable()
- || !edge->to()->user_reachable();
-}
-
-
-void HeapSnapshotGenerator::FillPostorderIndexes(
- Vector<HeapEntry*>* entries) {
- snapshot_->ClearPaint();
- int current_entry = 0;
- List<HeapEntry*> nodes_to_visit;
- HeapEntry* root = snapshot_->root();
- nodes_to_visit.Add(root);
- snapshot_->root()->paint();
- while (!nodes_to_visit.is_empty()) {
- HeapEntry* entry = nodes_to_visit.last();
- Vector<HeapGraphEdge*> children = entry->children();
- bool has_new_edges = false;
- for (int i = 0; i < children.length(); ++i) {
- if (entry != root && !IsRetainingEdge(children[i])) continue;
- HeapEntry* child = children[i]->to();
- if (!child->painted()) {
- nodes_to_visit.Add(child);
- child->paint();
- has_new_edges = true;
- }
- }
- if (!has_new_edges) {
- entry->set_postorder_index(current_entry);
- (*entries)[current_entry++] = entry;
- nodes_to_visit.RemoveLast();
- }
- }
- ASSERT_EQ(current_entry, entries->length());
-}
-
-
-static int Intersect(int i1, int i2, const Vector<int>& dominators) {
- int finger1 = i1, finger2 = i2;
- while (finger1 != finger2) {
- while (finger1 < finger2) finger1 = dominators[finger1];
- while (finger2 < finger1) finger2 = dominators[finger2];
- }
- return finger1;
-}
-
-
-// The algorithm is based on the article:
-// K. Cooper, T. Harvey and K. Kennedy "A Simple, Fast Dominance Algorithm"
-// Softw. Pract. Exper. 4 (2001), pp. 1-10.
-bool HeapSnapshotGenerator::BuildDominatorTree(
- const Vector<HeapEntry*>& entries,
- Vector<int>* dominators) {
- if (entries.length() == 0) return true;
- HeapEntry* root = snapshot_->root();
- const int entries_length = entries.length(), root_index = entries_length - 1;
- for (int i = 0; i < root_index; ++i) (*dominators)[i] = HeapEntry::kNoEntry;
- (*dominators)[root_index] = root_index;
-
- // The affected array is used to mark entries which dominators
- // have to be racalculated because of changes in their retainers.
- ScopedVector<bool> affected(entries_length);
- for (int i = 0; i < affected.length(); ++i) affected[i] = false;
- // Mark the root direct children as affected.
- Vector<HeapGraphEdge*> children = entries[root_index]->children();
- for (int i = 0; i < children.length(); ++i) {
- affected[children[i]->to()->postorder_index()] = true;
- }
-
- bool changed = true;
- while (changed) {
- changed = false;
- if (!ProgressReport(false)) return false;
- for (int i = root_index - 1; i >= 0; --i) {
- if (!affected[i]) continue;
- affected[i] = false;
- // If dominator of the entry has already been set to root,
- // then it can't propagate any further.
- if ((*dominators)[i] == root_index) continue;
- int new_idom_index = HeapEntry::kNoEntry;
- Vector<HeapGraphEdge*> rets = entries[i]->retainers();
- for (int j = 0; j < rets.length(); ++j) {
- if (rets[j]->from() != root && !IsRetainingEdge(rets[j])) continue;
- int ret_index = rets[j]->from()->postorder_index();
- if (dominators->at(ret_index) != HeapEntry::kNoEntry) {
- new_idom_index = new_idom_index == HeapEntry::kNoEntry
- ? ret_index
- : Intersect(ret_index, new_idom_index, *dominators);
- // If idom has already reached the root, it doesn't make sense
- // to check other retainers.
- if (new_idom_index == root_index) break;
- }
- }
- if (new_idom_index != HeapEntry::kNoEntry
- && dominators->at(i) != new_idom_index) {
- (*dominators)[i] = new_idom_index;
- changed = true;
- Vector<HeapGraphEdge*> children = entries[i]->children();
- for (int j = 0; j < children.length(); ++j) {
- affected[children[j]->to()->postorder_index()] = true;
- }
- }
- }
- }
- return true;
-}
-
-
-bool HeapSnapshotGenerator::SetEntriesDominators() {
- MarkUserReachableObjects();
- // This array is used for maintaining postorder of nodes.
- ScopedVector<HeapEntry*> ordered_entries(snapshot_->entries().length());
- FillPostorderIndexes(&ordered_entries);
- ScopedVector<int> dominators(ordered_entries.length());
- if (!BuildDominatorTree(ordered_entries, &dominators)) return false;
- for (int i = 0; i < ordered_entries.length(); ++i) {
- ASSERT(dominators[i] != HeapEntry::kNoEntry);
- ordered_entries[i]->set_dominator(ordered_entries[dominators[i]]);
- }
- return true;
-}
-
-
-bool HeapSnapshotGenerator::CalculateRetainedSizes() {
- // As for the dominators tree we only know parent nodes, not
- // children, to sum up total sizes we "bubble" node's self size
- // adding it to all of its parents.
- List<HeapEntry>& entries = snapshot_->entries();
- for (int i = 0; i < entries.length(); ++i) {
- HeapEntry* entry = &entries[i];
- entry->set_retained_size(entry->self_size());
- }
- for (int i = 0; i < entries.length(); ++i) {
- int entry_size = entries[i].self_size();
- HeapEntry* current = &entries[i];
- for (HeapEntry* dominator = current->dominator();
- dominator != current;
- current = dominator, dominator = current->dominator()) {
- ASSERT(current->dominator() != NULL);
- dominator->add_retained_size(entry_size);
- }
- }
- return true;
-}
-
-
template<int bytes> struct MaxDecimalDigitsIn;
template<> struct MaxDecimalDigitsIn<4> {
static const int kSigned = 11;
@@ -3417,8 +3200,8 @@
// type, name|index, to_node.
const int HeapSnapshotJSONSerializer::kEdgeFieldsCount = 3;
-// type, name, id, self_size, retained_size, dominator, children_index.
-const int HeapSnapshotJSONSerializer::kNodeFieldsCount = 7;
+// type, name, id, self_size, children_index.
+const int HeapSnapshotJSONSerializer::kNodeFieldsCount = 5;
void HeapSnapshotJSONSerializer::Serialize(v8::OutputStream* stream) {
ASSERT(writer_ == NULL);
@@ -3458,8 +3241,7 @@
(snapshot_->RawSnapshotSize() + MB - 1) / MB);
HeapEntry* message = result->AddEntry(HeapEntry::kString, text, 0, 4);
result->root()->SetIndexedReference(HeapGraphEdge::kElement, 1, message);
- result->FillChildrenAndRetainers();
- result->SetDominatorsToSelf();
+ result->FillChildren();
return result;
}
@@ -3557,11 +3339,10 @@
void HeapSnapshotJSONSerializer::SerializeNode(HeapEntry* entry,
int edges_index) {
- // The buffer needs space for 6 ints, 1 uint32_t, 7 commas, \n and \0
+ // The buffer needs space for 5 uint32_t, 5 commas, \n and \0
static const int kBufferSize =
- 6 * MaxDecimalDigitsIn<sizeof(int)>::kSigned // NOLINT
- + MaxDecimalDigitsIn<sizeof(uint32_t)>::kUnsigned // NOLINT
- + 7 + 1 + 1;
+ 5 * MaxDecimalDigitsIn<sizeof(uint32_t)>::kUnsigned // NOLINT
+ + 5 + 1 + 1;
EmbeddedVector<char, kBufferSize> buffer;
int buffer_pos = 0;
if (entry_index(entry) != 0) {
@@ -3575,10 +3356,6 @@
buffer[buffer_pos++] = ',';
buffer_pos = utoa(entry->self_size(), buffer, buffer_pos);
buffer[buffer_pos++] = ',';
- buffer_pos = utoa(entry->retained_size(), buffer, buffer_pos);
- buffer[buffer_pos++] = ',';
- buffer_pos = utoa(entry_index(entry->dominator()), buffer, buffer_pos);
- buffer[buffer_pos++] = ',';
buffer_pos = utoa(edges_index, buffer, buffer_pos);
buffer[buffer_pos++] = '\n';
buffer[buffer_pos++] = '\0';
@@ -3615,8 +3392,6 @@
JSON_S("name") ","
JSON_S("id") ","
JSON_S("self_size") ","
- JSON_S("retained_size") ","
- JSON_S("dominator") ","
JSON_S("edges_index")) ","
JSON_S("node_types") ":" JSON_A(
JSON_A(
diff --git a/src/profile-generator.h b/src/profile-generator.h
index 92896c2..349226b 100644
--- a/src/profile-generator.h
+++ b/src/profile-generator.h
@@ -529,35 +529,14 @@
void set_name(const char* name) { name_ = name; }
inline SnapshotObjectId id() { return id_; }
int self_size() { return self_size_; }
- int retained_size() { return retained_size_; }
- void add_retained_size(int size) { retained_size_ += size; }
- void set_retained_size(int size) { retained_size_ = size; }
INLINE(int index() const);
- int postorder_index() { return postorder_index_; }
- void set_postorder_index(int value) { postorder_index_ = value; }
int children_count() const { return children_count_; }
INLINE(int set_children_index(int index));
- INLINE(int set_retainers_index(int index));
void add_child(HeapGraphEdge* edge) {
children_arr()[children_count_++] = edge;
}
- void add_retainer(HeapGraphEdge* edge) {
- retainers_arr()[retainers_count_++] = edge;
- }
Vector<HeapGraphEdge*> children() {
return Vector<HeapGraphEdge*>(children_arr(), children_count_); }
- Vector<HeapGraphEdge*> retainers() {
- return Vector<HeapGraphEdge*>(retainers_arr(), retainers_count_); }
- INLINE(HeapEntry* dominator() const);
- void set_dominator(HeapEntry* entry) {
- ASSERT(entry != NULL);
- dominator_ = entry->index();
- }
- void clear_paint() { painted_ = false; }
- bool painted() { return painted_; }
- void paint() { painted_ = true; }
- bool user_reachable() { return user_reachable_; }
- void set_user_reachable() { user_reachable_ = true; }
void SetIndexedReference(
HeapGraphEdge::Type type, int index, HeapEntry* entry);
@@ -571,22 +550,12 @@
private:
INLINE(HeapGraphEdge** children_arr());
- INLINE(HeapGraphEdge** retainers_arr());
const char* TypeAsString();
- unsigned painted_: 1;
- unsigned user_reachable_: 1;
- int dominator_: 30;
unsigned type_: 4;
- int retainers_count_: 28;
- int retainers_index_;
- int children_count_;
+ int children_count_: 28;
int children_index_;
int self_size_;
- union {
- int postorder_index_; // Used during dominator tree building.
- int retained_size_; // At that moment, there is no retained size yet.
- };
SnapshotObjectId id_;
HeapSnapshot* snapshot_;
const char* name_;
@@ -626,7 +595,6 @@
List<HeapEntry>& entries() { return entries_; }
List<HeapGraphEdge>& edges() { return edges_; }
List<HeapGraphEdge*>& children() { return children_; }
- List<HeapGraphEdge*>& retainers() { return retainers_; }
void RememberLastJSObjectId();
SnapshotObjectId max_snapshot_js_object_id() const {
return max_snapshot_js_object_id_;
@@ -640,11 +608,9 @@
HeapEntry* AddGcRootsEntry();
HeapEntry* AddGcSubrootEntry(int tag);
HeapEntry* AddNativesRootEntry();
- void ClearPaint();
HeapEntry* GetEntryById(SnapshotObjectId id);
List<HeapEntry*>* GetSortedEntriesList();
- void SetDominatorsToSelf();
- void FillChildrenAndRetainers();
+ void FillChildren();
void Print(int max_depth);
void PrintEntriesSize();
@@ -661,7 +627,6 @@
List<HeapEntry> entries_;
List<HeapGraphEdge> edges_;
List<HeapGraphEdge*> children_;
- List<HeapGraphEdge*> retainers_;
List<HeapEntry*> sorted_entries_;
SnapshotObjectId max_snapshot_js_object_id_;
@@ -1061,16 +1026,9 @@
bool GenerateSnapshot();
private:
- bool BuildDominatorTree(const Vector<HeapEntry*>& entries,
- Vector<int>* dominators);
- bool CalculateRetainedSizes();
bool FillReferences();
- void FillPostorderIndexes(Vector<HeapEntry*>* entries);
- bool IsUserGlobalReference(const HeapGraphEdge* edge);
- void MarkUserReachableObjects();
void ProgressStep();
bool ProgressReport(bool force = false);
- bool SetEntriesDominators();
void SetProgressTotal(int iterations_count);
HeapSnapshot* snapshot_;
diff --git a/src/regexp-macro-assembler-irregexp.cc b/src/regexp-macro-assembler-irregexp.cc
index aa67919..363b1ab 100644
--- a/src/regexp-macro-assembler-irregexp.cc
+++ b/src/regexp-macro-assembler-irregexp.cc
@@ -203,8 +203,9 @@
}
-void RegExpMacroAssemblerIrregexp::Succeed() {
+bool RegExpMacroAssemblerIrregexp::Succeed() {
Emit(BC_SUCCEED, 0);
+ return false; // Restart matching for global regexp not supported.
}
diff --git a/src/regexp-macro-assembler-irregexp.h b/src/regexp-macro-assembler-irregexp.h
index 25cb68d..d64a3d8 100644
--- a/src/regexp-macro-assembler-irregexp.h
+++ b/src/regexp-macro-assembler-irregexp.h
@@ -1,4 +1,4 @@
-// Copyright 2008-2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -59,7 +59,7 @@
virtual void Backtrack();
virtual void GoTo(Label* label);
virtual void PushBacktrack(Label* label);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void Fail();
virtual void PopRegister(int register_index);
virtual void PushRegister(int register_index,
diff --git a/src/regexp-macro-assembler-tracer.cc b/src/regexp-macro-assembler-tracer.cc
index b7aeac4..c45fd44 100644
--- a/src/regexp-macro-assembler-tracer.cc
+++ b/src/regexp-macro-assembler-tracer.cc
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -102,14 +102,15 @@
}
-void RegExpMacroAssemblerTracer::Succeed() {
- PrintF(" Succeed();\n");
- assembler_->Succeed();
+bool RegExpMacroAssemblerTracer::Succeed() {
+ bool restart = assembler_->Succeed();
+ PrintF(" Succeed();%s\n", restart ? " [restart for global match]" : "");
+ return restart;
}
void RegExpMacroAssemblerTracer::Fail() {
- PrintF(" Fail();\n");
+ PrintF(" Fail();");
assembler_->Fail();
}
diff --git a/src/regexp-macro-assembler-tracer.h b/src/regexp-macro-assembler-tracer.h
index 3fd4d8b..a915835 100644
--- a/src/regexp-macro-assembler-tracer.h
+++ b/src/regexp-macro-assembler-tracer.h
@@ -98,7 +98,7 @@
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
diff --git a/src/regexp-macro-assembler.cc b/src/regexp-macro-assembler.cc
index b6fb3c5..08568de 100644
--- a/src/regexp-macro-assembler.cc
+++ b/src/regexp-macro-assembler.cc
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -35,7 +35,9 @@
namespace v8 {
namespace internal {
-RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
+RegExpMacroAssembler::RegExpMacroAssembler()
+ : slow_safe_compiler_(false),
+ global_(false) {
}
@@ -149,6 +151,7 @@
input_start,
input_end,
offsets_vector,
+ offsets_vector_length,
isolate);
return res;
}
@@ -161,6 +164,7 @@
const byte* input_start,
const byte* input_end,
int* output,
+ int output_size,
Isolate* isolate) {
ASSERT(isolate == Isolate::Current());
// Ensure that the minimum stack has been allocated.
@@ -174,10 +178,10 @@
input_start,
input_end,
output,
+ output_size,
stack_base,
direct_call,
isolate);
- ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
diff --git a/src/regexp-macro-assembler.h b/src/regexp-macro-assembler.h
index 8587435..5b2cf4a 100644
--- a/src/regexp-macro-assembler.h
+++ b/src/regexp-macro-assembler.h
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -174,7 +174,8 @@
virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetCurrentPositionFromEnd(int by) = 0;
virtual void SetRegister(int register_index, int to) = 0;
- virtual void Succeed() = 0;
+ // Return whether the matching (with a global regexp) will be restarted.
+ virtual bool Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
@@ -183,8 +184,14 @@
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
+ // Set whether the regular expression has the global flag. Exiting due to
+ // a failure in a global regexp may still mean success overall.
+ void set_global(bool global) { global_ = global; }
+ bool global() { return global_; }
+
private:
bool slow_safe_compiler_;
+ bool global_;
};
@@ -249,6 +256,7 @@
const byte* input_start,
const byte* input_end,
int* output,
+ int output_size,
Isolate* isolate);
};
diff --git a/src/runtime.cc b/src/runtime.cc
index a42f90c..a3c159a 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -3795,62 +3795,73 @@
}
-static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
+static int SearchRegExpNoCaptureMultiple(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_array,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
+ ASSERT(regexp->CaptureCount() == 0);
int match_start = -1;
int match_end = 0;
int pos = 0;
- int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
- if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
+ int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
+ if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
- OffsetsVector registers(required_registers, isolate);
+ int max_matches;
+ int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
+ registers_per_match,
+ &max_matches);
+ OffsetsVector registers(num_registers, isolate);
Vector<int32_t> register_vector(registers.vector(), registers.length());
int subject_length = subject->length();
bool first = true;
-
for (;;) { // Break on failure, return on exception.
- RegExpImpl::IrregexpResult result =
- RegExpImpl::IrregexpExecOnce(regexp,
- subject,
- pos,
- register_vector);
- if (result == RegExpImpl::RE_SUCCESS) {
- match_start = register_vector[0];
- builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
- if (match_end < match_start) {
- ReplacementStringBuilder::AddSubjectSlice(builder,
- match_end,
- match_start);
+ int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ pos,
+ register_vector);
+ if (num_matches > 0) {
+ for (int match_index = 0; match_index < num_matches; match_index++) {
+ int32_t* current_match = ®ister_vector[match_index * 2];
+ match_start = current_match[0];
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ match_start);
+ }
+ match_end = current_match[1];
+ HandleScope loop_scope(isolate);
+ if (!first) {
+ builder->Add(*isolate->factory()->NewProperSubString(subject,
+ match_start,
+ match_end));
+ } else {
+ builder->Add(*isolate->factory()->NewSubString(subject,
+ match_start,
+ match_end));
+ first = false;
+ }
}
- match_end = register_vector[1];
- HandleScope loop_scope(isolate);
- if (!first) {
- builder->Add(*isolate->factory()->NewProperSubString(subject,
- match_start,
- match_end));
- } else {
- builder->Add(*isolate->factory()->NewSubString(subject,
- match_start,
- match_end));
- }
+
+ // If we did not get the maximum number of matches, we can stop here
+ // since there are no matches left.
+ if (num_matches < max_matches) break;
+
if (match_start != match_end) {
pos = match_end;
} else {
pos = match_end + 1;
if (pos > subject_length) break;
}
- } else if (result == RegExpImpl::RE_FAILURE) {
+ } else if (num_matches == 0) {
break;
} else {
- ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
- return result;
+ ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
+ return RegExpImpl::RE_EXCEPTION;
}
- first = false;
}
if (match_start >= 0) {
@@ -3872,7 +3883,7 @@
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
-static RegExpImpl::IrregexpResult SearchRegExpMultiple(
+static int SearchRegExpMultiple(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
@@ -3880,17 +3891,20 @@
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
- int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
- if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
+ int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
+ if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
- OffsetsVector registers(required_registers, isolate);
+ int max_matches;
+ int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
+ registers_per_match,
+ &max_matches);
+ OffsetsVector registers(num_registers, isolate);
Vector<int32_t> register_vector(registers.vector(), registers.length());
- RegExpImpl::IrregexpResult result =
- RegExpImpl::IrregexpExecOnce(regexp,
- subject,
- 0,
- register_vector);
+ int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ 0,
+ register_vector);
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
@@ -3899,60 +3913,65 @@
int pos = 0;
// End of previous match. Differs from pos if match was empty.
int match_end = 0;
- if (result == RegExpImpl::RE_SUCCESS) {
- bool first = true;
- do {
- int match_start = register_vector[0];
- builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
- if (match_end < match_start) {
- ReplacementStringBuilder::AddSubjectSlice(builder,
- match_end,
- match_start);
- }
- match_end = register_vector[1];
+ bool first = true;
- {
- // Avoid accumulating new handles inside loop.
- HandleScope temp_scope(isolate);
- // Arguments array to replace function is match, captures, index and
- // subject, i.e., 3 + capture count in total.
- Handle<FixedArray> elements =
- isolate->factory()->NewFixedArray(3 + capture_count);
- Handle<String> match;
- if (!first) {
- match = isolate->factory()->NewProperSubString(subject,
- match_start,
- match_end);
- } else {
- match = isolate->factory()->NewSubString(subject,
- match_start,
- match_end);
+ if (num_matches > 0) {
+ do {
+ int match_start = 0;
+ for (int match_index = 0; match_index < num_matches; match_index++) {
+ int32_t* current_match =
+ ®ister_vector[match_index * registers_per_match];
+ match_start = current_match[0];
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ match_start);
}
- elements->set(0, *match);
- for (int i = 1; i <= capture_count; i++) {
- int start = register_vector[i * 2];
- if (start >= 0) {
- int end = register_vector[i * 2 + 1];
- ASSERT(start <= end);
- Handle<String> substring;
- if (!first) {
- substring = isolate->factory()->NewProperSubString(subject,
- start,
- end);
- } else {
- substring = isolate->factory()->NewSubString(subject, start, end);
- }
- elements->set(i, *substring);
+ match_end = current_match[1];
+
+ {
+ // Avoid accumulating new handles inside loop.
+ HandleScope temp_scope(isolate);
+ // Arguments array to replace function is match, captures, index and
+ // subject, i.e., 3 + capture count in total.
+ Handle<FixedArray> elements =
+ isolate->factory()->NewFixedArray(3 + capture_count);
+ Handle<String> match;
+ if (!first) {
+ match = isolate->factory()->NewProperSubString(subject,
+ match_start,
+ match_end);
} else {
- ASSERT(register_vector[i * 2 + 1] < 0);
- elements->set(i, isolate->heap()->undefined_value());
+ match = isolate->factory()->NewSubString(subject,
+ match_start,
+ match_end);
+ first = false;
}
+ elements->set(0, *match);
+ for (int i = 1; i <= capture_count; i++) {
+ int start = current_match[i * 2];
+ if (start >= 0) {
+ int end = current_match[i * 2 + 1];
+ ASSERT(start <= end);
+ Handle<String> substring =
+ isolate->factory()->NewProperSubString(subject, start, end);
+ elements->set(i, *substring);
+ } else {
+ ASSERT(current_match[i * 2 + 1] < 0);
+ elements->set(i, isolate->heap()->undefined_value());
+ }
+ }
+ elements->set(capture_count + 1, Smi::FromInt(match_start));
+ elements->set(capture_count + 2, *subject);
+ builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
}
- elements->set(capture_count + 1, Smi::FromInt(match_start));
- elements->set(capture_count + 2, *subject);
- builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
}
+ // If we did not get the maximum number of matches, we can stop here
+ // since there are no matches left.
+ if (num_matches < max_matches) break;
+
if (match_end > match_start) {
pos = match_end;
} else {
@@ -3962,14 +3981,13 @@
}
}
- result = RegExpImpl::IrregexpExecOnce(regexp,
- subject,
- pos,
- register_vector);
- first = false;
- } while (result == RegExpImpl::RE_SUCCESS);
+ num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ pos,
+ register_vector);
+ } while (num_matches > 0);
- if (result != RegExpImpl::RE_EXCEPTION) {
+ if (num_matches != RegExpImpl::RE_EXCEPTION) {
// Finished matching, with at least one match.
if (match_end < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder,
@@ -3993,7 +4011,7 @@
}
}
// No matches at all, return failure or exception result directly.
- return result;
+ return num_matches;
}
@@ -4035,7 +4053,7 @@
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- RegExpImpl::IrregexpResult result;
+ int result;
if (regexp->CaptureCount() == 0) {
result = SearchRegExpNoCaptureMultiple(isolate,
subject,
diff --git a/src/version.cc b/src/version.cc
index 79b6ebd..e475be0 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -34,7 +34,7 @@
// cannot be changed without changing the SCons build script.
#define MAJOR_VERSION 3
#define MINOR_VERSION 11
-#define BUILD_NUMBER 4
+#define BUILD_NUMBER 5
#define PATCH_LEVEL 0
// Use 1 for candidates and 0 otherwise.
// (Boolean macro values are not supported by all preprocessors.)
diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc
index d179d2a..13838ac 100644
--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@@ -2864,30 +2864,37 @@
__ IncrementCounter(counters->regexp_entry_native(), 1);
// Isolates: note we add an additional parameter here (isolate pointer).
- static const int kRegExpExecuteArguments = 8;
+ static const int kRegExpExecuteArguments = 9;
int argument_slots_on_stack =
masm->ArgumentStackSlotsForCFunctionCall(kRegExpExecuteArguments);
__ EnterApiExitFrame(argument_slots_on_stack);
- // Argument 8: Pass current isolate address.
+ // Argument 9: Pass current isolate address.
// __ movq(Operand(rsp, (argument_slots_on_stack - 1) * kPointerSize),
// Immediate(ExternalReference::isolate_address()));
__ LoadAddress(kScratchRegister, ExternalReference::isolate_address());
__ movq(Operand(rsp, (argument_slots_on_stack - 1) * kPointerSize),
kScratchRegister);
- // Argument 7: Indicate that this is a direct call from JavaScript.
+ // Argument 8: Indicate that this is a direct call from JavaScript.
__ movq(Operand(rsp, (argument_slots_on_stack - 2) * kPointerSize),
Immediate(1));
- // Argument 6: Start (high end) of backtracking stack memory area.
+ // Argument 7: Start (high end) of backtracking stack memory area.
__ movq(kScratchRegister, address_of_regexp_stack_memory_address);
__ movq(r9, Operand(kScratchRegister, 0));
__ movq(kScratchRegister, address_of_regexp_stack_memory_size);
__ addq(r9, Operand(kScratchRegister, 0));
- // Argument 6 passed in r9 on Linux and on the stack on Windows.
-#ifdef _WIN64
__ movq(Operand(rsp, (argument_slots_on_stack - 3) * kPointerSize), r9);
+
+ // Argument 6: Set the number of capture registers to zero to force global
+ // regexps to behave as non-global. This does not affect non-global regexps.
+ // Argument 6 is passed in r9 on Linux and on the stack on Windows.
+#ifdef _WIN64
+ __ movq(Operand(rsp, (argument_slots_on_stack - 4) * kPointerSize),
+ Immediate(0));
+#else
+ __ Set(r9, 0);
#endif
// Argument 5: static offsets vector buffer.
@@ -2895,7 +2902,7 @@
ExternalReference::address_of_static_offsets_vector(isolate));
// Argument 5 passed in r8 on Linux and on the stack on Windows.
#ifdef _WIN64
- __ movq(Operand(rsp, (argument_slots_on_stack - 4) * kPointerSize), r8);
+ __ movq(Operand(rsp, (argument_slots_on_stack - 5) * kPointerSize), r8);
#endif
// First four arguments are passed in registers on both Linux and Windows.
@@ -2960,7 +2967,9 @@
// Check the result.
Label success;
Label exception;
- __ cmpl(rax, Immediate(NativeRegExpMacroAssembler::SUCCESS));
+ __ cmpl(rax, Immediate(1));
+ // We expect exactly one result since we force the called regexp to behave
+ // as non-global.
__ j(equal, &success, Label::kNear);
__ cmpl(rax, Immediate(NativeRegExpMacroAssembler::EXCEPTION));
__ j(equal, &exception);
diff --git a/src/x64/full-codegen-x64.cc b/src/x64/full-codegen-x64.cc
index 974269e..81dad6b 100644
--- a/src/x64/full-codegen-x64.cc
+++ b/src/x64/full-codegen-x64.cc
@@ -659,7 +659,7 @@
Label* fall_through) {
ToBooleanStub stub(result_register());
__ push(result_register());
- __ CallStub(&stub);
+ __ CallStub(&stub, condition->test_id());
__ testq(result_register(), result_register());
// The stub returns nonzero for true.
Split(not_zero, if_true, if_false, fall_through);
@@ -2287,7 +2287,7 @@
CallFunctionStub stub(arg_count, flags);
__ movq(rdi, Operand(rsp, (arg_count + 1) * kPointerSize));
- __ CallStub(&stub);
+ __ CallStub(&stub, expr->id());
RecordJSReturnSite(expr);
// Restore context register.
__ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc
index bf232bf..cb1e029 100644
--- a/src/x64/regexp-macro-assembler-x64.cc
+++ b/src/x64/regexp-macro-assembler-x64.cc
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -44,21 +44,23 @@
/*
* This assembler uses the following register assignment convention
- * - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using
- * LoadCurrentCharacter before using any of the dispatch methods.
- * - rdi : current position in input, as negative offset from end of string.
+ * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded
+ * using LoadCurrentCharacter before using any of the dispatch methods.
+ * Temporarily stores the index of capture start after a matching pass
+ * for a global regexp.
+ * - rdi : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character
- * offset! Is always a 32-bit signed (negative) offset, but must be
+ * offset! Is always a 32-bit signed (negative) offset, but must be
* maintained sign-extended to 64 bits, since it is used as index.
- * - rsi : end of input (points to byte after last character in input),
+ * - rsi : End of input (points to byte after last character in input),
* so that rsi+rdi points to the current character.
- * - rbp : frame pointer. Used to access arguments, local variables and
+ * - rbp : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
- * - rsp : points to tip of C stack.
- * - rcx : points to tip of backtrack stack. The backtrack stack contains
- * only 32-bit values. Most are offsets from some base (e.g., character
+ * - rsp : Points to tip of C stack.
+ * - rcx : Points to tip of backtrack stack. The backtrack stack contains
+ * only 32-bit values. Most are offsets from some base (e.g., character
* positions from end of string or code location from Code* pointer).
- * - r8 : code object pointer. Used to convert between absolute and
+ * - r8 : Code object pointer. Used to convert between absolute and
* code-object-relative addresses.
*
* The registers rax, rbx, r9 and r11 are free to use for computations.
@@ -72,20 +74,22 @@
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
- * - Isolate* isolate (Address of the current isolate)
+ * - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0 call
* through the runtime system)
- * - stack_area_base (High end of the memory area to use as
+ * - stack_area_base (high end of the memory area to use as
* backtracking stack)
+ * - capture array size (may fit multiple sets of matches)
* - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
+ * - end of input (address of end of string)
+ * - start of input (address of first character in string)
* - start index (character index of start)
* - String* input_string (input string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
+ * - success counter (only useful for global regexp to count matches)
* - Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a non-position.
+ * position -1). Used to initialize capture registers to a non-position.
* - At start of string (if 1, we are starting at the start of the
* string, otherwise 0)
* - register 0 rbp[-n] (Only positions must be stored in the first
@@ -94,7 +98,7 @@
*
* The first num_saved_registers_ registers are initialized to point to
* "character -1" in the string (i.e., char_size() bytes before the first
- * character of the string). The remaining registers starts out uninitialized.
+ * character of the string). The remaining registers starts out uninitialized.
*
* The first seven values must be provided by the calling code by
* calling the code's entry address cast to a function pointer with the
@@ -744,13 +748,16 @@
void RegExpMacroAssemblerX64::Fail() {
- ASSERT(FAILURE == 0); // Return value for failure is zero.
- __ Set(rax, 0);
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ __ Set(rax, FAILURE);
+ }
__ jmp(&exit_label_);
}
Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
+ Label return_rax;
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
@@ -784,7 +791,7 @@
ASSERT_EQ(kInputStart, -3 * kPointerSize);
ASSERT_EQ(kInputEnd, -4 * kPointerSize);
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
- ASSERT_EQ(kStackHighEnd, -6 * kPointerSize);
+ ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize);
__ push(rdi);
__ push(rsi);
__ push(rdx);
@@ -795,7 +802,8 @@
__ push(rbx); // Callee-save
#endif
- __ push(Immediate(0)); // Make room for "at start" constant.
+ __ push(Immediate(0)); // Number of successful matches in a global regexp.
+ __ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -815,14 +823,14 @@
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
__ bind(&stack_limit_hit);
__ Move(code_object_pointer(), masm_.CodeObject());
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
__ testq(rax, rax);
// If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
__ bind(&stack_ok);
@@ -847,19 +855,7 @@
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
- if (num_saved_registers_ > 0) {
- // Fill saved registers with initial value = start offset - 1
- // Fill in stack push order, to avoid accessing across an unwritten
- // page (a problem on Windows).
- __ Set(rcx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ movq(Operand(rbp, rcx, times_1, 0), rax);
- __ subq(rcx, Immediate(kPointerSize));
- __ cmpq(rcx,
- Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
- __ j(greater, &init_loop);
- }
+#ifdef WIN32
// Ensure that we have written to each stack page, in order. Skipping a page
// on Windows can cause segmentation faults. Assuming page size is 4k.
const int kPageSize = 4096;
@@ -869,21 +865,49 @@
i += kRegistersPerPage) {
__ movq(register_location(i), rax); // One write every page.
}
+#endif // WIN32
+
+ // Initialize code object pointer.
+ __ Move(code_object_pointer(), masm_.CodeObject());
+
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
+ __ j(not_equal, &load_char_start_regexp, Label::kNear);
+ __ Set(current_character(), '\n');
+ __ jmp(&start_regexp, Label::kNear);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+
+ // Initialize on-stack registers.
+ if (num_saved_registers_ > 0) {
+ // Fill saved registers with initial value = start offset - 1
+ // Fill in stack push order, to avoid accessing across an unwritten
+ // page (a problem on Windows).
+ if (num_saved_registers_ > 8) {
+ __ Set(rcx, kRegisterZero);
+ Label init_loop;
+ __ bind(&init_loop);
+ __ movq(Operand(rbp, rcx, times_1, 0), rax);
+ __ subq(rcx, Immediate(kPointerSize));
+ __ cmpq(rcx,
+ Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
+ __ j(greater, &init_loop);
+ } else { // Unroll the loop.
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ movq(register_location(i), rax);
+ }
+ }
+ }
// Initialize backtrack stack pointer.
__ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
- // Initialize code object pointer.
- __ Move(code_object_pointer(), masm_.CodeObject());
- // Load previous char as initial value of current-character.
- Label at_start;
- __ cmpb(Operand(rbp, kStartIndex), Immediate(0));
- __ j(equal, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ Set(current_character(), '\n');
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
@@ -902,6 +926,10 @@
}
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
+ if (i == 0 && global()) {
+ // Keep capture start in rdx for the zero-length check later.
+ __ movq(rdx, rax);
+ }
__ addq(rax, rcx); // Convert to index from start, not end.
if (mode_ == UC16) {
__ sar(rax, Immediate(1)); // Convert byte index to character index.
@@ -909,12 +937,54 @@
__ movl(Operand(rbx, i * kIntSize), rax);
}
}
- __ Set(rax, SUCCESS);
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ // Increment success counter.
+ __ incq(Operand(rbp, kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters));
+ __ subq(rcx, Immediate(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmpq(rcx, Immediate(num_saved_registers_));
+ __ j(less, &exit_label_);
+
+ __ movq(Operand(rbp, kNumOutputRegisters), rcx);
+ // Advance the location for output.
+ __ addq(Operand(rbp, kRegisterOutput),
+ Immediate(num_saved_registers_ * kIntSize));
+
+ // Prepare rax to initialize registers with its value in the next run.
+ __ movq(rax, Operand(rbp, kInputStartMinusOne));
+
+ // Special case for zero-length matches.
+ // rdx: capture start index
+ __ cmpq(rdi, rdx);
+ // Not a zero-length match, restart.
+ __ j(not_equal, &load_char_start_regexp);
+ // rdi (offset from the end) is zero if we already reached the end.
+ __ testq(rdi, rdi);
+ __ j(zero, &exit_label_, Label::kNear);
+ // Advance current position after a zero-length match.
+ if (mode_ == UC16) {
+ __ addq(rdi, Immediate(2));
+ } else {
+ __ incq(rdi);
+ }
+ __ jmp(&load_char_start_regexp);
+ } else {
+ __ movq(rax, Immediate(SUCCESS));
+ }
}
- // Exit and return rax
__ bind(&exit_label_);
+ if (global()) {
+ // Return the number of successful captures.
+ __ movq(rax, Operand(rbp, kSuccessfulCaptures));
+ }
+ __ bind(&return_rax);
#ifdef _WIN64
// Restore callee save registers.
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
@@ -951,7 +1021,7 @@
__ testq(rax, rax);
// If returning non-zero, we should end execution with the given
// result as return value.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
// Restore registers.
__ Move(code_object_pointer(), masm_.CodeObject());
@@ -1012,7 +1082,7 @@
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
}
FixupCodeRelativePositions();
@@ -1135,8 +1205,9 @@
}
-void RegExpMacroAssemblerX64::Succeed() {
+bool RegExpMacroAssemblerX64::Succeed() {
__ jmp(&success_label_);
+ return global();
}
diff --git a/src/x64/regexp-macro-assembler-x64.h b/src/x64/regexp-macro-assembler-x64.h
index cd24b60..31fc8ef 100644
--- a/src/x64/regexp-macro-assembler-x64.h
+++ b/src/x64/regexp-macro-assembler-x64.h
@@ -1,4 +1,4 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -109,7 +109,7 @@
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
@@ -154,7 +154,12 @@
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
- static const int kStackHighEnd = kRegisterOutput + kPointerSize;
+ // For the case of global regular expression, we have room to store at least
+ // one set of capture results. For the case of non-global regexp, we ignore
+ // this value. NumOutputRegisters is passed as 32-bit value. The upper
+ // 32 bit of this 64-bit stack slot may contain garbage.
+ static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
+ static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
// DirectCall is passed as 32 bit int (values 0 or 1).
static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
@@ -167,8 +172,12 @@
static const int kInputStart = kStartIndex - kPointerSize;
static const int kInputEnd = kInputStart - kPointerSize;
static const int kRegisterOutput = kInputEnd - kPointerSize;
- static const int kStackHighEnd = kRegisterOutput - kPointerSize;
- static const int kDirectCall = kFrameAlign;
+ // For the case of global regular expression, we have room to store at least
+ // one set of capture results. For the case of non-global regexp, we ignore
+ // this value.
+ static const int kNumOutputRegisters = kRegisterOutput - kPointerSize;
+ static const int kStackHighEnd = kFrameAlign;
+ static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
#endif
@@ -183,14 +192,14 @@
// AMD64 Calling Convention has only one callee-save register that
// we use. We push this after the frame pointer (and after the
// parameters).
- static const int kBackup_rbx = kStackHighEnd - kPointerSize;
+ static const int kBackup_rbx = kNumOutputRegisters - kPointerSize;
static const int kLastCalleeSaveRegister = kBackup_rbx;
#endif
+ static const int kSuccessfulCaptures = kLastCalleeSaveRegister - kPointerSize;
// When adding local variables remember to push space for them in
// the frame in GetCode.
- static const int kInputStartMinusOne =
- kLastCalleeSaveRegister - kPointerSize;
+ static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
diff --git a/src/x64/simulator-x64.h b/src/x64/simulator-x64.h
index df8423a..8aba701 100644
--- a/src/x64/simulator-x64.h
+++ b/src/x64/simulator-x64.h
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -40,12 +40,12 @@
(entry(p0, p1, p2, p3, p4))
typedef int (*regexp_matcher)(String*, int, const byte*,
- const byte*, int*, Address, int, Isolate*);
+ const byte*, int*, int, Address, int, Isolate*);
// Call the generated regexp code directly. The code at the entry address should
// expect eight int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
- (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7))
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
+ (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7, p8))
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
(reinterpret_cast<TryCatch*>(try_catch_address))
diff --git a/test/cctest/test-heap-profiler.cc b/test/cctest/test-heap-profiler.cc
index cbe8d44..c405b33 100644
--- a/test/cctest/test-heap-profiler.cc
+++ b/test/cctest/test-heap-profiler.cc
@@ -7,6 +7,7 @@
#include "v8.h"
#include "cctest.h"
+#include "hashmap.h"
#include "heap-profiler.h"
#include "snapshot.h"
#include "debug.h"
@@ -27,10 +28,14 @@
if (strcmp(entry->name(), "C2") == 0) has_C2 = true;
}
+ static bool AddressesMatch(void* key1, void* key2) {
+ return key1 == key2;
+ }
+
void CheckAllReachables(i::HeapEntry* root) {
+ i::HashMap visited(AddressesMatch);
i::List<i::HeapEntry*> list(10);
list.Add(root);
- root->paint();
CheckEntry(root);
while (!list.is_empty()) {
i::HeapEntry* entry = list.RemoveLast();
@@ -38,11 +43,15 @@
for (int i = 0; i < children.length(); ++i) {
if (children[i]->type() == i::HeapGraphEdge::kShortcut) continue;
i::HeapEntry* child = children[i]->to();
- if (!child->painted()) {
- list.Add(child);
- child->paint();
- CheckEntry(child);
- }
+ i::HashMap::Entry* entry = visited.Lookup(
+ reinterpret_cast<void*>(child),
+ static_cast<uint32_t>(reinterpret_cast<uintptr_t>(child)),
+ true);
+ if (entry->value)
+ continue;
+ entry->value = reinterpret_cast<void*>(1);
+ list.Add(child);
+ CheckEntry(child);
}
}
}
@@ -105,9 +114,6 @@
"var c2 = new C2(a2);");
const v8::HeapSnapshot* snapshot_env2 =
v8::HeapProfiler::TakeSnapshot(v8_str("env2"));
- i::HeapSnapshot* i_snapshot_env2 =
- const_cast<i::HeapSnapshot*>(
- reinterpret_cast<const i::HeapSnapshot*>(snapshot_env2));
const v8::HeapGraphNode* global_env2 = GetGlobalObject(snapshot_env2);
// Verify, that JS global object of env2 has '..2' properties.
@@ -120,9 +126,7 @@
NULL, GetProperty(global_env2, v8::HeapGraphEdge::kProperty, "b2_2"));
CHECK_NE(NULL, GetProperty(global_env2, v8::HeapGraphEdge::kProperty, "c2"));
- // Paint all nodes reachable from global object.
NamedEntriesDetector det;
- i_snapshot_env2->ClearPaint();
det.CheckAllReachables(const_cast<i::HeapEntry*>(
reinterpret_cast<const i::HeapEntry*>(global_env2)));
CHECK(det.has_A2);
@@ -156,9 +160,9 @@
CHECK_NE(NULL, x2);
// Test sizes.
- CHECK_EQ(x->GetSelfSize() * 3, x->GetRetainedSize());
- CHECK_EQ(x1->GetSelfSize(), x1->GetRetainedSize());
- CHECK_EQ(x2->GetSelfSize(), x2->GetRetainedSize());
+ CHECK_NE(0, x->GetSelfSize());
+ CHECK_NE(0, x1->GetSelfSize());
+ CHECK_NE(0, x2->GetSelfSize());
}
@@ -477,66 +481,6 @@
}
-TEST(HeapEntryDominator) {
- // The graph looks like this:
- //
- // -> node1
- // a |^
- // -> node5 ba
- // a v|
- // node6 -> node2
- // b a |^
- // -> node4 ba
- // b v|
- // -> node3
- //
- // The dominator for all nodes is node6.
-
- v8::HandleScope scope;
- LocalContext env;
-
- CompileRun(
- "function X(a, b) { this.a = a; this.b = b; }\n"
- "node6 = new X(new X(new X()), new X(new X(),new X()));\n"
- "(function(){\n"
- "node6.a.a.b = node6.b.a; // node1 -> node2\n"
- "node6.b.a.a = node6.a.a; // node2 -> node1\n"
- "node6.b.a.b = node6.b.b; // node2 -> node3\n"
- "node6.b.b.a = node6.b.a; // node3 -> node2\n"
- "})();");
-
- const v8::HeapSnapshot* snapshot =
- v8::HeapProfiler::TakeSnapshot(v8_str("dominators"));
-
- const v8::HeapGraphNode* global = GetGlobalObject(snapshot);
- CHECK_NE(NULL, global);
- const v8::HeapGraphNode* node6 =
- GetProperty(global, v8::HeapGraphEdge::kProperty, "node6");
- CHECK_NE(NULL, node6);
- const v8::HeapGraphNode* node5 =
- GetProperty(node6, v8::HeapGraphEdge::kProperty, "a");
- CHECK_NE(NULL, node5);
- const v8::HeapGraphNode* node4 =
- GetProperty(node6, v8::HeapGraphEdge::kProperty, "b");
- CHECK_NE(NULL, node4);
- const v8::HeapGraphNode* node3 =
- GetProperty(node4, v8::HeapGraphEdge::kProperty, "b");
- CHECK_NE(NULL, node3);
- const v8::HeapGraphNode* node2 =
- GetProperty(node4, v8::HeapGraphEdge::kProperty, "a");
- CHECK_NE(NULL, node2);
- const v8::HeapGraphNode* node1 =
- GetProperty(node5, v8::HeapGraphEdge::kProperty, "a");
- CHECK_NE(NULL, node1);
-
- CHECK_EQ(node6, node1->GetDominatorNode());
- CHECK_EQ(node6, node2->GetDominatorNode());
- CHECK_EQ(node6, node3->GetDominatorNode());
- CHECK_EQ(node6, node4->GetDominatorNode());
- CHECK_EQ(node6, node5->GetDominatorNode());
-}
-
-
namespace {
class TestJSONStream : public v8::OutputStream {
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
index e89e6cd..9b4f905 100644
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -506,8 +506,13 @@
NewStringFromUtf8(CStrVector(input));
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector(""));
- RegExpEngine::Compile(
- &compile_data, false, multiline, pattern, sample_subject, is_ascii);
+ RegExpEngine::Compile(&compile_data,
+ false,
+ false,
+ multiline,
+ pattern,
+ sample_subject,
+ is_ascii);
return compile_data.node;
}
@@ -720,6 +725,7 @@
input_start,
input_end,
captures,
+ 0,
Isolate::Current());
}
@@ -998,11 +1004,11 @@
int output[4];
NativeRegExpMacroAssembler::Result result =
Execute(*code,
- *input,
- 0,
- start_adr,
- start_adr + input->length() * 2,
- output);
+ *input,
+ 0,
+ start_adr,
+ start_adr + input->length() * 2,
+ output);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]);
diff --git a/test/mjsunit/regexp-global.js b/test/mjsunit/regexp-global.js
new file mode 100644
index 0000000..fac5c6a
--- /dev/null
+++ b/test/mjsunit/regexp-global.js
@@ -0,0 +1,127 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Test that an optional capture is cleared between two matches.
+var str = "ABX X";
+str = str.replace(/(\w)?X/g, function(match, capture) {
+ assertTrue(match.indexOf(capture) >= 0 ||
+ capture === undefined);
+ return capture ? capture.toLowerCase() : "-";
+ });
+assertEquals("Ab -", str);
+
+// Test zero-length matches.
+str = "Als Gregor Samsa eines Morgens";
+str = str.replace(/\b/g, function(match, capture) {
+ return "/";
+ });
+assertEquals("/Als/ /Gregor/ /Samsa/ /eines/ /Morgens/", str);
+
+// Test zero-length matches that have non-zero-length sub-captures.
+str = "It was a pleasure to burn.";
+str = str.replace(/(?=(\w+))\b/g, function(match, capture) {
+ return capture.length;
+ });
+assertEquals("2It 3was 1a 8pleasure 2to 4burn.", str);
+
+// Test multiple captures.
+str = "Try not. Do, or do not. There is no try.";
+str = str.replace(/(not?)|(do)|(try)/gi,
+ function(match, c1, c2, c3) {
+ assertTrue((c1 === undefined && c2 === undefined) ||
+ (c2 === undefined && c3 === undefined) ||
+ (c1 === undefined && c3 === undefined));
+ if (c1) return "-";
+ if (c2) return "+";
+ if (c3) return "="
+ });
+assertEquals("= -. +, or + -. There is - =.", str);
+
+// Test multiple alternate captures.
+str = "FOUR LEGS GOOD, TWO LEGS BAD!";
+str = str.replace(/(FOUR|TWO) LEGS (GOOD|BAD)/g,
+ function(match, num_legs, likeability) {
+ assertTrue(num_legs !== undefined);
+ assertTrue(likeability !== undefined);
+ if (num_legs == "FOUR") assertTrue(likeability == "GOOD");
+ if (num_legs == "TWO") assertTrue(likeability == "BAD");
+ return match.length - 10;
+ });
+assertEquals("4, 2!", str);
+
+
+// The same tests with UC16.
+
+//Test that an optional capture is cleared between two matches.
+str = "AB\u1234 \u1234";
+str = str.replace(/(\w)?\u1234/g,
+ function(match, capture) {
+ assertTrue(match.indexOf(capture) >= 0 ||
+ capture === undefined);
+ return capture ? capture.toLowerCase() : "-";
+ });
+assertEquals("Ab -", str);
+
+// Test zero-length matches.
+str = "Als \u2623\u2642 eines Morgens";
+str = str.replace(/\b/g, function(match, capture) {
+ return "/";
+ });
+assertEquals("/Als/ \u2623\u2642 /eines/ /Morgens/", str);
+
+// Test zero-length matches that have non-zero-length sub-captures.
+str = "It was a pleasure to \u70e7.";
+str = str.replace(/(?=(\w+))\b/g, function(match, capture) {
+ return capture.length;
+ });
+assertEquals("2It 3was 1a 8pleasure 2to \u70e7.", str);
+
+// Test multiple captures.
+str = "Try not. D\u26aa, or d\u26aa not. There is no try.";
+str = str.replace(/(not?)|(d\u26aa)|(try)/gi,
+ function(match, c1, c2, c3) {
+ assertTrue((c1 === undefined && c2 === undefined) ||
+ (c2 === undefined && c3 === undefined) ||
+ (c1 === undefined && c3 === undefined));
+ if (c1) return "-";
+ if (c2) return "+";
+ if (c3) return "="
+ });
+assertEquals("= -. +, or + -. There is - =.", str);
+
+// Test multiple alternate captures.
+str = "FOUR \u817f GOOD, TWO \u817f BAD!";
+str = str.replace(/(FOUR|TWO) \u817f (GOOD|BAD)/g,
+ function(match, num_legs, likeability) {
+ assertTrue(num_legs !== undefined);
+ assertTrue(likeability !== undefined);
+ if (num_legs == "FOUR") assertTrue(likeability == "GOOD");
+ if (num_legs == "TWO") assertTrue(likeability == "BAD");
+ return match.length - 7;
+ });
+assertEquals("4, 2!", str);
diff --git a/tools/fuzz-harness.sh b/tools/fuzz-harness.sh
new file mode 100644
index 0000000..efbf864
--- /dev/null
+++ b/tools/fuzz-harness.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Copyright 2012 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# A simple harness that downloads and runs 'jsfunfuzz' against d8. This
+# takes a long time because it runs many iterations and is intended for
+# automated usage. The package containing 'jsfunfuzz' can be found as an
+# attachment to this bug:
+# https://bugzilla.mozilla.org/show_bug.cgi?id=jsfunfuzz
+
+JSFUNFUZZ_URL="https://bugzilla.mozilla.org/attachment.cgi?id=310631"
+JSFUNFUZZ_MD5="d0e497201c5cd7bffbb1cdc1574f4e32"
+
+v8_root=$(readlink -f $(dirname $BASH_SOURCE)/../)
+
+if [ -n "$1" ]; then
+ d8="${v8_root}/$1"
+else
+ d8="${v8_root}/d8"
+fi
+
+if [ ! -f "$d8" ]; then
+ echo "Failed to find d8 binary: $d8"
+ exit 1
+fi
+
+jsfunfuzz_file="$v8_root/tools/jsfunfuzz.zip"
+if [ ! -f "$jsfunfuzz_file" ]; then
+ echo "Downloading $jsfunfuzz_file ..."
+ wget -q -O "$jsfunfuzz_file" $JSFUNFUZZ_URL || exit 1
+fi
+
+jsfunfuzz_sum=$(md5sum "$jsfunfuzz_file" | awk '{ print $1 }')
+if [ $jsfunfuzz_sum != $JSFUNFUZZ_MD5 ]; then
+ echo "Failed to verify checksum!"
+ exit 1
+fi
+
+jsfunfuzz_dir="$v8_root/tools/jsfunfuzz"
+if [ ! -d "$jsfunfuzz_dir" ]; then
+ echo "Unpacking into $jsfunfuzz_dir ..."
+ unzip "$jsfunfuzz_file" -d "$jsfunfuzz_dir" || exit 1
+ echo "Patching runner ..."
+ cat << EOF | patch -s -p0 -d "$v8_root"
+--- tools/jsfunfuzz/jsfunfuzz/multi_timed_run.py~
++++ tools/jsfunfuzz/jsfunfuzz/multi_timed_run.py
+@@ -125,7 +125,7 @@
+
+ def many_timed_runs():
+ iteration = 0
+- while True:
++ while iteration < 100:
+ iteration += 1
+ logfilename = "w%d" % iteration
+ one_timed_run(logfilename)
+EOF
+fi
+
+flags='--debug-code --expose-gc --verify-gc'
+python -u "$jsfunfuzz_dir/jsfunfuzz/multi_timed_run.py" 300 \
+ "$d8" $flags "$jsfunfuzz_dir/jsfunfuzz/jsfunfuzz.js"
+exit_code=$(cat w* | grep " looking good" -c)
+exit_code=$((100-exit_code))
+tar -cjf fuzz-results-$(date +%y%m%d).tar.bz2 err-* w*
+rm -f err-* w*
+
+echo "Total failures: $exit_code"
+exit $exit_code
diff --git a/tools/grokdump.py b/tools/grokdump.py
index 29d4755..4071306 100755
--- a/tools/grokdump.py
+++ b/tools/grokdump.py
@@ -111,18 +111,56 @@
def do_dump(reader, heap):
"""Dump all available memory regions."""
def dump_region(reader, start, size, location):
- print "%s - %s" % (reader.FormatIntPtr(start),
- reader.FormatIntPtr(start + size))
- for slot in xrange(start,
- start + size,
- reader.PointerSize()):
- maybe_address = reader.ReadUIntPtr(slot)
- heap_object = heap.FindObject(maybe_address)
- print "%s: %s" % (reader.FormatIntPtr(slot),
- reader.FormatIntPtr(maybe_address))
- if heap_object:
- heap_object.Print(Printer())
- print
+ print
+ while start & 3 != 0:
+ start += 1
+ size -= 1
+ location += 1
+ is_executable = reader.IsProbableExecutableRegion(location, size)
+ is_ascii = reader.IsProbableASCIIRegion(location, size)
+
+ if is_executable is not False:
+ lines = reader.GetDisasmLines(start, size)
+ for line in lines:
+ print FormatDisasmLine(start, heap, line)
+ print
+
+ if is_ascii is not False:
+ # Output in the same format as the Unix hd command
+ addr = start
+ for slot in xrange(location, location + size, 16):
+ hex_line = ""
+ asc_line = ""
+ for i in xrange(0, 16):
+ if slot + i < location + size:
+ byte = ctypes.c_uint8.from_buffer(reader.minidump, slot + i).value
+ if byte >= 0x20 and byte < 0x7f:
+ asc_line += chr(byte)
+ else:
+ asc_line += "."
+ hex_line += " %02x" % (byte)
+ else:
+ hex_line += " "
+ if i == 7:
+ hex_line += " "
+ print "%s %s |%s|" % (reader.FormatIntPtr(addr),
+ hex_line,
+ asc_line)
+ addr += 16
+
+ if is_executable is not True and is_ascii is not True:
+ print "%s - %s" % (reader.FormatIntPtr(start),
+ reader.FormatIntPtr(start + size))
+ for slot in xrange(start,
+ start + size,
+ reader.PointerSize()):
+ maybe_address = reader.ReadUIntPtr(slot)
+ heap_object = heap.FindObject(maybe_address)
+ print "%s: %s" % (reader.FormatIntPtr(slot),
+ reader.FormatIntPtr(maybe_address))
+ if heap_object:
+ heap_object.Print(Printer())
+ print
reader.ForEachMemoryRegion(dump_region)
@@ -470,6 +508,64 @@
elif self.arch == MD_CPU_ARCHITECTURE_X86:
return ctypes.c_uint32.from_buffer(self.minidump, location).value
+ def IsProbableASCIIRegion(self, location, length):
+ ascii_bytes = 0
+ non_ascii_bytes = 0
+ for loc in xrange(location, location + length):
+ byte = ctypes.c_uint8.from_buffer(self.minidump, loc).value
+ if byte >= 0x7f:
+ non_ascii_bytes += 1
+ if byte < 0x20 and byte != 0:
+ non_ascii_bytes += 1
+ if byte < 0x7f and byte >= 0x20:
+ ascii_bytes += 1
+ if byte == 0xa: # newline
+ ascii_bytes += 1
+ if ascii_bytes * 10 <= length:
+ return False
+ if length > 0 and ascii_bytes > non_ascii_bytes * 7:
+ return True
+ if ascii_bytes > non_ascii_bytes * 3:
+ return None # Maybe
+ return False
+
+ def IsProbableExecutableRegion(self, location, length):
+ opcode_bytes = 0
+ sixty_four = self.arch == MD_CPU_ARCHITECTURE_AMD64
+ for loc in xrange(location, location + length):
+ byte = ctypes.c_uint8.from_buffer(self.minidump, loc).value
+ if (byte == 0x8b or # mov
+ byte == 0x89 or # mov reg-reg
+ (byte & 0xf0) == 0x50 or # push/pop
+ (sixty_four and (byte & 0xf0) == 0x40) or # rex prefix
+ byte == 0xc3 or # return
+ byte == 0x74 or # jeq
+ byte == 0x84 or # jeq far
+ byte == 0x75 or # jne
+ byte == 0x85 or # jne far
+ byte == 0xe8 or # call
+ byte == 0xe9 or # jmp far
+ byte == 0xeb): # jmp near
+ opcode_bytes += 1
+ opcode_percent = (opcode_bytes * 100) / length
+ threshold = 20
+ if opcode_percent > threshold + 2:
+ return True
+ if opcode_percent > threshold - 2:
+ return None # Maybe
+ return False
+
+ def FindRegion(self, addr):
+ answer = [-1, -1]
+ def is_in(reader, start, size, location):
+ if addr >= start and addr < start + size:
+ answer[0] = start
+ answer[1] = size
+ self.ForEachMemoryRegion(is_in)
+ if answer[0] == -1:
+ return None
+ return answer
+
def ForEachMemoryRegion(self, cb):
if self.memory_list64 is not None:
for r in self.memory_list64.ranges:
@@ -1099,37 +1195,49 @@
def AnalyzeMinidump(options, minidump_name):
reader = MinidumpReader(options, minidump_name)
+ heap = None
DebugPrint("========================================")
if reader.exception is None:
print "Minidump has no exception info"
- return
- print "Exception info:"
- exception_thread = reader.thread_map[reader.exception.thread_id]
- print " thread id: %d" % exception_thread.id
- print " code: %08X" % reader.exception.exception.code
- print " context:"
- for r in CONTEXT_FOR_ARCH[reader.arch]:
- print " %s: %s" % (r, reader.FormatIntPtr(reader.Register(r)))
- # TODO(vitalyr): decode eflags.
- print " eflags: %s" % bin(reader.exception_context.eflags)[2:]
- print
+ else:
+ print "Exception info:"
+ exception_thread = reader.thread_map[reader.exception.thread_id]
+ print " thread id: %d" % exception_thread.id
+ print " code: %08X" % reader.exception.exception.code
+ print " context:"
+ for r in CONTEXT_FOR_ARCH[reader.arch]:
+ print " %s: %s" % (r, reader.FormatIntPtr(reader.Register(r)))
+ # TODO(vitalyr): decode eflags.
+ print " eflags: %s" % bin(reader.exception_context.eflags)[2:]
+ print
- stack_top = reader.ExceptionSP()
- stack_bottom = exception_thread.stack.start + \
- exception_thread.stack.memory.data_size
- stack_map = {reader.ExceptionIP(): -1}
- for slot in xrange(stack_top, stack_bottom, reader.PointerSize()):
- maybe_address = reader.ReadUIntPtr(slot)
- if not maybe_address in stack_map:
- stack_map[maybe_address] = slot
- heap = V8Heap(reader, stack_map)
+ stack_top = reader.ExceptionSP()
+ stack_bottom = exception_thread.stack.start + \
+ exception_thread.stack.memory.data_size
+ stack_map = {reader.ExceptionIP(): -1}
+ for slot in xrange(stack_top, stack_bottom, reader.PointerSize()):
+ maybe_address = reader.ReadUIntPtr(slot)
+ if not maybe_address in stack_map:
+ stack_map[maybe_address] = slot
+ heap = V8Heap(reader, stack_map)
- print "Disassembly around exception.eip:"
- start = reader.ExceptionIP() - EIP_PROXIMITY
- lines = reader.GetDisasmLines(start, 2 * EIP_PROXIMITY)
- for line in lines:
- print FormatDisasmLine(start, heap, line)
- print
+ print "Disassembly around exception.eip:"
+ disasm_start = reader.ExceptionIP() - EIP_PROXIMITY
+ disasm_bytes = 2 * EIP_PROXIMITY
+ if (options.full):
+ full_range = reader.FindRegion(reader.ExceptionIP())
+ if full_range is not None:
+ disasm_start = full_range[0]
+ disasm_bytes = full_range[1]
+
+ lines = reader.GetDisasmLines(disasm_start, disasm_bytes)
+
+ for line in lines:
+ print FormatDisasmLine(disasm_start, heap, line)
+ print
+
+ if heap is None:
+ heap = V8Heap(reader, None)
if options.full:
do_dump(reader, heap)
@@ -1137,15 +1245,16 @@
if options.shell:
InspectionShell(reader, heap).cmdloop("type help to get help")
else:
- print "Annotated stack (from exception.esp to bottom):"
- for slot in xrange(stack_top, stack_bottom, reader.PointerSize()):
- maybe_address = reader.ReadUIntPtr(slot)
- heap_object = heap.FindObject(maybe_address)
- print "%s: %s" % (reader.FormatIntPtr(slot),
- reader.FormatIntPtr(maybe_address))
- if heap_object:
- heap_object.Print(Printer())
- print
+ if reader.exception is not None:
+ print "Annotated stack (from exception.esp to bottom):"
+ for slot in xrange(stack_top, stack_bottom, reader.PointerSize()):
+ maybe_address = reader.ReadUIntPtr(slot)
+ heap_object = heap.FindObject(maybe_address)
+ print "%s: %s" % (reader.FormatIntPtr(slot),
+ reader.FormatIntPtr(maybe_address))
+ if heap_object:
+ heap_object.Print(Printer())
+ print
reader.Dispose()