Minor bugfixes and optimizations.
Added command line debugger to D8 shell.
Fixed subtle bug that caused the wrong 'this' to be used when calling a caught function in a catch clause.
Inline array loads within loops directly in the code instead of
git-svn-id: http://v8.googlecode.com/svn/trunk@1031 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/ChangeLog b/ChangeLog
index 8f3a393..17fce33 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2009-01-06: Version 0.4.7
+
+ Minor bugfixes and optimizations.
+
+ Added command line debugger to D8 shell.
+
+ Fixed subtle bug that caused the wrong 'this' to be used when
+ calling a caught function in a catch clause.
+
+ Inline array loads within loops directly in the code instead of
+ always calling a stub.
+
+
2008-12-11: Version 0.4.6
Fixed exception reporting bug where certain exceptions were
diff --git a/src/SConscript b/src/SConscript
index d008bdd..c1bd12b 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -73,7 +73,7 @@
D8_FILES = {
'all': [
- 'd8.cc'
+ 'd8.cc', 'd8-debug.cc'
],
'console:readline': [
'd8-readline.cc'
diff --git a/src/api.cc b/src/api.cc
index 546268a..e227824 100644
--- a/src/api.cc
+++ b/src/api.cc
@@ -2203,7 +2203,7 @@
const char* v8::V8::GetVersion() {
- return "0.4.6";
+ return "0.4.7";
}
diff --git a/src/assembler-arm.cc b/src/assembler-arm.cc
index 21cd225..4e3ae1a 100644
--- a/src/assembler-arm.cc
+++ b/src/assembler-arm.cc
@@ -320,8 +320,10 @@
no_const_pool_before_ = 0;
last_const_pool_end_ = 0;
last_bound_pos_ = 0;
- last_position_ = RelocInfo::kNoPosition;
- last_position_is_statement_ = false;
+ current_statement_position_ = RelocInfo::kNoPosition;
+ current_position_ = RelocInfo::kNoPosition;
+ written_statement_position_ = current_statement_position_;
+ written_position_ = current_position_;
}
@@ -1306,20 +1308,36 @@
void Assembler::RecordPosition(int pos) {
if (pos == RelocInfo::kNoPosition) return;
ASSERT(pos >= 0);
- if (pos == last_position_) return;
- CheckBuffer();
- RecordRelocInfo(RelocInfo::POSITION, pos);
- last_position_ = pos;
- last_position_is_statement_ = false;
+ current_position_ = pos;
+ WriteRecordedPositions();
}
void Assembler::RecordStatementPosition(int pos) {
- if (pos == last_position_) return;
- CheckBuffer();
- RecordRelocInfo(RelocInfo::STATEMENT_POSITION, pos);
- last_position_ = pos;
- last_position_is_statement_ = true;
+ if (pos == RelocInfo::kNoPosition) return;
+ ASSERT(pos >= 0);
+ current_statement_position_ = pos;
+ WriteRecordedPositions();
+}
+
+
+void Assembler::WriteRecordedPositions() {
+ // Write the statement position if it is different from what was written last
+ // time.
+ if (current_statement_position_ != written_statement_position_) {
+ CheckBuffer();
+ RecordRelocInfo(RelocInfo::STATEMENT_POSITION, current_statement_position_);
+ written_statement_position_ = current_statement_position_;
+ }
+
+ // Write the position if it is different from what was written last time and
+ // also diferent from the written statement position.
+ if (current_position_ != written_position_ &&
+ current_position_ != written_statement_position_) {
+ CheckBuffer();
+ RecordRelocInfo(RelocInfo::POSITION, current_position_);
+ written_position_ = current_position_;
+ }
}
diff --git a/src/assembler-arm.h b/src/assembler-arm.h
index bd70066..faf402a 100644
--- a/src/assembler-arm.h
+++ b/src/assembler-arm.h
@@ -643,15 +643,11 @@
void RecordPosition(int pos);
void RecordStatementPosition(int pos);
+ void WriteRecordedPositions();
int pc_offset() const { return pc_ - buffer_; }
- int last_position() const { return last_position_; }
- bool last_position_is_statement() const {
- return last_position_is_statement_;
- }
-
- // Temporary helper function. Used by codegen.cc.
- int last_statement_position() const { return last_position_; }
+ int current_position() const { return current_position_; }
+ int current_statement_position() const { return current_position_; }
protected:
int buffer_space() const { return reloc_info_writer.pos() - pc_; }
@@ -754,8 +750,10 @@
int last_bound_pos_;
// source position information
- int last_position_;
- bool last_position_is_statement_;
+ int current_position_;
+ int current_statement_position_;
+ int written_position_;
+ int written_statement_position_;
// Code emission
inline void CheckBuffer();
diff --git a/src/assembler-ia32-inl.h b/src/assembler-ia32-inl.h
index 534d57e..aee847d 100644
--- a/src/assembler-ia32-inl.h
+++ b/src/assembler-ia32-inl.h
@@ -152,6 +152,12 @@
}
+Immediate::Immediate(Label *internal_offset) {
+ x_ = reinterpret_cast<int32_t>(internal_offset);
+ rmode_ = RelocInfo::INTERNAL_REFERENCE;
+}
+
+
Immediate::Immediate(Handle<Object> handle) {
// Verify all Objects referred by code are NOT in new space.
Object* obj = *handle;
@@ -200,11 +206,27 @@
void Assembler::emit(const Immediate& x) {
+ if (x.rmode_ == RelocInfo::INTERNAL_REFERENCE) {
+ Label* label = reinterpret_cast<Label*>(x.x_);
+ emit_code_relative_offset(label);
+ return;
+ }
if (x.rmode_ != RelocInfo::NONE) RecordRelocInfo(x.rmode_);
emit(x.x_);
}
+void Assembler::emit_code_relative_offset(Label* label) {
+ if (label->is_bound()) {
+ int32_t pos;
+ pos = label->pos() + Code::kHeaderSize - kHeapObjectTag;
+ emit(pos);
+ } else {
+ emit_disp(label, Displacement::CODE_RELATIVE);
+ }
+}
+
+
void Assembler::emit_w(const Immediate& x) {
ASSERT(x.rmode_ == RelocInfo::NONE);
uint16_t value = static_cast<uint16_t>(x.x_);
diff --git a/src/assembler-ia32.cc b/src/assembler-ia32.cc
index c7571e3..f487c41 100644
--- a/src/assembler-ia32.cc
+++ b/src/assembler-ia32.cc
@@ -316,8 +316,10 @@
reloc_info_writer.Reposition(buffer_ + buffer_size, pc_);
last_pc_ = NULL;
- last_position_ = RelocInfo::kNoPosition;
- last_statement_position_ = RelocInfo::kNoPosition;
+ current_statement_position_ = RelocInfo::kNoPosition;
+ current_position_ = RelocInfo::kNoPosition;
+ written_statement_position_ = current_statement_position_;
+ written_position_ = current_position_;
}
@@ -342,6 +344,7 @@
desc->buffer_size = buffer_size_;
desc->instr_size = pc_offset();
desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
+ desc->origin = this;
Counters::reloc_info_size.Increment(desc->reloc_size);
}
@@ -420,29 +423,6 @@
}
-void Assembler::push(Label* label, RelocInfo::Mode reloc_mode) {
- ASSERT_NOT_NULL(label);
- EnsureSpace ensure_space(this);
- last_pc_ = pc_;
- // If reloc_mode == NONE, the label is stored as buffer relative.
- ASSERT(reloc_mode == RelocInfo::NONE);
- if (label->is_bound()) {
- // Index of position relative to Code Object-pointer.
- int rel_pos = label->pos() + Code::kHeaderSize - kHeapObjectTag;
- if (rel_pos >= 0 && rel_pos < 256) {
- EMIT(0x6a);
- EMIT(rel_pos);
- } else {
- EMIT(0x68);
- emit(rel_pos);
- }
- } else {
- EMIT(0x68);
- emit_disp(label, Displacement::CODE_RELATIVE);
- }
-}
-
-
void Assembler::pop(Register dst) {
ASSERT(reloc_info_writer.last_pc() != NULL);
if (FLAG_push_pop_elimination && (reloc_info_writer.last_pc() <= last_pc_)) {
@@ -889,21 +869,20 @@
}
-void Assembler::rep_cmpsb() {
+void Assembler::cmpb_al(const Operand& op) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
- EMIT(0xFC); // CLD to ensure forward operation
- EMIT(0xF3); // REP
- EMIT(0xA6); // CMPSB
+ EMIT(0x38); // CMP r/m8, r8
+ emit_operand(eax, op); // eax has same code as register al.
}
-void Assembler::rep_cmpsw() {
+
+void Assembler::cmpw_ax(const Operand& op) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
- EMIT(0xFC); // CLD to ensure forward operation
- EMIT(0xF3); // REP
- EMIT(0x66); // Operand size overide.
- EMIT(0xA7); // CMPS
+ EMIT(0x66);
+ EMIT(0x39); // CMP r/m16, r16
+ emit_operand(eax, op); // eax has same code as register ax.
}
@@ -1988,31 +1967,36 @@
void Assembler::RecordPosition(int pos) {
- if (pos == RelocInfo::kNoPosition) return;
+ ASSERT(pos != RelocInfo::kNoPosition);
ASSERT(pos >= 0);
- last_position_ = pos;
+ current_position_ = pos;
}
void Assembler::RecordStatementPosition(int pos) {
- if (pos == RelocInfo::kNoPosition) return;
+ ASSERT(pos != RelocInfo::kNoPosition);
ASSERT(pos >= 0);
- last_statement_position_ = pos;
+ current_statement_position_ = pos;
}
void Assembler::WriteRecordedPositions() {
- if (last_statement_position_ != RelocInfo::kNoPosition) {
+ // Write the statement position if it is different from what was written last
+ // time.
+ if (current_statement_position_ != written_statement_position_) {
EnsureSpace ensure_space(this);
- RecordRelocInfo(RelocInfo::STATEMENT_POSITION, last_statement_position_);
+ RecordRelocInfo(RelocInfo::STATEMENT_POSITION, current_statement_position_);
+ written_statement_position_ = current_statement_position_;
}
- if ((last_position_ != RelocInfo::kNoPosition) &&
- (last_position_ != last_statement_position_)) {
+
+ // Write the position if it is different from what was written last time and
+ // also diferent from the written statement position.
+ if (current_position_ != written_position_ &&
+ current_position_ != written_statement_position_) {
EnsureSpace ensure_space(this);
- RecordRelocInfo(RelocInfo::POSITION, last_position_);
+ RecordRelocInfo(RelocInfo::POSITION, current_position_);
+ written_position_ = current_position_;
}
- last_statement_position_ = RelocInfo::kNoPosition;
- last_position_ = RelocInfo::kNoPosition;
}
diff --git a/src/assembler-ia32.h b/src/assembler-ia32.h
index 17c6b82..7db218e 100644
--- a/src/assembler-ia32.h
+++ b/src/assembler-ia32.h
@@ -185,6 +185,10 @@
inline explicit Immediate(Handle<Object> handle);
inline explicit Immediate(Smi* value);
+ static Immediate CodeRelativeOffset(Label* label) {
+ return Immediate(label);
+ }
+
bool is_zero() const { return x_ == 0 && rmode_ == RelocInfo::NONE; }
bool is_int8() const {
return -128 <= x_ && x_ < 128 && rmode_ == RelocInfo::NONE;
@@ -194,6 +198,8 @@
}
private:
+ inline explicit Immediate(Label* value);
+
int x_;
RelocInfo::Mode rmode_;
@@ -497,15 +503,14 @@
void and_(const Operand& dst, const Immediate& x);
void cmpb(const Operand& op, int8_t imm8);
+ void cmpb_al(const Operand& op);
+ void cmpw_ax(const Operand& op);
void cmpw(const Operand& op, Immediate imm16);
void cmp(Register reg, int32_t imm32);
void cmp(Register reg, Handle<Object> handle);
void cmp(Register reg, const Operand& op);
void cmp(const Operand& op, const Immediate& imm);
- void rep_cmpsb();
- void rep_cmpsw();
-
void dec_b(Register dst);
void dec(Register dst);
@@ -707,8 +712,8 @@
void WriteInternalReference(int position, const Label& bound_label);
int pc_offset() const { return pc_ - buffer_; }
- int last_statement_position() const { return last_statement_position_; }
- int last_position() const { return last_position_; }
+ int current_statement_position() const { return current_statement_position_; }
+ int current_position() const { return current_position_; }
// Check if there is less than kGap bytes available in the buffer.
// If this is the case, we need to grow the buffer before emitting
@@ -731,24 +736,6 @@
private:
- // Code buffer:
- // The buffer into which code and relocation info are generated.
- byte* buffer_;
- int buffer_size_;
- // True if the assembler owns the buffer, false if buffer is external.
- bool own_buffer_;
-
- // code generation
- byte* pc_; // the program counter; moves forward
- RelocInfoWriter reloc_info_writer;
-
- // push-pop elimination
- byte* last_pc_;
-
- // source position information
- int last_position_;
- int last_statement_position_;
-
byte* addr_at(int pos) { return buffer_ + pos; }
byte byte_at(int pos) { return buffer_[pos]; }
uint32_t long_at(int pos) {
@@ -766,6 +753,9 @@
inline void emit(const Immediate& x);
inline void emit_w(const Immediate& x);
+ // Emit the code-object-relative offset of the label's position
+ inline void emit_code_relative_offset(Label* label);
+
// instruction generation
void emit_arith_b(int op1, int op2, Register dst, int imm8);
@@ -794,6 +784,26 @@
friend class CodePatcher;
friend class EnsureSpace;
+
+ // Code buffer:
+ // The buffer into which code and relocation info are generated.
+ byte* buffer_;
+ int buffer_size_;
+ // True if the assembler owns the buffer, false if buffer is external.
+ bool own_buffer_;
+
+ // code generation
+ byte* pc_; // the program counter; moves forward
+ RelocInfoWriter reloc_info_writer;
+
+ // push-pop elimination
+ byte* last_pc_;
+
+ // source position information
+ int current_statement_position_;
+ int current_position_;
+ int written_statement_position_;
+ int written_position_;
};
diff --git a/src/assembler.h b/src/assembler.h
index 971b950..7bd117c 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -382,6 +382,22 @@
};
+// A stack-allocated code region logs a name for the code generated
+// while the region is in effect. This information is used by the
+// profiler to categorize ticks within generated code.
+class CodeRegion BASE_EMBEDDED {
+ public:
+ inline CodeRegion(Assembler* assm, const char *name) : assm_(assm) {
+ LOG(BeginCodeRegionEvent(this, assm, name));
+ }
+ inline ~CodeRegion() {
+ LOG(EndCodeRegionEvent(this, assm_));
+ }
+ private:
+ Assembler* assm_;
+};
+
+
//------------------------------------------------------------------------------
// External function
diff --git a/src/ast.cc b/src/ast.cc
index 2ba4421..1433fe7 100644
--- a/src/ast.cc
+++ b/src/ast.cc
@@ -332,7 +332,7 @@
void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
stream()->Add("(# %i ", that->min());
- if (that->max() == RegExpQuantifier::kInfinity) {
+ if (that->max() == RegExpTree::kInfinity) {
stream()->Add("- ");
} else {
stream()->Add("%i ", that->max());
@@ -381,4 +381,34 @@
}
+RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
+ : alternatives_(alternatives) {
+ RegExpTree* first_alternative = alternatives->at(0);
+ min_match_ = first_alternative->min_match();
+ max_match_ = first_alternative->max_match();
+ for (int i = 1; i < alternatives->length(); i++) {
+ RegExpTree* alternative = alternatives->at(i);
+ min_match_ = Min(min_match_, alternative->min_match());
+ max_match_ = Max(max_match_, alternative->max_match());
+ }
+}
+
+
+RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
+ : nodes_(nodes) {
+ min_match_ = 0;
+ max_match_ = 0;
+ for (int i = 0; i < nodes->length(); i++) {
+ RegExpTree* node = nodes->at(i);
+ min_match_ += node->min_match();
+ int node_max_match = node->max_match();
+ if (kInfinity - max_match_ < node_max_match) {
+ max_match_ = kInfinity;
+ } else {
+ max_match_ += node->max_match();
+ }
+ }
+}
+
+
} } // namespace v8::internal
diff --git a/src/ast.h b/src/ast.h
index 528c4f5..150506b 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -81,7 +81,7 @@
V(Throw) \
V(Property) \
V(Call) \
- V(CallEval) \
+ V(CallEval) \
V(CallNew) \
V(CallRuntime) \
V(UnaryOperation) \
@@ -409,15 +409,18 @@
class WithEnterStatement: public Statement {
public:
- explicit WithEnterStatement(Expression* expression)
- : expression_(expression) { }
+ explicit WithEnterStatement(Expression* expression, bool is_catch_block)
+ : expression_(expression), is_catch_block_(is_catch_block) { }
virtual void Accept(AstVisitor* v);
Expression* expression() const { return expression_; }
+ bool is_catch_block() const { return is_catch_block_; }
+
private:
Expression* expression_;
+ bool is_catch_block_;
};
@@ -700,7 +703,7 @@
};
// An array literal has a literals object that is used
-// used for minimizing the work when contructing it at runtime.
+// for minimizing the work when constructing it at runtime.
class ArrayLiteral: public Expression {
public:
ArrayLiteral(Handle<FixedArray> literals,
@@ -1213,11 +1216,14 @@
class RegExpTree: public ZoneObject {
public:
+ static const int kInfinity = kMaxInt;
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) = 0;
virtual bool IsTextElement() { return false; }
+ virtual int min_match() = 0;
+ virtual int max_match() = 0;
virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString();
#define MAKE_ASTYPE(Name) \
@@ -1230,47 +1236,37 @@
class RegExpDisjunction: public RegExpTree {
public:
- explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
- : alternatives_(alternatives) { }
+ explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction();
+ virtual int min_match() { return min_match_; }
+ virtual int max_match() { return max_match_; }
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
private:
ZoneList<RegExpTree*>* alternatives_;
+ int min_match_;
+ int max_match_;
};
class RegExpAlternative: public RegExpTree {
public:
- explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
+ explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative();
+ virtual int min_match() { return min_match_; }
+ virtual int max_match() { return max_match_; }
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
-};
-
-
-class RegExpText: public RegExpTree {
- public:
- RegExpText() : elements_(2) { }
- virtual void* Accept(RegExpVisitor* visitor, void* data);
- virtual RegExpNode* ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success);
- virtual RegExpText* AsText();
- virtual bool IsText();
- virtual bool IsTextElement() { return true; }
- virtual void AppendToText(RegExpText* text);
- void AddElement(TextElement elm) { elements_.Add(elm); }
- ZoneList<TextElement>* elements() { return &elements_; }
- private:
- ZoneList<TextElement> elements_;
+ int min_match_;
+ int max_match_;
};
@@ -1290,33 +1286,73 @@
RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion();
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return 0; }
Type type() { return type_; }
private:
Type type_;
};
+class CharacterSet BASE_EMBEDDED {
+ public:
+ explicit CharacterSet(uc16 standard_set_type)
+ : ranges_(NULL),
+ standard_set_type_(standard_set_type) {}
+ explicit CharacterSet(ZoneList<CharacterRange>* ranges)
+ : ranges_(ranges),
+ standard_set_type_(0) {}
+ ZoneList<CharacterRange>* ranges();
+ uc16 standard_set_type() { return standard_set_type_; }
+ void set_standard_set_type(uc16 special_set_type) {
+ standard_set_type_ = special_set_type;
+ }
+ bool is_standard() { return standard_set_type_ != 0; }
+ private:
+ ZoneList<CharacterRange>* ranges_;
+ // If non-zero, the value represents a standard set (e.g., all whitespace
+ // characters) without having to expand the ranges.
+ uc16 standard_set_type_;
+};
+
+
class RegExpCharacterClass: public RegExpTree {
public:
RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
- : ranges_(ranges),
+ : set_(ranges),
is_negated_(is_negated) { }
explicit RegExpCharacterClass(uc16 type)
- : ranges_(new ZoneList<CharacterRange>(2)),
- is_negated_(false) {
- CharacterRange::AddClassEscape(type, ranges_);
- }
+ : set_(type),
+ is_negated_(false) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; }
+ virtual int min_match() { return 1; }
+ virtual int max_match() { return 1; }
virtual void AppendToText(RegExpText* text);
- ZoneList<CharacterRange>* ranges() { return ranges_; }
+ CharacterSet character_set() { return set_; }
+ // TODO(lrn): Remove need for complex version if is_standard that
+ // recognizes a mangled standard set and just do { return set_.is_special(); }
+ bool is_standard();
+ // Returns a value representing the standard character set if is_standard()
+ // returns true.
+ // Currently used values are:
+ // s : unicode whitespace
+ // S : unicode non-whitespace
+ // w : ASCII word character (digit, letter, underscore)
+ // W : non-ASCII word character
+ // d : ASCII digit
+ // D : non-ASCII digit
+ // . : non-unicode newline
+ // * : All characters
+ uc16 standard_type() { return set_.standard_set_type(); }
+ ZoneList<CharacterRange>* ranges() { return set_.ranges(); }
bool is_negated() { return is_negated_; }
private:
- ZoneList<CharacterRange>* ranges_;
+ CharacterSet set_;
bool is_negated_;
};
@@ -1330,20 +1366,53 @@
virtual RegExpAtom* AsAtom();
virtual bool IsAtom();
virtual bool IsTextElement() { return true; }
+ virtual int min_match() { return data_.length(); }
+ virtual int max_match() { return data_.length(); }
virtual void AppendToText(RegExpText* text);
Vector<const uc16> data() { return data_; }
+ int length() { return data_.length(); }
private:
Vector<const uc16> data_;
};
+class RegExpText: public RegExpTree {
+ public:
+ RegExpText() : elements_(2), length_(0) {}
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpText* AsText();
+ virtual bool IsText();
+ virtual bool IsTextElement() { return true; }
+ virtual int min_match() { return length_; }
+ virtual int max_match() { return length_; }
+ virtual void AppendToText(RegExpText* text);
+ void AddElement(TextElement elm) {
+ elements_.Add(elm);
+ length_ += elm.length();
+ };
+ ZoneList<TextElement>* elements() { return &elements_; }
+ private:
+ ZoneList<TextElement> elements_;
+ int length_;
+};
+
+
class RegExpQuantifier: public RegExpTree {
public:
RegExpQuantifier(int min, int max, bool is_greedy, RegExpTree* body)
: min_(min),
max_(max),
is_greedy_(is_greedy),
- body_(body) { }
+ body_(body),
+ min_match_(min * body->min_match()) {
+ if (max > 0 && body->max_match() > kInfinity / max) {
+ max_match_ = kInfinity;
+ } else {
+ max_match_ = max * body->max_match();
+ }
+ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
@@ -1355,18 +1424,19 @@
RegExpNode* on_success);
virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier();
+ virtual int min_match() { return min_match_; }
+ virtual int max_match() { return max_match_; }
int min() { return min_; }
int max() { return max_; }
bool is_greedy() { return is_greedy_; }
RegExpTree* body() { return body_; }
- // We just use a very large integer value as infinity because 2^30
- // is infinite in practice.
- static const int kInfinity = (1 << 30);
private:
int min_;
int max_;
bool is_greedy_;
RegExpTree* body_;
+ int min_match_;
+ int max_match_;
};
@@ -1389,6 +1459,8 @@
RegExpNode* on_success);
virtual RegExpCapture* AsCapture();
virtual bool IsCapture();
+ virtual int min_match() { return body_->min_match(); }
+ virtual int max_match() { return body_->max_match(); }
RegExpTree* body() { return body_; }
int index() { return index_; }
inline CaptureAvailability available() { return available_; }
@@ -1414,6 +1486,8 @@
RegExpNode* on_success);
virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead();
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return 0; }
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
private:
@@ -1431,6 +1505,8 @@
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
+ virtual int min_match() { return capture_->min_match(); }
+ virtual int max_match() { return capture_->max_match(); }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
@@ -1446,6 +1522,8 @@
RegExpNode* on_success);
virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty();
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return 0; }
static RegExpEmpty* GetInstance() { return &kInstance; }
private:
static RegExpEmpty kInstance;
diff --git a/src/builtins-ia32.cc b/src/builtins-ia32.cc
index 807a4bb..d773c97 100644
--- a/src/builtins-ia32.cc
+++ b/src/builtins-ia32.cc
@@ -615,6 +615,10 @@
// Use inline caching to speed up access to arguments.
Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
__ call(ic, RelocInfo::CODE_TARGET);
+ // It is important that we do not have a test instruction after the
+ // call. A test instruction after the call is used to indicate that
+ // we have generated an inline version of the keyed load. In this
+ // case, we know that we are not generating a test instruction next.
// Remove IC arguments from the stack and push the nth argument.
__ add(Operand(esp), Immediate(2 * kPointerSize));
diff --git a/src/bytecodes-irregexp.h b/src/bytecodes-irregexp.h
index 64a65cb..5ffc4b3 100644
--- a/src/bytecodes-irregexp.h
+++ b/src/bytecodes-irregexp.h
@@ -51,23 +51,28 @@
V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
-V(CHECK_CHAR, 19, 7) /* check_char uc16 addr32 */ \
-V(CHECK_NOT_CHAR, 20, 7) /* check_not_char uc16 addr32 */ \
-V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32 */ \
-V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
-V(CHECK_LT, 23, 7) /* check_lt uc16 addr32 */ \
-V(CHECK_GT, 24, 7) /* check_gr uc16 addr32 */ \
-V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
-V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
-V(LOOKUP_MAP1, 28, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
-V(LOOKUP_MAP2, 29, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
-V(LOOKUP_MAP8, 30, 99) /* l_map8 start16 byte_map addr32* */ \
-V(LOOKUP_HI_MAP8, 31, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
-V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32 */ \
-V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32 */ \
-V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32 */ \
-V(CHECK_GREEDY, 35, 5) /* check_greedy addr32 */
+V(LOAD_2_CURRENT_CHARS, 19, 9) /* load offset32 addr32 */ \
+V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 5) /* load offset32 */ \
+V(LOAD_4_CURRENT_CHARS, 21, 9) /* load offset32 addr32 */ \
+V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 5) /* load offset32 */ \
+V(CHECK_CHAR, 23, 9) /* check_char uint32 addr32 */ \
+V(CHECK_NOT_CHAR, 24, 9) /* check_not_char uint32 addr32 */ \
+V(AND_CHECK_CHAR, 25, 13) /* and_check_char uint32 uint32 addr32 */ \
+V(AND_CHECK_NOT_CHAR, 26, 13) /* and_check_not_char uint32 uint32 addr32 */ \
+V(MINUS_AND_CHECK_NOT_CHAR, 27, 11) /* minus_and_check_not_char uc16 uc16...*/ \
+V(CHECK_LT, 28, 7) /* check_lt uc16 addr32 */ \
+V(CHECK_GT, 29, 7) /* check_gr uc16 addr32 */ \
+V(CHECK_NOT_BACK_REF, 30, 6) /* check_not_back_ref capture_idx addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE, 31, 6) /* check_not_back_ref_no_case captu... */ \
+V(CHECK_NOT_REGS_EQUAL, 32, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
+V(LOOKUP_MAP1, 33, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
+V(LOOKUP_MAP2, 34, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
+V(LOOKUP_MAP8, 35, 99) /* l_map8 start16 byte_map addr32* */ \
+V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
+V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32 */ \
+V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32 */ \
+V(CHECK_NOT_AT_START, 39, 5) /* check_not_at_start addr32 */ \
+V(CHECK_GREEDY, 40, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
diff --git a/src/codegen-arm.cc b/src/codegen-arm.cc
index 712a989..e0cae7f 100644
--- a/src/codegen-arm.cc
+++ b/src/codegen-arm.cc
@@ -1043,7 +1043,7 @@
}
// Record the position for debugging purposes.
- __ RecordPosition(position);
+ CodeForSourcePosition(position);
// Use the shared code stub to call the function.
CallFunctionStub call_function(args->length());
@@ -1074,7 +1074,7 @@
void CodeGenerator::VisitBlock(Block* node) {
Comment cmnt(masm_, "[ Block");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
node->set_break_stack_height(break_stack_height_);
VisitStatements(node->statements());
__ bind(node->break_target());
@@ -1094,6 +1094,7 @@
void CodeGenerator::VisitDeclaration(Declaration* node) {
Comment cmnt(masm_, "[ Declaration");
+ CodeForStatement(node);
Variable* var = node->proxy()->var();
ASSERT(var != NULL); // must have been resolved
Slot* slot = var->slot();
@@ -1159,7 +1160,7 @@
void CodeGenerator::VisitExpressionStatement(ExpressionStatement* node) {
Comment cmnt(masm_, "[ ExpressionStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
Expression* expression = node->expression();
expression->MarkAsStatement();
Load(expression);
@@ -1169,6 +1170,7 @@
void CodeGenerator::VisitEmptyStatement(EmptyStatement* node) {
Comment cmnt(masm_, "// EmptyStatement");
+ CodeForStatement(node);
// nothing to do
}
@@ -1180,7 +1182,7 @@
bool has_then_stm = node->HasThenStatement();
bool has_else_stm = node->HasElseStatement();
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
Label exit;
if (has_then_stm && has_else_stm) {
@@ -1245,7 +1247,7 @@
void CodeGenerator::VisitContinueStatement(ContinueStatement* node) {
Comment cmnt(masm_, "[ ContinueStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
CleanStack(break_stack_height_ - node->target()->break_stack_height());
__ b(node->target()->continue_target());
}
@@ -1253,7 +1255,7 @@
void CodeGenerator::VisitBreakStatement(BreakStatement* node) {
Comment cmnt(masm_, "[ BreakStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
CleanStack(break_stack_height_ - node->target()->break_stack_height());
__ b(node->target()->break_target());
}
@@ -1261,7 +1263,7 @@
void CodeGenerator::VisitReturnStatement(ReturnStatement* node) {
Comment cmnt(masm_, "[ ReturnStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
Load(node->expression());
// Move the function result into r0.
frame_->Pop(r0);
@@ -1272,9 +1274,13 @@
void CodeGenerator::VisitWithEnterStatement(WithEnterStatement* node) {
Comment cmnt(masm_, "[ WithEnterStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
Load(node->expression());
- __ CallRuntime(Runtime::kPushContext, 1);
+ if (node->is_catch_block()) {
+ __ CallRuntime(Runtime::kPushCatchContext, 1);
+ } else {
+ __ CallRuntime(Runtime::kPushContext, 1);
+ }
if (kDebug) {
Label verified_true;
__ cmp(r0, Operand(cp));
@@ -1289,6 +1295,7 @@
void CodeGenerator::VisitWithExitStatement(WithExitStatement* node) {
Comment cmnt(masm_, "[ WithExitStatement");
+ CodeForStatement(node);
// Pop context.
__ ldr(cp, ContextOperand(cp, Context::PREVIOUS_INDEX));
// Update context local.
@@ -1355,7 +1362,7 @@
void CodeGenerator::VisitSwitchStatement(SwitchStatement* node) {
Comment cmnt(masm_, "[ SwitchStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
node->set_break_stack_height(break_stack_height_);
Load(node->tag());
@@ -1423,7 +1430,7 @@
void CodeGenerator::VisitLoopStatement(LoopStatement* node) {
Comment cmnt(masm_, "[ LoopStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
node->set_break_stack_height(break_stack_height_);
// simple condition analysis
@@ -1463,7 +1470,7 @@
// Record source position of the statement as this code which is after the
// code for the body actually belongs to the loop statement and not the
// body.
- if (FLAG_debug_info) __ RecordPosition(node->statement_pos());
+ CodeForStatement(node);
ASSERT(node->type() == LoopStatement::FOR_LOOP);
Visit(node->next());
}
@@ -1495,7 +1502,7 @@
void CodeGenerator::VisitForInStatement(ForInStatement* node) {
Comment cmnt(masm_, "[ ForInStatement");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
// We keep stuff on the stack while the body is executing.
// Record it, so that a break/continue crossing this statement
@@ -1683,6 +1690,7 @@
void CodeGenerator::VisitTryCatch(TryCatch* node) {
Comment cmnt(masm_, "[ TryCatch");
+ CodeForStatement(node);
Label try_block, exit;
@@ -1779,6 +1787,7 @@
void CodeGenerator::VisitTryFinally(TryFinally* node) {
Comment cmnt(masm_, "[ TryFinally");
+ CodeForStatement(node);
// State: Used to keep track of reason for entering the finally
// block. Should probably be extended to hold information for
@@ -1920,7 +1929,7 @@
void CodeGenerator::VisitDebuggerStatement(DebuggerStatement* node) {
Comment cmnt(masm_, "[ DebuggerStatament");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
__ CallRuntime(Runtime::kDebugBreak, 0);
// Ignore the return value.
}
@@ -2228,7 +2237,7 @@
void CodeGenerator::VisitAssignment(Assignment* node) {
Comment cmnt(masm_, "[ Assignment");
- if (FLAG_debug_info) RecordStatementPosition(node);
+ CodeForStatement(node);
Reference target(this, node->target());
if (target.is_illegal()) return;
@@ -2259,7 +2268,7 @@
// Assignment ignored - leave the value on the stack.
} else {
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
if (node->op() == Token::INIT_CONST) {
// Dynamic constant initializations must use the function context
// and initialize the actual constant declared. Dynamic variable
@@ -2276,7 +2285,7 @@
Comment cmnt(masm_, "[ Throw");
Load(node->exception());
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
__ CallRuntime(Runtime::kThrow, 1);
frame_->Push(r0);
}
@@ -2295,7 +2304,7 @@
ZoneList<Expression*>* args = node->arguments();
- RecordStatementPosition(node);
+ CodeForStatement(node);
// Standard function call.
// Check if the function is a variable or a property.
@@ -2331,7 +2340,7 @@
// Setup the receiver register and call the IC initialization code.
Handle<Code> stub = ComputeCallInitialize(args->length());
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
__ Call(stub, RelocInfo::CODE_TARGET_CONTEXT);
__ ldr(cp, frame_->Context());
// Remove the function from the stack.
@@ -2378,7 +2387,7 @@
// Set the receiver register and call the IC initialization code.
Handle<Code> stub = ComputeCallInitialize(args->length());
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
__ Call(stub, RelocInfo::CODE_TARGET);
__ ldr(cp, frame_->Context());
@@ -2432,7 +2441,7 @@
ZoneList<Expression*>* args = node->arguments();
Expression* function = node->expression();
- RecordStatementPosition(node);
+ CodeForStatement(node);
// Prepare stack for call to resolved function.
Load(function);
@@ -2462,7 +2471,7 @@
__ str(r1, MemOperand(sp, args->length() * kPointerSize));
// Call the function.
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
CallFunctionStub call_function(args->length());
__ CallStub(&call_function);
@@ -2476,6 +2485,7 @@
void CodeGenerator::VisitCallNew(CallNew* node) {
Comment cmnt(masm_, "[ CallNew");
+ CodeForStatement(node);
// According to ECMA-262, section 11.2.2, page 44, the function
// expression in new calls must be evaluated before the
@@ -2501,7 +2511,7 @@
// Call the construct call builtin that handles allocation and
// constructor invocation.
- __ RecordPosition(RelocInfo::POSITION);
+ CodeForSourcePosition(node->position());
__ Call(Handle<Code>(Builtins::builtin(Builtins::JSConstructCall)),
RelocInfo::CONSTRUCT_CALL);
@@ -2567,6 +2577,19 @@
}
+void CodeGenerator::GenerateLog(ZoneList<Expression*>* args) {
+ // See comment in CodeGenerator::GenerateLog in codegen-ia32.cc.
+ ASSERT_EQ(args->length(), 3);
+ if (ShouldGenerateLog(args->at(0))) {
+ Load(args->at(1));
+ Load(args->at(2));
+ __ CallRuntime(Runtime::kLog, 2);
+ }
+ __ mov(r0, Operand(Factory::undefined_value()));
+ frame_->Push(r0);
+}
+
+
void CodeGenerator::GenerateIsNonNegativeSmi(ZoneList<Expression*>* args) {
ASSERT(args->length() == 1);
Load(args->at(0));
@@ -3207,15 +3230,6 @@
}
-void CodeGenerator::RecordStatementPosition(Node* node) {
- if (FLAG_debug_info) {
- int statement_pos = node->statement_pos();
- if (statement_pos == RelocInfo::kNoPosition) return;
- __ RecordStatementPosition(statement_pos);
- }
-}
-
-
#undef __
#define __ masm->
@@ -3243,7 +3257,7 @@
VirtualFrame* frame = cgen_->frame();
Property* property = expression_->AsProperty();
if (property != NULL) {
- __ RecordPosition(property->position());
+ cgen_->CodeForSourcePosition(property->position());
}
switch (type_) {
@@ -3281,6 +3295,9 @@
case KEYED: {
// TODO(1241834): Make sure that this it is safe to ignore the
// distinction between expressions in a typeof and not in a typeof.
+
+ // TODO(181): Implement inlined version of array indexing once
+ // loop nesting is properly tracked on ARM.
Comment cmnt(masm, "[ Load from keyed Property");
ASSERT(property != NULL);
Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
@@ -3309,7 +3326,7 @@
VirtualFrame* frame = cgen_->frame();
Property* property = expression_->AsProperty();
if (property != NULL) {
- __ RecordPosition(property->position());
+ cgen_->CodeForSourcePosition(property->position());
}
switch (type_) {
@@ -3412,7 +3429,7 @@
Comment cmnt(masm, "[ Store to keyed Property");
Property* property = expression_->AsProperty();
ASSERT(property != NULL);
- __ RecordPosition(property->position());
+ cgen_->CodeForSourcePosition(property->position());
// Call IC code.
Handle<Code> ic(Builtins::builtin(Builtins::KeyedStoreIC_Initialize));
diff --git a/src/codegen-arm.h b/src/codegen-arm.h
index 904a3b7..18f7a92 100644
--- a/src/codegen-arm.h
+++ b/src/codegen-arm.h
@@ -195,6 +195,8 @@
Handle<Script> script,
bool is_eval);
+ static bool ShouldGenerateLog(Expression* type);
+
static void SetFunctionInfo(Handle<JSFunction> fun,
int length,
int function_token_position,
@@ -324,6 +326,8 @@
// Fast support for object equality testing.
void GenerateObjectEquals(ZoneList<Expression*>* args);
+ void GenerateLog(ZoneList<Expression*>* args);
+
// Methods and constants for fast case switch statement support.
//
// Only allow fast-case switch if the range of labels is at most
@@ -376,10 +380,11 @@
bool TryGenerateFastCaseSwitchStatement(SwitchStatement* node);
- // Bottle-neck interface to call the Assembler to generate the statement
- // position. This allows us to easily control whether statement positions
- // should be generated or not.
- void RecordStatementPosition(Node* node);
+ // Methods used to indicate which source code is generated for. Source
+ // positions are collected by the assembler and emitted with the relocation
+ // information.
+ void CodeForStatement(Node* node);
+ void CodeForSourcePosition(int pos);
bool is_eval_; // Tells whether code is generated for eval.
Handle<Script> script_;
diff --git a/src/codegen-ia32.cc b/src/codegen-ia32.cc
index 8e5420b..17e4a8d 100644
--- a/src/codegen-ia32.cc
+++ b/src/codegen-ia32.cc
@@ -186,7 +186,7 @@
void CodeGenerator::GenCode(FunctionLiteral* fun) {
// Record the position for debugging purposes.
- __ RecordPosition(fun->start_position());
+ CodeForSourcePosition(fun->start_position());
ZoneList<Statement*>* body = fun->body();
@@ -1322,14 +1322,14 @@
// Call the function just below TOS on the stack with the given
// arguments. The receiver is the TOS.
void CodeGenerator::CallWithArguments(ZoneList<Expression*>* args,
- int position) {
+ int position) {
// Push the arguments ("left-to-right") on the stack.
for (int i = 0; i < args->length(); i++) {
Load(args->at(i));
}
// Record the position for debugging purposes.
- __ RecordPosition(position);
+ CodeForSourcePosition(position);
// Use the shared code stub to call the function.
CallFunctionStub call_function(args->length());
@@ -1365,7 +1365,7 @@
void CodeGenerator::VisitBlock(Block* node) {
Comment cmnt(masm_, "[ Block");
- RecordStatementPosition(node);
+ CodeForStatement(node);
node->set_break_stack_height(break_stack_height_);
VisitStatements(node->statements());
__ bind(node->break_target());
@@ -1383,6 +1383,7 @@
void CodeGenerator::VisitDeclaration(Declaration* node) {
Comment cmnt(masm_, "[ Declaration");
+ CodeForStatement(node);
Variable* var = node->proxy()->var();
ASSERT(var != NULL); // must have been resolved
Slot* slot = var->slot();
@@ -1444,7 +1445,7 @@
void CodeGenerator::VisitExpressionStatement(ExpressionStatement* node) {
Comment cmnt(masm_, "[ ExpressionStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
Expression* expression = node->expression();
expression->MarkAsStatement();
Load(expression);
@@ -1455,6 +1456,7 @@
void CodeGenerator::VisitEmptyStatement(EmptyStatement* node) {
Comment cmnt(masm_, "// EmptyStatement");
+ CodeForStatement(node);
// nothing to do
}
@@ -1466,7 +1468,7 @@
bool has_then_stm = node->HasThenStatement();
bool has_else_stm = node->HasElseStatement();
- RecordStatementPosition(node);
+ CodeForStatement(node);
Label exit;
if (has_then_stm && has_else_stm) {
Label then;
@@ -1528,7 +1530,7 @@
void CodeGenerator::VisitContinueStatement(ContinueStatement* node) {
Comment cmnt(masm_, "[ ContinueStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
CleanStack(break_stack_height_ - node->target()->break_stack_height());
__ jmp(node->target()->continue_target());
}
@@ -1536,7 +1538,7 @@
void CodeGenerator::VisitBreakStatement(BreakStatement* node) {
Comment cmnt(masm_, "[ BreakStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
CleanStack(break_stack_height_ - node->target()->break_stack_height());
__ jmp(node->target()->break_target());
}
@@ -1544,7 +1546,7 @@
void CodeGenerator::VisitReturnStatement(ReturnStatement* node) {
Comment cmnt(masm_, "[ ReturnStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
Load(node->expression());
// Move the function result into eax
@@ -1582,9 +1584,13 @@
void CodeGenerator::VisitWithEnterStatement(WithEnterStatement* node) {
Comment cmnt(masm_, "[ WithEnterStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
Load(node->expression());
- __ CallRuntime(Runtime::kPushContext, 1);
+ if (node->is_catch_block()) {
+ __ CallRuntime(Runtime::kPushCatchContext, 1);
+ } else {
+ __ CallRuntime(Runtime::kPushContext, 1);
+ }
if (kDebug) {
Label verified_true;
@@ -1602,6 +1608,7 @@
void CodeGenerator::VisitWithExitStatement(WithExitStatement* node) {
Comment cmnt(masm_, "[ WithExitStatement");
+ CodeForStatement(node);
// Pop context.
__ mov(esi, ContextOperand(esi, Context::PREVIOUS_INDEX));
// Update context local.
@@ -1684,7 +1691,7 @@
void CodeGenerator::VisitSwitchStatement(SwitchStatement* node) {
Comment cmnt(masm_, "[ SwitchStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
node->set_break_stack_height(break_stack_height_);
Load(node->tag());
@@ -1751,7 +1758,7 @@
void CodeGenerator::VisitLoopStatement(LoopStatement* node) {
Comment cmnt(masm_, "[ LoopStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
node->set_break_stack_height(break_stack_height_);
// simple condition analysis
@@ -1794,8 +1801,7 @@
// Record source position of the statement as this code which is after the
// code for the body actually belongs to the loop statement and not the
// body.
- RecordStatementPosition(node);
- __ RecordPosition(node->statement_pos());
+ CodeForStatement(node);
ASSERT(node->type() == LoopStatement::FOR_LOOP);
Visit(node->next());
}
@@ -1824,7 +1830,7 @@
void CodeGenerator::VisitForInStatement(ForInStatement* node) {
Comment cmnt(masm_, "[ ForInStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
// We keep stuff on the stack while the body is executing.
// Record it, so that a break/continue crossing this statement
@@ -2012,6 +2018,7 @@
void CodeGenerator::VisitTryCatch(TryCatch* node) {
Comment cmnt(masm_, "[ TryCatch");
+ CodeForStatement(node);
Label try_block, exit;
@@ -2057,10 +2064,9 @@
}
// Generate code for the statements in the try block.
- bool was_inside_try = is_inside_try_;
- is_inside_try_ = true;
- VisitStatements(node->try_block()->statements());
- is_inside_try_ = was_inside_try;
+ { TempAssign<bool> temp(&is_inside_try_, true);
+ VisitStatements(node->try_block()->statements());
+ }
// Stop the introduced shadowing and count the number of required unlinks.
// After shadowing stops, the original labels are unshadowed and the
@@ -2117,6 +2123,7 @@
void CodeGenerator::VisitTryFinally(TryFinally* node) {
Comment cmnt(masm_, "[ TryFinally");
+ CodeForStatement(node);
// State: Used to keep track of reason for entering the finally
// block. Should probably be extended to hold information for
@@ -2155,10 +2162,9 @@
}
// Generate code for the statements in the try block.
- bool was_inside_try = is_inside_try_;
- is_inside_try_ = true;
- VisitStatements(node->try_block()->statements());
- is_inside_try_ = was_inside_try;
+ { TempAssign<bool> temp(&is_inside_try_, true);
+ VisitStatements(node->try_block()->statements());
+ }
// Stop the introduced shadowing and count the number of required unlinks.
// After shadowing stops, the original labels are unshadowed and the
@@ -2254,7 +2260,7 @@
void CodeGenerator::VisitDebuggerStatement(DebuggerStatement* node) {
Comment cmnt(masm_, "[ DebuggerStatement");
- RecordStatementPosition(node);
+ CodeForStatement(node);
__ CallRuntime(Runtime::kDebugBreak, 0);
// Ignore the return value.
}
@@ -2610,8 +2616,8 @@
void CodeGenerator::VisitAssignment(Assignment* node) {
Comment cmnt(masm_, "[ Assignment");
+ CodeForStatement(node);
- RecordStatementPosition(node);
Reference target(this, node->target());
if (target.is_illegal()) return;
@@ -2638,7 +2644,7 @@
node->op() != Token::INIT_VAR && node->op() != Token::INIT_CONST) {
// Assignment ignored - leave the value on the stack.
} else {
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
if (node->op() == Token::INIT_CONST) {
// Dynamic constant initializations must use the function context
// and initialize the actual constant declared. Dynamic variable
@@ -2653,9 +2659,9 @@
void CodeGenerator::VisitThrow(Throw* node) {
Comment cmnt(masm_, "[ Throw");
+ CodeForStatement(node);
Load(node->exception());
- __ RecordPosition(node->position());
__ CallRuntime(Runtime::kThrow, 1);
frame_->Push(eax);
}
@@ -2674,7 +2680,7 @@
ZoneList<Expression*>* args = node->arguments();
- RecordStatementPosition(node);
+ CodeForStatement(node);
// Check if the function is a variable or a property.
Expression* function = node->expression();
@@ -2711,7 +2717,7 @@
Handle<Code> stub = (loop_nesting() > 0)
? ComputeCallInitializeInLoop(args->length())
: ComputeCallInitialize(args->length());
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
__ call(stub, RelocInfo::CODE_TARGET_CONTEXT);
__ mov(esi, frame_->Context());
@@ -2757,7 +2763,7 @@
Handle<Code> stub = (loop_nesting() > 0)
? ComputeCallInitializeInLoop(args->length())
: ComputeCallInitialize(args->length());
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
__ call(stub, RelocInfo::CODE_TARGET);
__ mov(esi, frame_->Context());
@@ -2800,6 +2806,7 @@
void CodeGenerator::VisitCallNew(CallNew* node) {
Comment cmnt(masm_, "[ CallNew");
+ CodeForStatement(node);
// According to ECMA-262, section 11.2.2, page 44, the function
// expression in new calls must be evaluated before the
@@ -2828,7 +2835,7 @@
// Call the construct call builtin that handles allocation and
// constructor invocation.
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
__ call(Handle<Code>(Builtins::builtin(Builtins::JSConstructCall)),
RelocInfo::CONSTRUCT_CALL);
// Discard the function and "push" the newly created object.
@@ -2846,7 +2853,7 @@
ZoneList<Expression*>* args = node->arguments();
Expression* function = node->expression();
- RecordStatementPosition(node);
+ CodeForStatement(node);
// Prepare stack for call to resolved function.
Load(function);
@@ -2873,7 +2880,7 @@
__ mov(Operand(esp, args->length() * kPointerSize), edx);
// Call the function.
- __ RecordPosition(node->position());
+ CodeForSourcePosition(node->position());
CallFunctionStub call_function(args->length());
__ CallStub(&call_function);
@@ -2893,6 +2900,25 @@
}
+void CodeGenerator::GenerateLog(ZoneList<Expression*>* args) {
+ // Conditionally generate a log call.
+ // Args:
+ // 0 (literal string): The type of logging (corresponds to the flags).
+ // This is used to determine whether or not to generate the log call.
+ // 1 (string): Format string. Access the string at argument index 2
+ // with '%2s' (see Logger::LogRuntime for all the formats).
+ // 2 (array): Arguments to the format string.
+ ASSERT_EQ(args->length(), 3);
+ if (ShouldGenerateLog(args->at(0))) {
+ Load(args->at(1));
+ Load(args->at(2));
+ __ CallRuntime(Runtime::kLog, 2);
+ }
+ // Finally, we're expected to leave a value on the top of the stack.
+ frame_->Push(Immediate(Factory::undefined_value()));
+}
+
+
void CodeGenerator::GenerateIsNonNegativeSmi(ZoneList<Expression*>* args) {
ASSERT(args->length() == 1);
Load(args->at(0));
@@ -3771,14 +3797,38 @@
}
-void CodeGenerator::RecordStatementPosition(Node* node) {
- if (FLAG_debug_info) {
- int pos = node->statement_pos();
- if (pos != RelocInfo::kNoPosition) {
- __ RecordStatementPosition(pos);
- }
+class DeferredReferenceGetKeyedValue: public DeferredCode {
+ public:
+ DeferredReferenceGetKeyedValue(CodeGenerator* generator, bool is_global)
+ : DeferredCode(generator), is_global_(is_global) {
+ set_comment("[ DeferredReferenceGetKeyedValue");
}
-}
+
+ virtual void Generate() {
+ Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
+ // Calculate the delta from the IC call instruction to the map
+ // check cmp instruction in the inlined version. This delta is
+ // stored in a test(eax, delta) instruction after the call so that
+ // we can find it in the IC initialization code and patch the cmp
+ // instruction. This means that we cannot allow test instructions
+ // after calls to KeyedLoadIC stubs in other places.
+ int delta_to_patch_site = __ SizeOfCodeGeneratedSince(patch_site());
+ if (is_global_) {
+ __ call(ic, RelocInfo::CODE_TARGET_CONTEXT);
+ } else {
+ __ call(ic, RelocInfo::CODE_TARGET);
+ }
+ __ test(eax, Immediate(-delta_to_patch_site));
+ __ IncrementCounter(&Counters::keyed_load_inline_miss, 1);
+ }
+
+ Label* patch_site() { return &patch_site_; }
+
+ private:
+ Label patch_site_;
+ bool is_global_;
+};
+
#undef __
@@ -3794,8 +3844,6 @@
ASSERT(proxy->AsVariable()->is_global());
return proxy->name();
} else {
- MacroAssembler* masm = cgen_->masm();
- __ RecordPosition(property->position());
Literal* raw_name = property->key()->AsLiteral();
ASSERT(raw_name != NULL);
return Handle<String>(String::cast(*raw_name->handle()));
@@ -3818,45 +3866,97 @@
}
case NAMED: {
- // TODO(1241834): Make sure that this it is safe to ignore the
- // distinction between expressions in a typeof and not in a typeof. If
- // there is a chance that reference errors can be thrown below, we
- // must distinguish between the two kinds of loads (typeof expression
- // loads must not throw a reference error).
+ // TODO(1241834): Make sure that it is safe to ignore the
+ // distinction between expressions in a typeof and not in a
+ // typeof. If there is a chance that reference errors can be
+ // thrown below, we must distinguish between the two kinds of
+ // loads (typeof expression loads must not throw a reference
+ // error).
Comment cmnt(masm, "[ Load from named Property");
Handle<String> name(GetName());
+ Variable* var = expression_->AsVariableProxy()->AsVariable();
Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
// Setup the name register.
__ mov(ecx, name);
-
- Variable* var = expression_->AsVariableProxy()->AsVariable();
if (var != NULL) {
ASSERT(var->is_global());
__ call(ic, RelocInfo::CODE_TARGET_CONTEXT);
} else {
__ call(ic, RelocInfo::CODE_TARGET);
}
- frame->Push(eax); // IC call leaves result in eax, push it out
+ // Push the result.
+ frame->Push(eax);
break;
}
case KEYED: {
- // TODO(1241834): Make sure that this it is safe to ignore the
- // distinction between expressions in a typeof and not in a typeof.
- Comment cmnt(masm, "[ Load from keyed Property");
- Property* property = expression_->AsProperty();
- ASSERT(property != NULL);
- __ RecordPosition(property->position());
- Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
-
+ // TODO(1241834): Make sure that it is safe to ignore the
+ // distinction between expressions in a typeof and not in a
+ // typeof.
Variable* var = expression_->AsVariableProxy()->AsVariable();
- if (var != NULL) {
- ASSERT(var->is_global());
- __ call(ic, RelocInfo::CODE_TARGET_CONTEXT);
+ bool is_global = var != NULL;
+ ASSERT(!is_global || var->is_global());
+ // Inline array load code if inside of a loop. We do not know
+ // the receiver map yet, so we initially generate the code with
+ // a check against an invalid map. In the inline cache code, we
+ // patch the map check if appropriate.
+ if (cgen_->loop_nesting() > 0) {
+ Comment cmnt(masm, "[ Inlined array index load");
+ DeferredReferenceGetKeyedValue* deferred =
+ new DeferredReferenceGetKeyedValue(cgen_, is_global);
+ // Load receiver and check that it is not a smi (only needed
+ // if this is not a load from the global context) and that it
+ // has the expected map.
+ __ mov(edx, Operand(esp, kPointerSize));
+ if (!is_global) {
+ __ test(edx, Immediate(kSmiTagMask));
+ __ j(zero, deferred->enter(), not_taken);
+ }
+ // Initially, use an invalid map. The map is patched in the IC
+ // initialization code.
+ __ bind(deferred->patch_site());
+ __ cmp(FieldOperand(edx, HeapObject::kMapOffset),
+ Immediate(Factory::null_value()));
+ __ j(not_equal, deferred->enter(), not_taken);
+ // Load key and check that it is a smi.
+ __ mov(eax, Operand(esp, 0));
+ __ test(eax, Immediate(kSmiTagMask));
+ __ j(not_zero, deferred->enter(), not_taken);
+ // Shift to get actual index value.
+ __ sar(eax, kSmiTagSize);
+ // Get the elements array from the receiver and check that it
+ // is not a dictionary.
+ __ mov(edx, FieldOperand(edx, JSObject::kElementsOffset));
+ __ cmp(FieldOperand(edx, HeapObject::kMapOffset),
+ Immediate(Factory::hash_table_map()));
+ __ j(equal, deferred->enter(), not_taken);
+ // Check that key is within bounds.
+ __ cmp(eax, FieldOperand(edx, Array::kLengthOffset));
+ __ j(above_equal, deferred->enter(), not_taken);
+ // Load and check that the result is not the hole.
+ __ mov(eax,
+ Operand(edx, eax, times_4, Array::kHeaderSize - kHeapObjectTag));
+ __ cmp(Operand(eax), Immediate(Factory::the_hole_value()));
+ __ j(equal, deferred->enter(), not_taken);
+ __ IncrementCounter(&Counters::keyed_load_inline, 1);
+ __ bind(deferred->exit());
} else {
- __ call(ic, RelocInfo::CODE_TARGET);
+ Comment cmnt(masm, "[ Load from keyed Property");
+ Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
+ if (is_global) {
+ __ call(ic, RelocInfo::CODE_TARGET_CONTEXT);
+ } else {
+ __ call(ic, RelocInfo::CODE_TARGET);
+ }
+ // Make sure that we do not have a test instruction after the
+ // call. A test instruction after the call is used to
+ // indicate that we have generated an inline version of the
+ // keyed load. The explicit nop instruction is here because
+ // the push that follows might be peep-hole optimized away.
+ __ nop();
}
- frame->Push(eax); // IC call leaves result in eax, push it out
+ // Push the result.
+ frame->Push(eax);
break;
}
@@ -3963,9 +4063,6 @@
case KEYED: {
Comment cmnt(masm, "[ Store to keyed Property");
- Property* property = expression_->AsProperty();
- ASSERT(property != NULL);
- __ RecordPosition(property->position());
// Call IC code.
Handle<Code> ic(Builtins::builtin(Builtins::KeyedStoreIC_Initialize));
// TODO(1222589): Make the IC grab the values from the stack.
diff --git a/src/codegen-ia32.h b/src/codegen-ia32.h
index a1525c7..ee1cfc1 100644
--- a/src/codegen-ia32.h
+++ b/src/codegen-ia32.h
@@ -201,6 +201,8 @@
Handle<Script> script,
bool is_eval);
+ static bool ShouldGenerateLog(Expression* type);
+
static void SetFunctionInfo(Handle<JSFunction> fun,
int length,
int function_token_position,
@@ -348,6 +350,8 @@
// Fast support for object equality testing.
void GenerateObjectEquals(ZoneList<Expression*>* args);
+ void GenerateLog(ZoneList<Expression*>* args);
+
// Methods and constants for fast case switch statement support.
//
@@ -400,11 +404,11 @@
// Returns true if the fast-case switch was generated, and false if not.
bool TryGenerateFastCaseSwitchStatement(SwitchStatement* node);
-
- // Bottle-neck interface to call the Assembler to generate the statement
- // position. This allows us to easily control whether statement positions
- // should be generated or not.
- void RecordStatementPosition(Node* node);
+ // Methods used to indicate which source code is generated for. Source
+ // positions are collected by the assembler and emitted with the relocation
+ // information.
+ void CodeForStatement(Node* node);
+ void CodeForSourcePosition(int pos);
bool is_eval_; // Tells whether code is generated for eval.
Handle<Script> script_;
diff --git a/src/codegen.cc b/src/codegen.cc
index 4df51e7..265f6ba 100644
--- a/src/codegen.cc
+++ b/src/codegen.cc
@@ -40,9 +40,11 @@
DeferredCode::DeferredCode(CodeGenerator* generator)
: masm_(generator->masm()),
generator_(generator),
- statement_position_(masm_->last_statement_position()),
- position_(masm_->last_position()) {
+ statement_position_(masm_->current_statement_position()),
+ position_(masm_->current_position()) {
generator->AddDeferred(this);
+ ASSERT(statement_position_ != RelocInfo::kNoPosition);
+ ASSERT(position_ != RelocInfo::kNoPosition);
#ifdef DEBUG
comment_ = "";
#endif
@@ -54,9 +56,7 @@
DeferredCode* code = deferred_.RemoveLast();
MacroAssembler* masm = code->masm();
// Record position of deferred code stub.
- if (code->statement_position() != RelocInfo::kNoPosition) {
- masm->RecordStatementPosition(code->statement_position());
- }
+ masm->RecordStatementPosition(code->statement_position());
if (code->position() != RelocInfo::kNoPosition) {
masm->RecordPosition(code->position());
}
@@ -163,6 +163,19 @@
}
+bool CodeGenerator::ShouldGenerateLog(Expression* type) {
+ ASSERT(type != NULL);
+ if (!Logger::is_enabled()) return false;
+ Handle<String> name = Handle<String>::cast(type->AsLiteral()->handle());
+ if (FLAG_log_regexp) {
+ static Vector<const char> kRegexp = CStrVector("regexp");
+ if (name->IsEqualTo(kRegexp))
+ return true;
+ }
+ return false;
+}
+
+
// Sets the function info on a function.
// The start_position points to the first '(' character after the function name
// in the full script source. When counting characters in the script source the
@@ -338,7 +351,9 @@
{&v8::internal::CodeGenerator::GenerateFastCharCodeAt,
"_FastCharCodeAt"},
{&v8::internal::CodeGenerator::GenerateObjectEquals,
- "_ObjectEquals"}
+ "_ObjectEquals"},
+ {&v8::internal::CodeGenerator::GenerateLog,
+ "_Log"}
};
Handle<String> name = node->name();
StringShape shape(*name);
@@ -464,6 +479,26 @@
}
+void CodeGenerator::CodeForStatement(Node* node) {
+ if (FLAG_debug_info) {
+ int pos = node->statement_pos();
+ if (pos != RelocInfo::kNoPosition) {
+ masm()->RecordStatementPosition(pos);
+ CodeForSourcePosition(pos);
+ }
+ }
+}
+
+
+void CodeGenerator::CodeForSourcePosition(int pos) {
+ if (FLAG_debug_info) {
+ if (pos != RelocInfo::kNoPosition) {
+ masm()->RecordPosition(pos);
+ }
+ }
+}
+
+
const char* RuntimeStub::GetName() {
return Runtime::FunctionForId(id_)->stub_name;
}
diff --git a/src/contexts.h b/src/contexts.h
index 30aa5bb..d6567dc 100644
--- a/src/contexts.h
+++ b/src/contexts.h
@@ -264,6 +264,12 @@
global_context()->set_out_of_memory(Heap::true_value());
}
+ // The exception holder is the object used as a with object in
+ // the implementation of a catch block.
+ bool is_exception_holder(Object* object) {
+ return IsCatchContext() && extension() == object;
+ }
+
#define GLOBAL_CONTEXT_FIELD_ACCESSORS(index, type, name) \
void set_##name(type* value) { \
ASSERT(IsGlobalContext()); \
diff --git a/src/d8.cc b/src/d8.cc
index fbc91d3..24eb722 100644
--- a/src/d8.cc
+++ b/src/d8.cc
@@ -29,6 +29,7 @@
#include <stdlib.h>
#include "d8.h"
+#include "d8-debug.h"
#include "debug.h"
#include "api.h"
#include "natives.h"
@@ -98,17 +99,21 @@
bool report_exceptions) {
HandleScope handle_scope;
TryCatch try_catch;
+ if (i::FLAG_debugger) {
+ // When debugging make exceptions appear to be uncaught.
+ try_catch.SetVerbose(true);
+ }
Handle<Script> script = Script::Compile(source, name);
if (script.IsEmpty()) {
// Print errors that happened during compilation.
- if (report_exceptions)
+ if (report_exceptions && !i::FLAG_debugger)
ReportException(&try_catch);
return false;
} else {
Handle<Value> result = script->Run();
if (result.IsEmpty()) {
// Print errors that happened during execution.
- if (report_exceptions)
+ if (report_exceptions && !i::FLAG_debugger)
ReportException(&try_catch);
return false;
} else {
@@ -212,6 +217,46 @@
}
+Handle<String> Shell::DebugEventToText(Handle<Object> event) {
+ HandleScope handle_scope;
+ Context::Scope context_scope(utility_context_);
+ Handle<Object> global = utility_context_->Global();
+ Handle<Value> fun = global->Get(String::New("DebugEventToText"));
+ TryCatch try_catch;
+ try_catch.SetVerbose(true);
+ static const int kArgc = 1;
+ Handle<Value> argv[kArgc] = { event };
+ Handle<Value> val = Handle<Function>::Cast(fun)->Call(global, kArgc, argv);
+ if (try_catch.HasCaught()) {
+ return handle_scope.Close(try_catch.Exception()->ToString());
+ } else {
+ return handle_scope.Close(Handle<String>::Cast(val));
+ }
+}
+
+
+Handle<Value> Shell::DebugCommandToJSONRequest(Handle<String> command) {
+ Context::Scope context_scope(utility_context_);
+ Handle<Object> global = utility_context_->Global();
+ Handle<Value> fun = global->Get(String::New("DebugCommandToJSONRequest"));
+ static const int kArgc = 1;
+ Handle<Value> argv[kArgc] = { command };
+ Handle<Value> val = Handle<Function>::Cast(fun)->Call(global, kArgc, argv);
+ return val;
+}
+
+
+Handle<Object> Shell::DebugResponseDetails(Handle<String> response) {
+ Context::Scope context_scope(utility_context_);
+ Handle<Object> global = utility_context_->Global();
+ Handle<Value> fun = global->Get(String::New("DebugResponseDetails"));
+ static const int kArgc = 1;
+ Handle<Value> argv[kArgc] = { response };
+ Handle<Value> val = Handle<Function>::Cast(fun)->Call(global, kArgc, argv);
+ return Handle<Object>::Cast(val);
+}
+
+
int32_t* Counter::Bind(const char* name) {
int i;
for (i = 0; i < kMaxNameSize - 1 && name[i]; i++)
@@ -322,16 +367,16 @@
void Shell::OnExit() {
if (i::FLAG_dump_counters) {
- ::printf("+----------------------------------------+----------+\n");
- ::printf("| Name | Value |\n");
- ::printf("+----------------------------------------+----------+\n");
+ ::printf("+----------------------------------------+-------------+\n");
+ ::printf("| Name | Value |\n");
+ ::printf("+----------------------------------------+-------------+\n");
for (CounterMap::iterator i = counter_map_.begin();
i != counter_map_.end();
i++) {
Counter* counter = (*i).second;
- ::printf("| %-38s | %8i |\n", (*i).first, counter->value());
+ ::printf("| %-38s | %11i |\n", (*i).first, counter->value());
}
- ::printf("+----------------------------------------+----------+\n");
+ ::printf("+----------------------------------------+-------------+\n");
}
if (counters_file_ != NULL)
delete counters_file_;
@@ -388,7 +433,9 @@
Context::Scope context_scope(evaluation_context_);
for (int i = 1; i < argc; i++) {
char* str = argv[i];
- if (strcmp(str, "-f") == 0) {
+ if (strcmp(str, "--shell") == 0) {
+ run_shell = true;
+ } else if (strcmp(str, "-f") == 0) {
// Ignore any -f flags for compatibility with other stand-alone
// JavaScript engines.
continue;
@@ -415,6 +462,8 @@
return 1;
}
}
+ if (i::FLAG_debugger)
+ v8::Debug::AddDebugEventListener(HandleDebugEvent);
if (run_shell)
RunShell();
OnExit();
diff --git a/src/d8.h b/src/d8.h
index 371e529..4614378 100644
--- a/src/d8.h
+++ b/src/d8.h
@@ -88,12 +88,17 @@
static int Main(int argc, char* argv[]);
static Handle<Array> GetCompletions(Handle<String> text,
Handle<String> full);
+ static Handle<String> DebugEventToText(Handle<Object> event);
+ static Handle<Value> DebugCommandToJSONRequest(Handle<String> command);
+ static Handle<Object> DebugResponseDetails(Handle<String> response);
static Handle<Value> Print(const Arguments& args);
static Handle<Value> Quit(const Arguments& args);
static Handle<Value> Version(const Arguments& args);
static Handle<Value> Load(const Arguments& args);
+ static Handle<Context> utility_context() { return utility_context_; }
+
static const char* kHistoryFileName;
static const char* kPrompt;
private:
diff --git a/src/d8.js b/src/d8.js
index cf8b60c..61467b9 100644
--- a/src/d8.js
+++ b/src/d8.js
@@ -31,7 +31,11 @@
if (str.length > this.length)
return false;
return this.substr(0, str.length) == str;
-};
+}
+
+function log10(num) {
+ return Math.log(num)/Math.log(10);
+}
function ToInspectableObject(obj) {
if (!obj && typeof obj === 'object') {
@@ -68,3 +72,857 @@
}
return result;
}
+
+
+// Global object holding debugger related constants and state.
+const Debug = {};
+
+
+// Debug events which can occour in the V8 JavaScript engine. These originate
+// from the API include file debug.h.
+Debug.DebugEvent = { Break: 1,
+ Exception: 2,
+ NewFunction: 3,
+ BeforeCompile: 4,
+ AfterCompile: 5 };
+
+
+// The different types of scripts matching enum ScriptType in objects.h.
+Debug.ScriptType = { Native: 0,
+ Extension: 1,
+ Normal: 2 };
+
+
+// Current debug state.
+const kNoFrame = -1;
+Debug.State = {
+ currentFrame: kNoFrame,
+ currentSourceLine: -1
+}
+
+
+function DebugEventToText(event) {
+ if (event.eventType() == 1) {
+ // Build the break details.
+ var details = '';
+ if (event.breakPointsHit()) {
+ details += 'breakpoint';
+ if (event.breakPointsHit().length > 1) {
+ details += 's';
+ }
+ details += ' #';
+ for (var i = 0; i < event.breakPointsHit().length; i++) {
+ if (i > 0) {
+ details += ', #';
+ }
+ // Find the break point number. For break points originating from a
+ // script break point display the script break point number.
+ var break_point = event.breakPointsHit()[i];
+ var script_break_point = break_point.script_break_point();
+ if (script_break_point) {
+ details += script_break_point.number();
+ } else {
+ details += break_point.number();
+ }
+ }
+ } else {
+ details += 'break';
+ }
+ details += ' in ';
+ details += event.executionState().frame(0).invocationText();
+ details += ' at ';
+ details += event.executionState().frame(0).sourceAndPositionText();
+ details += '\n'
+ if (event.func().script()) {
+ details += FrameSourceUnderline(event.executionState().frame(0));
+ }
+ Debug.State.currentSourceLine =
+ event.executionState().frame(0).sourceLine();
+ Debug.State.currentFrame = 0;
+ return details;
+ } else if (event.eventType() == 2) {
+ var details = '';
+ if (event.uncaught_) {
+ details += 'Uncaught: ';
+ } else {
+ details += 'Exception: ';
+ }
+
+ details += '"';
+ details += event.exception();
+ details += '"';
+ if (event.executionState().frameCount() > 0) {
+ details += '"';
+ details += event.exception();
+ details += ' at ';
+ details += event.executionState().frame(0).sourceAndPositionText();
+ details += '\n';
+ details += FrameSourceUnderline(event.executionState().frame(0));
+ Debug.State.currentSourceLine =
+ event.executionState().frame(0).sourceLine();
+ Debug.State.currentFrame = 0;
+ } else {
+ details += ' (empty stack)';
+ Debug.State.currentSourceLine = -1;
+ Debug.State.currentFrame = kNoFrame;
+ }
+
+ return details;
+ }
+
+ return 'Unknown debug event ' + event.eventType();
+};
+
+
+function SourceUnderline(source_text, position) {
+ if (!source_text) {
+ return;
+ }
+
+ // Create an underline with a caret pointing to the source position. If the
+ // source contains a tab character the underline will have a tab character in
+ // the same place otherwise the underline will have a space character.
+ var underline = '';
+ for (var i = 0; i < position; i++) {
+ if (source_text[i] == '\t') {
+ underline += '\t';
+ } else {
+ underline += ' ';
+ }
+ }
+ underline += '^';
+
+ // Return the source line text with the underline beneath.
+ return source_text + '\n' + underline;
+};
+
+
+function FrameSourceUnderline(frame) {
+ var location = frame.sourceLocation();
+ if (location) {
+ return SourceUnderline(location.sourceText(),
+ location.position - location.start);
+ }
+};
+
+
+// Converts a text command to a JSON request.
+function DebugCommandToJSONRequest(cmd_line) {
+ return new DebugRequest(cmd_line).JSONRequest();
+};
+
+
+function DebugRequest(cmd_line) {
+ // If the very first character is a { assume that a JSON request have been
+ // entered as a command. Converting that to a JSON request is trivial.
+ if (cmd_line && cmd_line.length > 0 && cmd_line.charAt(0) == '{') {
+ this.request_ = cmd_line;
+ return;
+ }
+
+ // Trim string for leading and trailing whitespace.
+ cmd_line = cmd_line.replace(/^\s+|\s+$/g, '');
+
+ // Find the command.
+ var pos = cmd_line.indexOf(' ');
+ var cmd;
+ var args;
+ if (pos == -1) {
+ cmd = cmd_line;
+ args = '';
+ } else {
+ cmd = cmd_line.slice(0, pos);
+ args = cmd_line.slice(pos).replace(/^\s+|\s+$/g, '');
+ }
+
+ // Switch on command.
+ switch (cmd) {
+ case 'continue':
+ case 'c':
+ this.request_ = this.continueCommandToJSONRequest_(args);
+ break;
+
+ case 'step':
+ case 's':
+ this.request_ = this.stepCommandToJSONRequest_(args);
+ break;
+
+ case 'backtrace':
+ case 'bt':
+ this.request_ = this.backtraceCommandToJSONRequest_(args);
+ break;
+
+ case 'frame':
+ case 'f':
+ this.request_ = this.frameCommandToJSONRequest_(args);
+ break;
+
+ case 'print':
+ case 'p':
+ this.request_ = this.printCommandToJSONRequest_(args);
+ break;
+
+ case 'source':
+ this.request_ = this.sourceCommandToJSONRequest_(args);
+ break;
+
+ case 'scripts':
+ this.request_ = this.scriptsCommandToJSONRequest_(args);
+ break;
+
+ case 'break':
+ case 'b':
+ this.request_ = this.breakCommandToJSONRequest_(args);
+ break;
+
+ case 'clear':
+ this.request_ = this.clearCommandToJSONRequest_(args);
+ break;
+
+ case 'help':
+ case '?':
+ this.helpCommand_(args);
+ // Return null to indicate no JSON to send (command handled internally).
+ this.request_ = void 0;
+ break;
+
+ default:
+ throw new Error('Unknown command "' + cmd + '"');
+ }
+}
+
+DebugRequest.prototype.JSONRequest = function() {
+ return this.request_;
+}
+
+
+function RequestPacket(command) {
+ this.seq = 0;
+ this.type = 'request';
+ this.command = command;
+}
+
+
+RequestPacket.prototype.toJSONProtocol = function() {
+ // Encode the protocol header.
+ var json = '{';
+ json += '"seq":' + this.seq;
+ json += ',"type":"' + this.type + '"';
+ if (this.command) {
+ json += ',"command":' + StringToJSON_(this.command);
+ }
+ if (this.arguments) {
+ json += ',"arguments":';
+ // Encode the arguments part.
+ if (this.arguments.toJSONProtocol) {
+ json += this.arguments.toJSONProtocol()
+ } else {
+ json += SimpleObjectToJSON_(this.arguments);
+ }
+ }
+ json += '}';
+ return json;
+}
+
+
+DebugRequest.prototype.createRequest = function(command) {
+ return new RequestPacket(command);
+};
+
+
+// Create a JSON request for the continue command.
+DebugRequest.prototype.continueCommandToJSONRequest_ = function(args) {
+ var request = this.createRequest('continue');
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the step command.
+DebugRequest.prototype.stepCommandToJSONRequest_ = function(args) {
+ // Requesting a step is through the continue command with additional
+ // arguments.
+ var request = this.createRequest('continue');
+ request.arguments = {};
+
+ // Process arguments if any.
+ if (args && args.length > 0) {
+ args = args.split(/\s*[ ]+\s*/g);
+
+ if (args.length > 2) {
+ throw new Error('Invalid step arguments.');
+ }
+
+ if (args.length > 0) {
+ // Get step count argument if any.
+ if (args.length == 2) {
+ var stepcount = parseInt(args[1]);
+ if (isNaN(stepcount) || stepcount <= 0) {
+ throw new Error('Invalid step count argument "' + args[0] + '".');
+ }
+ request.arguments.stepcount = stepcount;
+ }
+
+ // Get the step action.
+ switch (args[0]) {
+ case 'in':
+ case 'i':
+ request.arguments.stepaction = 'in';
+ break;
+
+ case 'min':
+ case 'm':
+ request.arguments.stepaction = 'min';
+ break;
+
+ case 'next':
+ case 'n':
+ request.arguments.stepaction = 'next';
+ break;
+
+ case 'out':
+ case 'o':
+ request.arguments.stepaction = 'out';
+ break;
+
+ default:
+ throw new Error('Invalid step argument "' + args[0] + '".');
+ }
+ }
+ } else {
+ // Default is step next.
+ request.arguments.stepaction = 'next';
+ }
+
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the backtrace command.
+DebugRequest.prototype.backtraceCommandToJSONRequest_ = function(args) {
+ // Build a backtrace request from the text command.
+ var request = this.createRequest('backtrace');
+ args = args.split(/\s*[ ]+\s*/g);
+ if (args.length == 2) {
+ request.arguments = {};
+ var fromFrame = parseInt(args[0]);
+ var toFrame = parseInt(args[1]);
+ if (isNaN(fromFrame) || fromFrame < 0) {
+ throw new Error('Invalid start frame argument "' + args[0] + '".');
+ }
+ if (isNaN(toFrame) || toFrame < 0) {
+ throw new Error('Invalid end frame argument "' + args[1] + '".');
+ }
+ if (fromFrame > toFrame) {
+ throw new Error('Invalid arguments start frame cannot be larger ' +
+ 'than end frame.');
+ }
+ request.arguments.fromFrame = fromFrame;
+ request.arguments.toFrame = toFrame + 1;
+ }
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the frame command.
+DebugRequest.prototype.frameCommandToJSONRequest_ = function(args) {
+ // Build a frame request from the text command.
+ var request = this.createRequest('frame');
+ args = args.split(/\s*[ ]+\s*/g);
+ if (args.length > 0 && args[0].length > 0) {
+ request.arguments = {};
+ request.arguments.number = args[0];
+ }
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the print command.
+DebugRequest.prototype.printCommandToJSONRequest_ = function(args) {
+ // Build a evaluate request from the text command.
+ var request = this.createRequest('evaluate');
+ if (args.length == 0) {
+ throw new Error('Missing expression.');
+ }
+
+ request.arguments = {};
+ request.arguments.expression = args;
+
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the source command.
+DebugRequest.prototype.sourceCommandToJSONRequest_ = function(args) {
+ // Build a evaluate request from the text command.
+ var request = this.createRequest('source');
+
+ // Default is ten lines starting five lines before the current location.
+ var from = Debug.State.currentSourceLine - 5;
+ var lines = 10;
+
+ // Parse the arguments.
+ args = args.split(/\s*[ ]+\s*/g);
+ if (args.length > 1 && args[0].length > 0 && args[1].length > 0) {
+ from = parseInt(args[0]) - 1;
+ lines = parseInt(args[1]);
+ } else if (args.length > 0 && args[0].length > 0) {
+ from = parseInt(args[0]) - 1;
+ }
+
+ if (from < 0) from = 0;
+ if (lines < 0) lines = 10;
+
+ // Request source arround current source location.
+ request.arguments = {};
+ request.arguments.fromLine = from;
+ request.arguments.toLine = from + lines;
+
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the scripts command.
+DebugRequest.prototype.scriptsCommandToJSONRequest_ = function(args) {
+ // Build a evaluate request from the text command.
+ var request = this.createRequest('scripts');
+
+ // Process arguments if any.
+ if (args && args.length > 0) {
+ args = args.split(/\s*[ ]+\s*/g);
+
+ if (args.length > 1) {
+ throw new Error('Invalid scripts arguments.');
+ }
+
+ request.arguments = {};
+ switch (args[0]) {
+ case 'natives':
+ request.arguments.types = ScriptTypeFlag(Debug.ScriptType.Native);
+ break;
+
+ case 'extensions':
+ request.arguments.types = ScriptTypeFlag(Debug.ScriptType.Extension);
+ break;
+
+ case 'all':
+ request.arguments.types =
+ ScriptTypeFlag(Debug.ScriptType.Normal) |
+ ScriptTypeFlag(Debug.ScriptType.Native) |
+ ScriptTypeFlag(Debug.ScriptType.Extension);
+ break;
+
+ default:
+ throw new Error('Invalid argument "' + args[0] + '".');
+ }
+ }
+
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the break command.
+DebugRequest.prototype.breakCommandToJSONRequest_ = function(args) {
+ // Build a evaluate request from the text command.
+ var request = this.createRequest('setbreakpoint');
+
+ // Process arguments if any.
+ if (args && args.length > 0) {
+ var target = args;
+ var condition;
+
+ var pos = args.indexOf(' ');
+ if (pos > 0) {
+ target = args.substring(0, pos);
+ condition = args.substring(pos + 1, args.length);
+ }
+
+ request.arguments = {};
+ request.arguments.type = 'function';
+ request.arguments.target = target;
+ request.arguments.condition = condition;
+ } else {
+ throw new Error('Invalid break arguments.');
+ }
+
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the clear command.
+DebugRequest.prototype.clearCommandToJSONRequest_ = function(args) {
+ // Build a evaluate request from the text command.
+ var request = this.createRequest('clearbreakpoint');
+
+ // Process arguments if any.
+ if (args && args.length > 0) {
+ request.arguments = {};
+ request.arguments.breakpoint = parseInt(args);
+ } else {
+ throw new Error('Invalid break arguments.');
+ }
+
+ return request.toJSONProtocol();
+};
+
+
+// Create a JSON request for the break command.
+DebugRequest.prototype.helpCommand_ = function(args) {
+ // Help os quite simple.
+ if (args && args.length > 0) {
+ print('warning: arguments to \'help\' are ignored');
+ }
+
+ print('break location [condition]');
+ print('clear <breakpoint #>');
+ print('backtrace [from frame #] [to frame #]]');
+ print('frame <frame #>');
+ print('step [in | next | out| min [step count]]');
+ print('print <expression>');
+ print('source [from line [num lines]]');
+ print('scripts');
+ print('continue');
+ print('help');
+}
+
+
+// Convert a JSON response to text for display in a text based debugger.
+function DebugResponseDetails(json_response) {
+ details = {text:'', running:false}
+
+ try {
+ // Convert the JSON string to an object.
+ response = eval('(' + json_response + ')');
+
+ if (!response.success) {
+ details.text = response.message;
+ return details;
+ }
+
+ // Get the running state.
+ details.running = response.running;
+
+ switch (response.command) {
+ case 'setbreakpoint':
+ var body = response.body;
+ result = 'set breakpoint #';
+ result += body.breakpoint;
+ details.text = result;
+ break;
+
+ case 'clearbreakpoint':
+ var body = response.body;
+ result = 'cleared breakpoint #';
+ result += body.breakpoint;
+ details.text = result;
+ break;
+
+ case 'backtrace':
+ var body = response.body;
+ if (body.totalFrames == 0) {
+ result = '(empty stack)';
+ } else {
+ var result = 'Frames #' + body.fromFrame + ' to #' +
+ (body.toFrame - 1) + ' of ' + body.totalFrames + '\n';
+ for (i = 0; i < body.frames.length; i++) {
+ if (i != 0) result += '\n';
+ result += body.frames[i].text;
+ }
+ }
+ details.text = result;
+ break;
+
+ case 'frame':
+ details.text = SourceUnderline(response.body.sourceLineText,
+ response.body.column);
+ Debug.State.currentSourceLine = response.body.line;
+ Debug.State.currentFrame = response.body.index;
+ break;
+
+ case 'evaluate':
+ details.text = response.body.text;
+ break;
+
+ case 'source':
+ // Get the source from the response.
+ var source = response.body.source;
+ var from_line = response.body.fromLine + 1;
+ var lines = source.split('\n');
+ var maxdigits = 1 + Math.floor(log10(from_line + lines.length));
+ if (maxdigits < 3) {
+ maxdigits = 3;
+ }
+ var result = '';
+ for (var num = 0; num < lines.length; num++) {
+ // Check if there's an extra newline at the end.
+ if (num == (lines.length - 1) && lines[num].length == 0) {
+ break;
+ }
+
+ var current_line = from_line + num;
+ spacer = maxdigits - (1 + Math.floor(log10(current_line)));
+ if (current_line == Debug.State.currentSourceLine + 1) {
+ for (var i = 0; i < maxdigits; i++) {
+ result += '>';
+ }
+ result += ' ';
+ } else {
+ for (var i = 0; i < spacer; i++) {
+ result += ' ';
+ }
+ result += current_line + ': ';
+ }
+ result += lines[num];
+ result += '\n';
+ }
+ details.text = result;
+ break;
+
+ case 'scripts':
+ var result = '';
+ for (i = 0; i < response.body.length; i++) {
+ if (i != 0) result += '\n';
+ if (response.body[i].name) {
+ result += response.body[i].name;
+ } else {
+ result += '[unnamed] ';
+ var sourceStart = response.body[i].sourceStart;
+ if (sourceStart.length > 40) {
+ sourceStart = sourceStart.substring(0, 37) + '...';
+ }
+ result += sourceStart;
+ }
+ result += ' (lines: ';
+ result += response.body[i].sourceLines;
+ result += ', length: ';
+ result += response.body[i].sourceLength;
+ if (response.body[i].type == Debug.ScriptType.Native) {
+ result += ', native';
+ } else if (response.body[i].type == Debug.ScriptType.Extension) {
+ result += ', extension';
+ }
+ result += ')';
+ }
+ details.text = result;
+ break;
+
+ case 'continue':
+ details.text = "(running)";
+ break;
+
+ default:
+ details.text =
+ 'Response for unknown command \'' + response.command + '\'' +
+ ' (' + json_response + ')';
+ }
+ } catch (e) {
+ details.text = 'Error: "' + e + '" formatting response';
+ }
+
+ return details;
+};
+
+
+function MakeJSONPair_(name, value) {
+ return '"' + name + '":' + value;
+}
+
+
+function ArrayToJSONObject_(content) {
+ return '{' + content.join(',') + '}';
+}
+
+
+function ArrayToJSONArray_(content) {
+ return '[' + content.join(',') + ']';
+}
+
+
+function BooleanToJSON_(value) {
+ return String(value);
+}
+
+
+function NumberToJSON_(value) {
+ return String(value);
+}
+
+
+// Mapping of some control characters to avoid the \uXXXX syntax for most
+// commonly used control cahracters.
+const ctrlCharMap_ = {
+ '\b': '\\b',
+ '\t': '\\t',
+ '\n': '\\n',
+ '\f': '\\f',
+ '\r': '\\r',
+ '"' : '\\"',
+ '\\': '\\\\'
+};
+
+
+// Regular expression testing for ", \ and control characters (0x00 - 0x1F).
+const ctrlCharTest_ = new RegExp('["\\\\\x00-\x1F]');
+
+
+// Regular expression matching ", \ and control characters (0x00 - 0x1F)
+// globally.
+const ctrlCharMatch_ = new RegExp('["\\\\\x00-\x1F]', 'g');
+
+
+/**
+ * Convert a String to its JSON representation (see http://www.json.org/). To
+ * avoid depending on the String object this method calls the functions in
+ * string.js directly and not through the value.
+ * @param {String} value The String value to format as JSON
+ * @return {string} JSON formatted String value
+ */
+function StringToJSON_(value) {
+ // Check for" , \ and control characters (0x00 - 0x1F). No need to call
+ // RegExpTest as ctrlchar is constructed using RegExp.
+ if (ctrlCharTest_.test(value)) {
+ // Replace ", \ and control characters (0x00 - 0x1F).
+ return '"' +
+ value.replace(ctrlCharMatch_, function (char) {
+ // Use charmap if possible.
+ var mapped = ctrlCharMap_[char];
+ if (mapped) return mapped;
+ mapped = char.charCodeAt();
+ // Convert control character to unicode escape sequence.
+ return '\\u00' +
+ '0' + // TODO %NumberToRadixString(Math.floor(mapped / 16), 16) +
+ '0' // TODO %NumberToRadixString(mapped % 16, 16);
+ })
+ + '"';
+ }
+
+ // Simple string with no special characters.
+ return '"' + value + '"';
+}
+
+
+/**
+ * Convert a Date to ISO 8601 format. To avoid depending on the Date object
+ * this method calls the functions in date.js directly and not through the
+ * value.
+ * @param {Date} value The Date value to format as JSON
+ * @return {string} JSON formatted Date value
+ */
+function DateToISO8601_(value) {
+ function f(n) {
+ return n < 10 ? '0' + n : n;
+ }
+ function g(n) {
+ return n < 10 ? '00' + n : n < 100 ? '0' + n : n;
+ }
+ return builtins.GetUTCFullYearFrom(value) + '-' +
+ f(builtins.GetUTCMonthFrom(value) + 1) + '-' +
+ f(builtins.GetUTCDateFrom(value)) + 'T' +
+ f(builtins.GetUTCHoursFrom(value)) + ':' +
+ f(builtins.GetUTCMinutesFrom(value)) + ':' +
+ f(builtins.GetUTCSecondsFrom(value)) + '.' +
+ g(builtins.GetUTCMillisecondsFrom(value)) + 'Z';
+}
+
+
+/**
+ * Convert a Date to ISO 8601 format. To avoid depending on the Date object
+ * this method calls the functions in date.js directly and not through the
+ * value.
+ * @param {Date} value The Date value to format as JSON
+ * @return {string} JSON formatted Date value
+ */
+function DateToJSON_(value) {
+ return '"' + DateToISO8601_(value) + '"';
+}
+
+
+/**
+ * Convert an Object to its JSON representation (see http://www.json.org/).
+ * This implementation simply runs through all string property names and adds
+ * each property to the JSON representation for some predefined types. For type
+ * "object" the function calls itself recursively unless the object has the
+ * function property "toJSONProtocol" in which case that is used. This is not
+ * a general implementation but sufficient for the debugger. Note that circular
+ * structures will cause infinite recursion.
+ * @param {Object} object The object to format as JSON
+ * @return {string} JSON formatted object value
+ */
+function SimpleObjectToJSON_(object) {
+ var content = [];
+ for (var key in object) {
+ // Only consider string keys.
+ if (typeof key == 'string') {
+ var property_value = object[key];
+
+ // Format the value based on its type.
+ var property_value_json;
+ switch (typeof property_value) {
+ case 'object':
+ if (typeof property_value.toJSONProtocol == 'function') {
+ property_value_json = property_value.toJSONProtocol(true)
+ } else if (property_value.constructor.name == 'Array'){
+ property_value_json = SimpleArrayToJSON_(property_value);
+ } else {
+ property_value_json = SimpleObjectToJSON_(property_value);
+ }
+ break;
+
+ case 'boolean':
+ property_value_json = BooleanToJSON_(property_value);
+ break;
+
+ case 'number':
+ property_value_json = NumberToJSON_(property_value);
+ break;
+
+ case 'string':
+ property_value_json = StringToJSON_(property_value);
+ break;
+
+ default:
+ property_value_json = null;
+ }
+
+ // Add the property if relevant.
+ if (property_value_json) {
+ content.push(StringToJSON_(key) + ':' + property_value_json);
+ }
+ }
+ }
+
+ // Make JSON object representation.
+ return '{' + content.join(',') + '}';
+}
+
+
+/**
+ * Convert an array to its JSON representation. This is a VERY simple
+ * implementation just to support what is needed for the debugger.
+ * @param {Array} arrya The array to format as JSON
+ * @return {string} JSON formatted array value
+ */
+function SimpleArrayToJSON_(array) {
+ // Make JSON array representation.
+ var json = '[';
+ for (var i = 0; i < array.length; i++) {
+ if (i != 0) {
+ json += ',';
+ }
+ var elem = array[i];
+ if (elem.toJSONProtocol) {
+ json += elem.toJSONProtocol(true)
+ } else if (typeof(elem) === 'object') {
+ json += SimpleObjectToJSON_(elem);
+ } else if (typeof(elem) === 'boolean') {
+ json += BooleanToJSON_(elem);
+ } else if (typeof(elem) === 'number') {
+ json += NumberToJSON_(elem);
+ } else if (typeof(elem) === 'string') {
+ json += StringToJSON_(elem);
+ } else {
+ json += elem;
+ }
+ }
+ json += ']';
+ return json;
+}
diff --git a/src/debug-delay.js b/src/debug-delay.js
index 6db054e..32a7151 100644
--- a/src/debug-delay.js
+++ b/src/debug-delay.js
@@ -474,10 +474,18 @@
Debug.setBreakPoint = function(func, opt_line, opt_column, opt_condition) {
if (!IS_FUNCTION(func)) throw new Error('Parameters have wrong types.');
+ // Break points in API functions are not supported.
+ if (%FunctionIsAPIFunction(func)) {
+ throw new Error('Cannot set break point in native code.');
+ }
var source_position = this.findFunctionSourcePosition(func, opt_line, opt_column) -
this.sourcePosition(func);
// Find the script for the function.
var script = %FunctionGetScript(func);
+ // Break in builtin JavaScript code is not supported.
+ if (script.type == Debug.ScriptType.Native) {
+ throw new Error('Cannot set break point in native code.');
+ }
// If the script for the function has a name convert this to a script break
// point.
if (script && script.name) {
@@ -1321,6 +1329,9 @@
// Clear break point.
Debug.clearBreakPoint(break_point);
+
+ // Add the cleared break point number to the response.
+ response.body = { breakpoint: break_point }
}
@@ -1380,6 +1391,11 @@
DebugCommandProcessor.prototype.frameRequest_ = function(request, response) {
+ // No frames no source.
+ if (this.exec_state_.frameCount() == 0) {
+ return response.failed('No frames');
+ }
+
// With no arguments just keep the selected frame.
if (request.arguments && request.arguments.number >= 0) {
this.exec_state_.setSelectedFrame(request.arguments.number);
@@ -1445,6 +1461,11 @@
DebugCommandProcessor.prototype.sourceRequest_ = function(request, response) {
+ // No frames no source.
+ if (this.exec_state_.frameCount() == 0) {
+ return response.failed('No source');
+ }
+
var from_line;
var to_line;
var frame = this.exec_state_.frame();
diff --git a/src/debug.cc b/src/debug.cc
index 027c89b..368a975 100644
--- a/src/debug.cc
+++ b/src/debug.cc
@@ -1095,6 +1095,33 @@
}
+// Handle stepping into a function.
+void Debug::HandleStepIn(Handle<JSFunction> function,
+ Address fp,
+ bool is_constructor) {
+ // If the frame pointer is not supplied by the caller find it.
+ if (fp == 0) {
+ StackFrameIterator it;
+ it.Advance();
+ // For constructor functions skip another frame.
+ if (is_constructor) {
+ ASSERT(it.frame()->is_construct());
+ it.Advance();
+ }
+ fp = it.frame()->fp();
+ }
+
+ // Flood the function with one-shot break points if it is called from where
+ // step into was requested.
+ if (fp == Debug::step_in_fp()) {
+ // Don't allow step into functions in the native context.
+ if (function->context()->global() != Top::context()->builtins()) {
+ Debug::FloodWithOneShot(Handle<SharedFunctionInfo>(function->shared()));
+ }
+ }
+}
+
+
void Debug::ClearStepping() {
// Clear the various stepping setup.
ClearOneShot();
diff --git a/src/debug.h b/src/debug.h
index fdb80bd..56b8026 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -205,6 +205,9 @@
inline static bool has_break_points() { return has_break_points_; }
static bool StepInActive() { return thread_local_.step_into_fp_ != 0; }
+ static void HandleStepIn(Handle<JSFunction> function,
+ Address fp,
+ bool is_constructor);
static Address step_in_fp() { return thread_local_.step_into_fp_; }
static Address* step_in_fp_addr() { return &thread_local_.step_into_fp_; }
diff --git a/src/execution.cc b/src/execution.cc
index 941712a..7ccef5e 100644
--- a/src/execution.cc
+++ b/src/execution.cc
@@ -38,6 +38,9 @@
#include "simulator-ia32.h"
#endif
+#include "debug.h"
+#include "v8threads.h"
+
namespace v8 { namespace internal {
@@ -500,6 +503,69 @@
}
+static Object* RuntimePreempt() {
+ // Clear the preempt request flag.
+ StackGuard::Continue(PREEMPT);
+
+ ContextSwitcher::PreemptionReceived();
+
+ {
+ v8::Unlocker unlocker;
+ Thread::YieldCPU();
+ }
+
+ return Heap::undefined_value();
+}
+
+
+Object* Execution::DebugBreakHelper() {
+ // Just continue if breaks are disabled.
+ if (Debug::disable_break()) {
+ return Heap::undefined_value();
+ }
+
+ // Don't break in system functions. If the current function is
+ // either in the builtins object of some context or is in the debug
+ // context just return with the debug break stack guard active.
+ JavaScriptFrameIterator it;
+ JavaScriptFrame* frame = it.frame();
+ Object* fun = frame->function();
+ if (fun->IsJSFunction()) {
+ GlobalObject* global = JSFunction::cast(fun)->context()->global();
+ if (global->IsJSBuiltinsObject() || Debug::IsDebugGlobal(global)) {
+ return Heap::undefined_value();
+ }
+ }
+
+ // Clear the debug request flag.
+ StackGuard::Continue(DEBUGBREAK);
+
+ HandleScope scope;
+ // Enter the debugger. Just continue if we fail to enter the debugger.
+ EnterDebugger debugger;
+ if (debugger.FailedToEnter()) {
+ return Heap::undefined_value();
+ }
+
+ // Notify the debug event listeners.
+ Debugger::OnDebugBreak(Factory::undefined_value());
+
+ // Return to continue execution.
+ return Heap::undefined_value();
+}
+
+
+Object* Execution::HandleStackGuardInterrupt() {
+ if (StackGuard::IsDebugBreak()) DebugBreakHelper();
+ if (StackGuard::IsPreempted()) RuntimePreempt();
+ if (StackGuard::IsInterrupted()) {
+ // interrupt
+ StackGuard::Continue(INTERRUPT);
+ return Top::StackOverflow();
+ }
+ return Heap::undefined_value();
+}
+
// --- G C E x t e n s i o n ---
const char* GCExtension::kSource = "native function gc();";
diff --git a/src/execution.h b/src/execution.h
index 25a987f..bd37525 100644
--- a/src/execution.h
+++ b/src/execution.h
@@ -118,6 +118,12 @@
Handle<Object> pos,
Handle<Object> is_global);
+ static Object* DebugBreakHelper();
+
+ // If the stack guard is triggered, but it is not an actual
+ // stack overflow, then handle the interruption accordingly.
+ static Object* HandleStackGuardInterrupt();
+
// Get a function delegate (or undefined) for the given non-function
// object. Used for support calling objects as functions.
static Handle<Object> GetFunctionDelegate(Handle<Object> object);
diff --git a/src/factory.cc b/src/factory.cc
index 2c82b1d..9f6025b 100644
--- a/src/factory.cc
+++ b/src/factory.cc
@@ -130,8 +130,12 @@
Handle<Context> Factory::NewWithContext(Handle<Context> previous,
- Handle<JSObject> extension) {
- CALL_HEAP_FUNCTION(Heap::AllocateWithContext(*previous, *extension), Context);
+ Handle<JSObject> extension,
+ bool is_catch_context) {
+ CALL_HEAP_FUNCTION(Heap::AllocateWithContext(*previous,
+ *extension,
+ is_catch_context),
+ Context);
}
diff --git a/src/factory.h b/src/factory.h
index 429c483..3cbefd8 100644
--- a/src/factory.h
+++ b/src/factory.h
@@ -126,7 +126,8 @@
// Create a 'with' context.
static Handle<Context> NewWithContext(Handle<Context> previous,
- Handle<JSObject> extension);
+ Handle<JSObject> extension,
+ bool is_catch_context);
// Return the Symbol maching the passed in string.
static Handle<String> SymbolFromString(Handle<String> value);
diff --git a/src/flag-definitions.h b/src/flag-definitions.h
index eafbbad..194340b 100644
--- a/src/flag-definitions.h
+++ b/src/flag-definitions.h
@@ -226,6 +226,7 @@
DEFINE_bool(help, false, "Print usage message, including flags, on console")
DEFINE_bool(dump_counters, false, "Dump counters on exit")
+DEFINE_bool(debugger, true, "Enable JavaScript debugger")
DEFINE_string(map_counters, false, "Map counters to a file")
DEFINE_args(js_arguments, JSArguments(),
"Pass all remaining arguments to the script. Alias for \"--\".")
diff --git a/src/flags.cc b/src/flags.cc
index 48b4ef7..cb461ee 100644
--- a/src/flags.cc
+++ b/src/flags.cc
@@ -516,10 +516,11 @@
printf(" shell [options] file1 file2 ... filek\n");
printf(" run JavaScript scripts in file1, file2, ..., filek\n");
printf(" shell [options]\n");
- printf(" shell [options] --shell\n");
- printf(" run an interactive JavaScript shell");
- printf(" d8 [options] file\n");
+ printf(" shell [options] --shell [file1 file2 ... filek]\n");
+ printf(" run an interactive JavaScript shell\n");
+ printf(" d8 [options] file1 file2 ... filek\n");
printf(" d8 [options]\n");
+ printf(" d8 [options] --shell [file1 file2 ... filek]\n");
printf(" run the new debugging shell\n\n");
printf("Options:\n");
for (size_t i = 0; i < num_flags; ++i) {
diff --git a/src/globals.h b/src/globals.h
index e174bf3..2b5f3ac 100644
--- a/src/globals.h
+++ b/src/globals.h
@@ -145,6 +145,7 @@
class BreakableStatement;
class Code;
class CodeGenerator;
+class CodeRegion;
class CodeStub;
class Context;
class Debug;
@@ -259,6 +260,7 @@
int buffer_size;
int instr_size;
int reloc_size;
+ Assembler* origin;
};
diff --git a/src/heap.cc b/src/heap.cc
index 53b0774..c4a860b 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -1017,6 +1017,10 @@
obj = AllocateMap(FIXED_ARRAY_TYPE, HeapObject::kHeaderSize);
if (obj->IsFailure()) return false;
+ catch_context_map_ = Map::cast(obj);
+
+ obj = AllocateMap(FIXED_ARRAY_TYPE, HeapObject::kHeaderSize);
+ if (obj->IsFailure()) return false;
global_context_map_ = Map::cast(obj);
obj = AllocateMap(JS_FUNCTION_TYPE, JSFunction::kSize);
@@ -1659,6 +1663,7 @@
// through the self_reference parameter.
code->CopyFrom(desc);
if (sinfo != NULL) sinfo->Serialize(code); // write scope info
+ LOG(CodeAllocateEvent(code, desc.origin));
#ifdef DEBUG
code->Verify();
@@ -2387,11 +2392,13 @@
}
-Object* Heap::AllocateWithContext(Context* previous, JSObject* extension) {
+Object* Heap::AllocateWithContext(Context* previous,
+ JSObject* extension,
+ bool is_catch_context) {
Object* result = Heap::AllocateFixedArray(Context::MIN_CONTEXT_SLOTS);
if (result->IsFailure()) return result;
Context* context = reinterpret_cast<Context*>(result);
- context->set_map(context_map());
+ context->set_map(is_catch_context ? catch_context_map() : context_map());
context->set_closure(previous->closure());
context->set_fcontext(previous->fcontext());
context->set_previous(previous);
diff --git a/src/heap.h b/src/heap.h
index f17a091..3754c0b 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -92,6 +92,7 @@
V(Map, fixed_array_map) \
V(Map, hash_table_map) \
V(Map, context_map) \
+ V(Map, catch_context_map) \
V(Map, global_context_map) \
V(Map, code_map) \
V(Map, oddball_map) \
@@ -430,7 +431,9 @@
static Object* AllocateFunctionContext(int length, JSFunction* closure);
// Allocate a 'with' context.
- static Object* AllocateWithContext(Context* previous, JSObject* extension);
+ static Object* AllocateWithContext(Context* previous,
+ JSObject* extension,
+ bool is_catch_context);
// Allocates a new utility object in the old generation.
static Object* AllocateStruct(InstanceType type);
diff --git a/src/ic-arm.cc b/src/ic-arm.cc
index 55bbee4..4ccefa1 100644
--- a/src/ic-arm.cc
+++ b/src/ic-arm.cc
@@ -502,6 +502,12 @@
}
+// TODO(181): Implement map patching once loop nesting is tracked on
+// the ARM platform so we can generate inlined fast-case code for
+// array indexing in loops.
+void KeyedLoadIC::PatchInlinedMapCheck(Address address, Object* value) { }
+
+
Object* KeyedLoadIC_Miss(Arguments args);
diff --git a/src/ic-ia32.cc b/src/ic-ia32.cc
index 6021004..5603077 100644
--- a/src/ic-ia32.cc
+++ b/src/ic-ia32.cc
@@ -733,6 +733,27 @@
}
+void KeyedLoadIC::PatchInlinedMapCheck(Address address, Object* value) {
+ static const byte kTestEaxByte = 0xA9;
+ Address test_instruction_address = address + 4; // 4 = stub address
+ // The keyed load has a fast inlined case if the IC call instruction
+ // is immediately followed by a test instruction.
+ if (*test_instruction_address == kTestEaxByte) {
+ // Fetch the offset from the call instruction to the map cmp
+ // instruction. This offset is stored in the last 4 bytes of the
+ // 5 byte test instruction.
+ Address offset_address = test_instruction_address + 1;
+ int offset_value = *(reinterpret_cast<int*>(offset_address));
+ // Compute the map address. The operand-immediate compare
+ // instruction is two bytes larger than a call instruction so we
+ // add 2 to get to the map address.
+ Address map_address = address + offset_value + 2;
+ // patch the map check.
+ (*(reinterpret_cast<Object**>(map_address))) = value;
+ }
+}
+
+
// Defined in ic.cc.
Object* KeyedLoadIC_Miss(Arguments args);
diff --git a/src/ic.cc b/src/ic.cc
index 2dc9742..b28d6f6 100644
--- a/src/ic.cc
+++ b/src/ic.cc
@@ -233,6 +233,10 @@
void KeyedLoadIC::Clear(Address address, Code* target) {
if (target->ic_state() == UNINITIALIZED) return;
+ // Make sure to also clear the map used in inline fast cases. If we
+ // do not clear these maps, cached code can keep objects alive
+ // through the embedded maps.
+ PatchInlinedMapCheck(address, Heap::null_value());
SetTargetAtAddress(address, initialize_stub());
}
@@ -352,17 +356,16 @@
if (opt->IsJSFunction()) return opt;
}
- // If performing debug step into then flood this function with one-shot
- // break points if it is called from where step into was requested.
- if (Debug::StepInActive() && fp() == Debug::step_in_fp()) {
- // Don't allow step into functions in the native context.
- if (JSFunction::cast(result)->context()->global() !=
- Top::context()->builtins()) {
- HandleScope scope;
- Handle<SharedFunctionInfo> shared(JSFunction::cast(result)->shared());
- Debug::FloodWithOneShot(shared);
- }
+ // Handle stepping into a function if step into is active.
+ if (Debug::StepInActive()) {
+ // Protect the result in a handle as the debugger can allocate and might
+ // cause GC.
+ HandleScope scope;
+ Handle<JSFunction> function(JSFunction::cast(result));
+ Debug::HandleStepIn(function, fp(), false);
+ return *function;
}
+
return result;
}
@@ -719,7 +722,18 @@
// the global object).
bool use_ic = FLAG_use_ic && !object->IsAccessCheckNeeded();
- if (use_ic) set_target(generic_stub());
+ if (use_ic) {
+ set_target(generic_stub());
+ // For JSObjects that are not value wrappers and that do not have
+ // indexed interceptors, we initialize the inlined fast case (if
+ // present) by patching the inlined map check.
+ if (object->IsJSObject() &&
+ !object->IsJSValue() &&
+ !JSObject::cast(*object)->HasIndexedInterceptor()) {
+ Map* map = JSObject::cast(*object)->map();
+ PatchInlinedMapCheck(address(), map);
+ }
+ }
// Get the property.
return Runtime::GetObjectProperty(object, key);
diff --git a/src/ic.h b/src/ic.h
index d32edaa..bbe1f6d 100644
--- a/src/ic.h
+++ b/src/ic.h
@@ -276,6 +276,11 @@
}
static void Clear(Address address, Code* target);
+
+ // Support for patching the map that is checked in an inlined
+ // version of keyed load.
+ static void PatchInlinedMapCheck(Address address, Object* map);
+
friend class IC;
};
diff --git a/src/interpreter-irregexp.cc b/src/interpreter-irregexp.cc
index 1418443..13ba619 100644
--- a/src/interpreter-irregexp.cc
+++ b/src/interpreter-irregexp.cc
@@ -81,17 +81,34 @@
const byte* pc,
int stack_depth,
int current_position,
+ uint32_t current_char,
int bytecode_length,
const char* bytecode_name) {
if (FLAG_trace_regexp_bytecodes) {
- PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
+ bool printable = (current_char < 127 && current_char >= 32);
+ const char* format =
+ printable ?
+ "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
+ "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
+ PrintF(format,
pc - code_base,
stack_depth,
current_position,
+ current_char,
+ printable ? current_char : '.',
bytecode_name);
for (int i = 1; i < bytecode_length; i++) {
printf(", %02x", pc[i]);
}
+ printf(" ");
+ for (int i = 1; i < bytecode_length; i++) {
+ unsigned char b = pc[i];
+ if (b < 127 && b >= 32) {
+ printf("%c", b);
+ } else {
+ printf(".");
+ }
+ }
printf("\n");
}
}
@@ -103,6 +120,7 @@
pc, \
backtrack_sp - backtrack_stack, \
current, \
+ current_char, \
BC_##name##_LENGTH, \
#name);
#else
@@ -117,7 +135,7 @@
Vector<const Char> subject,
int* registers,
int current,
- int current_char) {
+ uint32_t current_char) {
const byte* pc = code_base;
static const int kBacktrackStackSize = 10000;
int backtrack_stack[kBacktrackStackSize];
@@ -233,45 +251,104 @@
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
+ BYTECODE(LOAD_2_CURRENT_CHARS) {
+ int pos = current + Load32(pc + 1);
+ if (pos + 2 > subject.length()) {
+ pc = code_base + Load32(pc + 5);
+ } else {
+ Char next = subject[pos + 1];
+ current_char =
+ (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
+ pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
+ }
+ break;
+ }
+ BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
+ int pos = current + Load32(pc + 1);
+ Char next = subject[pos + 1];
+ current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
+ pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
+ break;
+ }
+ BYTECODE(LOAD_4_CURRENT_CHARS) {
+ ASSERT(sizeof(Char) == 1);
+ int pos = current + Load32(pc + 1);
+ if (pos + 4 > subject.length()) {
+ pc = code_base + Load32(pc + 5);
+ } else {
+ Char next1 = subject[pos + 1];
+ Char next2 = subject[pos + 2];
+ Char next3 = subject[pos + 3];
+ current_char = (subject[pos] |
+ (next1 << 8) |
+ (next2 << 16) |
+ (next3 << 24));
+ pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
+ }
+ break;
+ }
+ BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
+ ASSERT(sizeof(Char) == 1);
+ int pos = current + Load32(pc + 1);
+ Char next1 = subject[pos + 1];
+ Char next2 = subject[pos + 2];
+ Char next3 = subject[pos + 3];
+ current_char = (subject[pos] |
+ (next1 << 8) |
+ (next2 << 16) |
+ (next3 << 24));
+ pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
+ break;
+ }
BYTECODE(CHECK_CHAR) {
- int c = Load16(pc + 1);
+ uint32_t c = Load32(pc + 1);
if (c == current_char) {
- pc = code_base + Load32(pc + 3);
+ pc = code_base + Load32(pc + 5);
} else {
pc += BC_CHECK_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_NOT_CHAR) {
- int c = Load16(pc + 1);
+ uint32_t c = Load32(pc + 1);
if (c != current_char) {
- pc = code_base + Load32(pc + 3);
+ pc = code_base + Load32(pc + 5);
} else {
pc += BC_CHECK_NOT_CHAR_LENGTH;
}
break;
}
- BYTECODE(OR_CHECK_NOT_CHAR) {
- int c = Load16(pc + 1);
- if (c != (current_char | Load16(pc + 3))) {
- pc = code_base + Load32(pc + 5);
+ BYTECODE(AND_CHECK_CHAR) {
+ uint32_t c = Load32(pc + 1);
+ if (c == (current_char & Load32(pc + 5))) {
+ pc = code_base + Load32(pc + 9);
} else {
- pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
+ pc += BC_AND_CHECK_CHAR_LENGTH;
}
break;
}
- BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
- int c = Load16(pc + 1);
- int m = Load16(pc + 3);
- if (c != ((current_char - m) | m)) {
- pc = code_base + Load32(pc + 5);
+ BYTECODE(AND_CHECK_NOT_CHAR) {
+ uint32_t c = Load32(pc + 1);
+ if (c != (current_char & Load32(pc + 5))) {
+ pc = code_base + Load32(pc + 9);
} else {
- pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
+ pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
+ }
+ break;
+ }
+ BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
+ uint32_t c = Load16(pc + 1);
+ uint32_t minus = Load16(pc + 3);
+ uint32_t mask = Load16(pc + 5);
+ if (c != ((current_char - minus) & mask)) {
+ pc = code_base + Load32(pc + 7);
+ } else {
+ pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_LT) {
- int limit = Load16(pc + 1);
+ uint32_t limit = Load16(pc + 1);
if (current_char < limit) {
pc = code_base + Load32(pc + 3);
} else {
@@ -280,7 +357,7 @@
break;
}
BYTECODE(CHECK_GT) {
- int limit = Load16(pc + 1);
+ uint32_t limit = Load16(pc + 1);
if (current_char > limit) {
pc = code_base + Load32(pc + 3);
} else {
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index b6165c4..6cca7fc 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -214,24 +214,14 @@
vector_ = static_offsets_vector_;
}
}
-
-
inline ~OffsetsVector() {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
DeleteArray(vector_);
vector_ = NULL;
}
}
-
-
- inline int* vector() {
- return vector_;
- }
-
-
- inline int length() {
- return offsets_vector_length_;
- }
+ inline int* vector() { return vector_; }
+ inline int length() { return offsets_vector_length_; }
private:
int* vector_;
@@ -270,22 +260,21 @@
"malformed_regexp");
return Handle<Object>::null();
}
- RegExpAtom* atom = parse_result.tree->AsAtom();
- if (atom != NULL && !flags.is_ignore_case()) {
- if (parse_result.has_character_escapes) {
- Vector<const uc16> atom_pattern = atom->data();
- Handle<String> atom_string =
- Factory::NewStringFromTwoByte(atom_pattern);
- result = AtomCompile(re, pattern, flags, atom_string);
- } else {
- result = AtomCompile(re, pattern, flags, pattern);
- }
+
+ if (parse_result.simple && !flags.is_ignore_case()) {
+ // Parse-tree is a single atom that is equal to the pattern.
+ result = AtomCompile(re, pattern, flags, pattern);
+ } else if (parse_result.tree->IsAtom() &&
+ !flags.is_ignore_case() &&
+ parse_result.capture_count == 0) {
+ RegExpAtom* atom = parse_result.tree->AsAtom();
+ Vector<const uc16> atom_pattern = atom->data();
+ Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
+ result = AtomCompile(re, pattern, flags, atom_string);
+ } else if (FLAG_irregexp) {
+ result = IrregexpPrepare(re, pattern, flags);
} else {
- if (FLAG_irregexp) {
- result = IrregexpPrepare(re, pattern, flags);
- } else {
- result = JscrePrepare(re, pattern, flags);
- }
+ result = JscrePrepare(re, pattern, flags);
}
Object* data = re->data();
if (data->IsFixedArray()) {
@@ -308,7 +297,7 @@
return AtomExec(regexp, subject, index);
case JSRegExp::IRREGEXP: {
Handle<Object> result = IrregexpExec(regexp, subject, index);
- if (!result.is_null()) {
+ if (!result.is_null() || Top::has_pending_exception()) {
return result;
}
// We couldn't handle the regexp using Irregexp, so fall back
@@ -338,12 +327,13 @@
return AtomExecGlobal(regexp, subject);
case JSRegExp::IRREGEXP: {
Handle<Object> result = IrregexpExecGlobal(regexp, subject);
- if (!result.is_null()) {
+ if (!result.is_null() || Top::has_pending_exception()) {
return result;
}
- // We couldn't handle the regexp using Irregexp, so fall back
- // on JSCRE.
- // Reset the JSRegExp to use JSCRE.
+ // Empty handle as result but no exception thrown means that
+ // the regexp contains features not yet handled by the irregexp
+ // compiler.
+ // We have to fall back on JSCRE. Reset the JSRegExp to use JSCRE.
JscrePrepare(regexp,
Handle<String>(regexp->Pattern()),
regexp->GetFlags());
@@ -383,7 +373,6 @@
return Handle<Smi>(Smi::FromInt(-1));
}
- LOG(RegExpExecEvent(re, start_index, subject));
int value = Runtime::StringMatch(subject, needle, start_index);
if (value == -1) return Factory::null_value();
@@ -403,7 +392,6 @@
int subject_length = subject->length();
int needle_length = needle->length();
while (true) {
- LOG(RegExpExecEvent(re, index, subject));
int value = -1;
if (index + needle_length <= subject_length) {
value = Runtime::StringMatch(subject, needle, index);
@@ -520,8 +508,9 @@
// Throw an exception.
Handle<JSArray> array = Factory::NewJSArray(2);
SetElement(array, 0, pattern);
- SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
- (error_message == NULL) ? "Unknown regexp error" : error_message)));
+ const char* message =
+ (error_message == NULL) ? "Unknown regexp error" : error_message;
+ SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
Handle<Object> regexp_err =
Factory::NewSyntaxError("malformed_regexp", array);
Top::Throw(*regexp_err);
@@ -584,8 +573,6 @@
reinterpret_cast<v8::jscre::JscreRegExp*>(
internal->GetDataStartAddress());
- LOG(RegExpExecEvent(regexp, previous_index, subject));
-
rc = v8::jscre::jsRegExpExecute(js_regexp,
two_byte_subject,
subject->length(),
@@ -682,6 +669,12 @@
// Irregexp implementation.
+// Retrieves a compiled version of the regexp for either ASCII or non-ASCII
+// strings. If the compiled version doesn't already exist, it is compiled
+// from the source pattern.
+// Irregexp is not feature complete yet. If there is something in the
+// regexp that the compiler cannot currently handle, an empty
+// handle is returned, but no exception is thrown.
static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
bool is_ascii) {
ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
@@ -794,7 +787,11 @@
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
- LOG(RegExpExecEvent(regexp, previous_index, subject));
+
+ if (!subject->IsFlat(StringShape(*subject))) {
+ FlattenString(subject);
+ }
+
return IrregexpExecOnce(irregexp,
num_captures,
subject,
@@ -829,11 +826,12 @@
subject->Flatten(shape);
}
- do {
+ while (true) {
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match.
matches = Factory::null_value();
+ return result;
} else {
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
@@ -842,7 +840,6 @@
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
- LOG(RegExpExecEvent(regexp, previous_index, subject));
matches = IrregexpExecOnce(irregexp,
IrregexpNumberOfCaptures(irregexp),
subject,
@@ -857,17 +854,12 @@
if (offsets.vector()[0] == offsets.vector()[1]) {
previous_index++;
}
+ } else if (matches->IsNull()) {
+ return result;
+ } else {
+ return matches;
}
}
- } while (matches->IsJSArray());
-
- // If we exited the loop with an exception, throw it.
- if (matches->IsNull()) {
- // Exited loop normally.
- return result;
- } else {
- // Exited loop with the exception in matches.
- return matches;
}
}
@@ -878,14 +870,11 @@
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
+ ASSERT(subject->IsFlat(StringShape(*subject)));
bool rc;
int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
- if (!subject->IsFlat(StringShape(*subject))) {
- FlattenString(subject);
- }
-
switch (tag) {
case RegExpMacroAssembler::kIA32Implementation: {
#ifndef ARM
@@ -911,6 +900,8 @@
bool is_ascii = flatshape.IsAsciiRepresentation();
int char_size_shift = is_ascii ? 0 : 1;
+ RegExpMacroAssemblerIA32::Result res;
+
if (flatshape.IsExternal()) {
const byte* address;
if (is_ascii) {
@@ -920,7 +911,7 @@
ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
}
- rc = RegExpMacroAssemblerIA32::Execute(
+ res = RegExpMacroAssemblerIA32::Execute(
*code,
&address,
start_offset << char_size_shift,
@@ -932,7 +923,7 @@
is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
: SeqTwoByteString::cast(*subject)->GetCharsAddress();
int byte_offset = char_address - reinterpret_cast<Address>(*subject);
- rc = RegExpMacroAssemblerIA32::Execute(
+ res = RegExpMacroAssemblerIA32::Execute(
*code,
subject.location(),
byte_offset + (start_offset << char_size_shift),
@@ -941,6 +932,12 @@
previous_index == 0);
}
+ if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
+ ASSERT(Top::has_pending_exception());
+ return Handle<Object>::null();
+ }
+ rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
+
if (rc) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
@@ -981,9 +978,9 @@
Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
// The captures come in (start, end+1) pairs.
- for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+ for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
array->set(i, Smi::FromInt(offsets_vector[i]));
- array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+ array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
}
return Factory::NewJSArrayWithElements(array);
}
@@ -1177,6 +1174,16 @@
}
+int TextElement::length() {
+ if (type == ATOM) {
+ return data.u_atom->length();
+ } else {
+ ASSERT(type == CHAR_CLASS);
+ return 1;
+ }
+}
+
+
DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
if (table_ == NULL) {
table_ = new DispatchTable();
@@ -1318,25 +1325,26 @@
}
-void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro,
+void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* assembler,
int max_register,
OutSet& affected_registers) {
for (int reg = 0; reg <= max_register; reg++) {
- if (affected_registers.Get(reg)) macro->PushRegister(reg);
+ if (affected_registers.Get(reg)) assembler->PushRegister(reg);
}
}
-void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro,
- int max_register,
- OutSet& affected_registers) {
+void GenerationVariant::RestoreAffectedRegisters(
+ RegExpMacroAssembler* assembler,
+ int max_register,
+ OutSet& affected_registers) {
for (int reg = max_register; reg >= 0; reg--) {
- if (affected_registers.Get(reg)) macro->PopRegister(reg);
+ if (affected_registers.Get(reg)) assembler->PopRegister(reg);
}
}
-void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
+void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* assembler,
int max_register,
OutSet& affected_registers) {
for (int reg = 0; reg <= max_register; reg++) {
@@ -1384,13 +1392,13 @@
}
}
if (store_position != -1) {
- macro->WriteCurrentPositionToRegister(reg, store_position);
+ assembler->WriteCurrentPositionToRegister(reg, store_position);
} else {
if (absolute) {
- macro->SetRegister(reg, value);
+ assembler->SetRegister(reg, value);
} else {
if (value != 0) {
- macro->AdvanceRegister(reg, value);
+ assembler->AdvanceRegister(reg, value);
}
}
}
@@ -1402,14 +1410,20 @@
// nodes. It normalises the state of the code generator to ensure we can
// generate generic code.
bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
- RegExpMacroAssembler* macro = compiler->macro_assembler();
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
- ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL);
+ ASSERT(actions_ != NULL ||
+ cp_offset_ != 0 ||
+ backtrack() != NULL ||
+ characters_preloaded_ != 0 ||
+ quick_check_performed_.characters() != 0 ||
+ bound_checked_up_to_ != 0);
if (actions_ == NULL && backtrack() == NULL) {
// Here we just have some deferred cp advances to fix and we are back to
- // a normal situation.
- macro->AdvanceCurrentPosition(cp_offset_);
+ // a normal situation. We may also have to forget some information gained
+ // through a quick check that was already performed.
+ if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
// Create a new trivial state and generate the node with that.
GenerationVariant new_state;
return successor->Emit(compiler, &new_state);
@@ -1418,50 +1432,50 @@
// Generate deferred actions here along with code to undo them again.
OutSet affected_registers;
int max_register = FindAffectedRegisters(&affected_registers);
- PushAffectedRegisters(macro, max_register, affected_registers);
- PerformDeferredActions(macro, max_register, affected_registers);
+ PushAffectedRegisters(assembler, max_register, affected_registers);
+ PerformDeferredActions(assembler, max_register, affected_registers);
if (backtrack() != NULL) {
// Here we have a concrete backtrack location. These are set up by choice
// nodes and so they indicate that we have a deferred save of the current
// position which we may need to emit here.
- macro->PushCurrentPosition();
+ assembler->PushCurrentPosition();
}
if (cp_offset_ != 0) {
- macro->AdvanceCurrentPosition(cp_offset_);
+ assembler->AdvanceCurrentPosition(cp_offset_);
}
// Create a new trivial state and generate the node with that.
Label undo;
- macro->PushBacktrack(&undo);
+ assembler->PushBacktrack(&undo);
GenerationVariant new_state;
bool ok = successor->Emit(compiler, &new_state);
// On backtrack we need to restore state.
- macro->Bind(&undo);
+ assembler->Bind(&undo);
if (!ok) return false;
if (backtrack() != NULL) {
- macro->PopCurrentPosition();
+ assembler->PopCurrentPosition();
}
- RestoreAffectedRegisters(macro, max_register, affected_registers);
+ RestoreAffectedRegisters(assembler, max_register, affected_registers);
if (backtrack() == NULL) {
- macro->Backtrack();
+ assembler->Backtrack();
} else {
- macro->GoTo(backtrack());
+ assembler->GoTo(backtrack());
}
return true;
}
-void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro,
+void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler,
GenerationVariant* variant) {
if (info()->at_end) {
Label succeed;
// LoadCurrentCharacter will go to the label if we are at the end of the
// input string.
- macro->LoadCurrentCharacter(0, &succeed);
- macro->GoTo(variant->backtrack());
- macro->Bind(&succeed);
+ assembler->LoadCurrentCharacter(0, &succeed);
+ assembler->GoTo(variant->backtrack());
+ assembler->Bind(&succeed);
}
}
@@ -1471,16 +1485,16 @@
if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
- RegExpMacroAssembler* macro = compiler->macro_assembler();
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (!label()->is_bound()) {
- macro->Bind(label());
+ assembler->Bind(label());
}
- EmitInfoChecks(macro, variant);
- macro->ReadCurrentPositionFromRegister(current_position_register_);
- macro->ReadStackPointerFromRegister(stack_pointer_register_);
+ EmitInfoChecks(assembler, variant);
+ assembler->ReadCurrentPositionFromRegister(current_position_register_);
+ assembler->ReadStackPointerFromRegister(stack_pointer_register_);
// Now that we have unwound the stack we find at the top of the stack the
// backtrack that the BeginSubmatch node got.
- macro->Backtrack();
+ assembler->Backtrack();
return true;
}
@@ -1489,18 +1503,18 @@
if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
- RegExpMacroAssembler* macro = compiler->macro_assembler();
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (!label()->is_bound()) {
- macro->Bind(label());
+ assembler->Bind(label());
}
switch (action_) {
case ACCEPT:
- EmitInfoChecks(macro, variant);
- macro->Succeed();
+ EmitInfoChecks(assembler, variant);
+ assembler->Succeed();
return true;
case BACKTRACK:
ASSERT(!info()->at_end);
- macro->GoTo(variant->backtrack());
+ assembler->GoTo(variant->backtrack());
return true;
case NEGATIVE_SUBMATCH_SUCCESS:
// This case is handled in a different virtual method.
@@ -1570,6 +1584,11 @@
#undef DEFINE_ACCEPT
+void LoopChoiceNode::Accept(NodeVisitor* visitor) {
+ visitor->VisitLoopChoice(this);
+}
+
+
// -------------------------------------------------------------------
// Emit code.
@@ -1598,44 +1617,48 @@
static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
-static inline void EmitAtomNonLetters(
+// Only emits non-letters (things that don't have case). Only used for case
+// independent matches.
+static inline bool EmitAtomNonLetter(
RegExpMacroAssembler* macro_assembler,
- TextElement elm,
- Vector<const uc16> quarks,
+ uc16 c,
Label* on_failure,
int cp_offset,
- bool check_offset) {
+ bool check,
+ bool preloaded) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- // It is vital that this loop is backwards due to the unchecked character
- // load below.
- for (int i = quarks.length() - 1; i >= 0; i--) {
- uc16 c = quarks[i];
- int length = uncanonicalize.get(c, '\0', chars);
- if (length <= 1) {
- if (check_offset && i == quarks.length() - 1) {
- macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
- } else {
- // Here we don't need to check against the end of the input string
- // since this character lies before a character that matched.
- macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
- }
- macro_assembler->CheckNotCharacter(c, on_failure);
+ int length = uncanonicalize.get(c, '\0', chars);
+ bool checked = false;
+ if (length <= 1) {
+ if (!preloaded) {
+ macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
+ checked = check;
}
+ macro_assembler->CheckNotCharacter(c, on_failure);
}
+ return checked;
}
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
+ bool ascii,
uc16 c1,
uc16 c2,
Label* on_failure) {
+ uc16 char_mask;
+ if (ascii) {
+ char_mask = String::kMaxAsciiCharCode;
+ } else {
+ char_mask = String::kMaxUC16CharCode;
+ }
uc16 exor = c1 ^ c2;
// Check whether exor has only one bit set.
if (((exor - 1) & exor) == 0) {
// If c1 and c2 differ only by one bit.
// Ecma262UnCanonicalize always gives the highest number last.
ASSERT(c2 > c1);
- macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
+ uc16 mask = char_mask ^ exor;
+ macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
return true;
}
ASSERT(c2 > c1);
@@ -1645,65 +1668,65 @@
// subtract the difference from the found character, then do the or
// trick. We avoid the theoretical case where negative numbers are
// involved in order to simplify code generation.
- macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
- diff,
- on_failure);
+ uc16 mask = char_mask ^ diff;
+ macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
+ diff,
+ mask,
+ on_failure);
return true;
}
return false;
}
-static inline void EmitAtomLetters(
+// Only emits letters (things that have case). Only used for case independent
+// matches.
+static inline bool EmitAtomLetter(
RegExpMacroAssembler* macro_assembler,
- TextElement elm,
- Vector<const uc16> quarks,
+ bool ascii,
+ uc16 c,
Label* on_failure,
int cp_offset,
- bool check_offset) {
+ bool check,
+ bool preloaded) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- // It is vital that this loop is backwards due to the unchecked character
- // load below.
- for (int i = quarks.length() - 1; i >= 0; i--) {
- uc16 c = quarks[i];
- int length = uncanonicalize.get(c, '\0', chars);
- if (length <= 1) continue;
- if (check_offset && i == quarks.length() - 1) {
- macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
- } else {
- // Here we don't need to check against the end of the input string
- // since this character lies before a character that matched.
- macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
- }
- Label ok;
- ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
- switch (length) {
- case 2: {
- if (ShortCutEmitCharacterPair(macro_assembler,
- chars[0],
- chars[1],
- on_failure)) {
- } else {
- macro_assembler->CheckCharacter(chars[0], &ok);
- macro_assembler->CheckNotCharacter(chars[1], on_failure);
- macro_assembler->Bind(&ok);
- }
- break;
- }
- case 4:
- macro_assembler->CheckCharacter(chars[3], &ok);
- // Fall through!
- case 3:
- macro_assembler->CheckCharacter(chars[0], &ok);
- macro_assembler->CheckCharacter(chars[1], &ok);
- macro_assembler->CheckNotCharacter(chars[2], on_failure);
- macro_assembler->Bind(&ok);
- break;
- default:
- UNREACHABLE();
- break;
- }
+ int length = uncanonicalize.get(c, '\0', chars);
+ if (length <= 1) return false;
+ // We may not need to check against the end of the input string
+ // if this character lies before a character that matched.
+ if (!preloaded) {
+ macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
}
+ Label ok;
+ ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
+ switch (length) {
+ case 2: {
+ if (ShortCutEmitCharacterPair(macro_assembler,
+ ascii,
+ chars[0],
+ chars[1],
+ on_failure)) {
+ } else {
+ macro_assembler->CheckCharacter(chars[0], &ok);
+ macro_assembler->CheckNotCharacter(chars[1], on_failure);
+ macro_assembler->Bind(&ok);
+ }
+ break;
+ }
+ case 4:
+ macro_assembler->CheckCharacter(chars[3], &ok);
+ // Fall through!
+ case 3:
+ macro_assembler->CheckCharacter(chars[0], &ok);
+ macro_assembler->CheckCharacter(chars[1], &ok);
+ macro_assembler->CheckNotCharacter(chars[2], on_failure);
+ macro_assembler->Bind(&ok);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ return true;
}
@@ -1712,7 +1735,16 @@
int cp_offset,
Label* on_failure,
bool check_offset,
- bool ascii) {
+ bool ascii,
+ bool preloaded) {
+ if (cc->is_standard() &&
+ macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
+ cp_offset,
+ check_offset,
+ on_failure)) {
+ return;
+ }
+
ZoneList<CharacterRange>* ranges = cc->ranges();
int max_char;
if (ascii) {
@@ -1758,15 +1790,11 @@
return;
}
- if (check_offset) {
- macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
- } else {
- // Here we don't need to check against the end of the input string
- // since this character lies before a character that matched.
- macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
+ if (!preloaded) {
+ macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
}
- for (int i = 0; i <= last_valid_range; i++) {
+ for (int i = 0; i < last_valid_range; i++) {
CharacterRange& range = ranges->at(i);
Label next_range;
uc16 from = range.from();
@@ -1827,6 +1855,10 @@
}
+RegExpNode::~RegExpNode() {
+}
+
+
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
GenerationVariant* variant) {
// TODO(erikcorry): Implement support.
@@ -1877,112 +1909,581 @@
}
+int ActionNode::EatsAtLeast(int recursion_depth) {
+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+ if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
+ return on_success()->EatsAtLeast(recursion_depth + 1);
+}
+
+
+int TextNode::EatsAtLeast(int recursion_depth) {
+ int answer = Length();
+ if (answer >= 4) return answer;
+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
+ return answer + on_success()->EatsAtLeast(recursion_depth + 1);
+}
+
+
+int ChoiceNode::EatsAtLeastHelper(int recursion_depth,
+ RegExpNode* ignore_this_node) {
+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+ int min = 100;
+ int choice_count = alternatives_->length();
+ for (int i = 0; i < choice_count; i++) {
+ RegExpNode* node = alternatives_->at(i).node();
+ if (node == ignore_this_node) continue;
+ int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1);
+ if (node_eats_at_least < min) min = node_eats_at_least;
+ }
+ return min;
+}
+
+
+int LoopChoiceNode::EatsAtLeast(int recursion_depth) {
+ return EatsAtLeastHelper(recursion_depth, loop_node_);
+}
+
+
+int ChoiceNode::EatsAtLeast(int recursion_depth) {
+ return EatsAtLeastHelper(recursion_depth, NULL);
+}
+
+
+// Takes the left-most 1-bit and smears it out, setting all bits to its right.
+static inline uint32_t SmearBitsRight(uint32_t v) {
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return v;
+}
+
+
+bool QuickCheckDetails::Rationalize(bool asc) {
+ bool found_useful_op = false;
+ uint32_t char_mask;
+ if (asc) {
+ char_mask = String::kMaxAsciiCharCode;
+ } else {
+ char_mask = String::kMaxUC16CharCode;
+ }
+ mask_ = 0;
+ value_ = 0;
+ int char_shift = 0;
+ for (int i = 0; i < characters_; i++) {
+ Position* pos = &positions_[i];
+ if ((pos->mask & String::kMaxAsciiCharCode) != 0) {
+ found_useful_op = true;
+ }
+ mask_ |= (pos->mask & char_mask) << char_shift;
+ value_ |= (pos->value & char_mask) << char_shift;
+ char_shift += asc ? 8 : 16;
+ }
+ return found_useful_op;
+}
+
+
+bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
+ GenerationVariant* variant,
+ bool preload_has_checked_bounds,
+ Label* on_possible_success,
+ QuickCheckDetails* details,
+ bool fall_through_on_failure) {
+ if (details->characters() == 0) return false;
+ GetQuickCheckDetails(details, compiler, 0);
+ if (!details->Rationalize(compiler->ascii())) return false;
+ uint32_t mask = details->mask();
+ uint32_t value = details->value();
+
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
+
+ if (variant->characters_preloaded() != details->characters()) {
+ assembler->LoadCurrentCharacter(variant->cp_offset(),
+ variant->backtrack(),
+ !preload_has_checked_bounds,
+ details->characters());
+ }
+
+
+ bool need_mask = true;
+
+ if (details->characters() == 1) {
+ // If number of characters preloaded is 1 then we used a byte or 16 bit
+ // load so the value is already masked down.
+ uint32_t char_mask;
+ if (compiler->ascii()) {
+ char_mask = String::kMaxAsciiCharCode;
+ } else {
+ char_mask = String::kMaxUC16CharCode;
+ }
+ if ((mask & char_mask) == char_mask) need_mask = false;
+ mask &= char_mask;
+ } else {
+ // For 2-character preloads in ASCII mode we also use a 16 bit load with
+ // zero extend.
+ if (details->characters() == 2 && compiler->ascii()) {
+ if ((mask & 0xffff) == 0xffff) need_mask = false;
+ } else {
+ if (mask == 0xffffffff) need_mask = false;
+ }
+ }
+
+ if (fall_through_on_failure) {
+ if (need_mask) {
+ assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
+ } else {
+ assembler->CheckCharacter(value, on_possible_success);
+ }
+ } else {
+ if (need_mask) {
+ assembler->CheckNotCharacterAfterAnd(value, mask, variant->backtrack());
+ } else {
+ assembler->CheckNotCharacter(value, variant->backtrack());
+ }
+ }
+ return true;
+}
+
+
+// Here is the meat of GetQuickCheckDetails (see also the comment on the
+// super-class in the .h file).
+//
+// We iterate along the text object, building up for each character a
+// mask and value that can be used to test for a quick failure to match.
+// The masks and values for the positions will be combined into a single
+// machine word for the current character width in order to be used in
+// generating a quick check.
+void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in) {
+ ASSERT(characters_filled_in < details->characters());
+ int characters = details->characters();
+ int char_mask;
+ int char_shift;
+ if (compiler->ascii()) {
+ char_mask = String::kMaxAsciiCharCode;
+ char_shift = 8;
+ } else {
+ char_mask = String::kMaxUC16CharCode;
+ char_shift = 16;
+ }
+ for (int k = 0; k < elms_->length(); k++) {
+ TextElement elm = elms_->at(k);
+ if (elm.type == TextElement::ATOM) {
+ Vector<const uc16> quarks = elm.data.u_atom->data();
+ for (int i = 0; i < characters && i < quarks.length(); i++) {
+ QuickCheckDetails::Position* pos =
+ details->positions(characters_filled_in);
+ if (compiler->ignore_case()) {
+ unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+ uc16 c = quarks[i];
+ int length = uncanonicalize.get(c, '\0', chars);
+ if (length < 2) {
+ // This letter has no case equivalents, so it's nice and simple
+ // and the mask-compare will determine definitely whether we have
+ // a match at this character position.
+ pos->mask = char_mask;
+ pos->value = c;
+ pos->determines_perfectly = true;
+ } else {
+ uint32_t common_bits = char_mask;
+ uint32_t bits = chars[0];
+ for (int j = 1; j < length; j++) {
+ uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
+ common_bits ^= differing_bits;
+ bits &= common_bits;
+ }
+ // If length is 2 and common bits has only one zero in it then
+ // our mask and compare instruction will determine definitely
+ // whether we have a match at this character position. Otherwise
+ // it can only be an approximate check.
+ uint32_t one_zero = (common_bits | ~char_mask);
+ if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
+ pos->determines_perfectly = true;
+ }
+ pos->mask = common_bits;
+ pos->value = bits;
+ }
+ } else {
+ // Don't ignore case. Nice simple case where the mask-compare will
+ // determine definitely whether we have a match at this character
+ // position.
+ pos->mask = char_mask;
+ pos->value = quarks[i];
+ pos->determines_perfectly = true;
+ }
+ characters_filled_in++;
+ ASSERT(characters_filled_in <= details->characters());
+ if (characters_filled_in == details->characters()) {
+ return;
+ }
+ }
+ } else {
+ QuickCheckDetails::Position* pos =
+ details->positions(characters_filled_in);
+ RegExpCharacterClass* tree = elm.data.u_char_class;
+ ZoneList<CharacterRange>* ranges = tree->ranges();
+ CharacterRange range = ranges->at(0);
+ if (tree->is_negated()) {
+ // A quick check uses multi-character mask and compare. There is no
+ // useful way to incorporate a negative char class into this scheme
+ // so we just conservatively create a mask and value that will always
+ // succeed.
+ pos->mask = 0;
+ pos->value = 0;
+ } else {
+ uint32_t differing_bits = (range.from() ^ range.to());
+ // A mask and compare is only perfect if the differing bits form a
+ // number like 00011111 with one single block of trailing 1s.
+ if ((differing_bits & (differing_bits + 1)) == 0) {
+ pos->determines_perfectly = true;
+ }
+ uint32_t common_bits = ~SmearBitsRight(differing_bits);
+ uint32_t bits = (range.from() & common_bits);
+ for (int i = 1; i < ranges->length(); i++) {
+ // Here we are combining more ranges into the mask and compare
+ // value. With each new range the mask becomes more sparse and
+ // so the chances of a false positive rise. A character class
+ // with multiple ranges is assumed never to be equivalent to a
+ // mask and compare operation.
+ pos->determines_perfectly = false;
+ CharacterRange range = ranges->at(i);
+ uint32_t new_common_bits = (range.from() ^ range.to());
+ new_common_bits = ~SmearBitsRight(new_common_bits);
+ common_bits &= new_common_bits;
+ bits &= new_common_bits;
+ uint32_t differing_bits = (range.from() & common_bits) ^ bits;
+ common_bits ^= differing_bits;
+ bits &= common_bits;
+ }
+ pos->mask = common_bits;
+ pos->value = bits;
+ }
+ characters_filled_in++;
+ ASSERT(characters_filled_in <= details->characters());
+ if (characters_filled_in == details->characters()) {
+ return;
+ }
+ }
+ }
+ ASSERT(characters_filled_in != details->characters());
+ on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in);
+}
+
+
+void QuickCheckDetails::Clear() {
+ for (int i = 0; i < characters_; i++) {
+ positions_[i].mask = 0;
+ positions_[i].value = 0;
+ positions_[i].determines_perfectly = false;
+ }
+ characters_ = 0;
+}
+
+
+void QuickCheckDetails::Advance(int by, bool ascii) {
+ ASSERT(by > 0);
+ if (by >= characters_) {
+ Clear();
+ return;
+ }
+ for (int i = 0; i < characters_ - by; i++) {
+ positions_[i] = positions_[by + i];
+ }
+ for (int i = characters_ - by; i < characters_; i++) {
+ positions_[i].mask = 0;
+ positions_[i].value = 0;
+ positions_[i].determines_perfectly = false;
+ }
+ characters_ -= by;
+ // We could change mask_ and value_ here but we would never advance unless
+ // they had already been used in a check and they won't be used again because
+ // it would gain us nothing. So there's no point.
+}
+
+
+void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
+ ASSERT(characters_ == other->characters_);
+ for (int i = from_index; i < characters_; i++) {
+ QuickCheckDetails::Position* pos = positions(i);
+ QuickCheckDetails::Position* other_pos = other->positions(i);
+ if (pos->mask != other_pos->mask ||
+ pos->value != other_pos->value ||
+ !other_pos->determines_perfectly) {
+ // Our mask-compare operation will be approximate unless we have the
+ // exact same operation on both sides of the alternation.
+ pos->determines_perfectly = false;
+ }
+ pos->mask &= other_pos->mask;
+ pos->value &= pos->mask;
+ other_pos->value &= pos->mask;
+ uc16 differing_bits = (pos->value ^ other_pos->value);
+ pos->mask &= ~differing_bits;
+ pos->value &= pos->mask;
+ }
+}
+
+
+void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in) {
+ if (body_can_be_zero_length_) return;
+ return ChoiceNode::GetQuickCheckDetails(details,
+ compiler,
+ characters_filled_in);
+}
+
+
+void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in) {
+ int choice_count = alternatives_->length();
+ ASSERT(choice_count > 0);
+ alternatives_->at(0).node()->GetQuickCheckDetails(details,
+ compiler,
+ characters_filled_in);
+ for (int i = 1; i < choice_count; i++) {
+ QuickCheckDetails new_details(details->characters());
+ RegExpNode* node = alternatives_->at(i).node();
+ node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
+ // Here we merge the quick match details of the two branches.
+ details->Merge(&new_details, characters_filled_in);
+ }
+}
+
+
+// We call this repeatedly to generate code for each pass over the text node.
+// The passes are in increasing order of difficulty because we hope one
+// of the first passes will fail in which case we are saved the work of the
+// later passes. for example for the case independent regexp /%[asdfghjkl]a/
+// we will check the '%' in the first pass, the case independent 'a' in the
+// second pass and the character class in the last pass.
+//
+// The passes are done from right to left, so for example to test for /bar/
+// we will first test for an 'r' with offset 2, then an 'a' with offset 1
+// and then a 'b' with offset 0. This means we can avoid the end-of-input
+// bounds check most of the time. In the example we only need to check for
+// end-of-input when loading the putative 'r'.
+//
+// A slight complication involves the fact that the first character may already
+// be fetched into a register by the previous node. In this case we want to
+// do the test for that character first. We do this in separate passes. The
+// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
+// pass has been performed then subsequent passes will have true in
+// first_element_checked to indicate that that character does not need to be
+// checked again.
+//
+// In addition to all this we are passed a GenerationVariant, which can
+// contain an AlternativeGeneration object. In this AlternativeGeneration
+// object we can see details of any quick check that was already passed in
+// order to get to the code we are now generating. The quick check can involve
+// loading characters, which means we do not need to recheck the bounds
+// up to the limit the quick check already checked. In addition the quick
+// check can have involved a mask and compare operation which may simplify
+// or obviate the need for further checks at some character positions.
+void TextNode::TextEmitPass(RegExpCompiler* compiler,
+ TextEmitPassType pass,
+ bool preloaded,
+ GenerationVariant* variant,
+ bool first_element_checked,
+ int* checked_up_to) {
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
+ bool ascii = compiler->ascii();
+ Label* backtrack = variant->backtrack();
+ QuickCheckDetails* quick_check = variant->quick_check_performed();
+ int element_count = elms_->length();
+ for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
+ TextElement elm = elms_->at(i);
+ int cp_offset = variant->cp_offset() + elm.cp_offset;
+ if (elm.type == TextElement::ATOM) {
+ if (pass == NON_ASCII_MATCH ||
+ pass == CHARACTER_MATCH ||
+ pass == CASE_CHARACTER_MATCH) {
+ Vector<const uc16> quarks = elm.data.u_atom->data();
+ for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
+ bool bound_checked = true; // Most ops will check their bounds.
+ if (first_element_checked && i == 0 && j == 0) continue;
+ if (quick_check != NULL &&
+ elm.cp_offset + j < quick_check->characters() &&
+ quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
+ continue;
+ }
+ if (pass == NON_ASCII_MATCH) {
+ ASSERT(ascii);
+ if (quarks[j] > String::kMaxAsciiCharCode) {
+ assembler->GoTo(backtrack);
+ return;
+ }
+ } else if (pass == CHARACTER_MATCH) {
+ if (compiler->ignore_case()) {
+ bound_checked = EmitAtomNonLetter(assembler,
+ quarks[j],
+ backtrack,
+ cp_offset + j,
+ *checked_up_to < cp_offset + j,
+ preloaded);
+ } else {
+ if (!preloaded) {
+ assembler->LoadCurrentCharacter(cp_offset + j,
+ backtrack,
+ *checked_up_to < cp_offset + j);
+ }
+ assembler->CheckNotCharacter(quarks[j], backtrack);
+ }
+ } else {
+ ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
+ ASSERT(compiler->ignore_case());
+ bound_checked = EmitAtomLetter(assembler,
+ compiler->ascii(),
+ quarks[j],
+ backtrack,
+ cp_offset + j,
+ *checked_up_to < cp_offset + j,
+ preloaded);
+ }
+ if (pass != NON_ASCII_MATCH && bound_checked) {
+ if (cp_offset + j > *checked_up_to) {
+ *checked_up_to = cp_offset + j;
+ }
+ }
+ }
+ }
+ } else {
+ ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
+ if (first_element_checked && i == 0) continue;
+ if (quick_check != NULL &&
+ elm.cp_offset < quick_check->characters() &&
+ quick_check->positions(elm.cp_offset)->determines_perfectly) {
+ continue;
+ }
+ if (pass == CHARACTER_CLASS_MATCH) {
+ RegExpCharacterClass* cc = elm.data.u_char_class;
+ EmitCharClass(assembler,
+ cc,
+ cp_offset,
+ backtrack,
+ *checked_up_to < cp_offset,
+ ascii,
+ preloaded);
+ if (cp_offset > *checked_up_to) {
+ *checked_up_to = cp_offset;
+ }
+ }
+ }
+ }
+}
+
+
+int TextNode::Length() {
+ TextElement elm = elms_->last();
+ ASSERT(elm.cp_offset >= 0);
+ if (elm.type == TextElement::ATOM) {
+ return elm.cp_offset + elm.data.u_atom->data().length();
+ } else {
+ return elm.cp_offset + 1;
+ }
+}
+
+
// This generates the code to match a text node. A text node can contain
// straight character sequences (possibly to be matched in a case-independent
-// way) and character classes. In order to be most efficient we test for the
-// simple things first and then move on to the more complicated things. The
-// simplest thing is a non-letter or a letter if we are matching case. The
-// next-most simple thing is a case-independent letter. The least simple is
-// a character class. Another optimization is that we test the last one first.
-// If that succeeds we don't need to test for the end of the string when we
-// load other characters.
+// way) and character classes. For efficiency we do not do this in a single
+// pass from left to right. Instead we pass over the text node several times,
+// emitting code for some character positions every time. See the comment on
+// TextEmitPass for details.
bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- Label *backtrack = variant->backtrack();
LimitResult limit_result = LimitVersions(compiler, variant);
if (limit_result == FAIL) return false;
if (limit_result == DONE) return true;
ASSERT(limit_result == CONTINUE);
- int element_count = elms_->length();
- ASSERT(element_count != 0);
- if (info()->at_end) {
- macro_assembler->GoTo(backtrack);
- return true;
- }
- // First check for non-ASCII text.
- // TODO(plesner): We should do this at node level.
- if (compiler->ascii()) {
- for (int i = element_count - 1; i >= 0; i--) {
- TextElement elm = elms_->at(i);
- if (elm.type == TextElement::ATOM) {
- Vector<const uc16> quarks = elm.data.u_atom->data();
- for (int j = quarks.length() - 1; j >= 0; j--) {
- if (quarks[j] > String::kMaxAsciiCharCode) {
- macro_assembler->GoTo(backtrack);
- return true;
- }
- }
- } else {
- ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
- }
- }
- }
- // Second, handle straight character matches.
- int checked_up_to = -1;
- for (int i = element_count - 1; i >= 0; i--) {
- TextElement elm = elms_->at(i);
- ASSERT(elm.cp_offset >= 0);
- int cp_offset = variant->cp_offset() + elm.cp_offset;
- if (elm.type == TextElement::ATOM) {
- Vector<const uc16> quarks = elm.data.u_atom->data();
- int last_cp_offset = cp_offset + quarks.length();
- if (compiler->ignore_case()) {
- EmitAtomNonLetters(macro_assembler,
- elm,
- quarks,
- backtrack,
- cp_offset,
- checked_up_to < last_cp_offset);
- } else {
- macro_assembler->CheckCharacters(quarks,
- cp_offset,
- backtrack,
- checked_up_to < last_cp_offset);
- }
- if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
- } else {
- ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
- }
- }
- // Third, handle case independent letter matches if any.
- if (compiler->ignore_case()) {
- for (int i = element_count - 1; i >= 0; i--) {
- TextElement elm = elms_->at(i);
- int cp_offset = variant->cp_offset() + elm.cp_offset;
- if (elm.type == TextElement::ATOM) {
- Vector<const uc16> quarks = elm.data.u_atom->data();
- int last_cp_offset = cp_offset + quarks.length();
- EmitAtomLetters(macro_assembler,
- elm,
- quarks,
- backtrack,
- cp_offset,
- checked_up_to < last_cp_offset);
- if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
- }
- }
- }
- // If the fast character matches passed then do the character classes.
- for (int i = element_count - 1; i >= 0; i--) {
- TextElement elm = elms_->at(i);
- int cp_offset = variant->cp_offset() + elm.cp_offset;
- if (elm.type == TextElement::CHAR_CLASS) {
- RegExpCharacterClass* cc = elm.data.u_char_class;
- EmitCharClass(macro_assembler,
- cc,
- cp_offset,
- backtrack,
- checked_up_to < cp_offset,
- compiler->ascii());
- if (cp_offset > checked_up_to) checked_up_to = cp_offset;
- }
+ if (info()->follows_word_interest ||
+ info()->follows_newline_interest ||
+ info()->follows_start_interest) {
+ return false;
}
- GenerationVariant new_variant(*variant);
- new_variant.set_cp_offset(checked_up_to + 1);
+ if (info()->at_end) {
+ compiler->macro_assembler()->GoTo(variant->backtrack());
+ return true;
+ }
+
+ if (compiler->ascii()) {
+ int dummy = 0;
+ TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy);
+ }
+
+ bool first_elt_done = false;
+ int bound_checked_to = variant->cp_offset() - 1;
+ bound_checked_to += variant->bound_checked_up_to();
+
+ // If a character is preloaded into the current character register then
+ // check that now.
+ if (variant->characters_preloaded() == 1) {
+ TextEmitPass(compiler,
+ CHARACTER_MATCH,
+ true,
+ variant,
+ false,
+ &bound_checked_to);
+ if (compiler->ignore_case()) {
+ TextEmitPass(compiler,
+ CASE_CHARACTER_MATCH,
+ true,
+ variant,
+ false,
+ &bound_checked_to);
+ }
+ TextEmitPass(compiler,
+ CHARACTER_CLASS_MATCH,
+ true,
+ variant,
+ false,
+ &bound_checked_to);
+ first_elt_done = true;
+ }
+
+ TextEmitPass(compiler,
+ CHARACTER_MATCH,
+ false,
+ variant,
+ first_elt_done,
+ &bound_checked_to);
+ if (compiler->ignore_case()) {
+ TextEmitPass(compiler,
+ CASE_CHARACTER_MATCH,
+ false,
+ variant,
+ first_elt_done,
+ &bound_checked_to);
+ }
+ TextEmitPass(compiler,
+ CHARACTER_CLASS_MATCH,
+ false,
+ variant,
+ first_elt_done,
+ &bound_checked_to);
+
+ GenerationVariant successor_variant(*variant);
+ successor_variant.AdvanceVariant(Length(), compiler->ascii());
RecursionCheck rc(compiler);
- return on_success()->Emit(compiler, &new_variant);
+ return on_success()->Emit(compiler, &successor_variant);
+}
+
+
+void GenerationVariant::AdvanceVariant(int by, bool ascii) {
+ ASSERT(by > 0);
+ // We don't have an instruction for shifting the current character register
+ // down or for using a shifted value for anything so lets just forget that
+ // we preloaded any characters into it.
+ characters_preloaded_ = 0;
+ // Adjust the offsets of the quick check performed information. This
+ // information is used to find out what we already determined about the
+ // characters by means of mask and compare.
+ quick_check_performed_.Advance(by, ascii);
+ cp_offset_ += by;
+ bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
}
@@ -2044,6 +2545,20 @@
}
+void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
+ ASSERT_EQ(loop_node_, NULL);
+ AddAlternative(alt);
+ loop_node_ = alt.node();
+}
+
+
+void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
+ ASSERT_EQ(continue_node_, NULL);
+ AddAlternative(alt);
+ continue_node_ = alt.node();
+}
+
+
bool LoopChoiceNode::Emit(RegExpCompiler* compiler,
GenerationVariant* variant) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
@@ -2065,6 +2580,155 @@
}
+int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) {
+ int preload_characters = EatsAtLeast(0);
+#ifdef CAN_READ_UNALIGNED
+ bool ascii = compiler->ascii();
+ if (ascii) {
+ if (preload_characters > 4) preload_characters = 4;
+ // We can't preload 3 characters because there is no machine instruction
+ // to do that. We can't just load 4 because we could be reading
+ // beyond the end of the string, which could cause a memory fault.
+ if (preload_characters == 3) preload_characters = 2;
+ } else {
+ if (preload_characters > 2) preload_characters = 2;
+ }
+#else
+ if (preload_characters > 1) preload_characters = 1;
+#endif
+ return preload_characters;
+}
+
+
+// This class is used when generating the alternatives in a choice node. It
+// records the way the alternative is being code generated.
+class AlternativeGeneration: public Malloced {
+ public:
+ AlternativeGeneration()
+ : possible_success(),
+ expects_preload(false),
+ after(),
+ quick_check_details() { }
+ Label possible_success;
+ bool expects_preload;
+ Label after;
+ QuickCheckDetails quick_check_details;
+};
+
+
+// Creates a list of AlternativeGenerations. If the list has a reasonable
+// size then it is on the stack, otherwise the excess is on the heap.
+class AlternativeGenerationList {
+ public:
+ explicit AlternativeGenerationList(int count)
+ : alt_gens_(count) {
+ for (int i = 0; i < count && i < kAFew; i++) {
+ alt_gens_.Add(a_few_alt_gens_ + i);
+ }
+ for (int i = kAFew; i < count; i++) {
+ alt_gens_.Add(new AlternativeGeneration());
+ }
+ }
+ ~AlternativeGenerationList() {
+ for (int i = 0; i < alt_gens_.length(); i++) {
+ alt_gens_[i]->possible_success.Unuse();
+ alt_gens_[i]->after.Unuse();
+ }
+ for (int i = kAFew; i < alt_gens_.length(); i++) {
+ delete alt_gens_[i];
+ alt_gens_[i] = NULL;
+ }
+ }
+
+ AlternativeGeneration* at(int i) {
+ return alt_gens_[i];
+ }
+ private:
+ static const int kAFew = 10;
+ ZoneList<AlternativeGeneration*> alt_gens_;
+ AlternativeGeneration a_few_alt_gens_[kAFew];
+};
+
+
+/* Code generation for choice nodes.
+ *
+ * We generate quick checks that do a mask and compare to eliminate a
+ * choice. If the quick check succeeds then it jumps to the continuation to
+ * do slow checks and check subsequent nodes. If it fails (the common case)
+ * it falls through to the next choice.
+ *
+ * Here is the desired flow graph. Nodes directly below each other imply
+ * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
+ * 3 doesn't have a quick check so we have to call the slow check.
+ * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
+ * regexp continuation is generated directly after the Sn node, up to the
+ * next GoTo if we decide to reuse some already generated code. Some
+ * nodes expect preload_characters to be preloaded into the current
+ * character register. R nodes do this preloading. Vertices are marked
+ * F for failures and S for success (possible success in the case of quick
+ * nodes). L, V, < and > are used as arrow heads.
+ *
+ * ----------> R
+ * |
+ * V
+ * Q1 -----> S1
+ * | S /
+ * F| /
+ * | F/
+ * | /
+ * | R
+ * | /
+ * V L
+ * Q2 -----> S2
+ * | S /
+ * F| /
+ * | F/
+ * | /
+ * | R
+ * | /
+ * V L
+ * S3
+ * |
+ * F|
+ * |
+ * R
+ * |
+ * backtrack V
+ * <----------Q4
+ * \ F |
+ * \ |S
+ * \ F V
+ * \-----S4
+ *
+ * For greedy loops we reverse our expectation and expect to match rather
+ * than fail. Therefore we want the loop code to look like this (U is the
+ * unwind code that steps back in the greedy loop). The following alternatives
+ * look the same as above.
+ * _____
+ * / \
+ * V |
+ * ----------> S1 |
+ * /| |
+ * / |S |
+ * F/ \_____/
+ * /
+ * |<-----------
+ * | \
+ * V \
+ * Q2 ---> S2 \
+ * | S / |
+ * F| / |
+ * | F/ |
+ * | / |
+ * | R |
+ * | / |
+ * F VL |
+ * <------U |
+ * back |S |
+ * \______________/
+ */
+
+
bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length();
@@ -2091,7 +2755,8 @@
int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
bool greedy_loop = false;
Label greedy_loop_label;
- GenerationVariant counter_backtrack_variant(&greedy_loop_label);
+ GenerationVariant counter_backtrack_variant;
+ counter_backtrack_variant.set_backtrack(&greedy_loop_label);
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
// and other simple nodes. These are handled by pushing the current
@@ -2105,7 +2770,8 @@
macro_assembler->PushCurrentPosition();
current_variant = &counter_backtrack_variant;
Label greedy_match_failed;
- GenerationVariant greedy_match_variant(&greedy_match_failed);
+ GenerationVariant greedy_match_variant;
+ greedy_match_variant.set_backtrack(&greedy_match_failed);
Label loop_label;
macro_assembler->Bind(&loop_label);
greedy_match_variant.set_stop_node(this);
@@ -2122,32 +2788,79 @@
Label second_choice; // For use in greedy matches.
macro_assembler->Bind(&second_choice);
+ int first_normal_choice = greedy_loop ? 1 : 0;
+
+ int preload_characters = CalculatePreloadCharacters(compiler);
+ bool preload_is_current =
+ (current_variant->characters_preloaded() == preload_characters);
+ bool preload_has_checked_bounds = preload_is_current;
+
+ AlternativeGenerationList alt_gens(choice_count);
+
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
- for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) {
+ for (int i = first_normal_choice; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
- Label after;
+ AlternativeGeneration* alt_gen(alt_gens.at(i));
+ alt_gen->quick_check_details.set_characters(preload_characters);
ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
GenerationVariant new_variant(*current_variant);
- new_variant.set_backtrack(&after);
- for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, guards->at(j), &new_variant);
+ new_variant.set_characters_preloaded(preload_is_current ?
+ preload_characters :
+ 0);
+ if (preload_has_checked_bounds) {
+ new_variant.set_bound_checked_up_to(preload_characters);
}
- if (!alternative.node()->Emit(compiler, &new_variant)) {
- after.Unuse();
- return false;
+ new_variant.quick_check_performed()->Clear();
+ alt_gen->expects_preload = preload_is_current;
+ bool generate_full_check_inline = false;
+ if (alternative.node()->EmitQuickCheck(compiler,
+ &new_variant,
+ preload_has_checked_bounds,
+ &alt_gen->possible_success,
+ &alt_gen->quick_check_details,
+ i < choice_count - 1)) {
+ // Quick check was generated for this choice.
+ preload_is_current = true;
+ preload_has_checked_bounds = true;
+ // On the last choice in the ChoiceNode we generated the quick
+ // check to fall through on possible success. So now we need to
+ // generate the full check inline.
+ if (i == choice_count - 1) {
+ macro_assembler->Bind(&alt_gen->possible_success);
+ new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
+ new_variant.set_characters_preloaded(preload_characters);
+ new_variant.set_bound_checked_up_to(preload_characters);
+ generate_full_check_inline = true;
+ }
+ } else {
+ // No quick check was generated. Put the full code here.
+ // If this is not the first choice then there could be slow checks from
+ // previous cases that go here when they fail. There's no reason to
+ // insist that they preload characters since the slow check we are about
+ // to generate probably can't use it.
+ if (i != first_normal_choice) {
+ alt_gen->expects_preload = false;
+ new_variant.set_characters_preloaded(0);
+ }
+ if (i < choice_count - 1) {
+ new_variant.set_backtrack(&alt_gen->after);
+ }
+ generate_full_check_inline = true;
}
- macro_assembler->Bind(&after);
+ if (generate_full_check_inline) {
+ for (int j = 0; j < guard_count; j++) {
+ GenerateGuard(macro_assembler, guards->at(j), &new_variant);
+ }
+ if (!alternative.node()->Emit(compiler, &new_variant)) {
+ greedy_loop_label.Unuse();
+ return false;
+ }
+ preload_is_current = false;
+ }
+ macro_assembler->Bind(&alt_gen->after);
}
- GuardedAlternative alternative = alternatives_->at(choice_count - 1);
- ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == NULL) ? 0 : guards->length();
- for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, guards->at(j), current_variant);
- }
- bool ok = alternative.node()->Emit(compiler, current_variant);
- if (!ok) return false;
if (greedy_loop) {
macro_assembler->Bind(&greedy_loop_label);
// If we have unwound to the bottom then backtrack.
@@ -2156,12 +2869,68 @@
macro_assembler->AdvanceCurrentPosition(-text_length);
macro_assembler->GoTo(&second_choice);
}
+ // At this point we need to generate slow checks for the alternatives where
+ // the quick check was inlined. We can recognize these because the associated
+ // label was bound.
+ for (int i = first_normal_choice; i < choice_count - 1; i++) {
+ AlternativeGeneration* alt_gen = alt_gens.at(i);
+ if (!EmitOutOfLineContinuation(compiler,
+ current_variant,
+ alternatives_->at(i),
+ alt_gen,
+ preload_characters,
+ alt_gens.at(i + 1)->expects_preload)) {
+ return false;
+ }
+ }
return true;
}
+bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
+ GenerationVariant* variant,
+ GuardedAlternative alternative,
+ AlternativeGeneration* alt_gen,
+ int preload_characters,
+ bool next_expects_preload) {
+ if (!alt_gen->possible_success.is_linked()) return true;
+
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ macro_assembler->Bind(&alt_gen->possible_success);
+ GenerationVariant out_of_line_variant(*variant);
+ out_of_line_variant.set_characters_preloaded(preload_characters);
+ out_of_line_variant.set_quick_check_performed(&alt_gen->quick_check_details);
+ ZoneList<Guard*>* guards = alternative.guards();
+ int guard_count = (guards == NULL) ? 0 : guards->length();
+ if (next_expects_preload) {
+ Label reload_current_char;
+ out_of_line_variant.set_backtrack(&reload_current_char);
+ for (int j = 0; j < guard_count; j++) {
+ GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
+ }
+ bool ok = alternative.node()->Emit(compiler, &out_of_line_variant);
+ macro_assembler->Bind(&reload_current_char);
+ // Reload the current character, since the next quick check expects that.
+ // We don't need to check bounds here because we only get into this
+ // code through a quick check which already did the checked load.
+ macro_assembler->LoadCurrentCharacter(variant->cp_offset(),
+ NULL,
+ false,
+ preload_characters);
+ macro_assembler->GoTo(&(alt_gen->after));
+ return ok;
+ } else {
+ out_of_line_variant.set_backtrack(&(alt_gen->after));
+ for (int j = 0; j < guard_count; j++) {
+ GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
+ }
+ return alternative.node()->Emit(compiler, &out_of_line_variant);
+ }
+}
+
+
bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
- RegExpMacroAssembler* macro = compiler->macro_assembler();
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
LimitResult limit_result = LimitVersions(compiler, variant);
if (limit_result == DONE) return true;
if (limit_result == FAIL) return false;
@@ -2193,9 +2962,9 @@
}
case BEGIN_SUBMATCH:
if (!variant->is_trivial()) return variant->Flush(compiler, this);
- macro->WriteCurrentPositionToRegister(
+ assembler->WriteCurrentPositionToRegister(
data_.u_submatch.current_position_register, 0);
- macro->WriteStackPointerToRegister(
+ assembler->WriteStackPointerToRegister(
data_.u_submatch.stack_pointer_register);
return on_success()->Emit(compiler, variant);
case POSITIVE_SUBMATCH_SUCCESS:
@@ -2210,13 +2979,13 @@
Label at_end;
// Load current character jumps to the label if we are beyond the string
// end.
- macro->LoadCurrentCharacter(0, &at_end);
- macro->GoTo(variant->backtrack());
- macro->Bind(&at_end);
+ assembler->LoadCurrentCharacter(0, &at_end);
+ assembler->GoTo(variant->backtrack());
+ assembler->Bind(&at_end);
}
- macro->ReadCurrentPositionFromRegister(
+ assembler->ReadCurrentPositionFromRegister(
data_.u_submatch.current_position_register);
- macro->ReadStackPointerFromRegister(
+ assembler->ReadStackPointerFromRegister(
data_.u_submatch.stack_pointer_register);
return on_success()->Emit(compiler, variant);
default:
@@ -2228,7 +2997,7 @@
bool BackReferenceNode::Emit(RegExpCompiler* compiler,
GenerationVariant* variant) {
- RegExpMacroAssembler* macro = compiler->macro_assembler();
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
@@ -2244,12 +3013,15 @@
if (info()->at_end) {
// If we are constrained to match at the end of the input then succeed
// iff the back reference is empty.
- macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack());
+ assembler->CheckNotRegistersEqual(start_reg_,
+ end_reg_,
+ variant->backtrack());
} else {
if (compiler->ignore_case()) {
- macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack());
+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+ variant->backtrack());
} else {
- macro->CheckNotBackReference(start_reg_, variant->backtrack());
+ assembler->CheckNotBackReference(start_reg_, variant->backtrack());
}
}
return on_success()->Emit(compiler, variant);
@@ -2408,17 +3180,6 @@
printer.PrintBit("NI", info->follows_newline_interest);
printer.PrintBit("WI", info->follows_word_interest);
printer.PrintBit("SI", info->follows_start_interest);
- printer.PrintBit("DN", info->determine_newline);
- printer.PrintBit("DW", info->determine_word);
- printer.PrintBit("DS", info->determine_start);
- printer.PrintBit("DDN", info->does_determine_newline);
- printer.PrintBit("DDW", info->does_determine_word);
- printer.PrintBit("DDS", info->does_determine_start);
- printer.PrintPositive("IW", info->is_word);
- printer.PrintPositive("IN", info->is_newline);
- printer.PrintPositive("FN", info->follows_newline);
- printer.PrintPositive("FW", info->follows_word);
- printer.PrintPositive("FS", info->follows_start);
Label* label = that->label();
if (label->is_bound())
printer.PrintPositive("@", label->pos());
@@ -2585,6 +3346,22 @@
// -------------------------------------------------------------------
// Tree to graph conversion
+static const int kSpaceRangeCount = 20;
+static const int kSpaceRangeAsciiCount = 4;
+static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020,
+ 0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A,
+ 0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 };
+
+static const int kWordRangeCount = 8;
+static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_',
+ '_', 'a', 'z' };
+
+static const int kDigitRangeCount = 2;
+static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' };
+
+static const int kLineTerminatorRangeCount = 6;
+static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A,
+ 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 };
RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
@@ -2599,6 +3376,77 @@
return new TextNode(elements(), on_success);
}
+static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
+ const uc16* special_class,
+ int length) {
+ ASSERT(ranges->length() != 0);
+ ASSERT(length != 0);
+ ASSERT(special_class[0] != 0);
+ if (ranges->length() != (length >> 1) + 1) {
+ return false;
+ }
+ CharacterRange range = ranges->at(0);
+ if (range.from() != 0) {
+ return false;
+ }
+ for (int i = 0; i < length; i += 2) {
+ if (special_class[i] != (range.to() + 1)) {
+ return false;
+ }
+ range = ranges->at((i >> 1) + 1);
+ if (special_class[i+1] != range.from() - 1) {
+ return false;
+ }
+ }
+ if (range.to() != 0xffff) {
+ return false;
+ }
+ return true;
+}
+
+
+static bool CompareRanges(ZoneList<CharacterRange>* ranges,
+ const uc16* special_class,
+ int length) {
+ if (ranges->length() * 2 != length) {
+ return false;
+ }
+ for (int i = 0; i < length; i += 2) {
+ CharacterRange range = ranges->at(i >> 1);
+ if (range.from() != special_class[i] || range.to() != special_class[i+1]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool RegExpCharacterClass::is_standard() {
+ // TODO(lrn): Remove need for this function, by not throwing away information
+ // along the way.
+ if (is_negated_) {
+ return false;
+ }
+ if (set_.is_standard()) {
+ return true;
+ }
+ if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
+ set_.set_standard_set_type('s');
+ return true;
+ }
+ if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
+ set_.set_standard_set_type('S');
+ return true;
+ }
+ if (CompareInverseRanges(set_.ranges(),
+ kLineTerminatorRanges,
+ kLineTerminatorRangeCount)) {
+ set_.set_standard_set_type('.');
+ return true;
+ }
+ return false;
+}
+
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
@@ -2650,11 +3498,58 @@
//
// TODO(someone): clear captures on repetition and handle empty
// matches.
+
+ // 15.10.2.5 RepeatMatcher algorithm.
+ // The parser has already eliminated the case where max is 0. In the case
+ // where max_match is zero the parser has removed the quantifier if min was
+ // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
+
+ // If we know that we cannot match zero length then things are a little
+ // simpler since we don't need to make the special zero length match check
+ // from step 2.1. If the min and max are small we can unroll a little in
+ // this case.
+ static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
+ static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
+ if (max == 0) return on_success; // This can happen due to recursion.
+ if (body->min_match() > 0) {
+ if (min > 0 && min <= kMaxUnrolledMinMatches) {
+ int new_max = (max == kInfinity) ? max : max - min;
+ // Recurse once to get the loop or optional matches after the fixed ones.
+ RegExpNode* answer =
+ ToNode(0, new_max, is_greedy, body, compiler, on_success);
+ // Unroll the forced matches from 0 to min. This can cause chains of
+ // TextNodes (which the parser does not generate). These should be
+ // combined if it turns out they hinder good code generation.
+ for (int i = 0; i < min; i++) {
+ answer = body->ToNode(compiler, answer);
+ }
+ return answer;
+ }
+ if (max <= kMaxUnrolledMaxMatches) {
+ ASSERT(min == 0);
+ // Unroll the optional matches up to max.
+ RegExpNode* answer = on_success;
+ for (int i = 0; i < max; i++) {
+ ChoiceNode* alternation = new ChoiceNode(2);
+ if (is_greedy) {
+ alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
+ answer)));
+ alternation->AddAlternative(GuardedAlternative(on_success));
+ } else {
+ alternation->AddAlternative(GuardedAlternative(on_success));
+ alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
+ answer)));
+ }
+ answer = alternation;
+ }
+ return answer;
+ }
+ }
bool has_min = min > 0;
- bool has_max = max < RegExpQuantifier::kInfinity;
+ bool has_max = max < RegExpTree::kInfinity;
bool needs_counter = has_min || has_max;
int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
- ChoiceNode* center = new LoopChoiceNode(2);
+ LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
: static_cast<RegExpNode*>(center);
@@ -2670,11 +3565,11 @@
rest_alt.AddGuard(rest_guard);
}
if (is_greedy) {
- center->AddAlternative(body_alt);
- center->AddAlternative(rest_alt);
+ center->AddLoopAlternative(body_alt);
+ center->AddContinueAlternative(rest_alt);
} else {
- center->AddAlternative(rest_alt);
- center->AddAlternative(body_alt);
+ center->AddContinueAlternative(rest_alt);
+ center->AddLoopAlternative(body_alt);
}
if (needs_counter) {
return ActionNode::SetRegister(reg_ctr, 0, center);
@@ -2793,32 +3688,6 @@
}
-static const int kSpaceRangeCount = 20;
-static const uc16 kSpaceRanges[kSpaceRangeCount] = {
- 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
- 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
- 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
-};
-
-
-static const int kWordRangeCount = 8;
-static const uc16 kWordRanges[kWordRangeCount] = {
- '0', '9', 'A', 'Z', '_', '_', 'a', 'z'
-};
-
-
-static const int kDigitRangeCount = 2;
-static const uc16 kDigitRanges[kDigitRangeCount] = {
- '0', '9'
-};
-
-
-static const int kLineTerminatorRangeCount = 6;
-static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {
- 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029
-};
-
-
static void AddClass(const uc16* elmv,
int elmc,
ZoneList<CharacterRange>* ranges) {
@@ -3014,6 +3883,16 @@
}
+ZoneList<CharacterRange>* CharacterSet::ranges() {
+ if (ranges_ == NULL) {
+ ranges_ = new ZoneList<CharacterRange>(2);
+ CharacterRange::AddClassEscape(standard_set_type_, ranges_);
+ }
+ return ranges_;
+}
+
+
+
// -------------------------------------------------------------------
// Interest propagation
@@ -3030,7 +3909,6 @@
RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) {
ASSERT_EQ(false, *cloned);
- ASSERT(!info->HasAssertions());
siblings_.Ensure(this);
RegExpNode* result = TryGetSibling(info);
if (result != NULL) return result;
@@ -3262,7 +4140,7 @@
// Analysis
-void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) {
+void Analysis::EnsureAnalyzed(RegExpNode* that) {
if (that->info()->been_analyzed || that->info()->being_analyzed)
return;
that->info()->being_analyzed = true;
@@ -3272,7 +4150,7 @@
}
-void AssertionPropagation::VisitEnd(EndNode* that) {
+void Analysis::VisitEnd(EndNode* that) {
// nothing to do
}
@@ -3295,23 +4173,16 @@
}
-void AssertionPropagation::VisitText(TextNode* that) {
+void Analysis::VisitText(TextNode* that) {
if (ignore_case_) {
that->MakeCaseIndependent();
}
EnsureAnalyzed(that->on_success());
- NodeInfo* info = that->info();
- NodeInfo* next_info = that->on_success()->info();
- // If the following node is interested in what it follows then this
- // node must determine it.
- info->determine_newline = next_info->follows_newline_interest;
- info->determine_word = next_info->follows_word_interest;
- info->determine_start = next_info->follows_start_interest;
that->CalculateOffsets();
}
-void AssertionPropagation::VisitAction(ActionNode* that) {
+void Analysis::VisitAction(ActionNode* that) {
RegExpNode* target = that->on_success();
EnsureAnalyzed(target);
// If the next node is interested in what it follows then this node
@@ -3320,7 +4191,7 @@
}
-void AssertionPropagation::VisitChoice(ChoiceNode* that) {
+void Analysis::VisitChoice(ChoiceNode* that) {
NodeInfo* info = that->info();
for (int i = 0; i < that->alternatives()->length(); i++) {
RegExpNode* node = that->alternatives()->at(i).node();
@@ -3332,212 +4203,28 @@
}
-void AssertionPropagation::VisitBackReference(BackReferenceNode* that) {
+void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
+ NodeInfo* info = that->info();
+ for (int i = 0; i < that->alternatives()->length(); i++) {
+ RegExpNode* node = that->alternatives()->at(i).node();
+ if (node != that->loop_node()) {
+ EnsureAnalyzed(node);
+ info->AddFromFollowing(node->info());
+ }
+ }
+ // Check the loop last since it may need the value of this node
+ // to get a correct result.
+ EnsureAnalyzed(that->loop_node());
+ info->AddFromFollowing(that->loop_node()->info());
+}
+
+
+void Analysis::VisitBackReference(BackReferenceNode* that) {
EnsureAnalyzed(that->on_success());
}
// -------------------------------------------------------------------
-// Assumption expansion
-
-
-RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
- siblings_.Ensure(this);
- NodeInfo new_info = *this->info();
- if (new_info.follows_word_interest)
- new_info.follows_word = info->follows_word;
- if (new_info.follows_newline_interest)
- new_info.follows_newline = info->follows_newline;
- // If the following node should determine something we need to get
- // a sibling that determines it.
- new_info.does_determine_newline = new_info.determine_newline;
- new_info.does_determine_word = new_info.determine_word;
- new_info.does_determine_start = new_info.determine_start;
- RegExpNode* sibling = TryGetSibling(&new_info);
- if (sibling == NULL) {
- sibling = ExpandLocal(&new_info);
- siblings_.Add(sibling);
- sibling->info()->being_expanded = true;
- sibling->ExpandChildren();
- sibling->info()->being_expanded = false;
- sibling->info()->been_expanded = true;
- } else {
- NodeInfo* sib_info = sibling->info();
- if (!sib_info->been_expanded && !sib_info->being_expanded) {
- sibling->info()->being_expanded = true;
- sibling->ExpandChildren();
- sibling->info()->being_expanded = false;
- sibling->info()->been_expanded = true;
- }
- }
- return sibling;
-}
-
-
-RegExpNode* ChoiceNode::ExpandLocal(NodeInfo* info) {
- ChoiceNode* clone = this->Clone();
- clone->info()->ResetCompilationState();
- clone->info()->AddAssumptions(info);
- return clone;
-}
-
-
-void ChoiceNode::ExpandChildren() {
- ZoneList<GuardedAlternative>* alts = alternatives();
- ZoneList<GuardedAlternative>* new_alts
- = new ZoneList<GuardedAlternative>(alts->length());
- for (int i = 0; i < alts->length(); i++) {
- GuardedAlternative next = alts->at(i);
- next.set_node(next.node()->EnsureExpanded(info()));
- new_alts->Add(next);
- }
- alternatives_ = new_alts;
-}
-
-
-RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
- TextElement last = elements()->last();
- if (last.type == TextElement::CHAR_CLASS) {
- RegExpCharacterClass* char_class = last.data.u_char_class;
- if (info->does_determine_word) {
- ZoneList<CharacterRange>* word = NULL;
- ZoneList<CharacterRange>* non_word = NULL;
- CharacterRange::Split(char_class->ranges(),
- CharacterRange::GetWordBounds(),
- &word,
- &non_word);
- if (non_word == NULL) {
- // This node contains no non-word characters so it must be
- // all word.
- this->info()->is_word = NodeInfo::TRUE;
- } else if (word == NULL) {
- // Vice versa.
- this->info()->is_word = NodeInfo::FALSE;
- } else {
- // If this character class contains both word and non-word
- // characters we need to split it into two.
- ChoiceNode* result = new ChoiceNode(2);
- // Welcome to the family, son!
- result->set_siblings(this->siblings());
- *result->info() = *this->info();
- result->info()->ResetCompilationState();
- result->info()->AddAssumptions(info);
- RegExpNode* word_node
- = new TextNode(new RegExpCharacterClass(word, false),
- on_success());
- word_node->info()->determine_word = true;
- word_node->info()->does_determine_word = true;
- word_node->info()->is_word = NodeInfo::TRUE;
- result->alternatives()->Add(GuardedAlternative(word_node));
- RegExpNode* non_word_node
- = new TextNode(new RegExpCharacterClass(non_word, false),
- on_success());
- non_word_node->info()->determine_word = true;
- non_word_node->info()->does_determine_word = true;
- non_word_node->info()->is_word = NodeInfo::FALSE;
- result->alternatives()->Add(GuardedAlternative(non_word_node));
- return result;
- }
- }
- }
- TextNode* clone = this->Clone();
- clone->info()->ResetCompilationState();
- clone->info()->AddAssumptions(info);
- return clone;
-}
-
-
-void TextNode::ExpandAtomChildren(RegExpAtom* that) {
- NodeInfo new_info = *info();
- uc16 last = that->data()[that->data().length() - 1];
- if (info()->determine_word) {
- new_info.follows_word = IsRegExpWord(last)
- ? NodeInfo::TRUE : NodeInfo::FALSE;
- } else {
- new_info.follows_word = NodeInfo::UNKNOWN;
- }
- if (info()->determine_newline) {
- new_info.follows_newline = IsRegExpNewline(last)
- ? NodeInfo::TRUE : NodeInfo::FALSE;
- } else {
- new_info.follows_newline = NodeInfo::UNKNOWN;
- }
- if (info()->determine_start) {
- new_info.follows_start = NodeInfo::FALSE;
- } else {
- new_info.follows_start = NodeInfo::UNKNOWN;
- }
- set_on_success(on_success()->EnsureExpanded(&new_info));
-}
-
-
-void TextNode::ExpandCharClassChildren(RegExpCharacterClass* that) {
- if (info()->does_determine_word) {
- // ASSERT(info()->is_word != NodeInfo::UNKNOWN);
- NodeInfo next_info = *on_success()->info();
- next_info.follows_word = info()->is_word;
- set_on_success(on_success()->EnsureExpanded(&next_info));
- } else {
- set_on_success(on_success()->EnsureExpanded(info()));
- }
-}
-
-
-void TextNode::ExpandChildren() {
- TextElement last = elements()->last();
- switch (last.type) {
- case TextElement::ATOM:
- ExpandAtomChildren(last.data.u_atom);
- break;
- case TextElement::CHAR_CLASS:
- ExpandCharClassChildren(last.data.u_char_class);
- break;
- default:
- UNREACHABLE();
- }
-}
-
-
-RegExpNode* ActionNode::ExpandLocal(NodeInfo* info) {
- ActionNode* clone = this->Clone();
- clone->info()->ResetCompilationState();
- clone->info()->AddAssumptions(info);
- return clone;
-}
-
-
-void ActionNode::ExpandChildren() {
- set_on_success(on_success()->EnsureExpanded(info()));
-}
-
-
-RegExpNode* BackReferenceNode::ExpandLocal(NodeInfo* info) {
- BackReferenceNode* clone = this->Clone();
- clone->info()->ResetCompilationState();
- clone->info()->AddAssumptions(info);
- return clone;
-}
-
-
-void BackReferenceNode::ExpandChildren() {
- set_on_success(on_success()->EnsureExpanded(info()));
-}
-
-
-RegExpNode* EndNode::ExpandLocal(NodeInfo* info) {
- EndNode* clone = this->Clone();
- clone->info()->ResetCompilationState();
- clone->info()->AddAssumptions(info);
- return clone;
-}
-
-
-void EndNode::ExpandChildren() {
- // nothing to do
-}
-
-
-// -------------------------------------------------------------------
// Dispatch table construction
@@ -3647,110 +4334,6 @@
}
-#ifdef DEBUG
-
-
-class VisitNodeScope {
- public:
- explicit VisitNodeScope(RegExpNode* node) : node_(node) {
- ASSERT(!node->info()->visited);
- node->info()->visited = true;
- }
- ~VisitNodeScope() {
- node_->info()->visited = false;
- }
- private:
- RegExpNode* node_;
-};
-
-
-class NodeValidator : public NodeVisitor {
- public:
- virtual void ValidateInfo(NodeInfo* info) = 0;
-#define DECLARE_VISIT(Type) \
- virtual void Visit##Type(Type##Node* that);
-FOR_EACH_NODE_TYPE(DECLARE_VISIT)
-#undef DECLARE_VISIT
-};
-
-
-class PostAnalysisNodeValidator : public NodeValidator {
- public:
- virtual void ValidateInfo(NodeInfo* info);
-};
-
-
-class PostExpansionNodeValidator : public NodeValidator {
- public:
- virtual void ValidateInfo(NodeInfo* info);
-};
-
-
-void PostAnalysisNodeValidator::ValidateInfo(NodeInfo* info) {
- ASSERT(info->been_analyzed);
-}
-
-
-void PostExpansionNodeValidator::ValidateInfo(NodeInfo* info) {
- ASSERT_EQ(info->determine_newline, info->does_determine_newline);
- ASSERT_EQ(info->determine_start, info->does_determine_start);
- ASSERT_EQ(info->determine_word, info->does_determine_word);
- ASSERT_EQ(info->follows_word_interest,
- (info->follows_word != NodeInfo::UNKNOWN));
- if (false) {
- // These are still unimplemented.
- ASSERT_EQ(info->follows_start_interest,
- (info->follows_start != NodeInfo::UNKNOWN));
- ASSERT_EQ(info->follows_newline_interest,
- (info->follows_newline != NodeInfo::UNKNOWN));
- }
-}
-
-
-void NodeValidator::VisitAction(ActionNode* that) {
- if (that->info()->visited) return;
- VisitNodeScope scope(that);
- ValidateInfo(that->info());
- that->on_success()->Accept(this);
-}
-
-
-void NodeValidator::VisitBackReference(BackReferenceNode* that) {
- if (that->info()->visited) return;
- VisitNodeScope scope(that);
- ValidateInfo(that->info());
- that->on_success()->Accept(this);
-}
-
-
-void NodeValidator::VisitChoice(ChoiceNode* that) {
- if (that->info()->visited) return;
- VisitNodeScope scope(that);
- ValidateInfo(that->info());
- ZoneList<GuardedAlternative>* alts = that->alternatives();
- for (int i = 0; i < alts->length(); i++)
- alts->at(i).node()->Accept(this);
-}
-
-
-void NodeValidator::VisitEnd(EndNode* that) {
- if (that->info()->visited) return;
- VisitNodeScope scope(that);
- ValidateInfo(that->info());
-}
-
-
-void NodeValidator::VisitText(TextNode* that) {
- if (that->info()->visited) return;
- VisitNodeScope scope(that);
- ValidateInfo(that->info());
- that->on_success()->Accept(this);
-}
-
-
-#endif
-
-
Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
bool ignore_case,
bool is_multiline,
@@ -3768,48 +4351,21 @@
// since we don't even handle ^ yet I'm saving that optimization for
// later.
RegExpNode* node = RegExpQuantifier::ToNode(0,
- RegExpQuantifier::kInfinity,
+ RegExpTree::kInfinity,
false,
new RegExpCharacterClass('*'),
&compiler,
captured_body);
- AssertionPropagation analysis(ignore_case);
+ data->node = node;
+ Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node);
NodeInfo info = *node->info();
- data->has_lookbehind = info.HasLookbehind();
- if (data->has_lookbehind) {
- // If this node needs information about the preceding text we let
- // it start with a character class that consumes a single character
- // and proceeds to wherever is appropriate. This means that if
- // has_lookbehind is set the code generator must start one character
- // before the start position.
- node = new TextNode(new RegExpCharacterClass('*'), node);
- analysis.EnsureAnalyzed(node);
- }
-
-#ifdef DEBUG
- PostAnalysisNodeValidator post_analysis_validator;
- node->Accept(&post_analysis_validator);
-#endif
-
- node = node->EnsureExpanded(&info);
-
-#ifdef DEBUG
- PostExpansionNodeValidator post_expansion_validator;
- node->Accept(&post_expansion_validator);
-#endif
-
- data->node = node;
if (is_multiline && !FLAG_attempt_multiline_irregexp) {
return Handle<FixedArray>::null();
}
- if (data->has_lookbehind) {
- return Handle<FixedArray>::null();
- }
-
if (FLAG_irregexp_native) {
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
diff --git a/src/jsregexp.h b/src/jsregexp.h
index dbeb6e2..a2e1647 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -439,6 +439,7 @@
explicit TextElement(Type t) : type(t), cp_offset(-1) { }
static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class);
+ int length();
Type type;
union {
RegExpAtom* u_atom;
@@ -459,23 +460,10 @@
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
- being_expanded(false),
- been_expanded(false),
- determine_word(false),
- determine_newline(false),
- determine_start(false),
- does_determine_word(false),
- does_determine_newline(false),
- does_determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
follows_start_interest(false),
- is_word(UNKNOWN),
- is_newline(UNKNOWN),
at_end(false),
- follows_word(UNKNOWN),
- follows_newline(UNKNOWN),
- follows_start(UNKNOWN),
visited(false) { }
// Returns true if the interests and assumptions of this node
@@ -484,19 +472,7 @@
return (at_end == that->at_end) &&
(follows_word_interest == that->follows_word_interest) &&
(follows_newline_interest == that->follows_newline_interest) &&
- (follows_start_interest == that->follows_start_interest) &&
- (follows_word == that->follows_word) &&
- (follows_newline == that->follows_newline) &&
- (follows_start == that->follows_start) &&
- (does_determine_word == that->does_determine_word) &&
- (does_determine_newline == that->does_determine_newline) &&
- (does_determine_start == that->does_determine_start);
- }
-
- bool HasAssertions() {
- return (follows_word != UNKNOWN) ||
- (follows_newline != UNKNOWN) ||
- (follows_start != UNKNOWN);
+ (follows_start_interest == that->follows_start_interest);
}
// Updates the interests of this node given the interests of the
@@ -508,26 +484,6 @@
follows_start_interest |= that->follows_start_interest;
}
- void AddAssumptions(NodeInfo* that) {
- if (that->follows_word != UNKNOWN) {
- ASSERT(follows_word == UNKNOWN || follows_word == that->follows_word);
- follows_word = that->follows_word;
- }
- if (that->follows_newline != UNKNOWN) {
- ASSERT(follows_newline == UNKNOWN ||
- follows_newline == that->follows_newline);
- follows_newline = that->follows_newline;
- }
- if (that->follows_start != UNKNOWN) {
- ASSERT(follows_start == UNKNOWN ||
- follows_start == that->follows_start);
- follows_start = that->follows_start;
- }
- does_determine_word = that->does_determine_word;
- does_determine_newline = that->does_determine_newline;
- does_determine_start = that->does_determine_start;
- }
-
bool HasLookbehind() {
return follows_word_interest ||
follows_newline_interest ||
@@ -545,25 +501,10 @@
void ResetCompilationState() {
being_analyzed = false;
been_analyzed = false;
- being_expanded = false;
- been_expanded = false;
}
bool being_analyzed: 1;
bool been_analyzed: 1;
- bool being_expanded: 1;
- bool been_expanded: 1;
-
- // These bits are set if this node must propagate forward information
- // about the last character it consumed (or, in the case of 'start',
- // if it is at the start of the input).
- bool determine_word: 1;
- bool determine_newline: 1;
- bool determine_start: 1;
-
- bool does_determine_word: 1;
- bool does_determine_newline: 1;
- bool does_determine_start: 1;
// These bits are set of this node has to know what the preceding
// character was.
@@ -571,35 +512,11 @@
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
- TriBool is_word: 2;
- TriBool is_newline: 2;
-
bool at_end: 1;
-
- // These bits are set if the node can make assumptions about what
- // the previous character was.
- TriBool follows_word: 2;
- TriBool follows_newline: 2;
- TriBool follows_start: 2;
-
bool visited: 1;
};
-class ExpansionGuard {
- public:
- explicit inline ExpansionGuard(NodeInfo* info) : info_(info) {
- ASSERT(!info->being_expanded);
- info->being_expanded = true;
- }
- inline ~ExpansionGuard() {
- info_->being_expanded = false;
- }
- private:
- NodeInfo* info_;
-};
-
-
class SiblingList {
public:
SiblingList() : list_(NULL) { }
@@ -619,24 +536,84 @@
};
+// Details of a quick mask-compare check that can look ahead in the
+// input stream.
+class QuickCheckDetails {
+ public:
+ QuickCheckDetails()
+ : characters_(0),
+ mask_(0),
+ value_(0) { }
+ explicit QuickCheckDetails(int characters)
+ : characters_(characters),
+ mask_(0),
+ value_(0) { }
+ bool Rationalize(bool ascii);
+ // Merge in the information from another branch of an alternation.
+ void Merge(QuickCheckDetails* other, int from_index);
+ // Advance the current position by some amount.
+ void Advance(int by, bool ascii);
+ void Clear();
+ struct Position {
+ Position() : mask(0), value(0), determines_perfectly(false) { }
+ uc16 mask;
+ uc16 value;
+ bool determines_perfectly;
+ };
+ int characters() { return characters_; }
+ void set_characters(int characters) { characters_ = characters; }
+ Position* positions(int index) {
+ ASSERT(index >= 0);
+ ASSERT(index < characters_);
+ return positions_ + index;
+ }
+ uint32_t mask() { return mask_; }
+ uint32_t value() { return value_; }
+
+ private:
+ // How many characters do we have quick check information from. This is
+ // the same for all branches of a choice node.
+ int characters_;
+ Position positions_[4];
+ // These values are the condensate of the above array after Rationalize().
+ uint32_t mask_;
+ uint32_t value_;
+};
+
+
class RegExpNode: public ZoneObject {
public:
RegExpNode() : variants_generated_(0) { }
- virtual ~RegExpNode() { }
+ virtual ~RegExpNode();
virtual void Accept(NodeVisitor* visitor) = 0;
// Generates a goto to this node or actually generates the code at this point.
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant) = 0;
+ // How many characters must this node consume at a minimum in order to
+ // succeed.
+ virtual int EatsAtLeast(int recursion_depth) = 0;
+ // Emits some quick code that checks whether the preloaded characters match.
+ // Falls through on certain failure, jumps to the label on possible success.
+ // If the node cannot make a quick check it does nothing and returns false.
+ bool EmitQuickCheck(RegExpCompiler* compiler,
+ GenerationVariant* variant,
+ bool preload_has_checked_bounds,
+ Label* on_possible_success,
+ QuickCheckDetails* details_return,
+ bool fall_through_on_failure);
+ // For a given number of characters this returns a mask and a value. The
+ // next n characters are anded with the mask and compared with the value.
+ // A comparison failure indicates the node cannot match the next n characters.
+ // A comparison success indicates the node may match.
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in) = 0;
static const int kNodeIsTooComplexForGreedyLoops = -1;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
Label* label() { return &label_; }
static const int kMaxVariantsGenerated = 10;
- RegExpNode* EnsureExpanded(NodeInfo* info);
- virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
- virtual void ExpandChildren() = 0;
-
// Propagates the given interest information forward. When seeing
// \bfoo for instance, the \b is implemented by propagating forward
// to the 'foo' string that it should only succeed if its first
@@ -720,8 +697,12 @@
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
- virtual RegExpNode* ExpandLocal(NodeInfo* info);
- virtual void ExpandChildren();
+ virtual int EatsAtLeast(int recursion_depth);
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int filled_in) {
+ return on_success()->GetQuickCheckDetails(details, compiler, filled_in);
+ }
virtual RegExpNode* PropagateForward(NodeInfo* info);
Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
@@ -767,9 +748,11 @@
}
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateForward(NodeInfo* info);
- virtual RegExpNode* ExpandLocal(NodeInfo* info);
- virtual void ExpandChildren();
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+ virtual int EatsAtLeast(int recursion_depth);
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in);
ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent();
virtual int GreedyLoopTextLength();
@@ -779,10 +762,21 @@
return result;
}
void CalculateOffsets();
- private:
- void ExpandAtomChildren(RegExpAtom* that);
- void ExpandCharClassChildren(RegExpCharacterClass* that);
+ private:
+ enum TextEmitPassType {
+ NON_ASCII_MATCH,
+ CHARACTER_MATCH,
+ CASE_CHARACTER_MATCH,
+ CHARACTER_CLASS_MATCH
+ };
+ void TextEmitPass(RegExpCompiler* compiler,
+ TextEmitPassType pass,
+ bool preloaded,
+ GenerationVariant* variant,
+ bool first_element_checked,
+ int* checked_up_to);
+ int Length();
ZoneList<TextElement>* elms_;
};
@@ -799,9 +793,13 @@
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+ virtual int EatsAtLeast(int recursion_depth) { return 0; }
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in) {
+ return;
+ }
virtual RegExpNode* PropagateForward(NodeInfo* info);
- virtual RegExpNode* ExpandLocal(NodeInfo* info);
- virtual void ExpandChildren();
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
private:
@@ -816,9 +814,14 @@
explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+ virtual int EatsAtLeast(int recursion_depth) { return 0; }
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in) {
+ // Returning 0 from EatsAtLeast should ensure we never get here.
+ UNREACHABLE();
+ }
virtual RegExpNode* PropagateForward(NodeInfo* info);
- virtual RegExpNode* ExpandLocal(NodeInfo* info);
- virtual void ExpandChildren();
virtual EndNode* Clone() { return new EndNode(*this); }
protected:
@@ -875,6 +878,9 @@
};
+class AlternativeGeneration;
+
+
class ChoiceNode: public RegExpNode {
public:
explicit ChoiceNode(int expected_size)
@@ -886,9 +892,12 @@
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
DispatchTable* GetTable(bool ignore_case);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+ virtual int EatsAtLeast(int recursion_depth);
+ int EatsAtLeastHelper(int recursion_depth, RegExpNode* ignore_this_node);
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in);
virtual RegExpNode* PropagateForward(NodeInfo* info);
- virtual RegExpNode* ExpandLocal(NodeInfo* info);
- virtual void ExpandChildren();
virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
bool being_calculated() { return being_calculated_; }
@@ -900,10 +909,17 @@
private:
friend class DispatchTableConstructor;
- friend class AssertionPropagation;
+ friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
GenerationVariant* variant);
+ int CalculatePreloadCharacters(RegExpCompiler* compiler);
+ bool EmitOutOfLineContinuation(RegExpCompiler* compiler,
+ GenerationVariant* variant,
+ GuardedAlternative alternative,
+ AlternativeGeneration* alt_gen,
+ int preload_characters,
+ bool next_expects_preload);
DispatchTable* table_;
bool being_calculated_;
};
@@ -911,9 +927,35 @@
class LoopChoiceNode: public ChoiceNode {
public:
- explicit LoopChoiceNode(int expected_size) : ChoiceNode(expected_size) { }
+ explicit LoopChoiceNode(bool body_can_be_zero_length)
+ : ChoiceNode(2),
+ loop_node_(NULL),
+ continue_node_(NULL),
+ body_can_be_zero_length_(body_can_be_zero_length) { }
+ void AddLoopAlternative(GuardedAlternative alt);
+ void AddContinueAlternative(GuardedAlternative alt);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+ virtual int EatsAtLeast(int recursion_depth); // Returns 0.
+ virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in);
virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
+ RegExpNode* loop_node() { return loop_node_; }
+ RegExpNode* continue_node() { return continue_node_; }
+ bool body_can_be_zero_length() { return body_can_be_zero_length_; }
+ virtual void Accept(NodeVisitor* visitor);
+
+ private:
+ // AddAlternative is made private for loop nodes because alternatives
+ // should not be added freely, we need to keep track of which node
+ // goes back to the node itself.
+ void AddAlternative(GuardedAlternative node) {
+ ChoiceNode::AddAlternative(node);
+ }
+
+ RegExpNode* loop_node_;
+ RegExpNode* continue_node_;
+ bool body_can_be_zero_length_;
};
@@ -963,42 +1005,51 @@
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
};
- explicit GenerationVariant(Label* backtrack)
- : cp_offset_(0),
- actions_(NULL),
- backtrack_(backtrack),
- stop_node_(NULL),
- loop_label_(NULL) { }
GenerationVariant()
: cp_offset_(0),
actions_(NULL),
backtrack_(NULL),
stop_node_(NULL),
- loop_label_(NULL) { }
+ loop_label_(NULL),
+ characters_preloaded_(0),
+ bound_checked_up_to_(0) { }
bool Flush(RegExpCompiler* compiler, RegExpNode* successor);
int cp_offset() { return cp_offset_; }
DeferredAction* actions() { return actions_; }
bool is_trivial() {
- return backtrack_ == NULL && actions_ == NULL && cp_offset_ == 0;
+ return backtrack_ == NULL &&
+ actions_ == NULL &&
+ cp_offset_ == 0 &&
+ characters_preloaded_ == 0 &&
+ bound_checked_up_to_ == 0 &&
+ quick_check_performed_.characters() == 0;
}
Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; }
- // These set methods should be used only on new GenerationVariants - the
- // intention is that GenerationVariants are immutable after creation.
+ int characters_preloaded() { return characters_preloaded_; }
+ int bound_checked_up_to() { return bound_checked_up_to_; }
+ QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
+ bool mentions_reg(int reg);
+ // These set methods and AdvanceVariant should be used only on new
+ // GenerationVariants - the intention is that GenerationVariants are
+ // immutable after creation.
void add_action(DeferredAction* new_action) {
ASSERT(new_action->next_ == NULL);
new_action->next_ = actions_;
actions_ = new_action;
}
- void set_cp_offset(int new_cp_offset) {
- ASSERT(new_cp_offset >= cp_offset_);
- cp_offset_ = new_cp_offset;
- }
void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
void set_stop_node(RegExpNode* node) { stop_node_ = node; }
void set_loop_label(Label* label) { loop_label_ = label; }
- bool mentions_reg(int reg);
+ void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; }
+ void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
+ void set_quick_check_performed(QuickCheckDetails* d) {
+ quick_check_performed_ = *d;
+ }
+ void clear_quick_check_performed() {
+ }
+ void AdvanceVariant(int by, bool ascii);
private:
int FindAffectedRegisters(OutSet* affected_registers);
void PerformDeferredActions(RegExpMacroAssembler* macro,
@@ -1015,7 +1066,12 @@
Label* backtrack_;
RegExpNode* stop_node_;
Label* loop_label_;
+ int characters_preloaded_;
+ int bound_checked_up_to_;
+ QuickCheckDetails quick_check_performed_;
};
+
+
class NodeVisitor {
public:
virtual ~NodeVisitor() { }
@@ -1023,6 +1079,7 @@
virtual void Visit##Type(Type##Node* that) = 0;
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
+ virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
};
@@ -1070,33 +1127,9 @@
// +-------+ ---> +------------+
// | word? | | check word |
// +-------+ +------------+
-//
-// At a later phase all nodes that determine information for their
-// following nodes are split into several 'sibling' nodes. In this
-// case the first '.' is split into one node that only matches words
-// and one that only matches non-words. The second '.' is also split,
-// into one node that assumes that the previous character was a word
-// character and one that assumes that is was non-word. In this case
-// the result is
-//
-// +------------------+ +------------------+
-// /--> | intersect(., \w) | ---> | intersect(., \W) |
-// | +------------------+ +------------------+
-// | | follows \w |
-// | +------------------+
-// --?
-// | +------------------+ +------------------+
-// \--> | intersect(., \W) | ---> | intersect(., \w) |
-// +------------------+ +------------------+
-// | follows \W |
-// +------------------+
-//
-// This way we don't need to explicitly check the previous character
-// but can always assume that whoever consumed the previous character
-// has propagated the relevant information forward.
-class AssertionPropagation: public NodeVisitor {
+class Analysis: public NodeVisitor {
public:
- explicit AssertionPropagation(bool ignore_case)
+ explicit Analysis(bool ignore_case)
: ignore_case_(ignore_case) { }
void EnsureAnalyzed(RegExpNode* node);
@@ -1104,11 +1137,12 @@
virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
+ virtual void VisitLoopChoice(LoopChoiceNode* that);
private:
bool ignore_case_;
- DISALLOW_IMPLICIT_CONSTRUCTORS(AssertionPropagation);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
@@ -1116,13 +1150,11 @@
RegExpCompileData()
: tree(NULL),
node(NULL),
- has_lookbehind(false),
- has_character_escapes(false),
+ simple(true),
capture_count(0) { }
RegExpTree* tree;
RegExpNode* node;
- bool has_lookbehind;
- bool has_character_escapes;
+ bool simple;
Handle<String> error;
int capture_count;
};
diff --git a/src/log.cc b/src/log.cc
index 4fa398c..7ecf2ac 100644
--- a/src/log.cc
+++ b/src/log.cc
@@ -32,6 +32,7 @@
#include "log.h"
#include "platform.h"
#include "string-stream.h"
+#include "macro-assembler.h"
namespace v8 { namespace internal {
@@ -349,11 +350,19 @@
#ifdef ENABLE_LOGGING_AND_PROFILING
-void Logger::LogString(Handle<String> str) {
+void Logger::LogString(Handle<String> str, bool show_impl_info) {
StringShape shape(*str);
int len = str->length(shape);
- if (len > 256)
- len = 256;
+ if (len > 0x1000)
+ len = 0x1000;
+ if (show_impl_info) {
+ fputc(shape.IsAsciiRepresentation() ? 'a' : '2', logfile_);
+ if (shape.IsExternal())
+ fputc('e', logfile_);
+ if (shape.IsSymbol())
+ fputc('#', logfile_);
+ fprintf(logfile_, ":%i:", str->length());
+ }
for (int i = 0; i < len; i++) {
uc32 c = str->Get(shape, i);
if (c > 0xff) {
@@ -388,7 +397,7 @@
break;
}
fprintf(logfile_, "/");
- LogString(Handle<String>::cast(source));
+ LogString(Handle<String>::cast(source), false);
fprintf(logfile_, "/");
// global flag
@@ -422,19 +431,40 @@
}
-void Logger::RegExpExecEvent(Handle<JSRegExp> regexp,
- int start_index,
- Handle<String> input_string) {
-#ifdef ENABLE_LOGGING_AND_PROFILING
- if (logfile_ == NULL || !FLAG_log_regexp) return;
+void Logger::LogRuntime(Vector<const char> format, JSArray* args) {
ScopedLock sl(mutex_);
-
- fprintf(logfile_, "regexp-run,");
- LogRegExpSource(regexp);
- fprintf(logfile_, ",");
- LogString(input_string);
- fprintf(logfile_, ",%d..%d\n", start_index, input_string->length());
-#endif
+ HandleScope scope;
+ for (int i = 0; i < format.length(); i++) {
+ char c = format[i];
+ if (c == '%' && i <= format.length() - 2) {
+ i++;
+ ASSERT('0' <= format[i] && format[i] <= '9');
+ Object* obj = args->GetElement(format[i] - '0');
+ i++;
+ switch (format[i]) {
+ case 's':
+ Logger::LogString(Handle<String>(String::cast(obj)), false);
+ break;
+ case 'S':
+ Logger::LogString(Handle<String>(String::cast(obj)), true);
+ break;
+ case 'r':
+ Logger::LogRegExpSource(Handle<JSRegExp>(JSRegExp::cast(obj)));
+ break;
+ case 'x':
+ fprintf(logfile_, "0x%x", Smi::cast(obj)->value());
+ break;
+ case 'i':
+ fprintf(logfile_, "%i", Smi::cast(obj)->value());
+ break;
+ default:
+ UNREACHABLE();
+ }
+ } else {
+ fputc(c, logfile_);
+ }
+ }
+ fputc('\n', logfile_);
}
@@ -556,6 +586,18 @@
}
+void Logger::CodeAllocateEvent(Code* code, Assembler* assem) {
+#ifdef ENABLE_LOGGING_AND_PROFILING
+ if (logfile_ == NULL || !FLAG_log_code) return;
+ ScopedLock sl(mutex_);
+
+ fprintf(logfile_, "code-allocate,0x%x,0x%x\n",
+ reinterpret_cast<unsigned int>(code->address()),
+ reinterpret_cast<unsigned int>(assem));
+#endif
+}
+
+
void Logger::CodeMoveEvent(Address from, Address to) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log_code) return;
@@ -576,6 +618,33 @@
}
+void Logger::BeginCodeRegionEvent(CodeRegion* region,
+ Assembler* masm,
+ const char* name) {
+#ifdef ENABLE_LOGGING_AND_PROFILING
+ if (logfile_ == NULL || !FLAG_log_code) return;
+ ScopedLock sl(mutex_);
+ fprintf(logfile_, "begin-code-region,0x%x,0x%x,0x%x,%s\n",
+ reinterpret_cast<unsigned int>(region),
+ reinterpret_cast<unsigned int>(masm),
+ masm->pc_offset(),
+ name);
+#endif
+}
+
+
+void Logger::EndCodeRegionEvent(CodeRegion* region, Assembler* masm) {
+#ifdef ENABLE_LOGGING_AND_PROFILING
+ if (logfile_ == NULL || !FLAG_log_code) return;
+ ScopedLock sl(mutex_);
+ fprintf(logfile_, "end-code-region,0x%x,0x%x,0x%x\n",
+ reinterpret_cast<unsigned int>(region),
+ reinterpret_cast<unsigned int>(masm),
+ masm->pc_offset());
+#endif
+}
+
+
void Logger::ResourceEvent(const char* name, const char* tag) {
#ifdef ENABLE_LOGGING_AND_PROFILING
if (logfile_ == NULL || !FLAG_log) return;
diff --git a/src/log.h b/src/log.h
index 89d3e8a..93ac6a0 100644
--- a/src/log.h
+++ b/src/log.h
@@ -72,7 +72,11 @@
#undef LOG
#ifdef ENABLE_LOGGING_AND_PROFILING
-#define LOG(Call) v8::internal::Logger::Call
+#define LOG(Call) \
+ do { \
+ if (v8::internal::Logger::is_enabled()) \
+ v8::internal::Logger::Call; \
+ } while (false)
#else
#define LOG(Call) ((void) 0)
#endif
@@ -160,10 +164,16 @@
static void CodeCreateEvent(const char* tag, Code* code, const char* source);
static void CodeCreateEvent(const char* tag, Code* code, String* name);
static void CodeCreateEvent(const char* tag, Code* code, int args_count);
+ static void CodeAllocateEvent(Code* code, Assembler* assem);
// Emits a code move event.
static void CodeMoveEvent(Address from, Address to);
// Emits a code delete event.
static void CodeDeleteEvent(Address from);
+ // Emits region delimiters
+ static void BeginCodeRegionEvent(CodeRegion* region,
+ Assembler* masm,
+ const char* name);
+ static void EndCodeRegionEvent(CodeRegion* region, Assembler* masm);
// ==== Events logged by --log-gc. ====
// Heap sampling events: start, end, and individual types.
@@ -183,9 +193,8 @@
static void RegExpCompileEvent(Handle<JSRegExp> regexp, bool in_cache);
- static void RegExpExecEvent(Handle<JSRegExp> regexp,
- int start_index,
- Handle<String> input_string);
+ // Log an event reported from generated code
+ static void LogRuntime(Vector<const char> format, JSArray* args);
#ifdef ENABLE_LOGGING_AND_PROFILING
static StateTag state() {
@@ -193,13 +202,15 @@
}
#endif
+ static bool is_enabled() { return logfile_ != NULL; }
+
#ifdef ENABLE_LOGGING_AND_PROFILING
private:
// Emits the source code of a regexp. Used by regexp events.
static void LogRegExpSource(Handle<JSRegExp> regexp);
- static void LogString(Handle<String> str);
+ static void LogString(Handle<String> str, bool show_impl_info);
// Emits a profiler tick event. Used by the profiler thread.
static void TickEvent(TickSample* sample, bool overflow);
diff --git a/src/macro-assembler-ia32.h b/src/macro-assembler-ia32.h
index 8bcc651..b8fb3b9 100644
--- a/src/macro-assembler-ia32.h
+++ b/src/macro-assembler-ia32.h
@@ -305,6 +305,7 @@
MacroAssembler masm_; // Macro assembler used to generate the code.
};
+
// -----------------------------------------------------------------------------
// Static helper functions.
diff --git a/src/objects-inl.h b/src/objects-inl.h
index 5c70463..0bad5a1 100644
--- a/src/objects-inl.h
+++ b/src/objects-inl.h
@@ -348,10 +348,17 @@
bool Object::IsContext() {
return Object::IsHeapObject()
&& (HeapObject::cast(this)->map() == Heap::context_map() ||
+ HeapObject::cast(this)->map() == Heap::catch_context_map() ||
HeapObject::cast(this)->map() == Heap::global_context_map());
}
+bool Object::IsCatchContext() {
+ return Object::IsHeapObject()
+ && HeapObject::cast(this)->map() == Heap::catch_context_map();
+}
+
+
bool Object::IsGlobalContext() {
return Object::IsHeapObject()
&& HeapObject::cast(this)->map() == Heap::global_context_map();
diff --git a/src/objects.h b/src/objects.h
index f400e99..c79838a 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -48,7 +48,7 @@
// - GlobalObject
// - JSGlobalObject
// - JSBuiltinsObject
-// _ JSGlobalProxy
+// - JSGlobalProxy
// - JSValue
// - Script
// - Array
@@ -616,6 +616,7 @@
inline bool IsFixedArray();
inline bool IsDescriptorArray();
inline bool IsContext();
+ inline bool IsCatchContext();
inline bool IsGlobalContext();
inline bool IsJSFunction();
inline bool IsCode();
diff --git a/src/parser.cc b/src/parser.cc
index 7236cb4..488389b 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -120,7 +120,10 @@
Statement* ParseContinueStatement(bool* ok);
Statement* ParseBreakStatement(ZoneStringList* labels, bool* ok);
Statement* ParseReturnStatement(bool* ok);
- Block* WithHelper(Expression* obj, ZoneStringList* labels, bool* ok);
+ Block* WithHelper(Expression* obj,
+ ZoneStringList* labels,
+ bool is_catch_block,
+ bool* ok);
Statement* ParseWithStatement(ZoneStringList* labels, bool* ok);
CaseClause* ParseCaseClause(bool* default_seen_ptr, bool* ok);
SwitchStatement* ParseSwitchStatement(ZoneStringList* labels, bool* ok);
@@ -470,9 +473,8 @@
} else if (terms_.length() > 0) {
ASSERT(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
- if (atom->IsLookahead() || atom->IsAssertion()) {
- // Guaranteed not to match a non-empty string.
- // Assertion as an atom can happen as, e.g., (?:\b)
+ if (atom->max_match() == 0) {
+ // Guaranteed to only match an empty string.
LAST(ADD_TERM);
if (min == 0) {
return;
@@ -527,7 +529,9 @@
void Advance(int dist);
void Reset(int pos);
- bool HasCharacterEscapes();
+ // Reports whether the pattern might be used as a literal search string.
+ // Only use if the result of the parse is a single atom node.
+ bool simple();
int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
int position() { return next_pos_ - 1; }
@@ -548,7 +552,7 @@
int next_pos_;
FlatStringReader* in_;
Handle<String>* error_;
- bool has_character_escapes_;
+ bool simple_;
ZoneList<RegExpCapture*>* captures_;
bool is_scanned_for_captures_;
// The capture count is only valid after we have scanned for captures.
@@ -1318,6 +1322,9 @@
Block* result = NEW(Block(labels, 1, false));
Target target(this, result);
TryStatement* statement = ParseTryStatement(CHECK_OK);
+ if (statement) {
+ statement->set_statement_pos(statement_pos);
+ }
if (result) result->AddStatement(statement);
return result;
}
@@ -1919,7 +1926,10 @@
}
-Block* Parser::WithHelper(Expression* obj, ZoneStringList* labels, bool* ok) {
+Block* Parser::WithHelper(Expression* obj,
+ ZoneStringList* labels,
+ bool is_catch_block,
+ bool* ok) {
// Parse the statement and collect escaping labels.
ZoneList<Label*>* label_list = NEW(ZoneList<Label*>(0));
LabelCollector collector(label_list);
@@ -1936,7 +1946,7 @@
Block* result = NEW(Block(NULL, 2, false));
if (result) {
- result->AddStatement(NEW(WithEnterStatement(obj)));
+ result->AddStatement(NEW(WithEnterStatement(obj, is_catch_block)));
// Create body block.
Block* body = NEW(Block(NULL, 1, false));
@@ -1972,7 +1982,7 @@
Expression* expr = ParseExpression(true, CHECK_OK);
Expect(Token::RPAREN, CHECK_OK);
- return WithHelper(expr, labels, CHECK_OK);
+ return WithHelper(expr, labels, false, CHECK_OK);
}
@@ -2133,7 +2143,7 @@
catch_var = top_scope_->NewTemporary(Factory::catch_var_symbol());
Expression* obj = MakeCatchContext(name, catch_var);
{ Target target(this, &catch_collector);
- catch_block = WithHelper(obj, NULL, CHECK_OK);
+ catch_block = WithHelper(obj, NULL, true, CHECK_OK);
}
} else {
Expect(Token::LBRACE, CHECK_OK);
@@ -3502,7 +3512,7 @@
next_pos_(0),
in_(in),
error_(error),
- has_character_escapes_(false),
+ simple_(true),
captures_(NULL),
is_scanned_for_captures_(false),
capture_count_(0),
@@ -3550,11 +3560,8 @@
}
-// Reports whether the parsed string atoms contain any characters that were
-// escaped in the original pattern. If not, all atoms are proper substrings
-// of the original pattern.
-bool RegExpParser::HasCharacterEscapes() {
- return has_character_escapes_;
+bool RegExpParser::simple() {
+ return simple_;
}
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
@@ -3769,7 +3776,7 @@
Advance(2);
break;
}
- has_character_escapes_ = true;
+ simple_ = false;
break;
case '{': {
int dummy;
@@ -3795,12 +3802,12 @@
// {
case '*':
min = 0;
- max = RegExpQuantifier::kInfinity;
+ max = RegExpTree::kInfinity;
Advance();
break;
case '+':
min = 1;
- max = RegExpQuantifier::kInfinity;
+ max = RegExpTree::kInfinity;
Advance();
break;
case '?':
@@ -3822,6 +3829,7 @@
is_greedy = false;
Advance();
}
+ simple_ = false; // Adding quantifier might *remove* look-ahead.
builder.AddQuantifierToAtom(min, max, is_greedy);
}
}
@@ -3965,7 +3973,7 @@
} else if (current() == ',') {
Advance();
if (current() == '}') {
- max = RegExpQuantifier::kInfinity;
+ max = RegExpTree::kInfinity;
Advance();
} else {
while (IsDecimalDigit(current())) {
@@ -4184,6 +4192,8 @@
Advance(2);
return CharacterRange::Singleton(0); // Return dummy value.
}
+ case kEndMarker:
+ ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED);
default:
uc32 c = ParseClassCharacterEscape(CHECK_FAILED);
return CharacterRange::Singleton(c);
@@ -4307,15 +4317,17 @@
// Make sure we have a stack guard.
StackGuard guard;
RegExpParser parser(input, &result->error, multiline);
- result->tree = parser.ParsePattern();
+ RegExpTree* tree = parser.ParsePattern();
if (parser.failed()) {
- ASSERT(result->tree == NULL);
+ ASSERT(tree == NULL);
ASSERT(!result->error.is_null());
} else {
- ASSERT(result->tree != NULL);
+ ASSERT(tree != NULL);
ASSERT(result->error.is_null());
- result->has_character_escapes = parser.HasCharacterEscapes();
- result->capture_count = parser.captures_started();
+ result->tree = tree;
+ int capture_count = parser.captures_started();
+ result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
+ result->capture_count = capture_count;
}
return !parser.failed();
}
diff --git a/src/regexp-delay.js b/src/regexp-delay.js
index 4baa9cd..4b0d537 100644
--- a/src/regexp-delay.js
+++ b/src/regexp-delay.js
@@ -178,6 +178,7 @@
return null;
}
+ %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
// matchIndices is an array of integers with length of captures*2,
// each pair of integers specified the start and the end of index
// in the string.
diff --git a/src/regexp-macro-assembler-ia32.cc b/src/regexp-macro-assembler-ia32.cc
index be2990e..89b416c 100644
--- a/src/regexp-macro-assembler-ia32.cc
+++ b/src/regexp-macro-assembler-ia32.cc
@@ -93,6 +93,7 @@
entry_label_(),
start_label_(),
success_label_(),
+ backtrack_label_(),
exit_label_(),
self_(Heap::undefined_value()) {
__ jmp(&entry_label_); // We'll write the entry code later.
@@ -106,7 +107,9 @@
entry_label_.Unuse();
start_label_.Unuse();
success_label_.Unuse();
+ backtrack_label_.Unuse();
exit_label_.Unuse();
+ check_preempt_label_.Unuse();
}
@@ -126,9 +129,7 @@
void RegExpMacroAssemblerIA32::Backtrack() {
- __ pop(ecx);
- __ add(Operand(ecx), Immediate(self_));
- __ jmp(Operand(ecx));
+ SafeReturn();
}
@@ -155,7 +156,7 @@
}
-void RegExpMacroAssemblerIA32::CheckCharacter(uc16 c, Label* on_equal) {
+void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
__ cmp(current_character(), c);
BranchOrBacktrack(equal, on_equal);
}
@@ -194,46 +195,28 @@
BranchOrBacktrack(greater, on_failure);
}
- if (str.length() <= kMaxInlineStringTests) {
- for (int i = 0; i < str.length(); i++) {
- if (mode_ == ASCII) {
- __ cmpb(Operand(esi, edi, times_1, byte_offset + i),
- static_cast<int8_t>(str[i]));
- } else {
- ASSERT(mode_ == UC16);
- __ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)),
- Immediate(str[i]));
- }
- BranchOrBacktrack(not_equal, on_failure);
- }
- return;
+ Label backtrack;
+ if (on_failure == NULL) {
+ // Avoid inlining the Backtrack macro for each test.
+ Label skip_backtrack;
+ __ jmp(&skip_backtrack);
+ __ bind(&backtrack);
+ Backtrack();
+ __ bind(&skip_backtrack);
+ on_failure = &backtrack;
}
- ArraySlice constant_buffer = constants_.GetBuffer(str.length(), char_size());
- if (mode_ == ASCII) {
- for (int i = 0; i < str.length(); i++) {
- constant_buffer.at<char>(i) = static_cast<char>(str[i]);
+ for (int i = 0; i < str.length(); i++) {
+ if (mode_ == ASCII) {
+ __ cmpb(Operand(esi, edi, times_1, byte_offset + i),
+ static_cast<int8_t>(str[i]));
+ } else {
+ ASSERT(mode_ == UC16);
+ __ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)),
+ Immediate(str[i]));
}
- } else {
- memcpy(constant_buffer.location(),
- str.start(),
- str.length() * sizeof(uc16));
+ BranchOrBacktrack(not_equal, on_failure);
}
-
- __ mov(eax, edi);
- __ mov(ebx, esi);
- __ lea(edi, Operand(esi, edi, times_1, byte_offset));
- LoadConstantBufferAddress(esi, &constant_buffer);
- __ mov(ecx, str.length());
- if (char_size() == 1) {
- __ rep_cmpsb();
- } else {
- ASSERT(char_size() == 2);
- __ rep_cmpsw();
- }
- __ mov(esi, ebx);
- __ mov(edi, eax);
- BranchOrBacktrack(not_equal, on_failure);
}
@@ -251,62 +234,58 @@
int start_reg,
Label* on_no_match) {
Label fallthrough;
- __ mov(eax, register_location(start_reg));
+ __ mov(edx, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
- __ sub(ecx, Operand(eax)); // Length to check.
+ __ sub(ecx, Operand(edx)); // Length to check.
BranchOrBacktrack(less, on_no_match);
__ j(equal, &fallthrough);
if (mode_ == ASCII) {
Label success;
Label fail;
- __ push(esi);
+ Label loop_increment;
__ push(edi);
+ __ add(edx, Operand(esi));
__ add(edi, Operand(esi));
- __ add(esi, Operand(eax));
+ __ add(ecx, Operand(edi));
+
Label loop;
__ bind(&loop);
- __ rep_cmpsb();
- __ j(equal, &success);
+ __ movzx_b(eax, Operand(edi, 0));
+ __ cmpb_al(Operand(edx, 0));
+ __ j(equal, &loop_increment);
+
// Compare lower-case if letters.
- __ movzx_b(eax, Operand(edi, -1));
- __ or_(eax, 0x20); // To-lower-case
+ __ or_(eax, 0x20); // To lower-case.
__ lea(ebx, Operand(eax, -'a'));
__ cmp(ebx, static_cast<int32_t>('z' - 'a'));
__ j(above, &fail);
- __ movzx_b(ebx, Operand(esi, -1));
+ __ movzx_b(ebx, Operand(edx, 0));
__ or_(ebx, 0x20); // To-lower-case
__ cmp(eax, Operand(ebx));
__ j(not_equal, &fail);
- __ or_(ecx, Operand(ecx));
- __ j(not_equal, &loop);
+
+ __ bind(&loop_increment);
+ __ add(Operand(edx), Immediate(1));
+ __ add(Operand(edi), Immediate(1));
+ __ cmp(edi, Operand(ecx));
+ __ j(below, &loop, taken);
__ jmp(&success);
__ bind(&fail);
__ pop(edi);
- __ pop(esi);
BranchOrBacktrack(no_condition, on_no_match);
__ bind(&success);
__ pop(eax); // discard original value of edi
- __ pop(esi);
__ sub(edi, Operand(esi));
} else {
- // store state
+ ASSERT(mode_ == UC16);
__ push(esi);
__ push(edi);
__ push(ecx);
- // align stack
- int frameAlignment = OS::ActivationFrameAlignment();
- if (frameAlignment != 0) {
- __ mov(ebx, esp);
- __ sub(Operand(esp), Immediate(5 * kPointerSize)); // args + esp.
- ASSERT(IsPowerOf2(frameAlignment));
- __ and_(esp, -frameAlignment);
- __ mov(Operand(esp, 4 * kPointerSize), ebx);
- } else {
- __ sub(Operand(esp), Immediate(4 * kPointerSize));
- }
+ const int four_arguments = 4;
+ FrameAlign(four_arguments);
// Put arguments on stack.
__ mov(Operand(esp, 3 * kPointerSize), ecx);
__ mov(ebx, Operand(ebp, kInputEndOffset));
@@ -317,17 +296,11 @@
__ mov(eax, Operand(ebp, kInputBuffer));
__ mov(Operand(esp, 0 * kPointerSize), eax);
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
- __ mov(Operand(eax),
- Immediate(reinterpret_cast<int32_t>(function_address)));
- __ call(Operand(eax));
- if (frameAlignment != 0) {
- __ mov(esp, Operand(esp, 4 * kPointerSize));
- } else {
- __ add(Operand(esp), Immediate(4 * sizeof(int32_t)));
- }
+ CallCFunction(function_address, four_arguments);
__ pop(ecx);
__ pop(edi);
__ pop(esi);
+
__ or_(eax, Operand(eax));
BranchOrBacktrack(zero, on_no_match);
__ add(edi, Operand(ecx));
@@ -340,30 +313,47 @@
int start_reg,
Label* on_no_match) {
Label fallthrough;
- __ mov(eax, register_location(start_reg));
+ Label success;
+ Label fail;
+ __ mov(edx, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
- __ sub(ecx, Operand(eax)); // Length to check.
+ __ sub(ecx, Operand(edx)); // Length to check.
BranchOrBacktrack(less, on_no_match);
__ j(equal, &fallthrough);
// Check that there are sufficient characters left in the input.
+
__ mov(ebx, edi);
__ add(ebx, Operand(ecx));
BranchOrBacktrack(greater, on_no_match);
__ mov(ebx, edi);
- __ mov(edx, esi);
__ add(edi, Operand(esi));
- __ add(esi, Operand(eax));
- __ rep_cmpsb();
- __ mov(esi, edx);
- Label success;
- __ j(equal, &success);
+ __ add(edx, Operand(esi));
+ __ add(ecx, Operand(edi));
+
+ Label loop;
+ __ bind(&loop);
+ if (mode_ == ASCII) {
+ __ movzx_b(eax, Operand(edx, 0));
+ __ cmpb_al(Operand(edi, 0));
+ } else {
+ ASSERT(mode_ == UC16);
+ __ movzx_w(eax, Operand(edx, 0));
+ __ cmpw_ax(Operand(edi, 0));
+ }
+ __ j(not_equal, &fail);
+ __ add(Operand(edx), Immediate(char_size()));
+ __ add(Operand(edi), Immediate(char_size()));
+ __ cmp(edi, Operand(ecx));
+ __ j(below, &loop);
+ __ jmp(&success);
+
+ __ bind(&fail);
__ mov(edi, ebx);
BranchOrBacktrack(no_condition, on_no_match);
__ bind(&success);
__ sub(edi, Operand(esi));
-
__ bind(&fallthrough);
}
@@ -377,32 +367,144 @@
}
-void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
+void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
+ Label* on_not_equal) {
__ cmp(current_character(), c);
BranchOrBacktrack(not_equal, on_not_equal);
}
-void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
- uc16 mask,
- Label* on_not_equal) {
+void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_equal) {
__ mov(eax, current_character());
- __ or_(eax, mask);
+ __ and_(eax, mask);
+ __ cmp(eax, c);
+ BranchOrBacktrack(equal, on_equal);
+}
+
+
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_not_equal) {
+ __ mov(eax, current_character());
+ __ and_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
-void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
uc16 c,
+ uc16 minus,
uc16 mask,
Label* on_not_equal) {
- __ lea(eax, Operand(current_character(), -mask));
- __ or_(eax, mask);
+ ASSERT(minus < String::kMaxUC16CharCode);
+ __ lea(eax, Operand(current_character(), -minus));
+ __ and_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
+bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match) {
+ // Range checks (c in min..max) are generally implemented by an unsigned
+ // (c - min) <= (max - min) check
+ switch (type) {
+ case 's':
+ // Match space-characters
+ if (mode_ == ASCII) {
+ // ASCII space characters are '\t'..'\r' and ' '.
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ Label success;
+ __ cmp(current_character(), ' ');
+ __ j(equal, &success);
+ // Check range 0x09..0x0d
+ __ sub(Operand(current_character()), Immediate('\t'));
+ __ cmp(current_character(), '\r' - '\t');
+ BranchOrBacktrack(above_equal, on_no_match);
+ __ bind(&success);
+ return true;
+ }
+ return false;
+ case 'S':
+ // Match non-space characters.
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ if (mode_ == ASCII) {
+ // ASCII space characters are '\t'..'\r' and ' '.
+ __ cmp(current_character(), ' ');
+ BranchOrBacktrack(equal, on_no_match);
+ __ sub(Operand(current_character()), Immediate('\t'));
+ __ cmp(current_character(), '\r' - '\t');
+ BranchOrBacktrack(below, on_no_match);
+ return true;
+ }
+ return false;
+ case 'd':
+ // Match ASCII digits ('0'..'9')
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ __ sub(Operand(current_character()), Immediate('0'));
+ __ cmp(current_character(), '9' - '0');
+ BranchOrBacktrack(greater_equal, on_no_match);
+ return true;
+ case 'D':
+ // Match non ASCII-digits
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ __ sub(Operand(current_character()), Immediate('0'));
+ __ cmp(current_character(), '9' - '0');
+ BranchOrBacktrack(below, on_no_match);
+ return true;
+ case '.': {
+ // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ if (check_offset) {
+ LoadCurrentCharacter(cp_offset, on_no_match, 1);
+ } else {
+ LoadCurrentCharacterUnchecked(cp_offset, 1);
+ }
+ // Compute hash value so exactly 0x0a and 0x0d become zero.
+ __ sub(Operand(current_character()), Immediate('\n'));
+ __ mov(eax, current_character());
+ __ and_(current_character(), 0x01);
+ __ shr(eax, 1);
+ __ xor_(current_character(), Operand(eax));
+ BranchOrBacktrack(equal, on_no_match);
+ if (mode_ == UC16) {
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed ((current_char - '\n') >> 1) in eax.
+ __ cmp(eax, (0x2028 - '\n') >> 1);
+ BranchOrBacktrack(equal, on_no_match);
+ }
+ return true;
+ }
+ case '*':
+ // Match any character.
+ if (check_offset) {
+ CheckPosition(cp_offset, on_no_match);
+ }
+ return true;
+ // No custom implementation (yet): w, W, s(UC16), S(UC16).
+ default:
+ return false;
+ }
+}
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
uc16 start,
@@ -528,7 +630,7 @@
Label at_start;
__ cmp(Operand(ebp, kAtStart), Immediate(0));
__ j(not_equal, &at_start);
- LoadCurrentCharacterUnchecked(-1); // Load previous char.
+ LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
__ jmp(&start_label_);
__ bind(&at_start);
__ mov(current_character(), '\n');
@@ -536,25 +638,25 @@
// Exit code:
- // Success
- __ bind(&success_label_);
- if (num_saved_registers_ > 0) {
- // copy captures to output
- __ mov(ebx, Operand(ebp, kRegisterOutput));
- __ mov(ecx, Operand(ebp, kInputEndOffset));
- __ sub(ecx, Operand(ebp, kInputStartOffset));
- for (int i = 0; i < num_saved_registers_; i++) {
- __ mov(eax, register_location(i));
- __ add(eax, Operand(ecx)); // Convert to index from start, not end.
- if (char_size() > 1) {
- ASSERT(char_size() == 2);
- __ sar(eax, 1); // Convert to character index, not byte.
+ if (success_label_.is_linked()) {
+ // Success
+ __ bind(&success_label_);
+ if (num_saved_registers_ > 0) {
+ // copy captures to output
+ __ mov(ebx, Operand(ebp, kRegisterOutput));
+ __ mov(ecx, Operand(ebp, kInputEndOffset));
+ __ sub(ecx, Operand(ebp, kInputStartOffset));
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ mov(eax, register_location(i));
+ __ add(eax, Operand(ecx)); // Convert to index from start, not end.
+ if (mode_ == UC16) {
+ __ sar(eax, 1); // Convert byte index to character index.
+ }
+ __ mov(Operand(ebx, i * kPointerSize), eax);
}
- __ mov(Operand(ebx, i * kPointerSize), eax);
}
+ __ mov(eax, Immediate(1));
}
- __ mov(eax, Immediate(1));
-
// Exit and return eax
__ bind(&exit_label_);
__ leave();
@@ -563,6 +665,53 @@
__ pop(esi);
__ ret(0);
+ // Backtrack code (branch target for conditional backtracks).
+ if (backtrack_label_.is_linked()) {
+ __ bind(&backtrack_label_);
+ Backtrack();
+ }
+
+ // Preempt-code
+ if (check_preempt_label_.is_linked()) {
+ __ bind(&check_preempt_label_);
+ // TODO(lrn): call C function to check the stack guard and return current
+ // stack state (0 = ok, positive = out of stack, negative = preempt).
+ // Then dispatch to an action depending on state, and loop.
+ __ push(edi);
+
+ Label retry;
+ Label stack_overflow;
+
+ __ bind(&retry);
+ int num_arguments = 2;
+ FrameAlign(num_arguments);
+ __ mov(Operand(esp, 1 * kPointerSize), Immediate(self_));
+ __ lea(eax, Operand(esp, -kPointerSize));
+ __ mov(Operand(esp, 0 * kPointerSize), eax);
+ CallCFunction(FUNCTION_ADDR(&CheckStackGuardState), num_arguments);
+
+ ExternalReference stack_guard_limit =
+ ExternalReference::address_of_stack_guard_limit();
+
+ __ or_(eax, Operand(eax));
+ __ j(not_equal, &stack_overflow);
+
+ __ cmp(esp, Operand::StaticVariable(stack_guard_limit));
+ __ j(below_equal, &retry);
+
+ __ pop(edi);
+ // String might have moved: Recompute esi from scratch.
+ __ mov(esi, Operand(esp, kInputBuffer));
+ __ mov(esi, Operand(esi, 0));
+ __ add(esi, Operand(esp, kInputEndOffset));
+ SafeReturn();
+
+ __ bind(&stack_overflow);
+ // Exit with result -1 to signal thrown exception.
+ __ mov(eax, -1);
+ __ jmp(&exit_label_);
+ }
+
CodeDesc code_desc;
masm_->GetCode(&code_desc);
Handle<Code> code = Factory::NewCode(code_desc,
@@ -602,12 +751,13 @@
void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input) {
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters) {
ASSERT(cp_offset >= 0);
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
- __ cmp(edi, -cp_offset * char_size());
- BranchOrBacktrack(greater_equal, on_end_of_input);
- LoadCurrentCharacterUnchecked(cp_offset);
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -622,8 +772,8 @@
void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) {
- // CheckStackLimit(); // Not ready yet.
- __ push(label, RelocInfo::NONE);
+ __ push(Immediate::CodeRelativeOffset(label));
+ CheckStackLimit();
}
@@ -710,6 +860,38 @@
}
+int RegExpMacroAssemblerIA32::CheckStackGuardState(Address return_address,
+ Code* re_code) {
+ if (StackGuard::IsStackOverflow()) {
+ Top::StackOverflow();
+ return 1;
+ }
+
+ // If not real stack overflow the stack guard was used to interrupt
+ // execution for another purpose.
+
+ // Prepare for possible GC.
+ Handle<Code> code_handle(re_code);
+#ifdef DEBUG
+ CHECK(re_code->instruction_start() <= return_address);
+ CHECK(return_address <=
+ re_code->instruction_start() + re_code->instruction_size());
+#endif
+
+ Object* result = Execution::HandleStackGuardInterrupt();
+
+ if (*code_handle != re_code) { // Return address no longer valid
+ int delta = *code_handle - re_code;
+ *reinterpret_cast<int32_t*>(return_address) += delta;
+ }
+
+ if (result->IsException()) {
+ return 1;
+ }
+ return 0;
+}
+
+
Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
ASSERT(register_index < (1<<30));
if (num_registers_ <= register_index) {
@@ -729,6 +911,13 @@
}
+void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset,
+ Label* on_outside_input) {
+ __ cmp(edi, -cp_offset * char_size());
+ BranchOrBacktrack(greater_equal, on_outside_input);
+}
+
+
void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
@@ -740,61 +929,95 @@
return;
}
if (to == NULL) {
- Label skip;
- __ j(NegateCondition(condition), &skip);
- Backtrack();
- __ bind(&skip);
+ __ j(condition, &backtrack_label_);
return;
}
__ j(condition, to);
}
+void RegExpMacroAssemblerIA32::SafeCall(Label* to) {
+ Label return_to;
+ __ push(Immediate::CodeRelativeOffset(&return_to));
+ __ jmp(to);
+ __ bind(&return_to);
+}
+
+
+void RegExpMacroAssemblerIA32::SafeReturn() {
+ __ pop(ecx);
+ __ add(Operand(ecx), Immediate(self_));
+ __ jmp(Operand(ecx));
+}
+
+
void RegExpMacroAssemblerIA32::CheckStackLimit() {
if (FLAG_check_stack) {
// Check for preemption first.
Label no_preempt;
- Label retry_preempt;
// Check for preemption.
ExternalReference stack_guard_limit =
ExternalReference::address_of_stack_guard_limit();
__ cmp(esp, Operand::StaticVariable(stack_guard_limit));
__ j(above, &no_preempt, taken);
- __ push(edi); // Current position.
- __ push(edx); // Current character.
- // Restore original edi, esi.
- __ mov(edi, Operand(ebp, kBackup_edi));
- __ mov(esi, Operand(ebp, kBackup_esi));
- __ bind(&retry_preempt);
- // simulate stack for Runtime call.
- __ push(eax);
- __ push(Immediate(Smi::FromInt(0))); // Dummy receiver
- __ CallRuntime(Runtime::kStackGuard, 1);
- __ pop(eax);
-
- __ cmp(esp, Operand::StaticVariable(stack_guard_limit));
- __ j(below_equal, &retry_preempt);
-
- __ pop(edx);
- __ pop(edi);
- __ mov(esi, Operand(ebp, kInputBuffer));
- __ mov(esi, Operand(esi, 0));
- __ add(esi, Operand(ebp, kInputEndOffset));
+ SafeCall(&check_preempt_label_);
__ bind(&no_preempt);
}
}
-void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
- if (mode_ == ASCII) {
- __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
- return;
+void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments) {
+ int frameAlignment = OS::ActivationFrameAlignment();
+ if (frameAlignment != 0) {
+ // Make stack end at alignment and make room for num_arguments words
+ // and the original value of esp.
+ __ mov(ebx, esp);
+ __ sub(Operand(esp), Immediate((num_arguments + 1) * kPointerSize));
+ ASSERT(IsPowerOf2(frameAlignment));
+ __ and_(esp, -frameAlignment);
+ __ mov(Operand(esp, num_arguments * kPointerSize), ebx);
+ } else {
+ __ sub(Operand(esp), Immediate(num_arguments * kPointerSize));
}
- ASSERT(mode_ == UC16);
- __ movzx_w(current_character(),
+}
+
+
+void RegExpMacroAssemblerIA32::CallCFunction(Address function_address,
+ int num_arguments) {
+ __ mov(Operand(eax), Immediate(reinterpret_cast<int32_t>(function_address)));
+ __ call(Operand(eax));
+ if (OS::ActivationFrameAlignment() != 0) {
+ __ mov(esp, Operand(esp, num_arguments * kPointerSize));
+ } else {
+ __ add(Operand(esp), Immediate(num_arguments * sizeof(int32_t)));
+ }
+}
+
+
+void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
+ int characters) {
+ if (mode_ == ASCII) {
+ if (characters == 4) {
+ __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
+ } else if (characters == 2) {
+ __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
+ } else {
+ ASSERT(characters == 1);
+ __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
+ }
+ } else {
+ ASSERT(mode_ == UC16);
+ if (characters == 2) {
+ __ mov(current_character(),
Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
+ } else {
+ ASSERT(characters == 1);
+ __ movzx_w(current_character(),
+ Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
+ }
+ }
}
diff --git a/src/regexp-macro-assembler-ia32.h b/src/regexp-macro-assembler-ia32.h
index 78ab2bd..ecf0326 100644
--- a/src/regexp-macro-assembler-ia32.h
+++ b/src/regexp-macro-assembler-ia32.h
@@ -33,7 +33,8 @@
class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
public:
// Type of input string to generate code for.
- enum Mode {ASCII = 1, UC16 = 2};
+ enum Mode { ASCII = 1, UC16 = 2 };
+ enum Result { EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
RegExpMacroAssemblerIA32(Mode mode, int registers_to_save);
virtual ~RegExpMacroAssemblerIA32();
@@ -42,7 +43,10 @@
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
- virtual void CheckCharacter(uc16 c, Label* on_equal);
+ virtual void CheckCharacter(uint32_t c, Label* on_equal);
+ virtual void CheckCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
@@ -55,11 +59,18 @@
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
- virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
- virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
- virtual void CheckNotCharacterAfterMinusOr(uc16 c,
- uc16 mask,
- Label* on_not_equal);
+ virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+ virtual void CheckNotCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_not_equal);
+ virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+ uc16 minus,
+ uc16 mask,
+ Label* on_not_equal);
+ virtual bool CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
@@ -76,9 +87,10 @@
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
- virtual void LoadCurrentCharacterUnchecked(int cp_offset);
-
+ virtual void LoadCurrentCharacter(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds = true,
+ int characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
@@ -92,16 +104,21 @@
virtual void WriteStackPointerToRegister(int reg);
template <typename T>
- static inline bool Execute(Code* code,
- T** input,
- int start_offset,
- int end_offset,
- int* output,
- bool at_start) {
- typedef bool (*matcher)(T**, int, int, int*, int);
+ static inline Result Execute(Code* code,
+ T** input,
+ int start_offset,
+ int end_offset,
+ int* output,
+ bool at_start) {
+ typedef int (*matcher)(T**, int, int, int*, int);
matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
int at_start_val = at_start ? 1 : 0;
- return matcher_func(input, start_offset, end_offset, output, at_start_val);
+ int result = matcher_func(input,
+ start_offset,
+ end_offset,
+ output,
+ at_start_val);
+ return (result < 0) ? EXCEPTION : (result ? SUCCESS : FAILURE);
}
private:
@@ -120,15 +137,30 @@
static const size_t kRegExpCodeSize = 1024;
// Initial size of constant buffers allocated during compilation.
static const int kRegExpConstantsSize = 256;
- // Only unroll loops up to this length.
- static const int kMaxInlineStringTests = 8;
+ // Only unroll loops up to this length. TODO(lrn): Actually use this.
+ static const int kMaxInlineStringTests = 32;
- // Compares two-byte strings case insenstively.
+ // Compares two-byte strings case insensitively.
static int CaseInsensitiveCompareUC16(uc16** buffer,
int byte_offset1,
int byte_offset2,
size_t byte_length);
+ void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
+
+ // Adds code that checks whether preemption has been requested
+ // (and checks if we have hit the stack limit too).
+ void CheckStackLimit();
+
+ // Called from RegExp if the stack-guard is triggered.
+ // If the code object is relocated, the return address is fixed before
+ // returning.
+ static int CheckStackGuardState(Address return_address, Code* re_code);
+
+ // Checks whether the given offset from the current position is before
+ // the end of the string.
+ void CheckPosition(int cp_offset, Label* on_outside_input);
+
// The ebp-relative location of a regexp register.
Operand register_location(int register_index);
@@ -147,9 +179,20 @@
// and an offset. Uses no extra registers.
void LoadConstantBufferAddress(Register reg, ArraySlice* buffer);
- // Adds code that checks whether preemption has been requested
- // (and checks if we have hit the stack limit too).
- void CheckStackLimit();
+ // Call and return internally in the generated code in a way that
+ // is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
+ void SafeCall(Label* to);
+ void SafeReturn();
+
+ // Before calling a C-function from generated code, align arguments on stack.
+ // After aligning the frame, arguments must be stored in esp[0], esp[4],
+ // etc., not pushed. The argument count assumes all arguments are word sized.
+ void FrameAlign(int num_arguments);
+ // Calls a C function and cleans up the space for arguments allocated
+ // by FrameAlign. The called function is not allowed to trigger a garbage
+ // collection, since that might move the code and invalidate the return
+ // address
+ void CallCFunction(Address function_address, int num_arguments);
MacroAssembler* masm_;
// Constant buffer provider. Allocates external storage for storing
@@ -166,7 +209,9 @@
Label entry_label_;
Label start_label_;
Label success_label_;
+ Label backtrack_label_;
Label exit_label_;
+ Label check_preempt_label_;
// Handle used to represent the generated code object itself.
Handle<Object> self_;
};
diff --git a/src/regexp-macro-assembler-irregexp.cc b/src/regexp-macro-assembler-irregexp.cc
index 44fa33c..cfcae32 100644
--- a/src/regexp-macro-assembler-irregexp.cc
+++ b/src/regexp-macro-assembler-irregexp.cc
@@ -44,6 +44,7 @@
RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
+ if (backtrack_.is_linked()) backtrack_.Unuse();
}
@@ -196,17 +197,32 @@
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
- Label* on_failure) {
- Emit(BC_LOAD_CURRENT_CHAR);
+ Label* on_failure,
+ bool check_bounds,
+ int characters) {
+ int bytecode;
+ if (check_bounds) {
+ if (characters == 4) {
+ bytecode = BC_LOAD_4_CURRENT_CHARS;
+ } else if (characters == 2) {
+ bytecode = BC_LOAD_2_CURRENT_CHARS;
+ } else {
+ ASSERT(characters == 1);
+ bytecode = BC_LOAD_CURRENT_CHAR;
+ }
+ } else {
+ if (characters == 4) {
+ bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
+ } else if (characters == 2) {
+ bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
+ } else {
+ ASSERT(characters == 1);
+ bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
+ }
+ }
+ Emit(bytecode);
Emit32(cp_offset);
- EmitOrLink(on_failure);
-}
-
-
-void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
- int cp_offset) {
- Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
- Emit32(cp_offset);
+ if (check_bounds) EmitOrLink(on_failure);
}
@@ -226,9 +242,9 @@
}
-void RegExpMacroAssemblerIrregexp::CheckCharacter(uc16 c, Label* on_equal) {
+void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
Emit(BC_CHECK_CHAR);
- Emit16(c);
+ Emit32(c);
EmitOrLink(on_equal);
}
@@ -239,31 +255,44 @@
}
-void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uc16 c,
+void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_CHAR);
- Emit16(c);
+ Emit32(c);
EmitOrLink(on_not_equal);
}
-void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr(
- uc16 c,
- uc16 mask,
+void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd(
+ uint32_t c,
+ uint32_t mask,
+ Label* on_equal) {
+ Emit(BC_AND_CHECK_CHAR);
+ Emit32(c);
+ Emit32(mask);
+ EmitOrLink(on_equal);
+}
+
+
+void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd(
+ uint32_t c,
+ uint32_t mask,
Label* on_not_equal) {
- Emit(BC_OR_CHECK_NOT_CHAR);
- Emit16(c);
- Emit16(mask);
+ Emit(BC_AND_CHECK_NOT_CHAR);
+ Emit32(c);
+ Emit32(mask);
EmitOrLink(on_not_equal);
}
-void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
+void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
uc16 c,
+ uc16 minus,
uc16 mask,
Label* on_not_equal) {
- Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
+ Emit(BC_MINUS_AND_CHECK_NOT_CHAR);
Emit16(c);
+ Emit16(minus);
Emit16(mask);
EmitOrLink(on_not_equal);
}
@@ -344,7 +373,7 @@
Emit32(cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR);
- Emit16(str[i]);
+ Emit32(str[i]);
EmitOrLink(on_failure);
}
}
diff --git a/src/regexp-macro-assembler-irregexp.h b/src/regexp-macro-assembler-irregexp.h
index 722e779..871e1e5 100644
--- a/src/regexp-macro-assembler-irregexp.h
+++ b/src/regexp-macro-assembler-irregexp.h
@@ -66,18 +66,26 @@
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
- virtual void LoadCurrentCharacterUnchecked(int cp_offset);
- virtual void CheckCharacterLT(uc16 limit, Label* on_less);
+ virtual void LoadCurrentCharacter(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds = true,
+ int characters = 1);
+ virtual void CheckCharacter(uint32_t c, Label* on_equal);
+ virtual void CheckCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
- virtual void CheckCharacter(uc16 c, Label* on_equal);
+ virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
- virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
- virtual void CheckNotCharacterAfterMinusOr(uc16 c,
- uc16 mask,
- Label* on_not_equal);
+ virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+ virtual void CheckNotCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_not_equal);
+ virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+ uc16 minus,
+ uc16 mask,
+ Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
diff --git a/src/regexp-macro-assembler-tracer.cc b/src/regexp-macro-assembler-tracer.cc
index fc3629c..94ede51 100644
--- a/src/regexp-macro-assembler-tracer.cc
+++ b/src/regexp-macro-assembler-tracer.cc
@@ -164,18 +164,19 @@
void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input) {
- PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters) {
+ const char* check_msg = check_bounds ? "" : " (unchecked)";
+ PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars));\n",
cp_offset,
- on_end_of_input);
- assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
-}
-
-
-void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
- PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
- cp_offset);
- assembler_->LoadCurrentCharacterUnchecked(cp_offset);
+ on_end_of_input,
+ check_msg,
+ characters);
+ assembler_->LoadCurrentCharacter(cp_offset,
+ on_end_of_input,
+ check_bounds,
+ characters);
}
@@ -192,7 +193,7 @@
}
-void RegExpMacroAssemblerTracer::CheckCharacter(uc16 c, Label* on_equal) {
+void RegExpMacroAssemblerTracer::CheckCharacter(uint32_t c, Label* on_equal) {
PrintF(" CheckCharacter(c='u%04x', label[%08x]);\n", c, on_equal);
assembler_->CheckCharacter(c, on_equal);
}
@@ -204,28 +205,49 @@
}
-void RegExpMacroAssemblerTracer::CheckNotCharacter(uc16 c,
+void RegExpMacroAssemblerTracer::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
PrintF(" CheckNotCharacter(c='u%04x', label[%08x]);\n", c, on_not_equal);
assembler_->CheckNotCharacter(c, on_not_equal);
}
-void RegExpMacroAssemblerTracer::CheckNotCharacterAfterOr(uc16 c, uc16 mask,
- Label* on_not_equal) {
- PrintF(" CheckNotCharacterAfterOr(c='u%04x', mask=0x%04x, label[%08x]);\n", c,
- mask, on_not_equal);
- assembler_->CheckNotCharacterAfterOr(c, mask, on_not_equal);
+void RegExpMacroAssemblerTracer::CheckCharacterAfterAnd(
+ uint32_t c,
+ uint32_t mask,
+ Label* on_equal) {
+ PrintF(" CheckCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
+ c,
+ mask,
+ on_equal);
+ assembler_->CheckCharacterAfterAnd(c, mask, on_equal);
}
-void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusOr(
+void RegExpMacroAssemblerTracer::CheckNotCharacterAfterAnd(
+ uint32_t c,
+ uint32_t mask,
+ Label* on_not_equal) {
+ PrintF(" CheckNotCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
+ c,
+ mask,
+ on_not_equal);
+ assembler_->CheckNotCharacterAfterAnd(c, mask, on_not_equal);
+}
+
+
+void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusAnd(
uc16 c,
+ uc16 minus,
uc16 mask,
Label* on_not_equal) {
- PrintF(" CheckNotCharacterAfterMinusOr(c='u%04x', mask=0x%04x, "
- "label[%08x]);\n", c, mask, on_not_equal);
- assembler_->CheckNotCharacterAfterMinusOr(c, mask, on_not_equal);
+ PrintF(" CheckNotCharacterAfterMinusAnd(c='u%04x', minus=%04x, mask=0x%04x, "
+ "label[%08x]);\n",
+ c,
+ minus,
+ mask,
+ on_not_equal);
+ assembler_->CheckNotCharacterAfterMinusAnd(c, minus, mask, on_not_equal);
}
@@ -273,16 +295,36 @@
void RegExpMacroAssemblerTracer::CheckBitmap(uc16 start, Label* bitmap,
Label* on_zero) {
- PrintF(" CheckBitmap(start=u$04x, <bitmap>, label[%08x]);\n", start, on_zero);
+ PrintF(" CheckBitmap(start=u%04x, <bitmap>, label[%08x]);\n", start, on_zero);
assembler_->CheckBitmap(start, bitmap, on_zero);
}
+bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
+ uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match) {
+ bool supported = assembler_->CheckSpecialCharacterClass(type,
+ cp_offset,
+ check_offset,
+ on_no_match);
+ PrintF(" CheckSpecialCharacterClass(type='%c', offset=%d, "
+ "check_offset=%s, label[%08x]): %s;\n",
+ type,
+ cp_offset,
+ check_offset ? "true" : "false",
+ on_no_match,
+ supported ? "true" : "false");
+ return supported;
+}
+
+
void RegExpMacroAssemblerTracer::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
- PrintF(" DispatchHalfNibbleMap(start=u$04x, <half_nibble_map>, [", start);
+ PrintF(" DispatchHalfNibbleMap(start=u%04x, <half_nibble_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
@@ -297,7 +339,7 @@
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
- PrintF(" DispatchByteMap(start=u$04x, <byte_map>, [", start);
+ PrintF(" DispatchByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
@@ -312,7 +354,7 @@
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
- PrintF(" DispatchHighByteMap(start=u$04x, <byte_map>, [", start);
+ PrintF(" DispatchHighByteMap(start=u%04x, <byte_map>, [", start);
for (int i = 0; i < destinations.length(); i++) {
if (i > 0)
PrintF(", ");
diff --git a/src/regexp-macro-assembler-tracer.h b/src/regexp-macro-assembler-tracer.h
index 88d4cc1..0f3eb3f 100644
--- a/src/regexp-macro-assembler-tracer.h
+++ b/src/regexp-macro-assembler-tracer.h
@@ -41,7 +41,10 @@
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
- virtual void CheckCharacter(uc16 c, Label* on_equal);
+ virtual void CheckCharacter(uint32_t c, Label* on_equal);
+ virtual void CheckCharacterAfterAnd(uint32_t c,
+ uint32_t and_with,
+ Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(
@@ -55,13 +58,18 @@
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
- virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
- virtual void CheckNotCharacterAfterOr(uc16 c,
- uc16 or_with,
- Label* on_not_equal);
- virtual void CheckNotCharacterAfterMinusOr(uc16 c,
- uc16 minus_then_or_with,
- Label* on_not_equal);
+ virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+ virtual void CheckNotCharacterAfterAnd(uint32_t c,
+ uint32_t and_with,
+ Label* on_not_equal);
+ virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+ uc16 minus,
+ uc16 and_with,
+ Label* on_not_equal);
+ virtual bool CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match);
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
@@ -81,8 +89,10 @@
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
- virtual void LoadCurrentCharacterUnchecked(int cp_offset);
+ virtual void LoadCurrentCharacter(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds = true,
+ int characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/src/regexp-macro-assembler.h b/src/regexp-macro-assembler.h
index 5ed1523..9d3ce5f 100644
--- a/src/regexp-macro-assembler.h
+++ b/src/regexp-macro-assembler.h
@@ -58,7 +58,12 @@
Label* on_zero) = 0; // Where to go if the bit is 0. Fall through on 1.
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
- virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
+ virtual void CheckCharacter(uint32_t c, Label* on_equal) = 0;
+ // Bitwise and the current character with the given constant and then
+ // check for a match with c.
+ virtual void CheckCharacterAfterAnd(uint32_t c,
+ uint32_t and_with,
+ Label* on_equal) = 0;
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we
@@ -81,20 +86,29 @@
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
- virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
- // Bitwise or the current character with the given constant and then
- // check for a match with c.
- virtual void CheckNotCharacterAfterOr(uc16 c,
- uc16 or_with,
- Label* on_not_equal) = 0;
+ virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal) = 0;
+ virtual void CheckNotCharacterAfterAnd(uint32_t c,
+ uint32_t and_with,
+ Label* on_not_equal) = 0;
// Subtract a constant from the current character, then or with the given
// constant and then check for a match with c.
- virtual void CheckNotCharacterAfterMinusOr(uc16 c,
- uc16 minus_then_or_with,
- Label* on_not_equal) = 0;
+ virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+ uc16 minus,
+ uc16 and_with,
+ Label* on_not_equal) = 0;
virtual void CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) = 0;
+ // Check whether a standard/default character class matches the current
+ // character. Returns false if the type of special character class does
+ // not have custom support.
+ // May clobber the current loaded character.
+ virtual bool CheckSpecialCharacterClass(uc16 type,
+ int cp_offset,
+ bool check_offset,
+ Label* on_no_match) {
+ return false;
+ }
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(
@@ -122,8 +136,10 @@
// Backtracks instead if the label is NULL.
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0;
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
- virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
+ virtual void LoadCurrentCharacter(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds = true,
+ int characters = 1) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0;
diff --git a/src/runtime.cc b/src/runtime.cc
index 8c3d043..931c2a0 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -919,6 +919,18 @@
}
+static Object* Runtime_FunctionIsAPIFunction(Arguments args) {
+ NoHandleAllocation ha;
+ ASSERT(args.length() == 1);
+
+ CONVERT_CHECKED(JSFunction, f, args[0]);
+ // The function_data field of the shared function info is used exclusively by
+ // the API.
+ return !f->shared()->function_data()->IsUndefined() ? Heap::true_value()
+ : Heap::false_value();
+}
+
+
static Object* Runtime_SetCode(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 2);
@@ -1049,7 +1061,7 @@
};
// buffers reused by BoyerMoore
-static int bad_char_occurence[kBMAlphabetSize];
+static int bad_char_occurrence[kBMAlphabetSize];
static BMGoodSuffixBuffers bmgs_buffers;
// Compute the bad-char table for Boyer-Moore in the static buffer.
@@ -1062,16 +1074,16 @@
int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1
: kBMAlphabetSize;
if (start == 0) { // All patterns less than kBMMaxShift in length.
- memset(bad_char_occurence, -1, table_size * sizeof(*bad_char_occurence));
+ memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));
} else {
for (int i = 0; i < table_size; i++) {
- bad_char_occurence[i] = start - 1;
+ bad_char_occurrence[i] = start - 1;
}
}
for (int i = start; i < pattern.length() - 1; i++) {
pchar c = pattern[i];
int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize;
- bad_char_occurence[bucket] = i;
+ bad_char_occurrence[bucket] = i;
}
}
@@ -1126,28 +1138,27 @@
}
template <typename schar, typename pchar>
-static inline int CharOccurence(int char_code) {
+static inline int CharOccurrence(int char_code) {
if (sizeof(schar) == 1) {
- return bad_char_occurence[char_code];
+ return bad_char_occurrence[char_code];
}
if (sizeof(pchar) == 1) {
if (char_code > String::kMaxAsciiCharCode) {
return -1;
}
- return bad_char_occurence[char_code];
+ return bad_char_occurrence[char_code];
}
- return bad_char_occurence[char_code % kBMAlphabetSize];
+ return bad_char_occurrence[char_code % kBMAlphabetSize];
}
-// Restricted simplified Boyer-Moore string matching. Restricts tables to a
-// suffix of long pattern strings and handles only equivalence classes
-// of the full alphabet. This allows us to ensure that tables take only
-// a fixed amount of space.
+// Restricted simplified Boyer-Moore string matching.
+// Uses only the bad-shift table of Boyer-Moore and only uses it
+// for the character compared to the last character of the needle.
template <typename schar, typename pchar>
-static int BoyerMooreSimplified(Vector<const schar> subject,
- Vector<const pchar> pattern,
- int start_index,
- bool* complete) {
+static int BoyerMooreHorsepool(Vector<const schar> subject,
+ Vector<const pchar> pattern,
+ int start_index,
+ bool* complete) {
int n = subject.length();
int m = pattern.length();
// Only preprocess at most kBMMaxShift last characters of pattern.
@@ -1158,12 +1169,13 @@
int badness = -m; // How bad we are doing without a good-suffix table.
int idx; // No matches found prior to this index.
pchar last_char = pattern[m - 1];
+ int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char);
// Perform search
for (idx = start_index; idx <= n - m;) {
int j = m - 1;
int c;
while (last_char != (c = subject[idx + j])) {
- int bc_occ = CharOccurence<schar, pchar>(c);
+ int bc_occ = CharOccurrence<schar, pchar>(c);
int shift = j - bc_occ;
idx += shift;
badness += 1 - shift; // at most zero, so badness cannot increase.
@@ -1173,19 +1185,17 @@
}
}
j--;
- while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
+ while (j >= 0 && pattern[j] == (subject[idx + j])) j--;
if (j < 0) {
*complete = true;
return idx;
} else {
- int bc_occ = CharOccurence<schar, pchar>(c);
- int shift = bc_occ < j ? j - bc_occ : 1;
- idx += shift;
+ idx += last_char_shift;
// Badness increases by the number of characters we have
// checked, and decreases by the number of characters we
// can skip by shifting. It's a measure of how we are doing
// compared to reading each character exactly once.
- badness += (m - j) - shift;
+ badness += (m - j) - last_char_shift;
if (badness > 0) {
*complete = false;
return idx;
@@ -1214,7 +1224,7 @@
int j = m - 1;
schar c;
while (last_char != (c = subject[idx + j])) {
- int shift = j - CharOccurence<schar, pchar>(c);
+ int shift = j - CharOccurrence<schar, pchar>(c);
idx += shift;
if (idx > n - m) {
return -1;
@@ -1225,12 +1235,15 @@
return idx;
} else if (j < start) {
// we have matched more than our tables allow us to be smart about.
- idx += 1;
+ // Fall back on BMH shift.
+ idx += m - 1 - CharOccurrence<schar, pchar>(last_char);
} else {
int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift.
- int bc_occ = CharOccurence<schar, pchar>(c);
+ int bc_occ = CharOccurrence<schar, pchar>(c);
int shift = j - bc_occ; // Bad-char shift.
- shift = (gs_shift > shift) ? gs_shift : shift;
+ if (gs_shift > shift) {
+ shift = gs_shift;
+ }
idx += shift;
}
} while (idx <= n - m);
@@ -1274,7 +1287,7 @@
badness++;
if (badness > 0) {
*complete = false;
- return (i);
+ return i;
}
if (subject[i] != pattern_first_char) continue;
int j = 1;
@@ -1345,7 +1358,7 @@
bool complete;
int idx = SimpleIndexOf(sub, pat, start_index, &complete);
if (complete) return idx;
- idx = BoyerMooreSimplified(sub, pat, idx, &complete);
+ idx = BoyerMooreHorsepool(sub, pat, idx, &complete);
if (complete) return idx;
return BoyerMooreIndexOf(sub, pat, idx);
}
@@ -3310,16 +3323,10 @@
if (constructor->IsJSFunction()) {
JSFunction* function = JSFunction::cast(constructor);
- // Handle steping into constructors.
+ // Handle steping into constructors if step into is active.
if (Debug::StepInActive()) {
- StackFrameIterator it;
- it.Advance();
- ASSERT(it.frame()->is_construct());
- it.Advance();
- if (it.frame()->fp() == Debug::step_in_fp()) {
- HandleScope scope;
- Debug::FloodWithOneShot(Handle<SharedFunctionInfo>(function->shared()));
- }
+ HandleScope scope;
+ Debug::HandleStepIn(Handle<JSFunction>(function), 0, true);
}
if (function->has_initial_map() &&
@@ -3415,19 +3422,15 @@
return result; // non-failure
}
-
-static Object* Runtime_PushContext(Arguments args) {
- NoHandleAllocation ha;
- ASSERT(args.length() == 1);
-
+static Object* PushContextHelper(Object* object, bool is_catch_context) {
// Convert the object to a proper JavaScript object.
- Object* object = args[0];
- if (!object->IsJSObject()) {
- object = object->ToObject();
- if (object->IsFailure()) {
- if (!Failure::cast(object)->IsInternalError()) return object;
+ Object* js_object = object;
+ if (!js_object->IsJSObject()) {
+ js_object = js_object->ToObject();
+ if (js_object->IsFailure()) {
+ if (!Failure::cast(js_object)->IsInternalError()) return js_object;
HandleScope scope;
- Handle<Object> handle(args[0]);
+ Handle<Object> handle(object);
Handle<Object> result =
Factory::NewTypeError("with_expression", HandleVector(&handle, 1));
return Top::Throw(*result);
@@ -3435,15 +3438,32 @@
}
Object* result =
- Heap::AllocateWithContext(Top::context(), JSObject::cast(object));
+ Heap::AllocateWithContext(Top::context(),
+ JSObject::cast(js_object),
+ is_catch_context);
if (result->IsFailure()) return result;
- Top::set_context(Context::cast(result));
+ Context* context = Context::cast(result);
+ Top::set_context(context);
return result;
}
+static Object* Runtime_PushContext(Arguments args) {
+ NoHandleAllocation ha;
+ ASSERT(args.length() == 1);
+ return PushContextHelper(args[0], false);
+}
+
+
+static Object* Runtime_PushCatchContext(Arguments args) {
+ NoHandleAllocation ha;
+ ASSERT(args.length() == 1);
+ return PushContextHelper(args[0], true);
+}
+
+
static Object* Runtime_LookupContext(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 2);
@@ -3541,9 +3561,14 @@
if (!holder.is_null() && holder->IsJSObject()) {
ASSERT(Handle<JSObject>::cast(holder)->HasProperty(*name));
JSObject* object = JSObject::cast(*holder);
- JSObject* receiver = (object->IsGlobalObject())
- ? GlobalObject::cast(object)->global_receiver()
- : ComputeReceiverForNonGlobal(object);
+ JSObject* receiver;
+ if (object->IsGlobalObject()) {
+ receiver = GlobalObject::cast(object)->global_receiver();
+ } else if (context->is_exception_holder(*holder)) {
+ receiver = Top::context()->global()->global_receiver();
+ } else {
+ receiver = ComputeReceiverForNonGlobal(object);
+ }
// No need to unhole the value here. This is taken care of by the
// GetProperty function.
Object* value = object->GetProperty(*name);
@@ -3664,61 +3689,9 @@
}
-static Object* RuntimePreempt(Arguments args) {
- // Clear the preempt request flag.
- StackGuard::Continue(PREEMPT);
-
- ContextSwitcher::PreemptionReceived();
-
- {
- v8::Unlocker unlocker;
- Thread::YieldCPU();
- }
-
- return Heap::undefined_value();
-}
-
-
-static Object* DebugBreakHelper() {
- // Just continue if breaks are disabled.
- if (Debug::disable_break()) {
- return Heap::undefined_value();
- }
-
- // Don't break in system functions. If the current function is
- // either in the builtins object of some context or is in the debug
- // context just return with the debug break stack guard active.
- JavaScriptFrameIterator it;
- JavaScriptFrame* frame = it.frame();
- Object* fun = frame->function();
- if (fun->IsJSFunction()) {
- GlobalObject* global = JSFunction::cast(fun)->context()->global();
- if (global->IsJSBuiltinsObject() || Debug::IsDebugGlobal(global)) {
- return Heap::undefined_value();
- }
- }
-
- // Clear the debug request flag.
- StackGuard::Continue(DEBUGBREAK);
-
- HandleScope scope;
- // Enter the debugger. Just continue if we fail to enter the debugger.
- EnterDebugger debugger;
- if (debugger.FailedToEnter()) {
- return Heap::undefined_value();
- }
-
- // Notify the debug event listeners.
- Debugger::OnDebugBreak(Factory::undefined_value());
-
- // Return to continue execution.
- return Heap::undefined_value();
-}
-
-
static Object* Runtime_DebugBreak(Arguments args) {
ASSERT(args.length() == 0);
- return DebugBreakHelper();
+ return Execution::DebugBreakHelper();
}
@@ -3728,16 +3701,7 @@
// First check if this is a real stack overflow.
if (StackGuard::IsStackOverflow()) return Runtime_StackOverflow(args);
- // If not real stack overflow the stack guard was used to interrupt
- // execution for another purpose.
- if (StackGuard::IsDebugBreak()) DebugBreakHelper();
- if (StackGuard::IsPreempted()) RuntimePreempt(args);
- if (StackGuard::IsInterrupted()) {
- // interrupt
- StackGuard::Continue(INTERRUPT);
- return Top::StackOverflow();
- }
- return Heap::undefined_value();
+ return Execution::HandleStackGuardInterrupt();
}
@@ -5268,7 +5232,9 @@
Handle<Context> previous(context_chain->previous());
Handle<JSObject> extension(JSObject::cast(context_chain->extension()));
return Factory::NewWithContext(
- CopyWithContextChain(function_context, previous), extension);
+ CopyWithContextChain(function_context, previous),
+ extension,
+ context_chain->IsCatchContext());
}
@@ -5874,6 +5840,16 @@
#endif
+static Object* Runtime_Log(Arguments args) {
+ ASSERT(args.length() == 2);
+ String* format = String::cast(args[0]);
+ Vector<const char> chars = format->ToAsciiVector();
+ JSArray* elms = JSArray::cast(args[1]);
+ Logger::LogRuntime(chars, elms);
+ return Heap::undefined_value();
+}
+
+
static Object* Runtime_IS_VAR(Arguments args) {
UNREACHABLE(); // implemented as macro in the parser
return NULL;
diff --git a/src/runtime.h b/src/runtime.h
index 98559d4..cb8d40b 100644
--- a/src/runtime.h
+++ b/src/runtime.h
@@ -160,6 +160,7 @@
F(FunctionGetSourceCode, 1) \
F(FunctionGetScript, 1) \
F(FunctionGetScriptSourcePosition, 1) \
+ F(FunctionIsAPIFunction, 1) \
F(GetScript, 1) \
\
F(ClassOf, 1) \
@@ -263,6 +264,7 @@
/* Contexts */ \
F(NewContext, 1) \
F(PushContext, 1) \
+ F(PushCatchContext, 1) \
F(LookupContext, 2) \
F(LoadContextSlot, 2) \
F(LoadContextSlotNoReferenceError, 2) \
@@ -283,6 +285,8 @@
F(DebugBreak, 0) \
F(FunctionGetAssemblerCode, 1) \
F(Abort, 2) \
+ /* Logging */ \
+ F(Log, 2) \
\
/* Pseudo functions - handled as macros by parser */ \
F(IS_VAR, 1)
diff --git a/src/string.js b/src/string.js
index 78606a6..614d541 100644
--- a/src/string.js
+++ b/src/string.js
@@ -152,6 +152,7 @@
var subject = ToString(this);
if (!regexp.global) return regexp.exec(subject);
+ %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
var matches = DoRegExpExecGlobal(regexp, subject);
// If the regexp did not match, return null.
@@ -185,6 +186,7 @@
// Delegate to one of the regular expression variants if necessary.
if (IS_REGEXP(search)) {
+ %_Log('regexp', 'regexp-replace,%0r,%1S', [search, subject]);
if (IS_FUNCTION(replace)) {
return StringReplaceRegExpWithFunction(subject, search, replace);
} else {
@@ -513,7 +515,13 @@
var currentIndex = 0;
var startIndex = 0;
- var sep = IS_REGEXP(separator) ? separator : ToString(separator);
+ var sep;
+ if (IS_REGEXP(separator)) {
+ sep = separator;
+ %_Log('regexp', 'regexp-split,%0S,%1r', [subject, sep]);
+ } else {
+ sep = ToString(separator);
+ }
if (length === 0) {
if (splitMatch(sep, subject, 0, 0) != null) return result;
diff --git a/src/stub-cache-ia32.cc b/src/stub-cache-ia32.cc
index c2be65d..f35877f 100644
--- a/src/stub-cache-ia32.cc
+++ b/src/stub-cache-ia32.cc
@@ -227,8 +227,8 @@
// are loaded directly otherwise the property is loaded from the properties
// fixed array.
void StubCompiler::GenerateFastPropertyLoad(MacroAssembler* masm,
- Register dst, Register src,
- JSObject* holder, int index) {
+ Register dst, Register src,
+ JSObject* holder, int index) {
// Adjust for the number of properties stored in the holder.
index -= holder->map()->inobject_properties();
if (index < 0) {
diff --git a/src/utils.h b/src/utils.h
index 4a47841..c8a4386 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -372,6 +372,23 @@
};
+// A temporary assignment sets a (non-local) variable to a value on
+// construction and resets it the value on destruction.
+template <typename T>
+class TempAssign {
+ public:
+ TempAssign(T* var, T value): var_(var), old_value_(*var) {
+ *var = value;
+ }
+
+ ~TempAssign() { *var_ = old_value_; }
+
+ private:
+ T* var_;
+ T old_value_;
+};
+
+
template <typename T, int kSize>
class EmbeddedVector : public Vector<T> {
public:
diff --git a/src/v8-counters.h b/src/v8-counters.h
index 586e002..93e8970 100644
--- a/src/v8-counters.h
+++ b/src/v8-counters.h
@@ -113,6 +113,8 @@
SC(keyed_load_field, V8.KeyedLoadField) \
SC(keyed_load_callback, V8.KeyedLoadCallback) \
SC(keyed_load_interceptor, V8.KeyedLoadInterceptor) \
+ SC(keyed_load_inline, V8.KeyedLoadInline) \
+ SC(keyed_load_inline_miss, V8.KeyedLoadInlineMiss) \
SC(keyed_store_field, V8.KeyedStoreField) \
SC(for_in, V8.ForIn) \
SC(enum_cache_hits, V8.EnumCacheHits) \
diff --git a/src/v8threads.cc b/src/v8threads.cc
index 2b4a027..6b7533b 100644
--- a/src/v8threads.cc
+++ b/src/v8threads.cc
@@ -269,62 +269,64 @@
}
+// This is the ContextSwitcher singleton. There is at most a single thread
+// running which delivers preemption events to V8 threads.
+ContextSwitcher* ContextSwitcher::singleton_ = NULL;
+
+
ContextSwitcher::ContextSwitcher(int every_n_ms)
- : preemption_semaphore_(OS::CreateSemaphore(0)),
- keep_going_(true),
+ : keep_going_(true),
sleep_ms_(every_n_ms) {
}
-static v8::internal::ContextSwitcher* switcher;
-
-
+// Set the scheduling interval of V8 threads. This function starts the
+// ContextSwitcher thread if needed.
void ContextSwitcher::StartPreemption(int every_n_ms) {
ASSERT(Locker::IsLocked());
- if (switcher == NULL) {
- switcher = new ContextSwitcher(every_n_ms);
- switcher->Start();
+ if (singleton_ == NULL) {
+ // If the ContextSwitcher thread is not running at the moment start it now.
+ singleton_ = new ContextSwitcher(every_n_ms);
+ singleton_->Start();
} else {
- switcher->sleep_ms_ = every_n_ms;
+ // ContextSwitcher thread is already running, so we just change the
+ // scheduling interval.
+ singleton_->sleep_ms_ = every_n_ms;
}
}
+// Disable preemption of V8 threads. If multiple threads want to use V8 they
+// must cooperatively schedule amongst them from this point on.
void ContextSwitcher::StopPreemption() {
ASSERT(Locker::IsLocked());
- if (switcher != NULL) {
- switcher->Stop();
- delete(switcher);
- switcher = NULL;
+ if (singleton_ != NULL) {
+ // The ContextSwitcher thread is running. We need to stop it and release
+ // its resources.
+ singleton_->keep_going_ = false;
+ singleton_->Join(); // Wait for the ContextSwitcher thread to exit.
+ // Thread has exited, now we can delete it.
+ delete(singleton_);
+ singleton_ = NULL;
}
}
+// Main loop of the ContextSwitcher thread: Preempt the currently running V8
+// thread at regular intervals.
void ContextSwitcher::Run() {
while (keep_going_) {
OS::Sleep(sleep_ms_);
StackGuard::Preempt();
- WaitForPreemption();
}
}
-void ContextSwitcher::Stop() {
- ASSERT(Locker::IsLocked());
- keep_going_ = false;
- preemption_semaphore_->Signal();
- Join();
-}
-
-
-void ContextSwitcher::WaitForPreemption() {
- preemption_semaphore_->Wait();
-}
-
-
+// Acknowledge the preemption by the receiving thread.
void ContextSwitcher::PreemptionReceived() {
ASSERT(Locker::IsLocked());
- switcher->preemption_semaphore_->Signal();
+ // There is currently no accounting being done for this. But could be in the
+ // future, which is why we leave this in.
}
diff --git a/src/v8threads.h b/src/v8threads.h
index f5c844d..557a8f1 100644
--- a/src/v8threads.h
+++ b/src/v8threads.h
@@ -87,19 +87,31 @@
};
+// The ContextSwitcher thread is used to schedule regular preemptions to
+// multiple running V8 threads. Generally it is necessary to call
+// StartPreemption if there is more than one thread running. If not, a single
+// JavaScript can take full control of V8 and not allow other threads to run.
class ContextSwitcher: public Thread {
public:
- void Run();
+ // Set the preemption interval for the ContextSwitcher thread.
static void StartPreemption(int every_n_ms);
+
+ // Stop sending preemption requests to threads.
static void StopPreemption();
+
+ // Preempted thread needs to call back to the ContextSwitcher to acknowlege
+ // the handling of a preemption request.
static void PreemptionReceived();
+
private:
explicit ContextSwitcher(int every_n_ms);
- void WaitForPreemption();
- void Stop();
- Semaphore* preemption_semaphore_;
+
+ void Run();
+
bool keep_going_;
int sleep_ms_;
+
+ static ContextSwitcher* singleton_;
};
} } // namespace v8::internal
diff --git a/test/cctest/SConscript b/test/cctest/SConscript
index c2a96f2..c70b07d 100644
--- a/test/cctest/SConscript
+++ b/test/cctest/SConscript
@@ -38,7 +38,7 @@
'test-ast.cc', 'test-heap.cc', 'test-utils.cc', 'test-compiler.cc',
'test-spaces.cc', 'test-mark-compact.cc', 'test-lock.cc',
'test-conversions.cc', 'test-strings.cc', 'test-serialize.cc',
- 'test-decls.cc', 'test-alloc.cc', 'test-regexp.cc'
+ 'test-decls.cc', 'test-alloc.cc', 'test-regexp.cc', 'test-threads.cc'
],
'arch:arm': ['test-assembler-arm.cc', 'test-disasm-arm.cc'],
'arch:ia32': ['test-assembler-ia32.cc', 'test-disasm-ia32.cc'],
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc
index 0f6ceab..926272e 100644
--- a/test/cctest/test-api.cc
+++ b/test/cctest/test-api.cc
@@ -5094,6 +5094,12 @@
static int GetSurvivingGlobalObjectsCount() {
int count = 0;
+ // We need to collect all garbage twice to be sure that everything
+ // has been collected. This is because inline caches are cleared in
+ // the first garbage collection but some of the maps have already
+ // been marked at that point. Therefore some of the maps are not
+ // collected until the second garbage collection.
+ v8::internal::Heap::CollectAllGarbage();
v8::internal::Heap::CollectAllGarbage();
v8::internal::HeapIterator it;
while (it.has_next()) {
@@ -5114,10 +5120,6 @@
v8::V8::Initialize();
- // TODO(121): when running "cctest test-api", the initial count is 2,
- // after second GC, the counter drops to 1. Needs to figure out why
- // one GC is not enough to collect all garbage.
- GetSurvivingGlobalObjectsCount();
int count = GetSurvivingGlobalObjectsCount();
for (int i = 0; i < 5; i++) {
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
index 782bb11..19ced16 100644
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -63,7 +63,7 @@
return output;
}
-static bool ParseEscapes(const char* input) {
+static bool CheckSimple(const char* input) {
V8::Initialize(NULL);
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
@@ -73,13 +73,39 @@
CHECK(v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
- return result.has_character_escapes;
+ return result.simple;
+}
+
+struct MinMaxPair {
+ int min_match;
+ int max_match;
+};
+
+static MinMaxPair CheckMinMaxMatch(const char* input) {
+ V8::Initialize(NULL);
+ v8::HandleScope scope;
+ unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ FlatStringReader reader(CStrVector(input));
+ RegExpCompileData result;
+ CHECK(v8::internal::ParseRegExp(&reader, false, &result));
+ CHECK(result.tree != NULL);
+ CHECK(result.error.is_null());
+ int min_match = result.tree->min_match();
+ int max_match = result.tree->max_match();
+ MinMaxPair pair = { min_match, max_match };
+ return pair;
}
+
#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
-#define CHECK_ESCAPES(input, has_escapes) CHECK_EQ(has_escapes, \
- ParseEscapes(input));
+#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
+#define CHECK_MIN_MAX(input, min, max) \
+ { MinMaxPair min_max = CheckMinMaxMatch(input); \
+ CHECK_EQ(min, min_max.min_match); \
+ CHECK_EQ(max, min_max.max_match); \
+ }
TEST(Parser) {
V8::Initialize(NULL);
@@ -168,6 +194,11 @@
CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
+ CHECK_PARSE_EQ("(?=a)?a", "'a'");
+ CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
+ CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
+ CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
+ CHECK_PARSE_EQ("(?!a)?a", "'a'");
CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))");
@@ -186,47 +217,50 @@
CHECK_PARSE_EQ("\\u003z", "'u003z'");
CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
- CHECK_ESCAPES("a", false);
- CHECK_ESCAPES("a|b", false);
- CHECK_ESCAPES("a\\n", true);
- CHECK_ESCAPES("^a", false);
- CHECK_ESCAPES("a$", false);
- CHECK_ESCAPES("a\\b!", false);
- CHECK_ESCAPES("a\\Bb", false);
- CHECK_ESCAPES("a*", false);
- CHECK_ESCAPES("a*?", false);
- CHECK_ESCAPES("a?", false);
- CHECK_ESCAPES("a??", false);
- CHECK_ESCAPES("a{0,1}?", false);
- CHECK_ESCAPES("a{1,1}?", false);
- CHECK_ESCAPES("a{1,2}?", false);
- CHECK_ESCAPES("a+?", false);
- CHECK_ESCAPES("(a)", false);
- CHECK_ESCAPES("(a)\\1", false);
- CHECK_ESCAPES("(\\1a)", false);
- CHECK_ESCAPES("\\1(a)", false);
- CHECK_ESCAPES("a\\s", false);
- CHECK_ESCAPES("a\\S", false);
- CHECK_ESCAPES("a\\d", false);
- CHECK_ESCAPES("a\\D", false);
- CHECK_ESCAPES("a\\w", false);
- CHECK_ESCAPES("a\\W", false);
- CHECK_ESCAPES("a.", false);
- CHECK_ESCAPES("a\\q", true);
- CHECK_ESCAPES("a[a]", false);
- CHECK_ESCAPES("a[^a]", false);
- CHECK_ESCAPES("a[a-z]", false);
- CHECK_ESCAPES("a[\\q]", false);
- CHECK_ESCAPES("a(?:b)", false);
- CHECK_ESCAPES("a(?=b)", false);
- CHECK_ESCAPES("a(?!b)", false);
- CHECK_ESCAPES("\\x60", true);
- CHECK_ESCAPES("\\u0060", true);
- CHECK_ESCAPES("\\cA", true);
- CHECK_ESCAPES("\\q", true);
- CHECK_ESCAPES("\\1112", true);
- CHECK_ESCAPES("\\0", true);
- CHECK_ESCAPES("(a)\\1", false);
+ CHECK_SIMPLE("a", true);
+ CHECK_SIMPLE("a|b", false);
+ CHECK_SIMPLE("a\\n", false);
+ CHECK_SIMPLE("^a", false);
+ CHECK_SIMPLE("a$", false);
+ CHECK_SIMPLE("a\\b!", false);
+ CHECK_SIMPLE("a\\Bb", false);
+ CHECK_SIMPLE("a*", false);
+ CHECK_SIMPLE("a*?", false);
+ CHECK_SIMPLE("a?", false);
+ CHECK_SIMPLE("a??", false);
+ CHECK_SIMPLE("a{0,1}?", false);
+ CHECK_SIMPLE("a{1,1}?", false);
+ CHECK_SIMPLE("a{1,2}?", false);
+ CHECK_SIMPLE("a+?", false);
+ CHECK_SIMPLE("(a)", false);
+ CHECK_SIMPLE("(a)\\1", false);
+ CHECK_SIMPLE("(\\1a)", false);
+ CHECK_SIMPLE("\\1(a)", false);
+ CHECK_SIMPLE("a\\s", false);
+ CHECK_SIMPLE("a\\S", false);
+ CHECK_SIMPLE("a\\d", false);
+ CHECK_SIMPLE("a\\D", false);
+ CHECK_SIMPLE("a\\w", false);
+ CHECK_SIMPLE("a\\W", false);
+ CHECK_SIMPLE("a.", false);
+ CHECK_SIMPLE("a\\q", false);
+ CHECK_SIMPLE("a[a]", false);
+ CHECK_SIMPLE("a[^a]", false);
+ CHECK_SIMPLE("a[a-z]", false);
+ CHECK_SIMPLE("a[\\q]", false);
+ CHECK_SIMPLE("a(?:b)", false);
+ CHECK_SIMPLE("a(?=b)", false);
+ CHECK_SIMPLE("a(?!b)", false);
+ CHECK_SIMPLE("\\x60", false);
+ CHECK_SIMPLE("\\u0060", false);
+ CHECK_SIMPLE("\\cA", false);
+ CHECK_SIMPLE("\\q", false);
+ CHECK_SIMPLE("\\1112", false);
+ CHECK_SIMPLE("\\0", false);
+ CHECK_SIMPLE("(a)\\1", false);
+ CHECK_SIMPLE("(?=a)?a", false);
+ CHECK_SIMPLE("(?!a)?a\\1", false);
+ CHECK_SIMPLE("(?:(?=a))a\\1", false);
CHECK_PARSE_EQ("a{}", "'a{}'");
CHECK_PARSE_EQ("a{,}", "'a{,}'");
@@ -244,6 +278,55 @@
CHECK_PARSE_EQ("{12z}", "'{12z}'");
CHECK_PARSE_EQ("{12,", "'{12,'");
CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
+
+ CHECK_MIN_MAX("a", 1, 1);
+ CHECK_MIN_MAX("abc", 3, 3);
+ CHECK_MIN_MAX("a[bc]d", 3, 3);
+ CHECK_MIN_MAX("a|bc", 1, 2);
+ CHECK_MIN_MAX("ab|c", 1, 2);
+ CHECK_MIN_MAX("a||bc", 0, 2);
+ CHECK_MIN_MAX("|", 0, 0);
+ CHECK_MIN_MAX("(?:ab)", 2, 2);
+ CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
+ CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
+ CHECK_MIN_MAX("(ab)", 2, 2);
+ CHECK_MIN_MAX("(ab|cde)", 2, 3);
+ CHECK_MIN_MAX("(ab)\\1", 4, 4);
+ CHECK_MIN_MAX("(ab|cde)\\1", 4, 6);
+ CHECK_MIN_MAX("(?:ab)?", 0, 2);
+ CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a?", 0, 1);
+ CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a??", 0, 1);
+ CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a?)?", 0, 1);
+ CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a{0}", 0, 0);
+ CHECK_MIN_MAX("(?:a+){0}", 0, 0);
+ CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
+ CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
+ CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
+ CHECK_MIN_MAX("a\\bc", 2, 2);
+ CHECK_MIN_MAX("a\\Bc", 2, 2);
+ CHECK_MIN_MAX("a\\sc", 3, 3);
+ CHECK_MIN_MAX("a\\Sc", 3, 3);
+ CHECK_MIN_MAX("a(?=b)c", 2, 2);
+ CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
+ CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
}
TEST(ParserRegression) {
@@ -567,12 +650,26 @@
#ifndef ARM // IA32 only tests.
-TEST(MacroAssemblerIA32Success) {
- V8::Initialize(NULL);
+class ContextInitializer {
+ public:
+ ContextInitializer() : env_(), scope_(), stack_guard_() {
+ env_ = v8::Context::New();
+ env_->Enter();
+ }
+ ~ContextInitializer() {
+ env_->Exit();
+ env_.Dispose();
+ }
+ private:
+ v8::Persistent<v8::Context> env_;
+ v8::HandleScope scope_;
+ v8::internal::StackGuard stack_guard_;
+};
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+
+TEST(MacroAssemblerIA32Success) {
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
@@ -589,14 +686,15 @@
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length();
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- captures,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ captures,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(-1, captures[0]);
CHECK_EQ(-1, captures[1]);
CHECK_EQ(-1, captures[2]);
@@ -605,11 +703,8 @@
TEST(MacroAssemblerIA32Simple) {
- V8::Initialize(NULL);
-
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
@@ -636,14 +731,15 @@
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length();
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- captures,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ captures,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]);
@@ -655,23 +751,20 @@
start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
end_offset = start_offset + seq_input->length();
- success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- captures,
- true);
+ result = RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ captures,
+ true);
- CHECK(!success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
}
TEST(MacroAssemblerIA32SimpleUC16) {
- V8::Initialize(NULL);
-
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 4);
@@ -700,14 +793,15 @@
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length() * sizeof(uc16);
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- captures,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ captures,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(-1, captures[2]);
@@ -720,23 +814,20 @@
start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
end_offset = start_offset + seq_input->length() * sizeof(uc16);
- success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- captures,
- true);
+ result = RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ captures,
+ true);
- CHECK(!success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
}
TEST(MacroAssemblerIA32Backtrack) {
- V8::Initialize(NULL);
-
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
@@ -761,23 +852,21 @@
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length();
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- NULL,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ NULL,
+ true);
- CHECK(!success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
}
-TEST(MacroAssemblerIA32BackReference) {
- V8::Initialize(NULL);
-
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+TEST(MacroAssemblerIA32BackReferenceASCII) {
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
@@ -807,26 +896,75 @@
int end_offset = start_offset + seq_input->length();
int output[3];
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- output,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ output,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(2, output[1]);
CHECK_EQ(6, output[2]);
}
-TEST(MacroAssemblerIA32AtStart) {
- V8::Initialize(NULL);
+TEST(MacroAssemblerIA32BackReferenceUC16) {
+ v8::V8::Initialize();
+ ContextInitializer initializer;
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+ RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 3);
+
+ m.WriteCurrentPositionToRegister(0, 0);
+ m.AdvanceCurrentPosition(2);
+ m.WriteCurrentPositionToRegister(1, 0);
+ Label nomatch;
+ m.CheckNotBackReference(0, &nomatch);
+ m.Fail();
+ m.Bind(&nomatch);
+ m.AdvanceCurrentPosition(2);
+ Label missing_match;
+ m.CheckNotBackReference(0, &missing_match);
+ m.WriteCurrentPositionToRegister(2, 0);
+ m.Succeed();
+ m.Bind(&missing_match);
+ m.Fail();
+
+ Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
+ Handle<Object> code_object = m.GetCode(source);
+ Handle<Code> code = Handle<Code>::cast(code_object);
+
+ const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
+ Handle<String> input =
+ Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
+ Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
+ Address start_adr = seq_input->GetCharsAddress();
+
+ int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
+ int end_offset = start_offset + seq_input->length() * sizeof(input_data[0]);
+
+ int output[3];
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ output,
+ true);
+
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
+ CHECK_EQ(0, output[0]);
+ CHECK_EQ(2, output[1]);
+ CHECK_EQ(6, output[2]);
+}
+
+
+
+TEST(MacroAssemblerIA32AtStart) {
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
@@ -861,35 +999,33 @@
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length();
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- NULL,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ NULL,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
start_offset += 3;
- success = RegExpMacroAssemblerIA32::Execute(*code,
+ result = RegExpMacroAssemblerIA32::Execute(*code,
seq_input.location(),
start_offset,
end_offset,
NULL,
false);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
}
TEST(MacroAssemblerIA32BackRefNoCase) {
- V8::Initialize(NULL);
-
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
@@ -928,14 +1064,15 @@
int end_offset = start_offset + seq_input->length();
int output[4];
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- output,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ output,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(12, output[1]);
CHECK_EQ(0, output[2]);
@@ -945,11 +1082,8 @@
TEST(MacroAssemblerIA32Registers) {
- V8::Initialize(NULL);
-
- // regexp-macro-assembler-ia32 needs a handle scope to allocate
- // byte-arrays for constants.
- v8::HandleScope scope;
+ v8::V8::Initialize();
+ ContextInitializer initializer;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 5);
@@ -1031,14 +1165,15 @@
int end_offset = start_offset + seq_input->length();
int output[5];
- bool success = RegExpMacroAssemblerIA32::Execute(*code,
- seq_input.location(),
- start_offset,
- end_offset,
- output,
- true);
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ output,
+ true);
- CHECK(success);
+ CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
CHECK_EQ(0, output[0]);
CHECK_EQ(3, output[1]);
CHECK_EQ(6, output[2]);
@@ -1046,6 +1181,45 @@
CHECK_EQ(9, output[4]);
}
+
+TEST(MacroAssemblerIA32StackOverflow) {
+ v8::V8::Initialize();
+ ContextInitializer initializer;
+
+ RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
+
+ Label loop;
+ m.Bind(&loop);
+ m.PushBacktrack(&loop);
+ m.GoTo(&loop);
+
+ Handle<String> source =
+ Factory::NewStringFromAscii(CStrVector("<stack overflow test>"));
+ Handle<Object> code_object = m.GetCode(source);
+ Handle<Code> code = Handle<Code>::cast(code_object);
+
+ // String long enough for test (content doesn't matter).
+ Handle<String> input =
+ Factory::NewStringFromAscii(CStrVector("dummy"));
+ Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
+ Address start_adr = seq_input->GetCharsAddress();
+ int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
+ int end_offset = start_offset + seq_input->length();
+
+ RegExpMacroAssemblerIA32::Result result =
+ RegExpMacroAssemblerIA32::Execute(*code,
+ seq_input.location(),
+ start_offset,
+ end_offset,
+ NULL,
+ true);
+
+ CHECK_EQ(RegExpMacroAssemblerIA32::EXCEPTION, result);
+ CHECK(Top::has_pending_exception());
+ Top::clear_pending_exception();
+}
+
+
#endif // !defined ARM
TEST(AddInverseToTable) {
@@ -1292,5 +1466,5 @@
TEST(Graph) {
V8::Initialize(NULL);
- Execute("\\bboy\\b", false, true, true);
+ Execute("\\b\\w+\\b", false, true, true);
}
diff --git a/test/mjsunit/debug-stepin-constructor.js b/test/mjsunit/debug-stepin-constructor.js
index ecd1283..ec35ce1 100644
--- a/test/mjsunit/debug-stepin-constructor.js
+++ b/test/mjsunit/debug-stepin-constructor.js
@@ -68,7 +68,7 @@
break_break_point_hit_count = 0;
g();
-assertEquals(5, break_break_point_hit_count);
+assertEquals(4, break_break_point_hit_count);
// Get rid of the debug event listener.
Debug.removeListener(listener);
diff --git a/test/mjsunit/fuzz-natives.js b/test/mjsunit/fuzz-natives.js
index 5b5d6d9..a2c3217 100644
--- a/test/mjsunit/fuzz-natives.js
+++ b/test/mjsunit/fuzz-natives.js
@@ -123,7 +123,8 @@
"CreateObjectLiteralBoilerplate": true,
"CloneObjectLiteralBoilerplate": true,
"IS_VAR": true,
- "ResolvePossiblyDirectEval": true
+ "ResolvePossiblyDirectEval": true,
+ "Log": true
};
var currentlyUncallable = {
diff --git a/test/mjsunit/mjsunit.status b/test/mjsunit/mjsunit.status
index 6483b45..e030878 100644
--- a/test/mjsunit/mjsunit.status
+++ b/test/mjsunit/mjsunit.status
@@ -47,7 +47,6 @@
# Bug number 1020483: Debug tests fail on ARM.
debug-constructor: CRASH, FAIL
debug-continue: SKIP
-debug-backtrace: FAIL
debug-evaluate-recursive: CRASH, FAIL if $mode == debug
debug-changebreakpoint: CRASH, FAIL if $mode == debug
debug-clearbreakpoint: CRASH, FAIL if $mode == debug
diff --git a/test/mjsunit/regexp.js b/test/mjsunit/regexp.js
index 46374cc..4422211 100644
--- a/test/mjsunit/regexp.js
+++ b/test/mjsunit/regexp.js
@@ -306,3 +306,13 @@
assertFalse(/f[abc]/i.test('xa'));
assertFalse(/<[abc]/i.test('x'));
assertFalse(/<[abc]/i.test('xa'));
+
+// Test that merging of quick test masks gets it right.
+assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt');
+assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2');
+assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3');
+assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4');
+assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5');
+assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6');
+assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7');
+assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8');
diff --git a/tools/tickprocessor.py b/tools/tickprocessor.py
index e3df951..5ef6b5f 100644
--- a/tools/tickprocessor.py
+++ b/tools/tickprocessor.py
@@ -35,9 +35,12 @@
self.tick_count = 0
self.name = name
- def IncrementTickCount(self):
+ def Tick(self, pc):
self.tick_count += 1
+ def RegionTicks(self):
+ return None
+
def SetStartAddress(self, start_addr):
self.start_addr = start_addr
@@ -59,10 +62,45 @@
class JSCodeEntry(CodeEntry):
- def __init__(self, start_addr, name, type, size):
+ def __init__(self, start_addr, name, type, size, assembler):
CodeEntry.__init__(self, start_addr, name)
self.type = type
self.size = size
+ self.assembler = assembler
+ self.region_ticks = None
+
+ def Tick(self, pc):
+ super(JSCodeEntry, self).Tick(pc)
+ if not pc is None:
+ offset = pc - self.start_addr
+ seen = []
+ narrowest = None
+ narrowest_width = None
+ for region in self.Regions():
+ if region.Contains(offset):
+ if (not region.name in seen):
+ seen.append(region.name)
+ if narrowest is None or region.Width() < narrowest.Width():
+ narrowest = region
+ if len(seen) == 0:
+ return
+ if self.region_ticks is None:
+ self.region_ticks = {}
+ for name in seen:
+ if not name in self.region_ticks:
+ self.region_ticks[name] = [0, 0]
+ self.region_ticks[name][0] += 1
+ if name == narrowest.name:
+ self.region_ticks[name][1] += 1
+
+ def RegionTicks(self):
+ return self.region_ticks
+
+ def Regions(self):
+ if self.assembler:
+ return self.assembler.regions
+ else:
+ return []
def ToString(self):
name = self.name
@@ -70,12 +108,39 @@
return self.type + ': ' + name
+class CodeRegion(object):
+
+ def __init__(self, start_offset, name):
+ self.start_offset = start_offset
+ self.name = name
+ self.end_offset = None
+
+ def Contains(self, pc):
+ return (self.start_offset <= pc) and (pc <= self.end_offset)
+
+ def Width(self):
+ return self.end_offset - self.start_offset
+
+
+class Assembler(object):
+
+ def __init__(self):
+ # Mapping from region ids to open regions
+ self.pending_regions = {}
+ self.regions = []
+
+
class TickProcessor(object):
def __init__(self):
self.log_file = ''
self.deleted_code = []
self.vm_extent = {}
+ # Map from assembler ids to the pending assembler objects
+ self.pending_assemblers = {}
+ # Map from code addresses the have been allocated but not yet officially
+ # created to their assemblers.
+ self.assemblers = {}
self.js_entries = splaytree.SplayTree()
self.cpp_entries = splaytree.SplayTree()
self.total_number_of_ticks = 0
@@ -104,6 +169,12 @@
elif row[0] == 'shared-library':
self.AddSharedLibraryEntry(row[1], int(row[2], 16), int(row[3], 16))
self.ParseVMSymbols(row[1], int(row[2], 16), int(row[3], 16))
+ elif row[0] == 'begin-code-region':
+ self.ProcessBeginCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16), row[4])
+ elif row[0] == 'end-code-region':
+ self.ProcessEndCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16))
+ elif row[0] == 'code-allocate':
+ self.ProcessCodeAllocate(int(row[1], 16), int(row[2], 16))
finally:
logfile.close()
@@ -121,8 +192,17 @@
def ParseVMSymbols(self, filename, start, end):
return
+ def ProcessCodeAllocate(self, addr, assem):
+ if assem in self.pending_assemblers:
+ assembler = self.pending_assemblers.pop(assem)
+ self.assemblers[addr] = assembler
+
def ProcessCodeCreation(self, type, addr, size, name):
- self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size))
+ if addr in self.assemblers:
+ assembler = self.assemblers.pop(addr)
+ else:
+ assembler = None
+ self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size, assembler))
def ProcessCodeMove(self, from_addr, to_addr):
try:
@@ -139,6 +219,18 @@
except 'KeyNotFound':
print('Code delete event for unknown code: 0x%x' % from_addr)
+ def ProcessBeginCodeRegion(self, id, assm, start, name):
+ if not assm in self.pending_assemblers:
+ self.pending_assemblers[assm] = Assembler()
+ assembler = self.pending_assemblers[assm]
+ assembler.pending_regions[id] = CodeRegion(start, name)
+
+ def ProcessEndCodeRegion(self, id, assm, end):
+ assm = self.pending_assemblers[assm]
+ region = assm.pending_regions.pop(id)
+ region.end_offset = end
+ assm.regions.append(region)
+
def IncludeTick(self, pc, sp, state):
return (self.included_state is None) or (self.included_state == state)
@@ -152,12 +244,13 @@
entry = self.cpp_entries.FindGreatestsLessThan(pc).value
if entry.IsSharedLibraryEntry():
self.number_of_library_ticks += 1
- entry.IncrementTickCount()
+ entry.Tick(None)
return
max = self.js_entries.FindMax()
min = self.js_entries.FindMin()
if max != None and pc < max.key and pc > min.key:
- self.js_entries.FindGreatestsLessThan(pc).value.IncrementTickCount()
+ code_obj = self.js_entries.FindGreatestsLessThan(pc).value
+ code_obj.Tick(pc)
return
self.unaccounted_number_of_ticks += 1
@@ -201,6 +294,17 @@
'nonlib' : non_library_percentage,
'name' : entry.ToString()
})
+ region_ticks = entry.RegionTicks()
+ if not region_ticks is None:
+ items = region_ticks.items()
+ items.sort(key=lambda e: e[1][1], reverse=True)
+ for (name, ticks) in items:
+ print(' flat cum')
+ print(' %(flat)5.1f%% %(accum)5.1f%% %(name)s' % {
+ 'flat' : ticks[1] * 100.0 / entry.tick_count,
+ 'accum' : ticks[0] * 100.0 / entry.tick_count,
+ 'name': name
+ })
if __name__ == '__main__':
sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')
diff --git a/tools/v8.xcodeproj/project.pbxproj b/tools/v8.xcodeproj/project.pbxproj
index 893082f..d65d342 100644
--- a/tools/v8.xcodeproj/project.pbxproj
+++ b/tools/v8.xcodeproj/project.pbxproj
@@ -43,6 +43,8 @@
897FF1C60E719D7100D62E90 /* pcre_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 897FF0ED0E719B3500D62E90 /* pcre_tables.cpp */; };
897FF1C70E719D7300D62E90 /* pcre_ucp_searchfuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 897FF0EE0E719B3500D62E90 /* pcre_ucp_searchfuncs.cpp */; };
897FF1C80E719D7600D62E90 /* pcre_xclass.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 897FF0EF0E719B3500D62E90 /* pcre_xclass.cpp */; };
+ 898BD20E0EF6CC930068B00A /* debug-ia32.cc in Sources */ = {isa = PBXBuildFile; fileRef = 898BD20D0EF6CC850068B00A /* debug-ia32.cc */; };
+ 898BD20F0EF6CC9A0068B00A /* debug-arm.cc in Sources */ = {isa = PBXBuildFile; fileRef = 898BD20C0EF6CC850068B00A /* debug-arm.cc */; };
89A15C7B0EE466EB00B48DEB /* regexp-macro-assembler-ia32.cc in Sources */ = {isa = PBXBuildFile; fileRef = 89A15C720EE466D000B48DEB /* regexp-macro-assembler-ia32.cc */; };
89A15C810EE4674900B48DEB /* regexp-macro-assembler.cc in Sources */ = {isa = PBXBuildFile; fileRef = 89A15C790EE466D000B48DEB /* regexp-macro-assembler.cc */; };
89A15C830EE4675E00B48DEB /* regexp-macro-assembler-irregexp.cc in Sources */ = {isa = PBXBuildFile; fileRef = 89A15C750EE466D000B48DEB /* regexp-macro-assembler-irregexp.cc */; };
@@ -470,6 +472,8 @@
897FF1B60E719C2300D62E90 /* js2c.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = js2c.py; sourceTree = "<group>"; };
897FF1B70E719C2E00D62E90 /* macros.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; name = macros.py; path = ../src/macros.py; sourceTree = "<group>"; };
897FF1BF0E719CB600D62E90 /* libjscre.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libjscre.a; sourceTree = BUILT_PRODUCTS_DIR; };
+ 898BD20C0EF6CC850068B00A /* debug-arm.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "debug-arm.cc"; sourceTree = "<group>"; };
+ 898BD20D0EF6CC850068B00A /* debug-ia32.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "debug-ia32.cc"; sourceTree = "<group>"; };
89A15C630EE4661A00B48DEB /* bytecodes-irregexp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "bytecodes-irregexp.h"; sourceTree = "<group>"; };
89A15C660EE4665300B48DEB /* interpreter-irregexp.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "interpreter-irregexp.cc"; sourceTree = "<group>"; };
89A15C670EE4665300B48DEB /* interpreter-irregexp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "interpreter-irregexp.h"; sourceTree = "<group>"; };
@@ -636,6 +640,8 @@
897FF1250E719B8F00D62E90 /* cpu.h */,
897FF1260E719B8F00D62E90 /* dateparser.cc */,
897FF1270E719B8F00D62E90 /* dateparser.h */,
+ 898BD20C0EF6CC850068B00A /* debug-arm.cc */,
+ 898BD20D0EF6CC850068B00A /* debug-ia32.cc */,
897FF1280E719B8F00D62E90 /* debug.cc */,
897FF1290E719B8F00D62E90 /* debug.h */,
897FF12A0E719B8F00D62E90 /* disasm-arm.cc */,
@@ -1046,6 +1052,7 @@
89A88DFC0E71A6460043BA31 /* counters.cc in Sources */,
89A88DFD0E71A6470043BA31 /* cpu-ia32.cc in Sources */,
89A88DFE0E71A6480043BA31 /* dateparser.cc in Sources */,
+ 898BD20E0EF6CC930068B00A /* debug-ia32.cc in Sources */,
89A88DFF0E71A6530043BA31 /* debug.cc in Sources */,
89A88E000E71A6540043BA31 /* disasm-ia32.cc in Sources */,
89A88E010E71A6550043BA31 /* disassembler.cc in Sources */,
@@ -1147,6 +1154,7 @@
89F23C4F0E78D5B2006B2466 /* counters.cc in Sources */,
89F23C9A0E78D5EC006B2466 /* cpu-arm.cc in Sources */,
89F23C510E78D5B2006B2466 /* dateparser.cc in Sources */,
+ 898BD20F0EF6CC9A0068B00A /* debug-arm.cc in Sources */,
89F23C520E78D5B2006B2466 /* debug.cc in Sources */,
89F23C9B0E78D5EE006B2466 /* disasm-arm.cc in Sources */,
89F23C540E78D5B2006B2466 /* disassembler.cc in Sources */,
diff --git a/tools/visual_studio/d8.vcproj b/tools/visual_studio/d8.vcproj
index ac38bf3..7f91ba7 100644
--- a/tools/visual_studio/d8.vcproj
+++ b/tools/visual_studio/d8.vcproj
@@ -148,6 +148,14 @@
>
</File>
<File
+ RelativePath="..\..\src\d8-debug.cc"
+ >
+ </File>
+ <File
+ RelativePath="..\..\src\d8-debug.h"
+ >
+ </File>
+ <File
RelativePath="..\..\src\d8.js"
>
<FileConfiguration