Update V8 to r5017 as required by WebKit r62496

Change-Id: I1b4b7718d1d77ceef07f543e9150a2cb3a628f3a
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index f8d98db..f5ff43a 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -1802,6 +1802,16 @@
 
 
 void Assembler::vmov(const DwVfpRegister dst,
+                     const DwVfpRegister src,
+                     const Condition cond) {
+  // Dd = Dm
+  // Instruction details available in ARM DDI 0406B, A8-642.
+  emit(cond | 0xE*B24 | 0xB*B20 |
+       dst.code()*B12 | 0x5*B9 | B8 | B6 | src.code());
+}
+
+
+void Assembler::vmov(const DwVfpRegister dst,
                      const Register src1,
                      const Register src2,
                      const Condition cond) {
@@ -2112,6 +2122,18 @@
 }
 
 
+
+void Assembler::vsqrt(const DwVfpRegister dst,
+                      const DwVfpRegister src,
+                      const Condition cond) {
+  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0001 (19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | 11 (7-6) | M(5)=? | 0(4) | Vm(3-0)
+  ASSERT(CpuFeatures::IsEnabled(VFP3));
+  emit(cond | 0xE*B24 | B23 | 0x3*B20 | B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | 3*B6 | src.code());
+}
+
+
 // Pseudo instructions.
 void Assembler::nop(int type) {
   // This is mov rx, rx.
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h
index 869227a..6a4fb23 100644
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -930,6 +930,10 @@
             const Register base,
             int offset,  // Offset must be a multiple of 4.
             const Condition cond = al);
+
+  void vmov(const DwVfpRegister dst,
+            const DwVfpRegister src,
+            const Condition cond = al);
   void vmov(const DwVfpRegister dst,
             const Register src1,
             const Register src2,
@@ -988,6 +992,9 @@
             const Condition cond = al);
   void vmrs(const Register dst,
             const Condition cond = al);
+  void vsqrt(const DwVfpRegister dst,
+             const DwVfpRegister src,
+             const Condition cond = al);
 
   // Pseudo instructions
   void nop(int type = 0);
@@ -1110,6 +1117,7 @@
   void EndBlockConstPool() {
     const_pool_blocked_nesting_--;
   }
+  bool is_const_pool_blocked() const { return const_pool_blocked_nesting_ > 0; }
 
  private:
   // Code buffer:
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc
index 8e87614..4d18727 100644
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -157,6 +157,7 @@
       state_(NULL),
       loop_nesting_(0),
       type_info_(NULL),
+      function_return_(JumpTarget::BIDIRECTIONAL),
       function_return_is_shadowed_(false) {
 }
 
@@ -218,7 +219,7 @@
       // for stack overflow.
       frame_->AllocateStackSlots();
 
-      VirtualFrame::SpilledScope spilled_scope(frame_);
+      frame_->AssertIsSpilled();
       int heap_slots = scope()->num_heap_slots() - Context::MIN_CONTEXT_SLOTS;
       if (heap_slots > 0) {
         // Allocate local context.
@@ -257,6 +258,7 @@
         // order: such a parameter is copied repeatedly into the same
         // context location and thus the last value is what is seen inside
         // the function.
+        frame_->AssertIsSpilled();
         for (int i = 0; i < scope()->num_parameters(); i++) {
           Variable* par = scope()->parameter(i);
           Slot* slot = par->slot();
@@ -282,8 +284,7 @@
 
       // Initialize ThisFunction reference if present.
       if (scope()->is_function_scope() && scope()->function() != NULL) {
-        __ mov(ip, Operand(Factory::the_hole_value()));
-        frame_->EmitPush(ip);
+        frame_->EmitPushRoot(Heap::kTheHoleValueRootIndex);
         StoreToSlot(scope()->function()->slot(), NOT_CONST_INIT);
       }
     } else {
@@ -510,7 +511,6 @@
         has_valid_frame() &&
         !has_cc() &&
         frame_->height() == original_height) {
-      frame_->SpillAll();
       true_target->Jump();
     }
   }
@@ -535,22 +535,18 @@
 
   if (has_cc()) {
     // Convert cc_reg_ into a boolean value.
-    VirtualFrame::SpilledScope scope(frame_);
     JumpTarget loaded;
     JumpTarget materialize_true;
     materialize_true.Branch(cc_reg_);
-    __ LoadRoot(r0, Heap::kFalseValueRootIndex);
-    frame_->EmitPush(r0);
+    frame_->EmitPushRoot(Heap::kFalseValueRootIndex);
     loaded.Jump();
     materialize_true.Bind();
-    __ LoadRoot(r0, Heap::kTrueValueRootIndex);
-    frame_->EmitPush(r0);
+    frame_->EmitPushRoot(Heap::kTrueValueRootIndex);
     loaded.Bind();
     cc_reg_ = al;
   }
 
   if (true_target.is_linked() || false_target.is_linked()) {
-    VirtualFrame::SpilledScope scope(frame_);
     // We have at least one condition value that has been "translated"
     // into a branch, thus it needs to be loaded explicitly.
     JumpTarget loaded;
@@ -561,8 +557,7 @@
     // Load "true" if necessary.
     if (true_target.is_linked()) {
       true_target.Bind();
-      __ LoadRoot(r0, Heap::kTrueValueRootIndex);
-      frame_->EmitPush(r0);
+      frame_->EmitPushRoot(Heap::kTrueValueRootIndex);
     }
     // If both "true" and "false" need to be loaded jump across the code for
     // "false".
@@ -572,8 +567,7 @@
     // Load "false" if necessary.
     if (false_target.is_linked()) {
       false_target.Bind();
-      __ LoadRoot(r0, Heap::kFalseValueRootIndex);
-      frame_->EmitPush(r0);
+      frame_->EmitPushRoot(Heap::kFalseValueRootIndex);
     }
     // A value is loaded on all paths reaching this point.
     loaded.Bind();
@@ -592,11 +586,11 @@
 
 
 void CodeGenerator::LoadGlobalReceiver(Register scratch) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
-  __ ldr(scratch, ContextOperand(cp, Context::GLOBAL_INDEX));
-  __ ldr(scratch,
-         FieldMemOperand(scratch, GlobalObject::kGlobalReceiverOffset));
-  frame_->EmitPush(scratch);
+  Register reg = frame_->GetTOSRegister();
+  __ ldr(reg, ContextOperand(cp, Context::GLOBAL_INDEX));
+  __ ldr(reg,
+         FieldMemOperand(reg, GlobalObject::kGlobalReceiverOffset));
+  frame_->EmitPush(reg);
 }
 
 
@@ -613,8 +607,6 @@
 
 
 void CodeGenerator::StoreArgumentsObject(bool initial) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
-
   ArgumentsAllocationMode mode = ArgumentsMode();
   ASSERT(mode != NO_ARGUMENTS_ALLOCATION);
 
@@ -623,9 +615,9 @@
     // When using lazy arguments allocation, we store the hole value
     // as a sentinel indicating that the arguments object hasn't been
     // allocated yet.
-    __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
-    frame_->EmitPush(ip);
+    frame_->EmitPushRoot(Heap::kTheHoleValueRootIndex);
   } else {
+    frame_->SpillAll();
     ArgumentsAccessStub stub(ArgumentsAccessStub::NEW_OBJECT);
     __ ldr(r2, frame_->Function());
     // The receiver is below the arguments, the return address, and the
@@ -649,9 +641,9 @@
     // already been written to. This can happen if the a function
     // has a local variable named 'arguments'.
     LoadFromSlot(scope()->arguments()->var()->slot(), NOT_INSIDE_TYPEOF);
-    frame_->EmitPop(r0);
+    Register arguments = frame_->PopToRegister();
     __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
-    __ cmp(r0, ip);
+    __ cmp(arguments, ip);
     done.Branch(ne);
   }
   StoreToSlot(arguments->slot(), NOT_CONST_INIT);
@@ -754,40 +746,45 @@
 // may jump to 'false_target' in case the register converts to 'false'.
 void CodeGenerator::ToBoolean(JumpTarget* true_target,
                               JumpTarget* false_target) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   // Note: The generated code snippet does not change stack variables.
   //       Only the condition code should be set.
-  frame_->EmitPop(r0);
+  bool known_smi = frame_->KnownSmiAt(0);
+  Register tos = frame_->PopToRegister();
 
   // Fast case checks
 
   // Check if the value is 'false'.
-  __ LoadRoot(ip, Heap::kFalseValueRootIndex);
-  __ cmp(r0, ip);
-  false_target->Branch(eq);
+  if (!known_smi) {
+    __ LoadRoot(ip, Heap::kFalseValueRootIndex);
+    __ cmp(tos, ip);
+    false_target->Branch(eq);
 
-  // Check if the value is 'true'.
-  __ LoadRoot(ip, Heap::kTrueValueRootIndex);
-  __ cmp(r0, ip);
-  true_target->Branch(eq);
+    // Check if the value is 'true'.
+    __ LoadRoot(ip, Heap::kTrueValueRootIndex);
+    __ cmp(tos, ip);
+    true_target->Branch(eq);
 
-  // Check if the value is 'undefined'.
-  __ LoadRoot(ip, Heap::kUndefinedValueRootIndex);
-  __ cmp(r0, ip);
-  false_target->Branch(eq);
+    // Check if the value is 'undefined'.
+    __ LoadRoot(ip, Heap::kUndefinedValueRootIndex);
+    __ cmp(tos, ip);
+    false_target->Branch(eq);
+  }
 
   // Check if the value is a smi.
-  __ cmp(r0, Operand(Smi::FromInt(0)));
-  false_target->Branch(eq);
-  __ tst(r0, Operand(kSmiTagMask));
-  true_target->Branch(eq);
+  __ cmp(tos, Operand(Smi::FromInt(0)));
 
-  // Slow case: call the runtime.
-  frame_->EmitPush(r0);
-  frame_->CallRuntime(Runtime::kToBool, 1);
-  // Convert the result (r0) to a condition code.
-  __ LoadRoot(ip, Heap::kFalseValueRootIndex);
-  __ cmp(r0, ip);
+  if (!known_smi) {
+    false_target->Branch(eq);
+    __ tst(tos, Operand(kSmiTagMask));
+    true_target->Branch(eq);
+
+    // Slow case: call the runtime.
+    frame_->EmitPush(tos);
+    frame_->CallRuntime(Runtime::kToBool, 1);
+    // Convert the result (r0) to a condition code.
+    __ LoadRoot(ip, Heap::kFalseValueRootIndex);
+    __ cmp(r0, ip);
+  }
 
   cc_reg_ = ne;
 }
@@ -935,7 +932,15 @@
 };
 
 
+
+// On entry the non-constant side of the binary operation is in tos_register_
+// and the constant smi side is nowhere.  The tos_register_ is not used by the
+// virtual frame.  On exit the answer is in the tos_register_ and the virtual
+// frame is unchanged.
 void DeferredInlineSmiOperation::Generate() {
+  VirtualFrame copied_frame(*frame_state()->frame());
+  copied_frame.SpillAll();
+
   Register lhs = r1;
   Register rhs = r0;
   switch (op_) {
@@ -969,44 +974,19 @@
     case Token::MOD:
     case Token::BIT_OR:
     case Token::BIT_XOR:
-    case Token::BIT_AND: {
-      if (reversed_) {
-        if (tos_register_.is(r0)) {
-          __ mov(r1, Operand(Smi::FromInt(value_)));
-        } else {
-          ASSERT(tos_register_.is(r1));
-          __ mov(r0, Operand(Smi::FromInt(value_)));
-          lhs = r0;
-          rhs = r1;
-        }
-      } else {
-        if (tos_register_.is(r1)) {
-          __ mov(r0, Operand(Smi::FromInt(value_)));
-        } else {
-          ASSERT(tos_register_.is(r0));
-          __ mov(r1, Operand(Smi::FromInt(value_)));
-          lhs = r0;
-          rhs = r1;
-        }
-      }
-      break;
-    }
-
+    case Token::BIT_AND:
     case Token::SHL:
     case Token::SHR:
     case Token::SAR: {
-      if (!reversed_) {
-        if (tos_register_.is(r1)) {
-          __ mov(r0, Operand(Smi::FromInt(value_)));
-        } else {
-          ASSERT(tos_register_.is(r0));
-          __ mov(r1, Operand(Smi::FromInt(value_)));
+      if (tos_register_.is(r1)) {
+        __ mov(r0, Operand(Smi::FromInt(value_)));
+      } else {
+        ASSERT(tos_register_.is(r0));
+        __ mov(r1, Operand(Smi::FromInt(value_)));
+      }
+      if (reversed_ == tos_register_.is(r1)) {
           lhs = r0;
           rhs = r1;
-        }
-      } else {
-        ASSERT(op_ == Token::SHL);
-        __ mov(r1, Operand(Smi::FromInt(value_)));
       }
       break;
     }
@@ -1019,11 +999,17 @@
 
   GenericBinaryOpStub stub(op_, overwrite_mode_, lhs, rhs, value_);
   __ CallStub(&stub);
+
   // The generic stub returns its value in r0, but that's not
   // necessarily what we want.  We want whatever the inlined code
   // expected, which is that the answer is in the same register as
   // the operand was.
   __ Move(tos_register_, r0);
+
+  // The tos register was not in use for the virtual frame that we
+  // came into this function with, so we can merge back to that frame
+  // without trashing it.
+  copied_frame.MergeTo(frame_state()->frame());
 }
 
 
@@ -1124,12 +1110,6 @@
 
   // We move the top of stack to a register (normally no move is invoved).
   Register tos = frame_->PopToRegister();
-  // All other registers are spilled.  The deferred code expects one argument
-  // in a register and all other values are flushed to the stack.  The
-  // answer is returned in the same register that the top of stack argument was
-  // in.
-  frame_->SpillAll();
-
   switch (op) {
     case Token::ADD: {
       DeferredCode* deferred =
@@ -1448,8 +1428,6 @@
 void CodeGenerator::CallWithArguments(ZoneList<Expression*>* args,
                                       CallFunctionFlags flags,
                                       int position) {
-  frame_->AssertIsSpilled();
-
   // Push the arguments ("left-to-right") on the stack.
   int arg_count = args->length();
   for (int i = 0; i < arg_count; i++) {
@@ -1482,7 +1460,6 @@
   // stack, as receiver and arguments, and calls x.
   // In the implementation comments, we call x the applicand
   // and y the receiver.
-  VirtualFrame::SpilledScope spilled_scope(frame_);
 
   ASSERT(ArgumentsMode() == LAZY_ARGUMENTS_ALLOCATION);
   ASSERT(arguments->IsArguments());
@@ -1500,6 +1477,15 @@
   Load(receiver);
   LoadFromSlot(scope()->arguments()->var()->slot(), NOT_INSIDE_TYPEOF);
 
+  // At this point the top two stack elements are probably in registers
+  // since they were just loaded.  Ensure they are in regs and get the
+  // regs.
+  Register receiver_reg = frame_->Peek2();
+  Register arguments_reg = frame_->Peek();
+
+  // From now on the frame is spilled.
+  frame_->SpillAll();
+
   // Emit the source position information after having loaded the
   // receiver and the arguments.
   CodeForSourcePosition(position);
@@ -1513,32 +1499,30 @@
   // already. If so, just use that instead of copying the arguments
   // from the stack. This also deals with cases where a local variable
   // named 'arguments' has been introduced.
-  __ ldr(r0, MemOperand(sp, 0));
-
-  Label slow, done;
+  JumpTarget slow;
+  Label done;
   __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
-  __ cmp(ip, r0);
-  __ b(ne, &slow);
+  __ cmp(ip, arguments_reg);
+  slow.Branch(ne);
 
   Label build_args;
   // Get rid of the arguments object probe.
   frame_->Drop();
   // Stack now has 3 elements on it.
   // Contents of stack at this point:
-  //   sp[0]: receiver
+  //   sp[0]: receiver - in the receiver_reg register.
   //   sp[1]: applicand.apply
   //   sp[2]: applicand.
 
   // Check that the receiver really is a JavaScript object.
-  __ ldr(r0, MemOperand(sp, 0));
-  __ BranchOnSmi(r0, &build_args);
+  __ BranchOnSmi(receiver_reg, &build_args);
   // We allow all JSObjects including JSFunctions.  As long as
   // JS_FUNCTION_TYPE is the last instance type and it is right
   // after LAST_JS_OBJECT_TYPE, we do not have to check the upper
   // bound.
   ASSERT(LAST_TYPE == JS_FUNCTION_TYPE);
   ASSERT(JS_FUNCTION_TYPE == LAST_JS_OBJECT_TYPE + 1);
-  __ CompareObjectType(r0, r1, r2, FIRST_JS_OBJECT_TYPE);
+  __ CompareObjectType(receiver_reg, r2, r3, FIRST_JS_OBJECT_TYPE);
   __ b(lt, &build_args);
 
   // Check that applicand.apply is Function.prototype.apply.
@@ -1627,7 +1611,7 @@
   StoreArgumentsObject(false);
 
   // Stack and frame now have 4 elements.
-  __ bind(&slow);
+  slow.Bind();
 
   // Generic computation of x.apply(y, args) with no special optimization.
   // Flip applicand.apply and applicand on the stack, so
@@ -1652,7 +1636,6 @@
 
 
 void CodeGenerator::Branch(bool if_true, JumpTarget* target) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   ASSERT(has_cc());
   Condition cc = if_true ? cc_reg_ : NegateCondition(cc_reg_);
   target->Branch(cc);
@@ -1661,7 +1644,7 @@
 
 
 void CodeGenerator::CheckStack() {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
+  frame_->SpillAll();
   Comment cmnt(masm_, "[ check stack");
   __ LoadRoot(ip, Heap::kStackLimitRootIndex);
   // Put the lr setup instruction in the delay slot.  kInstrSize is added to
@@ -1683,7 +1666,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   for (int i = 0; frame_ != NULL && i < statements->length(); i++) {
     Visit(statements->at(i));
   }
@@ -1695,7 +1677,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ Block");
   CodeForStatementPosition(node);
   node->break_target()->SetExpectedHeight();
@@ -1713,7 +1694,6 @@
   frame_->EmitPush(Operand(pairs));
   frame_->EmitPush(Operand(Smi::FromInt(is_eval() ? 1 : 0)));
 
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   frame_->CallRuntime(Runtime::kDeclareGlobals, 3);
   // The result is discarded.
 }
@@ -1754,7 +1734,6 @@
       frame_->EmitPush(Operand(0));
     }
 
-    VirtualFrame::SpilledScope spilled_scope(frame_);
     frame_->CallRuntime(Runtime::kDeclareContextSlot, 4);
     // Ignore the return value (declarations are statements).
 
@@ -1772,11 +1751,15 @@
     val = node->fun();  // NULL if we don't have a function
   }
 
+
   if (val != NULL) {
+    WriteBarrierCharacter wb_info =
+        val->type()->IsLikelySmi() ? LIKELY_SMI : UNLIKELY_SMI;
+    if (val->AsLiteral() != NULL) wb_info = NEVER_NEWSPACE;
     // Set initial value.
     Reference target(this, node->proxy());
     Load(val);
-    target.SetValue(NOT_CONST_INIT);
+    target.SetValue(NOT_CONST_INIT, wb_info);
 
     // Get rid of the assigned value (declarations are statements).
     frame_->Drop();
@@ -1899,7 +1882,6 @@
 
 
 void CodeGenerator::VisitContinueStatement(ContinueStatement* node) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ ContinueStatement");
   CodeForStatementPosition(node);
   node->target()->continue_target()->Jump();
@@ -1907,7 +1889,6 @@
 
 
 void CodeGenerator::VisitBreakStatement(BreakStatement* node) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ BreakStatement");
   CodeForStatementPosition(node);
   node->target()->break_target()->Jump();
@@ -1915,7 +1896,7 @@
 
 
 void CodeGenerator::VisitReturnStatement(ReturnStatement* node) {
-  VirtualFrame::SpilledScope spilled_scope(frame_);
+  frame_->SpillAll();
   Comment cmnt(masm_, "[ ReturnStatement");
 
   CodeForStatementPosition(node);
@@ -1926,7 +1907,7 @@
   } else {
     // Pop the result from the frame and prepare the frame for
     // returning thus making it easier to merge.
-    frame_->EmitPop(r0);
+    frame_->PopToR0();
     frame_->PrepareForReturn();
     if (function_return_.is_bound()) {
       // If the function return label is already bound we reuse the
@@ -1986,7 +1967,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ WithEnterStatement");
   CodeForStatementPosition(node);
   Load(node->expression());
@@ -2012,7 +1992,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ WithExitStatement");
   CodeForStatementPosition(node);
   // Pop context.
@@ -2027,7 +2006,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ SwitchStatement");
   CodeForStatementPosition(node);
   node->break_target()->SetExpectedHeight();
@@ -2055,8 +2033,7 @@
     next_test.Bind();
     next_test.Unuse();
     // Duplicate TOS.
-    __ ldr(r0, frame_->Top());
-    frame_->EmitPush(r0);
+    frame_->Dup();
     Comparison(eq, NULL, clause->label(), true);
     Branch(false, &next_test);
 
@@ -2094,7 +2071,7 @@
     default_entry.Bind();
     VisitStatements(default_clause->statements());
     // If control flow can fall out of the default and there is a case after
-    // it, jup to that case's body.
+    // it, jump to that case's body.
     if (frame_ != NULL && default_exit.is_bound()) {
       default_exit.Jump();
     }
@@ -2116,7 +2093,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ DoWhileStatement");
   CodeForStatementPosition(node);
   node->break_target()->SetExpectedHeight();
@@ -2191,7 +2167,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ WhileStatement");
   CodeForStatementPosition(node);
 
@@ -2209,7 +2184,7 @@
   node->continue_target()->Bind();
 
   if (info == DONT_KNOW) {
-    JumpTarget body;
+    JumpTarget body(JumpTarget::BIDIRECTIONAL);
     LoadCondition(node->cond(), &body, node->break_target(), true);
     if (has_valid_frame()) {
       // A NULL frame indicates that control did not fall out of the
@@ -2242,7 +2217,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ ForStatement");
   CodeForStatementPosition(node);
   if (node->init() != NULL) {
@@ -2521,13 +2495,13 @@
       if (each.size() > 0) {
         __ ldr(r0, frame_->ElementAt(each.size()));
         frame_->EmitPush(r0);
-        each.SetValue(NOT_CONST_INIT);
+        each.SetValue(NOT_CONST_INIT, UNLIKELY_SMI);
         frame_->Drop(2);
       } else {
         // If the reference was to a slot we rely on the convenient property
         // that it doesn't matter whether a value (eg, r3 pushed above) is
         // right on top of or right underneath a zero-sized reference.
-        each.SetValue(NOT_CONST_INIT);
+        each.SetValue(NOT_CONST_INIT, UNLIKELY_SMI);
         frame_->Drop();
       }
     }
@@ -2931,7 +2905,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ Conditional");
   JumpTarget then;
   JumpTarget else_;
@@ -2972,10 +2945,8 @@
                                     &done);
 
     slow.Bind();
-    VirtualFrame::SpilledScope spilled_scope(frame_);
     frame_->EmitPush(cp);
-    __ mov(r0, Operand(slot->var()->name()));
-    frame_->EmitPush(r0);
+    frame_->EmitPush(Operand(slot->var()->name()));
 
     if (typeof_state == INSIDE_TYPEOF) {
       frame_->CallRuntime(Runtime::kLoadContextSlotNoReferenceError, 2);
@@ -2990,16 +2961,17 @@
     Register scratch = VirtualFrame::scratch0();
     TypeInfo info = type_info(slot);
     frame_->EmitPush(SlotOperand(slot, scratch), info);
+
     if (slot->var()->mode() == Variable::CONST) {
       // Const slots may contain 'the hole' value (the constant hasn't been
       // initialized yet) which needs to be converted into the 'undefined'
       // value.
       Comment cmnt(masm_, "[ Unhole const");
-      frame_->EmitPop(scratch);
+      Register tos = frame_->PopToRegister();
       __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
-      __ cmp(scratch, ip);
-      __ LoadRoot(scratch, Heap::kUndefinedValueRootIndex, eq);
-      frame_->EmitPush(scratch);
+      __ cmp(tos, ip);
+      __ LoadRoot(tos, Heap::kUndefinedValueRootIndex, eq);
+      frame_->EmitPush(tos);
     }
   }
 }
@@ -3007,6 +2979,7 @@
 
 void CodeGenerator::LoadFromSlotCheckForArguments(Slot* slot,
                                                   TypeofState state) {
+  VirtualFrame::RegisterAllocationScope scope(this);
   LoadFromSlot(slot, state);
 
   // Bail out quickly if we're not using lazy arguments allocation.
@@ -3015,17 +2988,15 @@
   // ... or if the slot isn't a non-parameter arguments slot.
   if (slot->type() == Slot::PARAMETER || !slot->is_arguments()) return;
 
-  VirtualFrame::SpilledScope spilled_scope(frame_);
-
-  // Load the loaded value from the stack into r0 but leave it on the
+  // Load the loaded value from the stack into a register but leave it on the
   // stack.
-  __ ldr(r0, MemOperand(sp, 0));
+  Register tos = frame_->Peek();
 
   // If the loaded value is the sentinel that indicates that we
   // haven't loaded the arguments object yet, we need to do it now.
   JumpTarget exit;
   __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
-  __ cmp(r0, ip);
+  __ cmp(tos, ip);
   exit.Branch(ne);
   frame_->Drop();
   StoreArgumentsObject(false);
@@ -3035,14 +3006,13 @@
 
 void CodeGenerator::StoreToSlot(Slot* slot, InitState init_state) {
   ASSERT(slot != NULL);
+  VirtualFrame::RegisterAllocationScope scope(this);
   if (slot->type() == Slot::LOOKUP) {
-    VirtualFrame::SpilledScope spilled_scope(frame_);
     ASSERT(slot->var()->is_dynamic());
 
     // For now, just do a runtime call.
     frame_->EmitPush(cp);
-    __ mov(r0, Operand(slot->var()->name()));
-    frame_->EmitPush(r0);
+    frame_->EmitPush(Operand(slot->var()->name()));
 
     if (init_state == CONST_INIT) {
       // Same as the case for a normal store, but ignores attribute
@@ -3071,7 +3041,7 @@
   } else {
     ASSERT(!slot->var()->is_dynamic());
     Register scratch = VirtualFrame::scratch0();
-    VirtualFrame::RegisterAllocationScope scope(this);
+    Register scratch2 = VirtualFrame::scratch1();
 
     // The frame must be spilled when branching to this target.
     JumpTarget exit;
@@ -3085,7 +3055,6 @@
       __ ldr(scratch, SlotOperand(slot, scratch));
       __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
       __ cmp(scratch, ip);
-      frame_->SpillAll();
       exit.Branch(ne);
     }
 
@@ -3104,18 +3073,18 @@
       // Skip write barrier if the written value is a smi.
       __ tst(tos, Operand(kSmiTagMask));
       // We don't use tos any more after here.
-      VirtualFrame::SpilledScope spilled_scope(frame_);
       exit.Branch(eq);
       // scratch is loaded with context when calling SlotOperand above.
       int offset = FixedArray::kHeaderSize + slot->index() * kPointerSize;
-      // r1 could be identical with tos, but that doesn't matter.
-      __ RecordWrite(scratch, Operand(offset), r3, r1);
+      // We need an extra register.  Until we have a way to do that in the
+      // virtual frame we will cheat and ask for a free TOS register.
+      Register scratch3 = frame_->GetTOSRegister();
+      __ RecordWrite(scratch, Operand(offset), scratch2, scratch3);
     }
     // If we definitely did not jump over the assignment, we do not need
     // to bind the exit label.  Doing so can defeat peephole
     // optimization.
     if (init_state == CONST_INIT || slot->type() == Slot::CONTEXT) {
-      frame_->SpillAll();
       exit.Bind();
     }
   }
@@ -3289,42 +3258,51 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ RexExp Literal");
 
+  Register tmp = VirtualFrame::scratch0();
+  // Free up a TOS register that can be used to push the literal.
+  Register literal = frame_->GetTOSRegister();
+
   // Retrieve the literal array and check the allocated entry.
 
   // Load the function of this activation.
-  __ ldr(r1, frame_->Function());
+  __ ldr(tmp, frame_->Function());
 
   // Load the literals array of the function.
-  __ ldr(r1, FieldMemOperand(r1, JSFunction::kLiteralsOffset));
+  __ ldr(tmp, FieldMemOperand(tmp, JSFunction::kLiteralsOffset));
 
   // Load the literal at the ast saved index.
   int literal_offset =
       FixedArray::kHeaderSize + node->literal_index() * kPointerSize;
-  __ ldr(r2, FieldMemOperand(r1, literal_offset));
+  __ ldr(literal, FieldMemOperand(tmp, literal_offset));
 
   JumpTarget done;
   __ LoadRoot(ip, Heap::kUndefinedValueRootIndex);
-  __ cmp(r2, ip);
+  __ cmp(literal, ip);
+  // This branch locks the virtual frame at the done label to match the
+  // one we have here, where the literal register is not on the stack and
+  // nothing is spilled.
   done.Branch(ne);
 
-  // If the entry is undefined we call the runtime system to computed
+  // If the entry is undefined we call the runtime system to compute
   // the literal.
-  frame_->EmitPush(r1);  // literal array  (0)
-  __ mov(r0, Operand(Smi::FromInt(node->literal_index())));
-  frame_->EmitPush(r0);  // literal index  (1)
-  __ mov(r0, Operand(node->pattern()));  // RegExp pattern (2)
-  frame_->EmitPush(r0);
-  __ mov(r0, Operand(node->flags()));  // RegExp flags   (3)
-  frame_->EmitPush(r0);
+  // literal array  (0)
+  frame_->EmitPush(tmp);
+  // literal index  (1)
+  frame_->EmitPush(Operand(Smi::FromInt(node->literal_index())));
+  // RegExp pattern (2)
+  frame_->EmitPush(Operand(node->pattern()));
+  // RegExp flags   (3)
+  frame_->EmitPush(Operand(node->flags()));
   frame_->CallRuntime(Runtime::kMaterializeRegExpLiteral, 4);
-  __ mov(r2, Operand(r0));
+  __ Move(literal, r0);
 
+  // This call to bind will get us back to the virtual frame we had before
+  // where things are not spilled and the literal register is not on the stack.
   done.Bind();
   // Push the literal.
-  frame_->EmitPush(r2);
+  frame_->EmitPush(literal);
   ASSERT_EQ(original_height + 1, frame_->height());
 }
 
@@ -3333,20 +3311,20 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ ObjectLiteral");
 
+  Register literal = frame_->GetTOSRegister();
   // Load the function of this activation.
-  __ ldr(r3, frame_->Function());
+  __ ldr(literal, frame_->Function());
   // Literal array.
-  __ ldr(r3, FieldMemOperand(r3, JSFunction::kLiteralsOffset));
+  __ ldr(literal, FieldMemOperand(literal, JSFunction::kLiteralsOffset));
+  frame_->EmitPush(literal);
   // Literal index.
-  __ mov(r2, Operand(Smi::FromInt(node->literal_index())));
+  frame_->EmitPush(Operand(Smi::FromInt(node->literal_index())));
   // Constant properties.
-  __ mov(r1, Operand(node->constant_properties()));
+  frame_->EmitPush(Operand(node->constant_properties()));
   // Should the object literal have fast elements?
-  __ mov(r0, Operand(Smi::FromInt(node->fast_elements() ? 1 : 0)));
-  frame_->EmitPushMultiple(4, r3.bit() | r2.bit() | r1.bit() | r0.bit());
+  frame_->EmitPush(Operand(Smi::FromInt(node->fast_elements() ? 1 : 0)));
   if (node->depth() > 1) {
     frame_->CallRuntime(Runtime::kCreateObjectLiteral, 4);
   } else {
@@ -3369,37 +3347,33 @@
         if (key->handle()->IsSymbol()) {
           Handle<Code> ic(Builtins::builtin(Builtins::StoreIC_Initialize));
           Load(value);
-          frame_->EmitPop(r0);
+          frame_->PopToR0();
+          // Fetch the object literal.
+          frame_->SpillAllButCopyTOSToR1();
           __ mov(r2, Operand(key->handle()));
-          __ ldr(r1, frame_->Top());  // Load the receiver.
           frame_->CallCodeObject(ic, RelocInfo::CODE_TARGET, 0);
           break;
         }
         // else fall through
       case ObjectLiteral::Property::PROTOTYPE: {
-        __ ldr(r0, frame_->Top());
-        frame_->EmitPush(r0);  // dup the result
+        frame_->Dup();
         Load(key);
         Load(value);
         frame_->CallRuntime(Runtime::kSetProperty, 3);
         break;
       }
       case ObjectLiteral::Property::SETTER: {
-        __ ldr(r0, frame_->Top());
-        frame_->EmitPush(r0);
+        frame_->Dup();
         Load(key);
-        __ mov(r0, Operand(Smi::FromInt(1)));
-        frame_->EmitPush(r0);
+        frame_->EmitPush(Operand(Smi::FromInt(1)));
         Load(value);
         frame_->CallRuntime(Runtime::kDefineAccessor, 4);
         break;
       }
       case ObjectLiteral::Property::GETTER: {
-        __ ldr(r0, frame_->Top());
-        frame_->EmitPush(r0);
+        frame_->Dup();
         Load(key);
-        __ mov(r0, Operand(Smi::FromInt(0)));
-        frame_->EmitPush(r0);
+        frame_->EmitPush(Operand(Smi::FromInt(0)));
         Load(value);
         frame_->CallRuntime(Runtime::kDefineAccessor, 4);
         break;
@@ -3414,16 +3388,16 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ ArrayLiteral");
 
+  Register tos = frame_->GetTOSRegister();
   // Load the function of this activation.
-  __ ldr(r2, frame_->Function());
+  __ ldr(tos, frame_->Function());
   // Load the literals array of the function.
-  __ ldr(r2, FieldMemOperand(r2, JSFunction::kLiteralsOffset));
-  __ mov(r1, Operand(Smi::FromInt(node->literal_index())));
-  __ mov(r0, Operand(node->constant_elements()));
-  frame_->EmitPushMultiple(3, r2.bit() | r1.bit() | r0.bit());
+  __ ldr(tos, FieldMemOperand(tos, JSFunction::kLiteralsOffset));
+  frame_->EmitPush(tos);
+  frame_->EmitPush(Operand(Smi::FromInt(node->literal_index())));
+  frame_->EmitPush(Operand(node->constant_elements()));
   int length = node->values()->length();
   if (node->depth() > 1) {
     frame_->CallRuntime(Runtime::kCreateArrayLiteral, 3);
@@ -3450,10 +3424,10 @@
 
     // The property must be set by generated code.
     Load(value);
-    frame_->EmitPop(r0);
-
+    frame_->PopToR0();
     // Fetch the object literal.
-    __ ldr(r1, frame_->Top());
+    frame_->SpillAllButCopyTOSToR1();
+
     // Get the elements array.
     __ ldr(r1, FieldMemOperand(r1, JSObject::kElementsOffset));
 
@@ -3682,6 +3656,8 @@
   // Evaluate the receiver subexpression.
   Load(prop->obj());
 
+  WriteBarrierCharacter wb_info;
+
   // Change to slow case in the beginning of an initialization block to
   // avoid the quadratic behavior of repeatedly adding fast properties.
   if (node->starts_initialization_block()) {
@@ -3703,7 +3679,7 @@
   // [tos]   : key
   // [tos+1] : receiver
   // [tos+2] : receiver if at the end of an initialization block
-
+  //
   // Evaluate the right-hand side.
   if (node->is_compound()) {
     // For a compound assignment the right-hand side is a binary operation
@@ -3735,9 +3711,13 @@
                              overwrite_value ? OVERWRITE_RIGHT : NO_OVERWRITE,
                              inline_smi);
     }
+    wb_info = node->type()->IsLikelySmi() ? LIKELY_SMI : UNLIKELY_SMI;
   } else {
     // For non-compound assignment just load the right-hand side.
     Load(node->value());
+    wb_info = node->value()->AsLiteral() != NULL ?
+        NEVER_NEWSPACE :
+        (node->value()->type()->IsLikelySmi() ? LIKELY_SMI : UNLIKELY_SMI);
   }
 
   // Stack layout:
@@ -3749,7 +3729,7 @@
   // Perform the assignment.  It is safe to ignore constants here.
   ASSERT(node->op() != Token::INIT_CONST);
   CodeForSourcePosition(node->position());
-  EmitKeyedStore(prop->key()->type());
+  EmitKeyedStore(prop->key()->type(), wb_info);
   frame_->EmitPush(r0);
 
   // Stack layout:
@@ -3863,7 +3843,6 @@
   // ------------------------------------------------------------------------
 
   if (var != NULL && var->is_possibly_eval()) {
-    VirtualFrame::SpilledScope spilled_scope(frame_);
     // ----------------------------------
     // JavaScript example: 'eval(arg)'  // eval is not known to be shadowed
     // ----------------------------------
@@ -3877,8 +3856,7 @@
     Load(function);
 
     // Allocate a frame slot for the receiver.
-    __ LoadRoot(r2, Heap::kUndefinedValueRootIndex);
-    frame_->EmitPush(r2);
+    frame_->EmitPushRoot(Heap::kUndefinedValueRootIndex);
 
     // Load the arguments.
     int arg_count = args->length();
@@ -3886,6 +3864,8 @@
       Load(args->at(i));
     }
 
+    VirtualFrame::SpilledScope spilled_scope(frame_);
+
     // If we know that eval can only be shadowed by eval-introduced
     // variables we attempt to load the global eval function directly
     // in generated code. If we succeed, there is no need to perform a
@@ -4315,22 +4295,205 @@
 }
 
 
-// Generates the Math.pow method - currently just calls runtime.
+// Generates the Math.pow method.
 void CodeGenerator::GenerateMathPow(ZoneList<Expression*>* args) {
   ASSERT(args->length() == 2);
   Load(args->at(0));
   Load(args->at(1));
-  frame_->CallRuntime(Runtime::kMath_pow, 2);
-  frame_->EmitPush(r0);
+
+  if (!CpuFeatures::IsSupported(VFP3)) {
+    frame_->CallRuntime(Runtime::kMath_pow, 2);
+    frame_->EmitPush(r0);
+  } else {
+    CpuFeatures::Scope scope(VFP3);
+    JumpTarget runtime, done;
+    Label exponent_nonsmi, base_nonsmi, powi, not_minus_half, allocate_return;
+
+    Register scratch1 = VirtualFrame::scratch0();
+    Register scratch2 = VirtualFrame::scratch1();
+
+    // Get base and exponent to registers.
+    Register exponent = frame_->PopToRegister();
+    Register base = frame_->PopToRegister(exponent);
+    Register heap_number_map = no_reg;
+
+    // Set the frame for the runtime jump target. The code below jumps to the
+    // jump target label so the frame needs to be established before that.
+    ASSERT(runtime.entry_frame() == NULL);
+    runtime.set_entry_frame(frame_);
+
+    __ BranchOnNotSmi(exponent, &exponent_nonsmi);
+    __ BranchOnNotSmi(base, &base_nonsmi);
+
+    heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+
+    // Exponent is a smi and base is a smi. Get the smi value into vfp register
+    // d1.
+    __ SmiToDoubleVFPRegister(base, d1, scratch1, s0);
+    __ b(&powi);
+
+    __ bind(&base_nonsmi);
+    // Exponent is smi and base is non smi. Get the double value from the base
+    // into vfp register d1.
+    __ ObjectToDoubleVFPRegister(base, d1,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label());
+
+    __ bind(&powi);
+
+    // Load 1.0 into d0.
+    __ mov(scratch2, Operand(0x3ff00000));
+    __ mov(scratch1, Operand(0));
+    __ vmov(d0, scratch1, scratch2);
+
+    // Get the absolute untagged value of the exponent and use that for the
+    // calculation.
+    __ mov(scratch1, Operand(exponent, ASR, kSmiTagSize), SetCC);
+    __ rsb(scratch1, scratch1, Operand(0), LeaveCC, mi);  // Negate if negative.
+    __ vmov(d2, d0, mi);  // 1.0 needed in d2 later if exponent is negative.
+
+    // Run through all the bits in the exponent. The result is calculated in d0
+    // and d1 holds base^(bit^2).
+    Label more_bits;
+    __ bind(&more_bits);
+    __ mov(scratch1, Operand(scratch1, LSR, 1), SetCC);
+    __ vmul(d0, d0, d1, cs);  // Multiply with base^(bit^2) if bit is set.
+    __ vmul(d1, d1, d1, ne);  // Don't bother calculating next d1 if done.
+    __ b(ne, &more_bits);
+
+    // If exponent is positive we are done.
+    __ cmp(exponent, Operand(0));
+    __ b(ge, &allocate_return);
+
+    // If exponent is negative result is 1/result (d2 already holds 1.0 in that
+    // case). However if d0 has reached infinity this will not provide the
+    // correct result, so call runtime if that is the case.
+    __ mov(scratch2, Operand(0x7FF00000));
+    __ mov(scratch1, Operand(0));
+    __ vmov(d1, scratch1, scratch2);  // Load infinity into d1.
+    __ vcmp(d0, d1);
+    __ vmrs(pc);
+    runtime.Branch(eq);  // d0 reached infinity.
+    __ vdiv(d0, d2, d0);
+    __ b(&allocate_return);
+
+    __ bind(&exponent_nonsmi);
+    // Special handling of raising to the power of -0.5 and 0.5. First check
+    // that the value is a heap number and that the lower bits (which for both
+    // values are zero).
+    heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+    __ ldr(scratch1, FieldMemOperand(exponent, HeapObject::kMapOffset));
+    __ ldr(scratch2, FieldMemOperand(exponent, HeapNumber::kMantissaOffset));
+    __ cmp(scratch1, heap_number_map);
+    runtime.Branch(ne);
+    __ tst(scratch2, scratch2);
+    runtime.Branch(ne);
+
+    // Load the higher bits (which contains the floating point exponent).
+    __ ldr(scratch1, FieldMemOperand(exponent, HeapNumber::kExponentOffset));
+
+    // Compare exponent with -0.5.
+    __ cmp(scratch1, Operand(0xbfe00000));
+    __ b(ne, &not_minus_half);
+
+    // Get the double value from the base into vfp register d0.
+    __ ObjectToDoubleVFPRegister(base, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label(),
+                                 AVOID_NANS_AND_INFINITIES);
+
+    // Load 1.0 into d2.
+    __ mov(scratch2, Operand(0x3ff00000));
+    __ mov(scratch1, Operand(0));
+    __ vmov(d2, scratch1, scratch2);
+
+    // Calculate the reciprocal of the square root. 1/sqrt(x) = sqrt(1/x).
+    __ vdiv(d0, d2, d0);
+    __ vsqrt(d0, d0);
+
+    __ b(&allocate_return);
+
+    __ bind(&not_minus_half);
+    // Compare exponent with 0.5.
+    __ cmp(scratch1, Operand(0x3fe00000));
+    runtime.Branch(ne);
+
+      // Get the double value from the base into vfp register d0.
+    __ ObjectToDoubleVFPRegister(base, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label(),
+                                 AVOID_NANS_AND_INFINITIES);
+    __ vsqrt(d0, d0);
+
+    __ bind(&allocate_return);
+    Register scratch3 = r5;
+    __ AllocateHeapNumberWithValue(scratch3, d0, scratch1, scratch2,
+                                   heap_number_map, runtime.entry_label());
+    __ mov(base, scratch3);
+    done.Jump();
+
+    runtime.Bind();
+
+    // Push back the arguments again for the runtime call.
+    frame_->EmitPush(base);
+    frame_->EmitPush(exponent);
+    frame_->CallRuntime(Runtime::kMath_pow, 2);
+    __ Move(base, r0);
+
+    done.Bind();
+    frame_->EmitPush(base);
+  }
 }
 
 
-// Generates the Math.sqrt method - currently just calls runtime.
+// Generates the Math.sqrt method.
 void CodeGenerator::GenerateMathSqrt(ZoneList<Expression*>* args) {
   ASSERT(args->length() == 1);
   Load(args->at(0));
-  frame_->CallRuntime(Runtime::kMath_sqrt, 1);
-  frame_->EmitPush(r0);
+
+  if (!CpuFeatures::IsSupported(VFP3)) {
+    frame_->CallRuntime(Runtime::kMath_sqrt, 1);
+    frame_->EmitPush(r0);
+  } else {
+    CpuFeatures::Scope scope(VFP3);
+    JumpTarget runtime, done;
+
+    Register scratch1 = VirtualFrame::scratch0();
+    Register scratch2 = VirtualFrame::scratch1();
+
+    // Get the value from the frame.
+    Register tos = frame_->PopToRegister();
+
+    // Set the frame for the runtime jump target. The code below jumps to the
+    // jump target label so the frame needs to be established before that.
+    ASSERT(runtime.entry_frame() == NULL);
+    runtime.set_entry_frame(frame_);
+
+    Register heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+
+    // Get the double value from the heap number into vfp register d0.
+    __ ObjectToDoubleVFPRegister(tos, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label());
+
+    // Calculate the square root of d0 and place result in a heap number object.
+    __ vsqrt(d0, d0);
+    __ AllocateHeapNumberWithValue(
+        tos, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
+    done.Jump();
+
+    runtime.Bind();
+    // Push back the argument again for the runtime call.
+    frame_->EmitPush(tos);
+    frame_->CallRuntime(Runtime::kMath_sqrt, 1);
+    __ Move(tos, r0);
+
+    done.Bind();
+    frame_->EmitPush(tos);
+  }
 }
 
 
@@ -5201,7 +5364,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   Comment cmnt(masm_, "[ UnaryOperation");
 
   Token::Value op = node->op();
@@ -5273,8 +5435,7 @@
         break;
 
       case Token::SUB: {
-        VirtualFrame::SpilledScope spilled(frame_);
-        frame_->EmitPop(r0);
+        frame_->PopToR0();
         GenericUnaryOpStub stub(Token::SUB, overwrite);
         frame_->CallStub(&stub, 0);
         frame_->EmitPush(r0);  // r0 has result
@@ -5282,23 +5443,28 @@
       }
 
       case Token::BIT_NOT: {
-        // smi check
-        VirtualFrame::SpilledScope spilled(frame_);
-        frame_->EmitPop(r0);
-        JumpTarget smi_label;
+        Register tos = frame_->PopToRegister();
+        JumpTarget not_smi_label;
         JumpTarget continue_label;
-        __ tst(r0, Operand(kSmiTagMask));
-        smi_label.Branch(eq);
+        // Smi check.
+        __ tst(tos, Operand(kSmiTagMask));
+        not_smi_label.Branch(ne);
 
-        GenericUnaryOpStub stub(Token::BIT_NOT, overwrite);
-        frame_->CallStub(&stub, 0);
+        __ mvn(tos, Operand(tos));
+        __ bic(tos, tos, Operand(kSmiTagMask));  // Bit-clear inverted smi-tag.
+        frame_->EmitPush(tos);
+        // The fast case is the first to jump to the continue label, so it gets
+        // to decide the virtual frame layout.
         continue_label.Jump();
 
-        smi_label.Bind();
-        __ mvn(r0, Operand(r0));
-        __ bic(r0, r0, Operand(kSmiTagMask));  // bit-clear inverted smi-tag
+        not_smi_label.Bind();
+        frame_->SpillAll();
+        __ Move(r0, tos);
+        GenericUnaryOpStub stub(Token::BIT_NOT, overwrite);
+        frame_->CallStub(&stub, 0);
+        frame_->EmitPush(r0);
+
         continue_label.Bind();
-        frame_->EmitPush(r0);  // r0 has result
         break;
       }
 
@@ -5308,16 +5474,16 @@
         break;
 
       case Token::ADD: {
-        VirtualFrame::SpilledScope spilled(frame_);
-        frame_->EmitPop(r0);
+        Register tos = frame_->Peek();
         // Smi check.
         JumpTarget continue_label;
-        __ tst(r0, Operand(kSmiTagMask));
+        __ tst(tos, Operand(kSmiTagMask));
         continue_label.Branch(eq);
-        frame_->EmitPush(r0);
+
         frame_->InvokeBuiltin(Builtins::TO_NUMBER, CALL_JS, 1);
+        frame_->EmitPush(r0);
+
         continue_label.Bind();
-        frame_->EmitPush(r0);  // r0 has result
         break;
       }
       default:
@@ -5335,6 +5501,7 @@
   int original_height = frame_->height();
 #endif
   Comment cmnt(masm_, "[ CountOperation");
+  VirtualFrame::RegisterAllocationScope scope(this);
 
   bool is_postfix = node->is_postfix();
   bool is_increment = node->op() == Token::INC;
@@ -5358,7 +5525,7 @@
       __ sub(value, value, Operand(Smi::FromInt(1)));
     }
     frame_->EmitPush(value);
-    target.SetValue(NOT_CONST_INIT);
+    target.SetValue(NOT_CONST_INIT, LIKELY_SMI);
     if (is_postfix) frame_->Pop();
     ASSERT_EQ(original_height + 1, frame_->height());
     return;
@@ -5457,7 +5624,7 @@
     // Set the target with the result, leaving the result on
     // top of the stack.  Removes the target from the stack if
     // it has a non-zero size.
-    if (!is_const) target.SetValue(NOT_CONST_INIT);
+    if (!is_const) target.SetValue(NOT_CONST_INIT, LIKELY_SMI);
   }
 
   // Postfix: Discard the new value and use the old.
@@ -5478,7 +5645,6 @@
   // after evaluating the left hand side (due to the shortcut
   // semantics), but the compiler must (statically) know if the result
   // of compiling the binary operation is materialized or not.
-  VirtualFrame::SpilledScope spilled_scope(frame_);
   if (node->op() == Token::AND) {
     JumpTarget is_true;
     LoadCondition(node->left(), &is_true, false_target(), false);
@@ -5663,8 +5829,6 @@
     if (left_is_null || right_is_null) {
       Load(left_is_null ? right : left);
       Register tos = frame_->PopToRegister();
-      // JumpTargets can't cope with register allocation yet.
-      frame_->SpillAll();
       __ LoadRoot(ip, Heap::kNullValueRootIndex);
       __ cmp(tos, ip);
 
@@ -5707,9 +5871,6 @@
     LoadTypeofExpression(operation->expression());
     Register tos = frame_->PopToRegister();
 
-    // JumpTargets can't cope with register allocation yet.
-    frame_->SpillAll();
-
     Register scratch = VirtualFrame::scratch0();
 
     if (check->Equals(Heap::number_symbol())) {
@@ -5830,7 +5991,6 @@
       break;
 
     case Token::IN: {
-      VirtualFrame::SpilledScope scope(frame_);
       Load(left);
       Load(right);
       frame_->InvokeBuiltin(Builtins::IN, CALL_JS, 2);
@@ -5839,7 +5999,6 @@
     }
 
     case Token::INSTANCEOF: {
-      VirtualFrame::SpilledScope scope(frame_);
       Load(left);
       Load(right);
       InstanceofStub stub;
@@ -5937,10 +6096,15 @@
 };
 
 
+// Takes key and register in r0 and r1 or vice versa.  Returns result
+// in r0.
 void DeferredReferenceGetKeyedValue::Generate() {
   ASSERT((key_.is(r0) && receiver_.is(r1)) ||
          (key_.is(r1) && receiver_.is(r0)));
 
+  VirtualFrame copied_frame(*frame_state()->frame());
+  copied_frame.SpillAll();
+
   Register scratch1 = VirtualFrame::scratch0();
   Register scratch2 = VirtualFrame::scratch1();
   __ DecrementCounter(&Counters::keyed_load_inline, 1, scratch1, scratch2);
@@ -5961,6 +6125,13 @@
     // keyed load has been inlined.
     __ nop(PROPERTY_ACCESS_INLINED);
 
+    // Now go back to the frame that we entered with.  This will not overwrite
+    // the receiver or key registers since they were not in use when we came
+    // in.  The instructions emitted by this merge are skipped over by the
+    // inline load patching mechanism when looking for the branch instruction
+    // that tells it where the code to patch is.
+    copied_frame.MergeTo(frame_state()->frame());
+
     // Block the constant pool for one more instruction after leaving this
     // constant pool block scope to include the branch instruction ending the
     // deferred code.
@@ -6114,7 +6285,6 @@
     bool key_is_known_smi = frame_->KnownSmiAt(0);
     Register key = frame_->PopToRegister();
     Register receiver = frame_->PopToRegister(key);
-    VirtualFrame::SpilledScope spilled(frame_);
 
     // The deferred code expects key and receiver in registers.
     DeferredReferenceGetKeyedValue* deferred =
@@ -6152,10 +6322,12 @@
       // Get the elements array from the receiver and check that it
       // is not a dictionary.
       __ ldr(scratch1, FieldMemOperand(receiver, JSObject::kElementsOffset));
-      __ ldr(scratch2, FieldMemOperand(scratch1, JSObject::kMapOffset));
-      __ LoadRoot(ip, Heap::kFixedArrayMapRootIndex);
-      __ cmp(scratch2, ip);
-      deferred->Branch(ne);
+      if (FLAG_debug_code) {
+        __ ldr(scratch2, FieldMemOperand(scratch1, JSObject::kMapOffset));
+        __ LoadRoot(ip, Heap::kFixedArrayMapRootIndex);
+        __ cmp(scratch2, ip);
+        __ Assert(eq, "JSObject with fast elements map has slow elements");
+      }
 
       // Check that key is within bounds. Use unsigned comparison to handle
       // negative keys.
@@ -6176,7 +6348,7 @@
 
       __ mov(r0, scratch1);
       // Make sure that the expected number of instructions are generated.
-      ASSERT_EQ(kInlinedKeyedLoadInstructionsAfterPatch,
+      ASSERT_EQ(GetInlinedKeyedLoadInstructionsAfterPatch(),
                 masm_->InstructionsGeneratedSince(&check_inlined_codesize));
     }
 
@@ -6185,7 +6357,8 @@
 }
 
 
-void CodeGenerator::EmitKeyedStore(StaticType* key_type) {
+void CodeGenerator::EmitKeyedStore(StaticType* key_type,
+                                   WriteBarrierCharacter wb_info) {
   // Generate inlined version of the keyed store if the code is in a loop
   // and the key is likely to be a smi.
   if (loop_nesting() > 0 && key_type->IsLikelySmi()) {
@@ -6201,12 +6374,21 @@
     __ IncrementCounter(&Counters::keyed_store_inline, 1,
                         scratch1, scratch2);
 
+
+
     // Load the value, key and receiver from the stack.
+    bool value_is_harmless = frame_->KnownSmiAt(0);
+    if (wb_info == NEVER_NEWSPACE) value_is_harmless = true;
+    bool key_is_smi = frame_->KnownSmiAt(1);
     Register value = frame_->PopToRegister();
     Register key = frame_->PopToRegister(value);
+    VirtualFrame::SpilledScope spilled(frame_);
     Register receiver = r2;
     frame_->EmitPop(receiver);
-    VirtualFrame::SpilledScope spilled(frame_);
+
+#ifdef DEBUG
+    bool we_remembered_the_write_barrier = value_is_harmless;
+#endif
 
     // The deferred code expects value, key and receiver in registers.
     DeferredReferenceSetKeyedValue* deferred =
@@ -6214,12 +6396,23 @@
 
     // Check that the value is a smi. As this inlined code does not set the
     // write barrier it is only possible to store smi values.
-    __ tst(value, Operand(kSmiTagMask));
-    deferred->Branch(ne);
+    if (!value_is_harmless) {
+      // If the value is not likely to be a Smi then let's test the fixed array
+      // for new space instead.  See below.
+      if (wb_info == LIKELY_SMI) {
+        __ tst(value, Operand(kSmiTagMask));
+        deferred->Branch(ne);
+#ifdef DEBUG
+        we_remembered_the_write_barrier = true;
+#endif
+      }
+    }
 
-    // Check that the key is a smi.
-    __ tst(key, Operand(kSmiTagMask));
-    deferred->Branch(ne);
+    if (!key_is_smi) {
+      // Check that the key is a smi.
+      __ tst(key, Operand(kSmiTagMask));
+      deferred->Branch(ne);
+    }
 
     // Check that the receiver is a heap object.
     __ tst(receiver, Operand(kSmiTagMask));
@@ -6235,24 +6428,35 @@
     __ cmp(scratch1, key);
     deferred->Branch(ls);  // Unsigned less equal.
 
+    // Get the elements array from the receiver.
+    __ ldr(scratch1, FieldMemOperand(receiver, JSObject::kElementsOffset));
+    if (!value_is_harmless && wb_info != LIKELY_SMI) {
+      Label ok;
+      __ and_(scratch2, scratch1, Operand(ExternalReference::new_space_mask()));
+      __ cmp(scratch2, Operand(ExternalReference::new_space_start()));
+      __ tst(value, Operand(kSmiTagMask), ne);
+      deferred->Branch(ne);
+#ifdef DEBUG
+      we_remembered_the_write_barrier = true;
+#endif
+    }
+    // Check that the elements array is not a dictionary.
+    __ ldr(scratch2, FieldMemOperand(scratch1, JSObject::kMapOffset));
     // The following instructions are the part of the inlined store keyed
     // property code which can be patched. Therefore the exact number of
     // instructions generated need to be fixed, so the constant pool is blocked
     // while generating this code.
     { Assembler::BlockConstPoolScope block_const_pool(masm_);
-      // Get the elements array from the receiver and check that it
-      // is not a dictionary.
-      __ ldr(scratch1, FieldMemOperand(receiver, JSObject::kElementsOffset));
-      __ ldr(scratch2, FieldMemOperand(scratch1, JSObject::kMapOffset));
+#ifdef DEBUG
+      Label check_inlined_codesize;
+      masm_->bind(&check_inlined_codesize);
+#endif
+
       // Read the fixed array map from the constant pool (not from the root
       // array) so that the value can be patched.  When debugging, we patch this
       // comparison to always fail so that we will hit the IC call in the
       // deferred code which will allow the debugger to break for fast case
       // stores.
-#ifdef DEBUG
-    Label check_inlined_codesize;
-    masm_->bind(&check_inlined_codesize);
-#endif
       __ mov(scratch3, Operand(Factory::fixed_array_map()));
       __ cmp(scratch2, scratch3);
       deferred->Branch(ne);
@@ -6269,6 +6473,8 @@
                 masm_->InstructionsGeneratedSince(&check_inlined_codesize));
     }
 
+    ASSERT(we_remembered_the_write_barrier);
+
     deferred->BindExit();
   } else {
     frame()->CallKeyedStoreIC();
@@ -6284,7 +6490,6 @@
 #undef __
 #define __ ACCESS_MASM(masm)
 
-
 Handle<String> Reference::GetName() {
   ASSERT(type_ == NAMED);
   Property* property = expression_->AsProperty();
@@ -6367,7 +6572,7 @@
 }
 
 
-void Reference::SetValue(InitState init_state) {
+void Reference::SetValue(InitState init_state, WriteBarrierCharacter wb_info) {
   ASSERT(!is_illegal());
   ASSERT(!cgen_->has_cc());
   MacroAssembler* masm = cgen_->masm();
@@ -6399,7 +6604,7 @@
       Property* property = expression_->AsProperty();
       ASSERT(property != NULL);
       cgen_->CodeForSourcePosition(property->position());
-      cgen_->EmitKeyedStore(property->key()->type());
+      cgen_->EmitKeyedStore(property->key()->type(), wb_info);
       frame->EmitPush(r0);
       set_unloaded();
       break;
@@ -6648,7 +6853,7 @@
   __ bind(&not_special);
   // Count leading zeros.  Uses mantissa for a scratch register on pre-ARM5.
   // Gets the wrong answer for 0, but we already checked for that case above.
-  __ CountLeadingZeros(source_, mantissa, zeros_);
+  __ CountLeadingZeros(zeros_, source_, mantissa);
   // Compute exponent and or it into the exponent register.
   // We use mantissa as a scratch register here.  Use a fudge factor to
   // divide the constant 31 + HeapNumber::kExponentBias, 0x41d, into two parts
@@ -7073,22 +7278,42 @@
 
 
 // Fast negative check for symbol-to-symbol equality.
-static void EmitCheckForSymbols(MacroAssembler* masm, Label* slow) {
+static void EmitCheckForSymbolsOrObjects(MacroAssembler* masm,
+                                         Label* possible_strings,
+                                         Label* not_both_strings) {
   // r2 is object type of r0.
   // Ensure that no non-strings have the symbol bit set.
-  ASSERT(kNotStringTag + kIsSymbolMask > LAST_TYPE);
+  Label object_test;
   ASSERT(kSymbolTag != 0);
+  __ tst(r2, Operand(kIsNotStringMask));
+  __ b(ne, &object_test);
   __ tst(r2, Operand(kIsSymbolMask));
-  __ b(eq, slow);
-  __ ldr(r3, FieldMemOperand(r1, HeapObject::kMapOffset));
-  __ ldrb(r3, FieldMemOperand(r3, Map::kInstanceTypeOffset));
+  __ b(eq, possible_strings);
+  __ CompareObjectType(r1, r3, r3, FIRST_NONSTRING_TYPE);
+  __ b(ge, not_both_strings);
   __ tst(r3, Operand(kIsSymbolMask));
-  __ b(eq, slow);
+  __ b(eq, possible_strings);
 
   // Both are symbols.  We already checked they weren't the same pointer
   // so they are not equal.
   __ mov(r0, Operand(1));   // Non-zero indicates not equal.
   __ mov(pc, Operand(lr));  // Return.
+
+  __ bind(&object_test);
+  __ cmp(r2, Operand(FIRST_JS_OBJECT_TYPE));
+  __ b(lt, not_both_strings);
+  __ CompareObjectType(r1, r2, r3, FIRST_JS_OBJECT_TYPE);
+  __ b(lt, not_both_strings);
+  // If both objects are undetectable, they are equal.  Otherwise, they
+  // are not equal, since they are different objects and an object is not
+  // equal to undefined.
+  __ ldr(r3, FieldMemOperand(r0, HeapObject::kMapOffset));
+  __ ldrb(r2, FieldMemOperand(r2, Map::kBitFieldOffset));
+  __ ldrb(r3, FieldMemOperand(r3, Map::kBitFieldOffset));
+  __ and_(r0, r2, Operand(r3));
+  __ and_(r0, r0, Operand(1 << Map::kIsUndetectable));
+  __ eor(r0, r0, Operand(1 << Map::kIsUndetectable));
+  __ mov(pc, Operand(lr));  // Return.
 }
 
 
@@ -7204,7 +7429,8 @@
 
 
 void RecordWriteStub::Generate(MacroAssembler* masm) {
-  __ RecordWriteHelper(object_, Operand(offset_), offset_, scratch_);
+  __ add(offset_, object_, Operand(offset_));
+  __ RecordWriteHelper(object_, offset_, scratch_);
   __ Ret();
 }
 
@@ -7301,9 +7527,10 @@
   // In the strict case the EmitStrictTwoHeapObjectCompare already took care of
   // symbols.
   if (cc_ == eq && !strict_) {
-    // Either jumps to slow or returns the answer.  Assumes that r2 is the type
-    // of r0 on entry.
-    EmitCheckForSymbols(masm, &flat_string_check);
+    // Returns an answer for two symbols or two detectable objects.
+    // Otherwise jumps to string case or not both strings case.
+    // Assumes that r2 is the type of r0 on entry.
+    EmitCheckForSymbolsOrObjects(masm, &flat_string_check, &slow);
   }
 
   // Check for both being sequential ASCII strings, and inline if that is the
@@ -7377,7 +7604,7 @@
 
     // If we have floating point hardware, inline ADD, SUB, MUL, and DIV,
     // using registers d7 and d6 for the double values.
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
       CpuFeatures::Scope scope(VFP3);
       __ mov(r7, Operand(rhs, ASR, kSmiTagSize));
       __ vmov(s15, r7);
@@ -7385,8 +7612,12 @@
       __ mov(r7, Operand(lhs, ASR, kSmiTagSize));
       __ vmov(s13, r7);
       __ vcvt_f64_s32(d6, s13);
+      if (!use_fp_registers) {
+        __ vmov(r2, r3, d7);
+        __ vmov(r0, r1, d6);
+      }
     } else {
-      // Write Smi from rhs to r3 and r2 in double format.  r3 is scratch.
+      // Write Smi from rhs to r3 and r2 in double format.  r9 is scratch.
       __ mov(r7, Operand(rhs));
       ConvertToDoubleStub stub1(r3, r2, r7, r9);
       __ push(lr);
@@ -7461,12 +7692,15 @@
     __ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow);
     }
 
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
       CpuFeatures::Scope scope(VFP3);
       // Convert smi in r0 to double in d7.
       __ mov(r7, Operand(r0, ASR, kSmiTagSize));
       __ vmov(s15, r7);
       __ vcvt_f64_s32(d7, s15);
+      if (!use_fp_registers) {
+        __ vmov(r2, r3, d7);
+      }
     } else {
       // Write Smi from r0 to r3 and r2 in double format.
       __ mov(r7, Operand(r0));
@@ -7517,12 +7751,15 @@
     __ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow);
     }
 
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
       CpuFeatures::Scope scope(VFP3);
       // Convert smi in r1 to double in d6.
       __ mov(r7, Operand(r1, ASR, kSmiTagSize));
       __ vmov(s13, r7);
       __ vcvt_f64_s32(d6, s13);
+      if (!use_fp_registers) {
+        __ vmov(r0, r1, d6);
+      }
     } else {
       // Write Smi from r1 to r1 and r0 in double format.
       __ mov(r7, Operand(r1));
@@ -7806,7 +8043,11 @@
       // The code below for writing into heap numbers isn't capable of writing
       // the register as an unsigned int so we go to slow case if we hit this
       // case.
-      __ b(mi, &slow);
+      if (CpuFeatures::IsSupported(VFP3)) {
+        __ b(mi, &result_not_a_smi);
+      } else {
+        __ b(mi, &slow);
+      }
       break;
     case Token::SHL:
       // Use only the 5 least significant bits of the shift count.
@@ -7850,10 +8091,24 @@
   // result.
   __ mov(r0, Operand(r5));
 
-  // Tail call that writes the int32 in r2 to the heap number in r0, using
-  // r3 as scratch.  r0 is preserved and returned.
-  WriteInt32ToHeapNumberStub stub(r2, r0, r3);
-  __ Jump(stub.GetCode(), RelocInfo::CODE_TARGET);
+  if (CpuFeatures::IsSupported(VFP3)) {
+    // Convert the int32 in r2 to the heap number in r0. r3 is corrupted.
+    CpuFeatures::Scope scope(VFP3);
+    __ vmov(s0, r2);
+    if (op_ == Token::SHR) {
+      __ vcvt_f64_u32(d0, s0);
+    } else {
+      __ vcvt_f64_s32(d0, s0);
+    }
+    __ sub(r3, r0, Operand(kHeapObjectTag));
+    __ vstr(d0, r3, HeapNumber::kValueOffset);
+    __ Ret();
+  } else {
+    // Tail call that writes the int32 in r2 to the heap number in r0, using
+    // r3 as scratch.  r0 is preserved and returned.
+    WriteInt32ToHeapNumberStub stub(r2, r0, r3);
+    __ TailCallStub(&stub);
+  }
 
   if (mode_ != NO_OVERWRITE) {
     __ bind(&have_to_allocate);
@@ -7969,6 +8224,173 @@
 }
 
 
+// This uses versions of the sum-of-digits-to-see-if-a-number-is-divisible-by-3
+// trick.  See http://en.wikipedia.org/wiki/Divisibility_rule
+// Takes the sum of the digits base (mask + 1) repeatedly until we have a
+// number from 0 to mask.  On exit the 'eq' condition flags are set if the
+// answer is exactly the mask.
+void IntegerModStub::DigitSum(MacroAssembler* masm,
+                              Register lhs,
+                              int mask,
+                              int shift,
+                              Label* entry) {
+  ASSERT(mask > 0);
+  ASSERT(mask <= 0xff);  // This ensures we don't need ip to use it.
+  Label loop;
+  __ bind(&loop);
+  __ and_(ip, lhs, Operand(mask));
+  __ add(lhs, ip, Operand(lhs, LSR, shift));
+  __ bind(entry);
+  __ cmp(lhs, Operand(mask));
+  __ b(gt, &loop);
+}
+
+
+void IntegerModStub::DigitSum(MacroAssembler* masm,
+                              Register lhs,
+                              Register scratch,
+                              int mask,
+                              int shift1,
+                              int shift2,
+                              Label* entry) {
+  ASSERT(mask > 0);
+  ASSERT(mask <= 0xff);  // This ensures we don't need ip to use it.
+  Label loop;
+  __ bind(&loop);
+  __ bic(scratch, lhs, Operand(mask));
+  __ and_(ip, lhs, Operand(mask));
+  __ add(lhs, ip, Operand(lhs, LSR, shift1));
+  __ add(lhs, lhs, Operand(scratch, LSR, shift2));
+  __ bind(entry);
+  __ cmp(lhs, Operand(mask));
+  __ b(gt, &loop);
+}
+
+
+// Splits the number into two halves (bottom half has shift bits).  The top
+// half is subtracted from the bottom half.  If the result is negative then
+// rhs is added.
+void IntegerModStub::ModGetInRangeBySubtraction(MacroAssembler* masm,
+                                                Register lhs,
+                                                int shift,
+                                                int rhs) {
+  int mask = (1 << shift) - 1;
+  __ and_(ip, lhs, Operand(mask));
+  __ sub(lhs, ip, Operand(lhs, LSR, shift), SetCC);
+  __ add(lhs, lhs, Operand(rhs), LeaveCC, mi);
+}
+
+
+void IntegerModStub::ModReduce(MacroAssembler* masm,
+                               Register lhs,
+                               int max,
+                               int denominator) {
+  int limit = denominator;
+  while (limit * 2 <= max) limit *= 2;
+  while (limit >= denominator) {
+    __ cmp(lhs, Operand(limit));
+    __ sub(lhs, lhs, Operand(limit), LeaveCC, ge);
+    limit >>= 1;
+  }
+}
+
+
+void IntegerModStub::ModAnswer(MacroAssembler* masm,
+                               Register result,
+                               Register shift_distance,
+                               Register mask_bits,
+                               Register sum_of_digits) {
+  __ add(result, mask_bits, Operand(sum_of_digits, LSL, shift_distance));
+  __ Ret();
+}
+
+
+// See comment for class.
+void IntegerModStub::Generate(MacroAssembler* masm) {
+  __ mov(lhs_, Operand(lhs_, LSR, shift_distance_));
+  __ bic(odd_number_, odd_number_, Operand(1));
+  __ mov(odd_number_, Operand(odd_number_, LSL, 1));
+  // We now have (odd_number_ - 1) * 2 in the register.
+  // Build a switch out of branches instead of data because it avoids
+  // having to teach the assembler about intra-code-object pointers
+  // that are not in relative branch instructions.
+  Label mod3, mod5, mod7, mod9, mod11, mod13, mod15, mod17, mod19;
+  Label mod21, mod23, mod25;
+  { Assembler::BlockConstPoolScope block_const_pool(masm);
+    __ add(pc, pc, Operand(odd_number_));
+    // When you read pc it is always 8 ahead, but when you write it you always
+    // write the actual value.  So we put in two nops to take up the slack.
+    __ nop();
+    __ nop();
+    __ b(&mod3);
+    __ b(&mod5);
+    __ b(&mod7);
+    __ b(&mod9);
+    __ b(&mod11);
+    __ b(&mod13);
+    __ b(&mod15);
+    __ b(&mod17);
+    __ b(&mod19);
+    __ b(&mod21);
+    __ b(&mod23);
+    __ b(&mod25);
+  }
+
+  // For each denominator we find a multiple that is almost only ones
+  // when expressed in binary.  Then we do the sum-of-digits trick for
+  // that number.  If the multiple is not 1 then we have to do a little
+  // more work afterwards to get the answer into the 0-denominator-1
+  // range.
+  DigitSum(masm, lhs_, 3, 2, &mod3);  // 3 = b11.
+  __ sub(lhs_, lhs_, Operand(3), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xf, 4, &mod5);  // 5 * 3 = b1111.
+  ModGetInRangeBySubtraction(masm, lhs_, 2, 5);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 7, 3, &mod7);  // 7 = b111.
+  __ sub(lhs_, lhs_, Operand(7), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0x3f, 6, &mod9);  // 7 * 9 = b111111.
+  ModGetInRangeBySubtraction(masm, lhs_, 3, 9);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0x3f, 6, 3, &mod11);  // 5 * 11 = b110111.
+  ModReduce(masm, lhs_, 0x3f, 11);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 5, &mod13);  // 19 * 13 = b11110111.
+  ModReduce(masm, lhs_, 0xff, 13);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xf, 4, &mod15);  // 15 = b1111.
+  __ sub(lhs_, lhs_, Operand(15), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xff, 8, &mod17);  // 15 * 17 = b11111111.
+  ModGetInRangeBySubtraction(masm, lhs_, 4, 17);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 5, &mod19);  // 13 * 19 = b11110111.
+  ModReduce(masm, lhs_, 0xff, 19);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0x3f, 6, &mod21);  // 3 * 21 = b111111.
+  ModReduce(masm, lhs_, 0x3f, 21);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 7, &mod23);  // 11 * 23 = b11111101.
+  ModReduce(masm, lhs_, 0xff, 23);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0x7f, 7, 6, &mod25);  // 5 * 25 = b1111101.
+  ModReduce(masm, lhs_, 0x7f, 25);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+}
+
+
 const char* GenericBinaryOpStub::GetName() {
   if (name_ != NULL) return name_;
   const int len = 100;
@@ -8096,7 +8518,7 @@
     case Token::MOD: {
       Label not_smi;
       if (ShouldGenerateSmiCode() && specialized_on_rhs_) {
-        Label smi_is_unsuitable;
+        Label lhs_is_unsuitable;
         __ BranchOnNotSmi(lhs, &not_smi);
         if (IsPowerOf2(constant_rhs_)) {
           if (op_ == Token::MOD) {
@@ -8117,14 +8539,14 @@
             __ eor(rhs, rhs, Operand(0x80000000u), SetCC);
             // Next two instructions are conditional on the answer being -0.
             __ mov(rhs, Operand(Smi::FromInt(constant_rhs_)), LeaveCC, eq);
-            __ b(eq, &smi_is_unsuitable);
+            __ b(eq, &lhs_is_unsuitable);
             // We need to subtract the dividend.  Eg. -3 % 4 == -3.
             __ sub(result, rhs, Operand(Smi::FromInt(constant_rhs_)));
           } else {
             ASSERT(op_ == Token::DIV);
             __ tst(lhs,
                    Operand(0x80000000u | ((constant_rhs_ << kSmiTagSize) - 1)));
-            __ b(ne, &smi_is_unsuitable);  // Go slow on negative or remainder.
+            __ b(ne, &lhs_is_unsuitable);  // Go slow on negative or remainder.
             int shift = 0;
             int d = constant_rhs_;
             while ((d & 1) == 0) {
@@ -8137,7 +8559,7 @@
         } else {
           // Not a power of 2.
           __ tst(lhs, Operand(0x80000000u));
-          __ b(ne, &smi_is_unsuitable);
+          __ b(ne, &lhs_is_unsuitable);
           // Find a fixed point reciprocal of the divisor so we can divide by
           // multiplying.
           double divisor = 1.0 / constant_rhs_;
@@ -8172,7 +8594,7 @@
           // (lhs / rhs) where / indicates integer division.
           if (op_ == Token::DIV) {
             __ cmp(lhs, Operand(scratch, LSL, required_scratch_shift));
-            __ b(ne, &smi_is_unsuitable);  // There was a remainder.
+            __ b(ne, &lhs_is_unsuitable);  // There was a remainder.
             __ mov(result, Operand(scratch2, LSL, kSmiTagSize));
           } else {
             ASSERT(op_ == Token::MOD);
@@ -8180,14 +8602,21 @@
           }
         }
         __ Ret();
-        __ bind(&smi_is_unsuitable);
+        __ bind(&lhs_is_unsuitable);
       } else if (op_ == Token::MOD &&
                  runtime_operands_type_ != BinaryOpIC::HEAP_NUMBERS &&
                  runtime_operands_type_ != BinaryOpIC::STRINGS) {
         // Do generate a bit of smi code for modulus even though the default for
         // modulus is not to do it, but as the ARM processor has no coprocessor
-        // support for modulus checking for smis makes sense.
+        // support for modulus checking for smis makes sense.  We can handle
+        // 1 to 25 times any power of 2.  This covers over half the numbers from
+        // 1 to 100 including all of the first 25.  (Actually the constants < 10
+        // are handled above by reciprocal multiplication.  We only get here for
+        // those cases if the right hand side is not a constant or for cases
+        // like 192 which is 3*2^6 and ends up in the 3 case in the integer mod
+        // stub.)
         Label slow;
+        Label not_power_of_2;
         ASSERT(!ShouldGenerateSmiCode());
         ASSERT(kSmiTag == 0);  // Adjust code below.
         // Check for two positive smis.
@@ -8195,13 +8624,42 @@
         __ tst(smi_test_reg, Operand(0x80000000u | kSmiTagMask));
         __ b(ne, &slow);
         // Check that rhs is a power of two and not zero.
+        Register mask_bits = r3;
         __ sub(scratch, rhs, Operand(1), SetCC);
         __ b(mi, &slow);
-        __ tst(rhs, scratch);
-        __ b(ne, &slow);
+        __ and_(mask_bits, rhs, Operand(scratch), SetCC);
+        __ b(ne, &not_power_of_2);
         // Calculate power of two modulus.
         __ and_(result, lhs, Operand(scratch));
         __ Ret();
+
+        __ bind(&not_power_of_2);
+        __ eor(scratch, scratch, Operand(mask_bits));
+        // At least two bits are set in the modulus.  The high one(s) are in
+        // mask_bits and the low one is scratch + 1.
+        __ and_(mask_bits, scratch, Operand(lhs));
+        Register shift_distance = scratch;
+        scratch = no_reg;
+
+        // The rhs consists of a power of 2 multiplied by some odd number.
+        // The power-of-2 part we handle by putting the corresponding bits
+        // from the lhs in the mask_bits register, and the power in the
+        // shift_distance register.  Shift distance is never 0 due to Smi
+        // tagging.
+        __ CountLeadingZeros(r4, shift_distance, shift_distance);
+        __ rsb(shift_distance, r4, Operand(32));
+
+        // Now we need to find out what the odd number is. The last bit is
+        // always 1.
+        Register odd_number = r4;
+        __ mov(odd_number, Operand(rhs, LSR, shift_distance));
+        __ cmp(odd_number, Operand(25));
+        __ b(gt, &slow);
+
+        IntegerModStub stub(
+            result, shift_distance, odd_number, mask_bits, lhs, r5);
+        __ Jump(stub.GetCode(), RelocInfo::CODE_TARGET);  // Tail call.
+
         __ bind(&slow);
       }
       HandleBinaryOpSlowCases(
@@ -8499,12 +8957,21 @@
       __ mov(r0, Operand(r2));
     }
 
-    // WriteInt32ToHeapNumberStub does not trigger GC, so we do not
-    // have to set up a frame.
-    WriteInt32ToHeapNumberStub stub(r1, r0, r2);
-    __ push(lr);
-    __ Call(stub.GetCode(), RelocInfo::CODE_TARGET);
-    __ pop(lr);
+    if (CpuFeatures::IsSupported(VFP3)) {
+      // Convert the int32 in r1 to the heap number in r0. r2 is corrupted.
+      CpuFeatures::Scope scope(VFP3);
+      __ vmov(s0, r1);
+      __ vcvt_f64_s32(d0, s0);
+      __ sub(r2, r0, Operand(kHeapObjectTag));
+      __ vstr(d0, r2, HeapNumber::kValueOffset);
+    } else {
+      // WriteInt32ToHeapNumberStub does not trigger GC, so we do not
+      // have to set up a frame.
+      WriteInt32ToHeapNumberStub stub(r1, r0, r2);
+      __ push(lr);
+      __ Call(stub.GetCode(), RelocInfo::CODE_TARGET);
+      __ pop(lr);
+    }
   } else {
     UNIMPLEMENTED();
   }
diff --git a/src/arm/codegen-arm.h b/src/arm/codegen-arm.h
index be4d556..855723d 100644
--- a/src/arm/codegen-arm.h
+++ b/src/arm/codegen-arm.h
@@ -44,6 +44,7 @@
 enum InitState { CONST_INIT, NOT_CONST_INIT };
 enum TypeofState { INSIDE_TYPEOF, NOT_INSIDE_TYPEOF };
 enum GenerateInlineSmi { DONT_GENERATE_INLINE_SMI, GENERATE_INLINE_SMI };
+enum WriteBarrierCharacter { UNLIKELY_SMI, LIKELY_SMI, NEVER_NEWSPACE };
 
 
 // -------------------------------------------------------------------------
@@ -100,7 +101,7 @@
   // on the expression stack.  The  value is stored in the location specified
   // by the reference, and is left on top of the stack, after the reference
   // is popped from beneath it (unloaded).
-  void SetValue(InitState init_state);
+  void SetValue(InitState init_state, WriteBarrierCharacter wb);
 
   // This is in preparation for something that uses the reference on the stack.
   // If we need this reference afterwards get then dup it now.  Otherwise mark
@@ -276,7 +277,9 @@
   static int InlineRuntimeCallArgumentsCount(Handle<String> name);
 
   // Constants related to patching of inlined load/store.
-  static const int kInlinedKeyedLoadInstructionsAfterPatch = 17;
+  static int GetInlinedKeyedLoadInstructionsAfterPatch() {
+    return FLAG_debug_code ? 27 : 13;
+  }
   static const int kInlinedKeyedStoreInstructionsAfterPatch = 5;
 
  private:
@@ -382,7 +385,7 @@
 
   // Store a keyed property. Key and receiver are on the stack and the value is
   // in r0. Result is returned in r0.
-  void EmitKeyedStore(StaticType* key_type);
+  void EmitKeyedStore(StaticType* key_type, WriteBarrierCharacter wb_info);
 
   void LoadFromGlobalSlotCheckExtensions(Slot* slot,
                                          TypeofState typeof_state,
@@ -879,6 +882,102 @@
 };
 
 
+// This stub can do a fast mod operation without using fp.
+// It is tail called from the GenericBinaryOpStub and it always
+// returns an answer.  It never causes GC so it doesn't need a real frame.
+//
+// The inputs are always positive Smis.  This is never called
+// where the denominator is a power of 2.  We handle that separately.
+//
+// If we consider the denominator as an odd number multiplied by a power of 2,
+// then:
+// * The exponent (power of 2) is in the shift_distance register.
+// * The odd number is in the odd_number register.  It is always in the range
+//   of 3 to 25.
+// * The bits from the numerator that are to be copied to the answer (there are
+//   shift_distance of them) are in the mask_bits register.
+// * The other bits of the numerator have been shifted down and are in the lhs
+//   register.
+class IntegerModStub : public CodeStub {
+ public:
+  IntegerModStub(Register result,
+                 Register shift_distance,
+                 Register odd_number,
+                 Register mask_bits,
+                 Register lhs,
+                 Register scratch)
+      : result_(result),
+        shift_distance_(shift_distance),
+        odd_number_(odd_number),
+        mask_bits_(mask_bits),
+        lhs_(lhs),
+        scratch_(scratch) {
+    // We don't code these in the minor key, so they should always be the same.
+    // We don't really want to fix that since this stub is rather large and we
+    // don't want many copies of it.
+    ASSERT(shift_distance_.is(r9));
+    ASSERT(odd_number_.is(r4));
+    ASSERT(mask_bits_.is(r3));
+    ASSERT(scratch_.is(r5));
+  }
+
+ private:
+  Register result_;
+  Register shift_distance_;
+  Register odd_number_;
+  Register mask_bits_;
+  Register lhs_;
+  Register scratch_;
+
+  // Minor key encoding in 16 bits.
+  class ResultRegisterBits: public BitField<int, 0, 4> {};
+  class LhsRegisterBits: public BitField<int, 4, 4> {};
+
+  Major MajorKey() { return IntegerMod; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return ResultRegisterBits::encode(result_.code())
+           | LhsRegisterBits::encode(lhs_.code());
+  }
+
+  void Generate(MacroAssembler* masm);
+
+  const char* GetName() { return "IntegerModStub"; }
+
+  // Utility functions.
+  void DigitSum(MacroAssembler* masm,
+                Register lhs,
+                int mask,
+                int shift,
+                Label* entry);
+  void DigitSum(MacroAssembler* masm,
+                Register lhs,
+                Register scratch,
+                int mask,
+                int shift1,
+                int shift2,
+                Label* entry);
+  void ModGetInRangeBySubtraction(MacroAssembler* masm,
+                                  Register lhs,
+                                  int shift,
+                                  int rhs);
+  void ModReduce(MacroAssembler* masm,
+                 Register lhs,
+                 int max,
+                 int denominator);
+  void ModAnswer(MacroAssembler* masm,
+                 Register result,
+                 Register shift_distance,
+                 Register mask_bits,
+                 Register sum_of_digits);
+
+
+#ifdef DEBUG
+  void Print() { PrintF("IntegerModStub\n"); }
+#endif
+};
+
+
 // This stub can convert a signed int32 to a heap number (double).  It does
 // not work for int32s that are in Smi range!  No GC occurs during this stub
 // so you don't have to set up the frame.
diff --git a/src/arm/constants-arm.cc b/src/arm/constants-arm.cc
index 4e186d1..002e4c1 100644
--- a/src/arm/constants-arm.cc
+++ b/src/arm/constants-arm.cc
@@ -85,7 +85,7 @@
 
 const char* VFPRegisters::Name(int reg, bool is_double) {
   ASSERT((0 <= reg) && (reg < kNumVFPRegisters));
-  return names_[reg + is_double ? kNumVFPSingleRegisters : 0];
+  return names_[reg + (is_double ? kNumVFPSingleRegisters : 0)];
 }
 
 
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc
index 4005369..a52417b 100644
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -1038,7 +1038,8 @@
 // Dd = vmul(Dn, Dm)
 // Dd = vdiv(Dn, Dm)
 // vcmp(Dd, Dm)
-// VMRS
+// vmrs
+// Dd = vsqrt(Dm)
 void Decoder::DecodeTypeVFP(Instr* instr) {
   ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
   ASSERT(instr->Bits(11, 9) == 0x5);
@@ -1046,7 +1047,14 @@
   if (instr->Bit(4) == 0) {
     if (instr->Opc1Field() == 0x7) {
       // Other data processing instructions
-      if ((instr->Opc2Field() == 0x7) && (instr->Opc3Field() == 0x3)) {
+      if ((instr->Opc2Field() == 0x0) && (instr->Opc3Field() == 0x1)) {
+        // vmov register to register.
+        if (instr->SzField() == 0x1) {
+          Format(instr, "vmov.f64'cond 'Dd, 'Dm");
+        } else {
+          Unknown(instr);  // Not used by V8.
+        }
+      } else if ((instr->Opc2Field() == 0x7) && (instr->Opc3Field() == 0x3)) {
         DecodeVCVTBetweenDoubleAndSingle(instr);
       } else if ((instr->Opc2Field() == 0x8) && (instr->Opc3Field() & 0x1)) {
         DecodeVCVTBetweenFloatingPointAndInteger(instr);
@@ -1056,6 +1064,8 @@
       } else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
                  (instr->Opc3Field() & 0x1)) {
         DecodeVCMP(instr);
+      } else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
+        Format(instr, "vsqrt.f64'cond 'Dd, 'Dm");
       } else {
         Unknown(instr);  // Not used by V8.
       }
diff --git a/src/arm/ic-arm.cc b/src/arm/ic-arm.cc
index c6de4d8..97e6148 100644
--- a/src/arm/ic-arm.cc
+++ b/src/arm/ic-arm.cc
@@ -47,71 +47,80 @@
 
 #define __ ACCESS_MASM(masm)
 
-// Helper function used from LoadIC/CallIC GenerateNormal.
-// receiver: Receiver. It is not clobbered if a jump to the miss label is
-//           done
-// name:     Property name. It is not clobbered if a jump to the miss label is
-//           done
-// result:   Register for the result. It is only updated if a jump to the miss
-//           label is not done. Can be the same as receiver or name clobbering
-//           one of these in the case of not jumping to the miss label.
-// The three scratch registers need to be different from the receiver, name and
-// result.
-static void GenerateDictionaryLoad(MacroAssembler* masm,
-                                   Label* miss,
-                                   Register receiver,
-                                   Register name,
-                                   Register result,
-                                   Register scratch1,
-                                   Register scratch2,
-                                   Register scratch3,
-                                   DictionaryCheck check_dictionary) {
-  // Main use of the scratch registers.
-  // scratch1: Used to hold the property dictionary.
-  // scratch2: Used as temporary and to hold the capacity of the property
-  //           dictionary.
-  // scratch3: Used as temporary.
 
-  Label done;
+static void GenerateGlobalInstanceTypeCheck(MacroAssembler* masm,
+                                            Register type,
+                                            Label* global_object) {
+  // Register usage:
+  //   type: holds the receiver instance type on entry.
+  __ cmp(type, Operand(JS_GLOBAL_OBJECT_TYPE));
+  __ b(eq, global_object);
+  __ cmp(type, Operand(JS_BUILTINS_OBJECT_TYPE));
+  __ b(eq, global_object);
+  __ cmp(type, Operand(JS_GLOBAL_PROXY_TYPE));
+  __ b(eq, global_object);
+}
 
-  // Check for the absence of an interceptor.
-  // Load the map into scratch1.
-  __ ldr(scratch1, FieldMemOperand(receiver, JSObject::kMapOffset));
 
-  // Bail out if the receiver has a named interceptor.
-  __ ldrb(scratch2, FieldMemOperand(scratch1, Map::kBitFieldOffset));
-  __ tst(scratch2, Operand(1 << Map::kHasNamedInterceptor));
+// Generated code falls through if the receiver is a regular non-global
+// JS object with slow properties and no interceptors.
+static void GenerateStringDictionaryReceiverCheck(MacroAssembler* masm,
+                                                  Register receiver,
+                                                  Register elements,
+                                                  Register t0,
+                                                  Register t1,
+                                                  Label* miss) {
+  // Register usage:
+  //   receiver: holds the receiver on entry and is unchanged.
+  //   elements: holds the property dictionary on fall through.
+  // Scratch registers:
+  //   t0: used to holds the receiver map.
+  //   t1: used to holds the receiver instance type, receiver bit mask and
+  //       elements map.
+
+  // Check that the receiver isn't a smi.
+  __ tst(receiver, Operand(kSmiTagMask));
+  __ b(eq, miss);
+
+  // Check that the receiver is a valid JS object.
+  __ CompareObjectType(receiver, t0, t1, FIRST_JS_OBJECT_TYPE);
+  __ b(lt, miss);
+
+  // If this assert fails, we have to check upper bound too.
+  ASSERT(LAST_TYPE == JS_FUNCTION_TYPE);
+
+  GenerateGlobalInstanceTypeCheck(masm, t1, miss);
+
+  // Check that the global object does not require access checks.
+  __ ldrb(t1, FieldMemOperand(t0, Map::kBitFieldOffset));
+  __ tst(t1, Operand((1 << Map::kIsAccessCheckNeeded) |
+                     (1 << Map::kHasNamedInterceptor)));
   __ b(nz, miss);
 
-  // Bail out if we have a JS global proxy object.
-  __ ldrb(scratch2, FieldMemOperand(scratch1, Map::kInstanceTypeOffset));
-  __ cmp(scratch2, Operand(JS_GLOBAL_PROXY_TYPE));
-  __ b(eq, miss);
+  __ ldr(elements, FieldMemOperand(receiver, JSObject::kPropertiesOffset));
+  __ ldr(t1, FieldMemOperand(elements, HeapObject::kMapOffset));
+  __ LoadRoot(ip, Heap::kHashTableMapRootIndex);
+  __ cmp(t1, ip);
+  __ b(nz, miss);
+}
 
-  // Possible work-around for http://crbug.com/16276.
-  // See also: http://codereview.chromium.org/155418.
-  __ cmp(scratch2, Operand(JS_GLOBAL_OBJECT_TYPE));
-  __ b(eq, miss);
-  __ cmp(scratch2, Operand(JS_BUILTINS_OBJECT_TYPE));
-  __ b(eq, miss);
 
-  // Load the properties array.
-  __ ldr(scratch1, FieldMemOperand(receiver, JSObject::kPropertiesOffset));
-
-  // Check that the properties array is a dictionary.
-  if (check_dictionary == CHECK_DICTIONARY) {
-    __ ldr(scratch2, FieldMemOperand(scratch1, HeapObject::kMapOffset));
-    __ LoadRoot(ip, Heap::kHashTableMapRootIndex);
-    __ cmp(scratch2, ip);
-    __ b(ne, miss);
-  }
-
+// Probe the string dictionary in the |elements| register. Jump to the
+// |done| label if a property with the given name is found. Jump to
+// the |miss| label otherwise.
+static void GenerateStringDictionaryProbes(MacroAssembler* masm,
+                                           Label* miss,
+                                           Label* done,
+                                           Register elements,
+                                           Register name,
+                                           Register scratch1,
+                                           Register scratch2) {
   // Compute the capacity mask.
   const int kCapacityOffset = StringDictionary::kHeaderSize +
       StringDictionary::kCapacityIndex * kPointerSize;
-  __ ldr(scratch2, FieldMemOperand(scratch1, kCapacityOffset));
-  __ mov(scratch2, Operand(scratch2, ASR, kSmiTagSize));  // convert smi to int
-  __ sub(scratch2, scratch2, Operand(1));
+  __ ldr(scratch1, FieldMemOperand(elements, kCapacityOffset));
+  __ mov(scratch1, Operand(scratch1, ASR, kSmiTagSize));  // convert smi to int
+  __ sub(scratch1, scratch1, Operand(1));
 
   const int kElementsStartOffset = StringDictionary::kHeaderSize +
       StringDictionary::kElementsStartIndex * kPointerSize;
@@ -122,44 +131,141 @@
   static const int kProbes = 4;
   for (int i = 0; i < kProbes; i++) {
     // Compute the masked index: (hash + i + i * i) & mask.
-    __ ldr(scratch3, FieldMemOperand(name, String::kHashFieldOffset));
+    __ ldr(scratch2, FieldMemOperand(name, String::kHashFieldOffset));
     if (i > 0) {
       // Add the probe offset (i + i * i) left shifted to avoid right shifting
       // the hash in a separate instruction. The value hash + i + i * i is right
       // shifted in the following and instruction.
       ASSERT(StringDictionary::GetProbeOffset(i) <
              1 << (32 - String::kHashFieldOffset));
-      __ add(scratch3, scratch3, Operand(
+      __ add(scratch2, scratch2, Operand(
           StringDictionary::GetProbeOffset(i) << String::kHashShift));
     }
-    __ and_(scratch3, scratch2, Operand(scratch3, LSR, String::kHashShift));
+    __ and_(scratch2, scratch1, Operand(scratch2, LSR, String::kHashShift));
 
     // Scale the index by multiplying by the element size.
     ASSERT(StringDictionary::kEntrySize == 3);
-    // scratch3 = scratch3 * 3.
-    __ add(scratch3, scratch3, Operand(scratch3, LSL, 1));
+    // scratch2 = scratch2 * 3.
+    __ add(scratch2, scratch2, Operand(scratch2, LSL, 1));
 
     // Check if the key is identical to the name.
-    __ add(scratch3, scratch1, Operand(scratch3, LSL, 2));
-    __ ldr(ip, FieldMemOperand(scratch3, kElementsStartOffset));
+    __ add(scratch2, elements, Operand(scratch2, LSL, 2));
+    __ ldr(ip, FieldMemOperand(scratch2, kElementsStartOffset));
     __ cmp(name, Operand(ip));
     if (i != kProbes - 1) {
-      __ b(eq, &done);
+      __ b(eq, done);
     } else {
       __ b(ne, miss);
     }
   }
+}
 
-  // Check that the value is a normal property.
-  __ bind(&done);  // scratch3 == scratch1 + 4 * index
-  __ ldr(scratch2,
-         FieldMemOperand(scratch3, kElementsStartOffset + 2 * kPointerSize));
-  __ tst(scratch2, Operand(PropertyDetails::TypeField::mask() << kSmiTagSize));
+
+// Helper function used from LoadIC/CallIC GenerateNormal.
+//
+// elements: Property dictionary. It is not clobbered if a jump to the miss
+//           label is done.
+// name:     Property name. It is not clobbered if a jump to the miss label is
+//           done
+// result:   Register for the result. It is only updated if a jump to the miss
+//           label is not done. Can be the same as elements or name clobbering
+//           one of these in the case of not jumping to the miss label.
+// The two scratch registers need to be different from elements, name and
+// result.
+// The generated code assumes that the receiver has slow properties,
+// is not a global object and does not have interceptors.
+static void GenerateDictionaryLoad(MacroAssembler* masm,
+                                   Label* miss,
+                                   Register elements,
+                                   Register name,
+                                   Register result,
+                                   Register scratch1,
+                                   Register scratch2) {
+  // Main use of the scratch registers.
+  // scratch1: Used as temporary and to hold the capacity of the property
+  //           dictionary.
+  // scratch2: Used as temporary.
+  Label done;
+
+  // Probe the dictionary.
+  GenerateStringDictionaryProbes(masm,
+                                 miss,
+                                 &done,
+                                 elements,
+                                 name,
+                                 scratch1,
+                                 scratch2);
+
+  // If probing finds an entry check that the value is a normal
+  // property.
+  __ bind(&done);  // scratch2 == elements + 4 * index
+  const int kElementsStartOffset = StringDictionary::kHeaderSize +
+      StringDictionary::kElementsStartIndex * kPointerSize;
+  const int kDetailsOffset = kElementsStartOffset + 2 * kPointerSize;
+  __ ldr(scratch1, FieldMemOperand(scratch2, kDetailsOffset));
+  __ tst(scratch1, Operand(PropertyDetails::TypeField::mask() << kSmiTagSize));
   __ b(ne, miss);
 
   // Get the value at the masked, scaled index and return.
   __ ldr(result,
-         FieldMemOperand(scratch3, kElementsStartOffset + 1 * kPointerSize));
+         FieldMemOperand(scratch2, kElementsStartOffset + 1 * kPointerSize));
+}
+
+
+// Helper function used from StoreIC::GenerateNormal.
+//
+// elements: Property dictionary. It is not clobbered if a jump to the miss
+//           label is done.
+// name:     Property name. It is not clobbered if a jump to the miss label is
+//           done
+// value:    The value to store.
+// The two scratch registers need to be different from elements, name and
+// result.
+// The generated code assumes that the receiver has slow properties,
+// is not a global object and does not have interceptors.
+static void GenerateDictionaryStore(MacroAssembler* masm,
+                                    Label* miss,
+                                    Register elements,
+                                    Register name,
+                                    Register value,
+                                    Register scratch1,
+                                    Register scratch2) {
+  // Main use of the scratch registers.
+  // scratch1: Used as temporary and to hold the capacity of the property
+  //           dictionary.
+  // scratch2: Used as temporary.
+  Label done;
+
+  // Probe the dictionary.
+  GenerateStringDictionaryProbes(masm,
+                                 miss,
+                                 &done,
+                                 elements,
+                                 name,
+                                 scratch1,
+                                 scratch2);
+
+  // If probing finds an entry in the dictionary check that the value
+  // is a normal property that is not read only.
+  __ bind(&done);  // scratch2 == elements + 4 * index
+  const int kElementsStartOffset = StringDictionary::kHeaderSize +
+      StringDictionary::kElementsStartIndex * kPointerSize;
+  const int kDetailsOffset = kElementsStartOffset + 2 * kPointerSize;
+  const int kTypeAndReadOnlyMask
+      = (PropertyDetails::TypeField::mask() |
+         PropertyDetails::AttributesField::encode(READ_ONLY)) << kSmiTagSize;
+  __ ldr(scratch1, FieldMemOperand(scratch2, kDetailsOffset));
+  __ tst(scratch1, Operand(kTypeAndReadOnlyMask));
+  __ b(ne, miss);
+
+  // Store the value at the masked, scaled index and return.
+  const int kValueOffset = kElementsStartOffset + kPointerSize;
+  __ add(scratch2, scratch2, Operand(kValueOffset - kHeapObjectTag));
+  __ str(value, MemOperand(scratch2));
+
+  // Update the write barrier. Make sure not to clobber the value.
+  __ mov(scratch1, value);
+  __ RecordWrite(elements, scratch2, scratch1);
 }
 
 
@@ -310,6 +416,7 @@
                                            Register receiver,
                                            Register scratch1,
                                            Register scratch2,
+                                           int interceptor_bit,
                                            Label* slow) {
   // Check that the object isn't a smi.
   __ BranchOnSmi(receiver, slow);
@@ -317,8 +424,9 @@
   __ ldr(scratch1, FieldMemOperand(receiver, HeapObject::kMapOffset));
   // Check bit field.
   __ ldrb(scratch2, FieldMemOperand(scratch1, Map::kBitFieldOffset));
-  __ tst(scratch2, Operand(KeyedLoadIC::kSlowCaseBitFieldMask));
-  __ b(ne, slow);
+  __ tst(scratch2,
+         Operand((1 << Map::kIsAccessCheckNeeded) | (1 << interceptor_bit)));
+  __ b(nz, slow);
   // Check that the object is some kind of JS object EXCEPT JS Value type.
   // In the case that the object is a value-wrapper object,
   // we enter the runtime system to make sure that indexing into string
@@ -502,13 +610,11 @@
 }
 
 
-static void GenerateNormalHelper(MacroAssembler* masm,
-                                 int argc,
-                                 bool is_global_object,
-                                 Label* miss,
-                                 Register scratch) {
-  // Search dictionary - put result in register r1.
-  GenerateDictionaryLoad(masm, miss, r1, r2, r1, r0, r3, r4, CHECK_DICTIONARY);
+static void GenerateFunctionTailCall(MacroAssembler* masm,
+                                     int argc,
+                                     Label* miss,
+                                     Register scratch) {
+  // r1: function
 
   // Check that the value isn't a smi.
   __ tst(r1, Operand(kSmiTagMask));
@@ -518,13 +624,6 @@
   __ CompareObjectType(r1, scratch, scratch, JS_FUNCTION_TYPE);
   __ b(ne, miss);
 
-  // Patch the receiver with the global proxy if necessary.
-  if (is_global_object) {
-    __ ldr(r0, MemOperand(sp, argc * kPointerSize));
-    __ ldr(r0, FieldMemOperand(r0, GlobalObject::kGlobalReceiverOffset));
-    __ str(r0, MemOperand(sp, argc * kPointerSize));
-  }
-
   // Invoke the function.
   ParameterCount actual(argc);
   __ InvokeFunction(r1, actual, JUMP_FUNCTION);
@@ -536,53 +635,18 @@
   //  -- r2    : name
   //  -- lr    : return address
   // -----------------------------------
-  Label miss, global_object, non_global_object;
+  Label miss;
 
   // Get the receiver of the function from the stack into r1.
   __ ldr(r1, MemOperand(sp, argc * kPointerSize));
 
-  // Check that the receiver isn't a smi.
-  __ tst(r1, Operand(kSmiTagMask));
-  __ b(eq, &miss);
+  GenerateStringDictionaryReceiverCheck(masm, r1, r0, r3, r4, &miss);
 
-  // Check that the receiver is a valid JS object.  Put the map in r3.
-  __ CompareObjectType(r1, r3, r0, FIRST_JS_OBJECT_TYPE);
-  __ b(lt, &miss);
+  // r0: elements
+  // Search the dictionary - put result in register r1.
+  GenerateDictionaryLoad(masm, &miss, r0, r2, r1, r3, r4);
 
-  // If this assert fails, we have to check upper bound too.
-  ASSERT(LAST_TYPE == JS_FUNCTION_TYPE);
-
-  // Check for access to global object.
-  __ cmp(r0, Operand(JS_GLOBAL_OBJECT_TYPE));
-  __ b(eq, &global_object);
-  __ cmp(r0, Operand(JS_BUILTINS_OBJECT_TYPE));
-  __ b(ne, &non_global_object);
-
-  // Accessing global object: Load and invoke.
-  __ bind(&global_object);
-  // Check that the global object does not require access checks.
-  __ ldrb(r3, FieldMemOperand(r3, Map::kBitFieldOffset));
-  __ tst(r3, Operand(1 << Map::kIsAccessCheckNeeded));
-  __ b(ne, &miss);
-  GenerateNormalHelper(masm, argc, true, &miss, r4);
-
-  // Accessing non-global object: Check for access to global proxy.
-  Label global_proxy, invoke;
-  __ bind(&non_global_object);
-  __ cmp(r0, Operand(JS_GLOBAL_PROXY_TYPE));
-  __ b(eq, &global_proxy);
-  // Check that the non-global, non-global-proxy object does not
-  // require access checks.
-  __ ldrb(r3, FieldMemOperand(r3, Map::kBitFieldOffset));
-  __ tst(r3, Operand(1 << Map::kIsAccessCheckNeeded));
-  __ b(ne, &miss);
-  __ bind(&invoke);
-  GenerateNormalHelper(masm, argc, false, &miss, r4);
-
-  // Global object access: Check access rights.
-  __ bind(&global_proxy);
-  __ CheckAccessGlobalProxy(r1, r0, &miss);
-  __ b(&invoke);
+  GenerateFunctionTailCall(masm, argc, &miss, r4);
 
   __ bind(&miss);
 }
@@ -594,6 +658,12 @@
   //  -- lr    : return address
   // -----------------------------------
 
+  if (id == IC::kCallIC_Miss) {
+    __ IncrementCounter(&Counters::call_miss, 1, r3, r4);
+  } else {
+    __ IncrementCounter(&Counters::keyed_call_miss, 1, r3, r4);
+  }
+
   // Get the receiver of the function from the stack.
   __ ldr(r3, MemOperand(sp, argc * kPointerSize));
 
@@ -614,23 +684,26 @@
   __ LeaveInternalFrame();
 
   // Check if the receiver is a global object of some sort.
-  Label invoke, global;
-  __ ldr(r2, MemOperand(sp, argc * kPointerSize));  // receiver
-  __ tst(r2, Operand(kSmiTagMask));
-  __ b(eq, &invoke);
-  __ CompareObjectType(r2, r3, r3, JS_GLOBAL_OBJECT_TYPE);
-  __ b(eq, &global);
-  __ cmp(r3, Operand(JS_BUILTINS_OBJECT_TYPE));
-  __ b(ne, &invoke);
+  // This can happen only for regular CallIC but not KeyedCallIC.
+  if (id == IC::kCallIC_Miss) {
+    Label invoke, global;
+    __ ldr(r2, MemOperand(sp, argc * kPointerSize));  // receiver
+    __ tst(r2, Operand(kSmiTagMask));
+    __ b(eq, &invoke);
+    __ CompareObjectType(r2, r3, r3, JS_GLOBAL_OBJECT_TYPE);
+    __ b(eq, &global);
+    __ cmp(r3, Operand(JS_BUILTINS_OBJECT_TYPE));
+    __ b(ne, &invoke);
 
-  // Patch the receiver on the stack.
-  __ bind(&global);
-  __ ldr(r2, FieldMemOperand(r2, GlobalObject::kGlobalReceiverOffset));
-  __ str(r2, MemOperand(sp, argc * kPointerSize));
+    // Patch the receiver on the stack.
+    __ bind(&global);
+    __ ldr(r2, FieldMemOperand(r2, GlobalObject::kGlobalReceiverOffset));
+    __ str(r2, MemOperand(sp, argc * kPointerSize));
+    __ bind(&invoke);
+  }
 
   // Invoke the function.
   ParameterCount actual(argc);
-  __ bind(&invoke);
   __ InvokeFunction(r1, actual, JUMP_FUNCTION);
 }
 
@@ -698,7 +771,8 @@
   // Now the key is known to be a smi. This place is also jumped to from below
   // where a numeric string is converted to a smi.
 
-  GenerateKeyedLoadReceiverCheck(masm, r1, r0, r3, &slow_call);
+  GenerateKeyedLoadReceiverCheck(
+      masm, r1, r0, r3, Map::kHasIndexedInterceptor, &slow_call);
 
   GenerateFastArrayLoad(
       masm, r1, r2, r4, r3, r0, r1, &check_number_dictionary, &slow_load);
@@ -708,14 +782,7 @@
   // receiver in r1 is not used after this point.
   // r2: key
   // r1: function
-
-  // Check that the value in r1 is a JSFunction.
-  __ BranchOnSmi(r1, &slow_call);
-  __ CompareObjectType(r1, r0, r0, JS_FUNCTION_TYPE);
-  __ b(ne, &slow_call);
-  // Invoke the function.
-  ParameterCount actual(argc);
-  __ InvokeFunction(r1, actual, JUMP_FUNCTION);
+  GenerateFunctionTailCall(masm, argc, &slow_call, r0);
 
   __ bind(&check_number_dictionary);
   // r2: key
@@ -751,16 +818,16 @@
   // If the receiver is a regular JS object with slow properties then do
   // a quick inline probe of the receiver's dictionary.
   // Otherwise do the monomorphic cache probe.
-  GenerateKeyedLoadReceiverCheck(masm, r1, r0, r3, &lookup_monomorphic_cache);
+  GenerateKeyedLoadReceiverCheck(
+      masm, r1, r0, r3, Map::kHasNamedInterceptor, &lookup_monomorphic_cache);
 
-  __ ldr(r3, FieldMemOperand(r1, JSObject::kPropertiesOffset));
-  __ ldr(r3, FieldMemOperand(r3, HeapObject::kMapOffset));
+  __ ldr(r0, FieldMemOperand(r1, JSObject::kPropertiesOffset));
+  __ ldr(r3, FieldMemOperand(r0, HeapObject::kMapOffset));
   __ LoadRoot(ip, Heap::kHashTableMapRootIndex);
   __ cmp(r3, ip);
   __ b(ne, &lookup_monomorphic_cache);
 
-  GenerateDictionaryLoad(
-      masm, &slow_load, r1, r2, r1, r0, r3, r4, DICTIONARY_CHECK_DONE);
+  GenerateDictionaryLoad(masm, &slow_load, r0, r2, r1, r3, r4);
   __ IncrementCounter(&Counters::keyed_call_generic_lookup_dict, 1, r0, r3);
   __ jmp(&do_call);
 
@@ -826,36 +893,14 @@
   //  -- r0    : receiver
   //  -- sp[0] : receiver
   // -----------------------------------
-  Label miss, probe, global;
+  Label miss;
 
-  // Check that the receiver isn't a smi.
-  __ tst(r0, Operand(kSmiTagMask));
-  __ b(eq, &miss);
+  GenerateStringDictionaryReceiverCheck(masm, r0, r1, r3, r4, &miss);
 
-  // Check that the receiver is a valid JS object.  Put the map in r3.
-  __ CompareObjectType(r0, r3, r1, FIRST_JS_OBJECT_TYPE);
-  __ b(lt, &miss);
-  // If this assert fails, we have to check upper bound too.
-  ASSERT(LAST_TYPE == JS_FUNCTION_TYPE);
-
-  // Check for access to global object (unlikely).
-  __ cmp(r1, Operand(JS_GLOBAL_PROXY_TYPE));
-  __ b(eq, &global);
-
-  // Check for non-global object that requires access check.
-  __ ldrb(r3, FieldMemOperand(r3, Map::kBitFieldOffset));
-  __ tst(r3, Operand(1 << Map::kIsAccessCheckNeeded));
-  __ b(ne, &miss);
-
-  __ bind(&probe);
-  GenerateDictionaryLoad(masm, &miss, r0, r2, r0, r1, r3, r4, CHECK_DICTIONARY);
+  // r1: elements
+  GenerateDictionaryLoad(masm, &miss, r1, r2, r0, r3, r4);
   __ Ret();
 
-  // Global object access: Check access rights.
-  __ bind(&global);
-  __ CheckAccessGlobalProxy(r0, r1, &miss);
-  __ b(&probe);
-
   // Cache miss: Jump to runtime.
   __ bind(&miss);
   GenerateMiss(masm);
@@ -870,6 +915,8 @@
   //  -- sp[0] : receiver
   // -----------------------------------
 
+  __ IncrementCounter(&Counters::load_miss, 1, r3, r4);
+
   __ mov(r3, r0);
   __ Push(r3, r2);
 
@@ -963,7 +1010,7 @@
   // Patch the map check.
   Address ldr_map_instr_address =
       inline_end_address -
-      (CodeGenerator::kInlinedKeyedLoadInstructionsAfterPatch *
+      (CodeGenerator::GetInlinedKeyedLoadInstructionsAfterPatch() *
       Assembler::kInstrSize);
   Assembler::set_target_address_at(ldr_map_instr_address,
                                    reinterpret_cast<Address>(map));
@@ -1013,6 +1060,8 @@
   //  -- r1     : receiver
   // -----------------------------------
 
+  __ IncrementCounter(&Counters::keyed_load_miss, 1, r3, r4);
+
   __ Push(r1, r0);
 
   ExternalReference ref = ExternalReference(IC_Utility(kKeyedLoadIC_Miss));
@@ -1045,14 +1094,15 @@
   Register key = r0;
   Register receiver = r1;
 
-  GenerateKeyedLoadReceiverCheck(masm, receiver, r2, r3, &slow);
-
   // Check that the key is a smi.
   __ BranchOnNotSmi(key, &check_string);
   __ bind(&index_smi);
   // Now the key is known to be a smi. This place is also jumped to from below
   // where a numeric string is converted to a smi.
 
+  GenerateKeyedLoadReceiverCheck(
+      masm, receiver, r2, r3, Map::kHasIndexedInterceptor, &slow);
+
   GenerateFastArrayLoad(
       masm, receiver, key, r4, r3, r2, r0, &check_pixel_array, &slow);
   __ IncrementCounter(&Counters::keyed_load_generic_smi, 1, r2, r3);
@@ -1095,12 +1145,15 @@
   __ bind(&check_string);
   GenerateKeyStringCheck(masm, key, r2, r3, &index_string, &slow);
 
+  GenerateKeyedLoadReceiverCheck(
+      masm, receiver, r2, r3, Map::kHasNamedInterceptor, &slow);
+
   // If the receiver is a fast-case object, check the keyed lookup
   // cache. Otherwise probe the dictionary.
   __ ldr(r3, FieldMemOperand(r1, JSObject::kPropertiesOffset));
-  __ ldr(r3, FieldMemOperand(r3, HeapObject::kMapOffset));
+  __ ldr(r4, FieldMemOperand(r3, HeapObject::kMapOffset));
   __ LoadRoot(ip, Heap::kHashTableMapRootIndex);
-  __ cmp(r3, ip);
+  __ cmp(r4, ip);
   __ b(eq, &probe_dictionary);
 
   // Load the map of the receiver, compute the keyed lookup cache hash
@@ -1148,9 +1201,14 @@
   // Do a quick inline probe of the receiver's dictionary, if it
   // exists.
   __ bind(&probe_dictionary);
+  // r1: receiver
+  // r0: key
+  // r3: elements
+  __ ldr(r2, FieldMemOperand(r1, HeapObject::kMapOffset));
+  __ ldrb(r2, FieldMemOperand(r2, Map::kInstanceTypeOffset));
+  GenerateGlobalInstanceTypeCheck(masm, r2, &slow);
   // Load the property to r0.
-  GenerateDictionaryLoad(
-      masm, &slow, r1, r0, r0, r2, r3, r4, DICTIONARY_CHECK_DONE);
+  GenerateDictionaryLoad(masm, &slow, r3, r0, r0, r2, r4);
   __ IncrementCounter(&Counters::keyed_load_generic_symbol, 1, r2, r3);
   __ Ret();
 
@@ -1744,7 +1802,7 @@
     // Count leading zeros.
     // Gets the wrong answer for 0, but we already checked for that case above.
     Register zeros = scratch2;
-    __ CountLeadingZeros(ival, scratch1, zeros);
+    __ CountLeadingZeros(zeros, ival, scratch1);
 
     // Compute exponent and or it into the exponent register.
     __ rsb(scratch1,
@@ -2160,6 +2218,27 @@
 }
 
 
+void StoreIC::GenerateNormal(MacroAssembler* masm) {
+  // ----------- S t a t e -------------
+  //  -- r0    : value
+  //  -- r1    : receiver
+  //  -- r2    : name
+  //  -- lr    : return address
+  // -----------------------------------
+  Label miss;
+
+  GenerateStringDictionaryReceiverCheck(masm, r1, r3, r4, r5, &miss);
+
+  GenerateDictionaryStore(masm, &miss, r3, r2, r0, r4, r5);
+  __ IncrementCounter(&Counters::store_normal_hit, 1, r4, r5);
+  __ Ret();
+
+  __ bind(&miss);
+  __ IncrementCounter(&Counters::store_normal_miss, 1, r4, r5);
+  GenerateMiss(masm);
+}
+
+
 #undef __
 
 
diff --git a/src/arm/jump-target-arm.cc b/src/arm/jump-target-arm.cc
index 86198fb..c6eb628 100644
--- a/src/arm/jump-target-arm.cc
+++ b/src/arm/jump-target-arm.cc
@@ -61,9 +61,17 @@
   } else {
     // Clone the current frame to use as the expected one at the target.
     set_entry_frame(cgen()->frame());
+    // Zap the fall-through frame since the jump was unconditional.
     RegisterFile empty;
     cgen()->SetFrame(NULL, &empty);
   }
+  if (entry_label_.is_bound()) {
+    // You can't jump backwards to an already bound label unless you admitted
+    // up front that this was a bidirectional jump target.  Bidirectional jump
+    // targets will zap their type info when bound in case some later virtual
+    // frame with less precise type info branches to them.
+    ASSERT(direction_ != FORWARD_ONLY);
+  }
   __ jmp(&entry_label_);
 }
 
@@ -83,6 +91,13 @@
     // Clone the current frame to use as the expected one at the target.
     set_entry_frame(cgen()->frame());
   }
+  if (entry_label_.is_bound()) {
+    // You can't branch backwards to an already bound label unless you admitted
+    // up front that this was a bidirectional jump target.  Bidirectional jump
+    // targets will zap their type info when bound in case some later virtual
+    // frame with less precise type info branches to them.
+    ASSERT(direction_ != FORWARD_ONLY);
+  }
   __ b(cc, &entry_label_);
   if (cc == al) {
     cgen()->DeleteFrame();
@@ -121,6 +136,7 @@
   ASSERT(!cgen()->has_valid_frame() || cgen()->HasValidEntryRegisters());
 
   if (cgen()->has_valid_frame()) {
+    if (direction_ != FORWARD_ONLY) cgen()->frame()->ForgetTypeInfo();
     // If there is a current frame we can use it on the fall through.
     if (!entry_frame_set_) {
       entry_frame_ = *cgen()->frame();
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc
index 29e168c..81fc11e 100644
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -310,32 +310,28 @@
 
 
 void MacroAssembler::RecordWriteHelper(Register object,
-                                       Operand offset,
-                                       Register scratch0,
-                                       Register scratch1) {
+                                       Register address,
+                                       Register scratch) {
   if (FLAG_debug_code) {
     // Check that the object is not in new space.
     Label not_in_new_space;
-    InNewSpace(object, scratch1, ne, &not_in_new_space);
+    InNewSpace(object, scratch, ne, &not_in_new_space);
     Abort("new-space object passed to RecordWriteHelper");
     bind(&not_in_new_space);
   }
 
-  // Add offset into the object.
-  add(scratch0, object, offset);
-
   // Calculate page address.
   Bfc(object, 0, kPageSizeBits);
 
   // Calculate region number.
-  Ubfx(scratch0, scratch0, Page::kRegionSizeLog2,
+  Ubfx(address, address, Page::kRegionSizeLog2,
        kPageSizeBits - Page::kRegionSizeLog2);
 
   // Mark region dirty.
-  ldr(scratch1, MemOperand(object, Page::kDirtyFlagOffset));
+  ldr(scratch, MemOperand(object, Page::kDirtyFlagOffset));
   mov(ip, Operand(1));
-  orr(scratch1, scratch1, Operand(ip, LSL, scratch0));
-  str(scratch1, MemOperand(object, Page::kDirtyFlagOffset));
+  orr(scratch, scratch, Operand(ip, LSL, address));
+  str(scratch, MemOperand(object, Page::kDirtyFlagOffset));
 }
 
 
@@ -368,8 +364,11 @@
   // region marks for new space pages.
   InNewSpace(object, scratch0, eq, &done);
 
+  // Add offset into the object.
+  add(scratch0, object, offset);
+
   // Record the actual write.
-  RecordWriteHelper(object, offset, scratch0, scratch1);
+  RecordWriteHelper(object, scratch0, scratch1);
 
   bind(&done);
 
@@ -383,6 +382,38 @@
 }
 
 
+// Will clobber 4 registers: object, address, scratch, ip.  The
+// register 'object' contains a heap object pointer.  The heap object
+// tag is shifted away.
+void MacroAssembler::RecordWrite(Register object,
+                                 Register address,
+                                 Register scratch) {
+  // The compiled code assumes that record write doesn't change the
+  // context register, so we check that none of the clobbered
+  // registers are cp.
+  ASSERT(!object.is(cp) && !address.is(cp) && !scratch.is(cp));
+
+  Label done;
+
+  // First, test that the object is not in the new space.  We cannot set
+  // region marks for new space pages.
+  InNewSpace(object, scratch, eq, &done);
+
+  // Record the actual write.
+  RecordWriteHelper(object, address, scratch);
+
+  bind(&done);
+
+  // Clobber all input registers when running with the debug-code flag
+  // turned on to provoke errors.
+  if (FLAG_debug_code) {
+    mov(object, Operand(BitCast<int32_t>(kZapValue)));
+    mov(address, Operand(BitCast<int32_t>(kZapValue)));
+    mov(scratch, Operand(BitCast<int32_t>(kZapValue)));
+  }
+}
+
+
 void MacroAssembler::Ldrd(Register dst1, Register dst2,
                           const MemOperand& src, Condition cond) {
   ASSERT(src.rm().is(no_reg));
@@ -1369,6 +1400,56 @@
 }
 
 
+void MacroAssembler::ObjectToDoubleVFPRegister(Register object,
+                                               DwVfpRegister result,
+                                               Register scratch1,
+                                               Register scratch2,
+                                               Register heap_number_map,
+                                               SwVfpRegister scratch3,
+                                               Label* not_number,
+                                               ObjectToDoubleFlags flags) {
+  Label done;
+  if ((flags & OBJECT_NOT_SMI) == 0) {
+    Label not_smi;
+    BranchOnNotSmi(object, &not_smi);
+    // Remove smi tag and convert to double.
+    mov(scratch1, Operand(object, ASR, kSmiTagSize));
+    vmov(scratch3, scratch1);
+    vcvt_f64_s32(result, scratch3);
+    b(&done);
+    bind(&not_smi);
+  }
+  // Check for heap number and load double value from it.
+  ldr(scratch1, FieldMemOperand(object, HeapObject::kMapOffset));
+  sub(scratch2, object, Operand(kHeapObjectTag));
+  cmp(scratch1, heap_number_map);
+  b(ne, not_number);
+  if ((flags & AVOID_NANS_AND_INFINITIES) != 0) {
+    // If exponent is all ones the number is either a NaN or +/-Infinity.
+    ldr(scratch1, FieldMemOperand(object, HeapNumber::kExponentOffset));
+    Sbfx(scratch1,
+         scratch1,
+         HeapNumber::kExponentShift,
+         HeapNumber::kExponentBits);
+    // All-one value sign extend to -1.
+    cmp(scratch1, Operand(-1));
+    b(eq, not_number);
+  }
+  vldr(result, scratch2, HeapNumber::kValueOffset);
+  bind(&done);
+}
+
+
+void MacroAssembler::SmiToDoubleVFPRegister(Register smi,
+                                            DwVfpRegister value,
+                                            Register scratch1,
+                                            SwVfpRegister scratch2) {
+  mov(scratch1, Operand(smi, ASR, kSmiTagSize));
+  vmov(scratch2, scratch1);
+  vcvt_f64_s32(value, scratch2);
+}
+
+
 void MacroAssembler::GetLeastBitsFromSmi(Register dst,
                                          Register src,
                                          int num_least_bits) {
@@ -1548,6 +1629,8 @@
 
 
 void MacroAssembler::Abort(const char* msg) {
+  Label abort_start;
+  bind(&abort_start);
   // We want to pass the msg string like a smi to avoid GC
   // problems, however msg is not guaranteed to be aligned
   // properly. Instead, we pass an aligned pointer that is
@@ -1571,6 +1654,17 @@
   push(r0);
   CallRuntime(Runtime::kAbort, 2);
   // will not return here
+  if (is_const_pool_blocked()) {
+    // If the calling code cares about the exact number of
+    // instructions generated, we insert padding here to keep the size
+    // of the Abort macro constant.
+    static const int kExpectedAbortInstructions = 10;
+    int abort_instructions = InstructionsGeneratedSince(&abort_start);
+    ASSERT(abort_instructions <= kExpectedAbortInstructions);
+    while (abort_instructions++ < kExpectedAbortInstructions) {
+      nop();
+    }
+  }
 }
 
 
@@ -1673,14 +1767,31 @@
 }
 
 
-void MacroAssembler::CountLeadingZeros(Register source,
-                                       Register scratch,
-                                       Register zeros) {
+void MacroAssembler::AllocateHeapNumberWithValue(Register result,
+                                                 DwVfpRegister value,
+                                                 Register scratch1,
+                                                 Register scratch2,
+                                                 Register heap_number_map,
+                                                 Label* gc_required) {
+  AllocateHeapNumber(result, scratch1, scratch2, heap_number_map, gc_required);
+  sub(scratch1, result, Operand(kHeapObjectTag));
+  vstr(value, scratch1, HeapNumber::kValueOffset);
+}
+
+
+void MacroAssembler::CountLeadingZeros(Register zeros,   // Answer.
+                                       Register source,  // Input.
+                                       Register scratch) {
+  ASSERT(!zeros.is(source) || !source.is(zeros));
+  ASSERT(!zeros.is(scratch));
+  ASSERT(!scratch.is(ip));
+  ASSERT(!source.is(ip));
+  ASSERT(!zeros.is(ip));
 #ifdef CAN_USE_ARMV5_INSTRUCTIONS
   clz(zeros, source);  // This instruction is only supported after ARM5.
 #else
   mov(zeros, Operand(0));
-  mov(scratch, source);
+  Move(scratch, source);
   // Top 16.
   tst(scratch, Operand(0xffff0000));
   add(zeros, zeros, Operand(16), LeaveCC, eq);
diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h
index e02a6c8..d57c565 100644
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -67,6 +67,17 @@
 };
 
 
+// Flags used for the ObjectToDoubleVFPRegister function.
+enum ObjectToDoubleFlags {
+  // No special flags.
+  NO_OBJECT_TO_DOUBLE_FLAGS = 0,
+  // Object is known to be a non smi.
+  OBJECT_NOT_SMI = 1 << 0,
+  // Don't load NaNs or infinities, branch to the non number case instead.
+  AVOID_NANS_AND_INFINITIES = 1 << 1
+};
+
+
 // MacroAssembler implements a collection of frequently used macros.
 class MacroAssembler: public Assembler {
  public:
@@ -126,22 +137,32 @@
                   Label* branch);
 
 
-  // For the page containing |object| mark the region covering [object+offset]
+  // For the page containing |object| mark the region covering [address]
   // dirty. The object address must be in the first 8K of an allocated page.
   void RecordWriteHelper(Register object,
-                         Operand offset,
-                         Register scratch0,
-                         Register scratch1);
+                         Register address,
+                         Register scratch);
 
-  // For the page containing |object| mark the region covering [object+offset]
-  // dirty. The object address must be in the first 8K of an allocated page.
-  // The 'scratch' registers are used in the implementation and all 3 registers
-  // are clobbered by the operation, as well as the ip register.
+  // For the page containing |object| mark the region covering
+  // [object+offset] dirty. The object address must be in the first 8K
+  // of an allocated page.  The 'scratch' registers are used in the
+  // implementation and all 3 registers are clobbered by the
+  // operation, as well as the ip register. RecordWrite updates the
+  // write barrier even when storing smis.
   void RecordWrite(Register object,
                    Operand offset,
                    Register scratch0,
                    Register scratch1);
 
+  // For the page containing |object| mark the region covering
+  // [address] dirty. The object address must be in the first 8K of an
+  // allocated page.  All 3 registers are clobbered by the operation,
+  // as well as the ip register. RecordWrite updates the write barrier
+  // even when storing smis.
+  void RecordWrite(Register object,
+                   Register address,
+                   Register scratch);
+
   // Push two registers.  Pushes leftmost register first (to highest address).
   void Push(Register src1, Register src2, Condition cond = al) {
     ASSERT(!src1.is(src2));
@@ -381,6 +402,13 @@
                           Register scratch2,
                           Register heap_number_map,
                           Label* gc_required);
+  void AllocateHeapNumberWithValue(Register result,
+                                   DwVfpRegister value,
+                                   Register scratch1,
+                                   Register scratch2,
+                                   Register heap_number_map,
+                                   Label* gc_required);
+
 
   // ---------------------------------------------------------------------------
   // Support functions.
@@ -469,12 +497,35 @@
                                          Register outHighReg,
                                          Register outLowReg);
 
+  // Load the value of a number object into a VFP double register. If the object
+  // is not a number a jump to the label not_number is performed and the VFP
+  // double register is unchanged.
+  void ObjectToDoubleVFPRegister(
+      Register object,
+      DwVfpRegister value,
+      Register scratch1,
+      Register scratch2,
+      Register heap_number_map,
+      SwVfpRegister scratch3,
+      Label* not_number,
+      ObjectToDoubleFlags flags = NO_OBJECT_TO_DOUBLE_FLAGS);
+
+  // Load the value of a smi object into a VFP double register. The register
+  // scratch1 can be the same register as smi in which case smi will hold the
+  // untagged value afterwards.
+  void SmiToDoubleVFPRegister(Register smi,
+                              DwVfpRegister value,
+                              Register scratch1,
+                              SwVfpRegister scratch2);
+
   // Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
   // instruction.  On pre-ARM5 hardware this routine gives the wrong answer
-  // for 0 (31 instead of 32).
-  void CountLeadingZeros(Register source,
-                         Register scratch,
-                         Register zeros);
+  // for 0 (31 instead of 32).  Source and scratch can be the same in which case
+  // the source is clobbered.  Source and zeros can also be the same in which
+  // case scratch should be a different register.
+  void CountLeadingZeros(Register zeros,
+                         Register source,
+                         Register scratch);
 
   // ---------------------------------------------------------------------------
   // Runtime calls
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index 77776c2..f09ce00 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -26,6 +26,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdlib.h>
+#include <math.h>
 #include <cstdarg>
 #include "v8.h"
 
@@ -2262,7 +2263,8 @@
 // Dd = vmul(Dn, Dm)
 // Dd = vdiv(Dn, Dm)
 // vcmp(Dd, Dm)
-// VMRS
+// vmrs
+// Dd = vsqrt(Dm)
 void Simulator::DecodeTypeVFP(Instr* instr) {
   ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
   ASSERT(instr->Bits(11, 9) == 0x5);
@@ -2274,7 +2276,14 @@
   if (instr->Bit(4) == 0) {
     if (instr->Opc1Field() == 0x7) {
       // Other data processing instructions
-      if ((instr->Opc2Field() == 0x7) && (instr->Opc3Field() == 0x3)) {
+      if ((instr->Opc2Field() == 0x0) && (instr->Opc3Field() == 0x1)) {
+        // vmov register to register.
+        if (instr->SzField() == 0x1) {
+          set_d_register_from_double(vd, get_double_from_d_register(vm));
+        } else {
+          UNREACHABLE();  // Not used by V8.
+        }
+      } else if ((instr->Opc2Field() == 0x7) && (instr->Opc3Field() == 0x3)) {
         DecodeVCVTBetweenDoubleAndSingle(instr);
       } else if ((instr->Opc2Field() == 0x8) && (instr->Opc3Field() & 0x1)) {
         DecodeVCVTBetweenFloatingPointAndInteger(instr);
@@ -2284,6 +2293,11 @@
       } else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
                  (instr->Opc3Field() & 0x1)) {
         DecodeVCMP(instr);
+      } else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
+        // vsqrt
+        double dm_value = get_double_from_d_register(vm);
+        double dd_value = sqrt(dm_value);
+        set_d_register_from_double(vd, dd_value);
       } else {
         UNREACHABLE();  // Not used by V8.
       }
diff --git a/src/arm/stub-cache-arm.cc b/src/arm/stub-cache-arm.cc
index 3e5ba11..0e649cc 100644
--- a/src/arm/stub-cache-arm.cc
+++ b/src/arm/stub-cache-arm.cc
@@ -741,7 +741,8 @@
                                        Register scratch,
                                        String* name,
                                        int save_at_depth,
-                                       Label* miss) {
+                                       Label* miss,
+                                       Register extra) {
   // Check that the maps haven't changed.
   Register result =
       masm()->CheckMaps(object, object_reg, holder, holder_reg, scratch,
diff --git a/src/arm/virtual-frame-arm.cc b/src/arm/virtual-frame-arm.cc
index 8b90f42..2ddfd0f 100644
--- a/src/arm/virtual-frame-arm.cc
+++ b/src/arm/virtual-frame-arm.cc
@@ -482,6 +482,32 @@
 }
 
 
+void VirtualFrame::SpillAllButCopyTOSToR1() {
+  switch (top_of_stack_state_) {
+    case NO_TOS_REGISTERS:
+      __ ldr(r1, MemOperand(sp, 0));
+      break;
+    case R0_TOS:
+      __ push(r0);
+      __ mov(r1, r0);
+      break;
+    case R1_TOS:
+      __ push(r1);
+      break;
+    case R0_R1_TOS:
+      __ Push(r1, r0);
+      __ mov(r1, r0);
+      break;
+    case R1_R0_TOS:
+      __ Push(r0, r1);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  top_of_stack_state_ = NO_TOS_REGISTERS;
+}
+
+
 void VirtualFrame::SpillAllButCopyTOSToR1R0() {
   switch (top_of_stack_state_) {
     case NO_TOS_REGISTERS:
@@ -524,6 +550,24 @@
 }
 
 
+Register VirtualFrame::Peek2() {
+  AssertIsNotSpilled();
+  switch (top_of_stack_state_) {
+    case NO_TOS_REGISTERS:
+    case R0_TOS:
+    case R0_R1_TOS:
+      MergeTOSTo(R0_R1_TOS);
+      return r1;
+    case R1_TOS:
+    case R1_R0_TOS:
+      MergeTOSTo(R1_R0_TOS);
+      return r0;
+  }
+  UNREACHABLE();
+  return no_reg;
+}
+
+
 void VirtualFrame::Dup() {
   if (SpilledScope::is_spilled()) {
     __ ldr(ip, MemOperand(sp, 0));
diff --git a/src/arm/virtual-frame-arm.h b/src/arm/virtual-frame-arm.h
index d8dc5c6..8eedf22 100644
--- a/src/arm/virtual-frame-arm.h
+++ b/src/arm/virtual-frame-arm.h
@@ -189,12 +189,15 @@
     return (tos_known_smi_map_ & (~other->tos_known_smi_map_)) == 0;
   }
 
+  inline void ForgetTypeInfo() {
+    tos_known_smi_map_ = 0;
+  }
+
   // Detach a frame from its code generator, perhaps temporarily.  This
   // tells the register allocator that it is free to use frame-internal
   // registers.  Used when the code generator's frame is switched from this
   // one to NULL by an unconditional jump.
   void DetachFromCodeGenerator() {
-    AssertIsSpilled();
   }
 
   // (Re)attach a frame to its code generator.  This informs the register
@@ -202,7 +205,6 @@
   // Used when a code generator's frame is switched from NULL to this one by
   // binding a label.
   void AttachToCodeGenerator() {
-    AssertIsSpilled();
   }
 
   // Emit code for the physical JS entry and exit frame sequences.  After
@@ -330,6 +332,10 @@
   // must be copied to a scratch register before modification.
   Register Peek();
 
+  // Look at the value beneath the top of the stack.  The register returned is
+  // aliased and must be copied to a scratch register before modification.
+  Register Peek2();
+
   // Duplicate the top of stack.
   void Dup();
 
@@ -339,6 +345,9 @@
   // Flushes all registers, but it puts a copy of the top-of-stack in r0.
   void SpillAllButCopyTOSToR0();
 
+  // Flushes all registers, but it puts a copy of the top-of-stack in r1.
+  void SpillAllButCopyTOSToR1();
+
   // Flushes all registers, but it puts a copy of the top-of-stack in r1
   // and the next value on the stack in r0.
   void SpillAllButCopyTOSToR1R0();