2010-04-26: Version 2.2.5

Various performance improvements (especially for ARM and x64)

Fixed bug in CPU profiling (http://crbug.com/42137)

Fixed a bug with the natives cache.
        
Fixed two bugs in the ARM code generator that can cause wrong calculations.

Fixed a bug that may cause a wrong result for shift operations.


git-svn-id: http://v8.googlecode.com/svn/trunk@4498 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc
index b2fe05f..3594aab 100644
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -206,7 +206,7 @@
 
 #ifdef DEBUG
         JumpTarget verified_true;
-        __ cmp(r0, Operand(cp));
+        __ cmp(r0, cp);
         verified_true.Branch(eq);
         __ stop("NewContext: r0 is expected to be the same as cp");
         verified_true.Bind();
@@ -247,29 +247,10 @@
       }
 
       // Store the arguments object.  This must happen after context
-      // initialization because the arguments object may be stored in the
-      // context.
-      if (scope()->arguments() != NULL) {
-        Comment cmnt(masm_, "[ allocate arguments object");
-        ASSERT(scope()->arguments_shadow() != NULL);
-        Variable* arguments = scope()->arguments()->var();
-        Variable* shadow = scope()->arguments_shadow()->var();
-        ASSERT(arguments != NULL && arguments->slot() != NULL);
-        ASSERT(shadow != NULL && shadow->slot() != NULL);
-        ArgumentsAccessStub stub(ArgumentsAccessStub::NEW_OBJECT);
-        __ ldr(r2, frame_->Function());
-        // The receiver is below the arguments, the return address, and the
-        // frame pointer on the stack.
-        const int kReceiverDisplacement = 2 + scope()->num_parameters();
-        __ add(r1, fp, Operand(kReceiverDisplacement * kPointerSize));
-        __ mov(r0, Operand(Smi::FromInt(scope()->num_parameters())));
-        frame_->Adjust(3);
-        __ stm(db_w, sp, r0.bit() | r1.bit() | r2.bit());
-        frame_->CallStub(&stub, 3);
-        frame_->EmitPush(r0);
-        StoreToSlot(arguments->slot(), NOT_CONST_INIT);
-        StoreToSlot(shadow->slot(), NOT_CONST_INIT);
-        frame_->Drop();  // Value is no longer needed.
+      // initialization because the arguments object may be stored in
+      // the context.
+      if (ArgumentsMode() != NO_ARGUMENTS_ALLOCATION) {
+        StoreArgumentsObject(true);
       }
 
       // Initialize ThisFunction reference if present.
@@ -353,37 +334,34 @@
       frame_->CallRuntime(Runtime::kTraceExit, 1);
     }
 
+#ifdef DEBUG
     // Add a label for checking the size of the code used for returning.
     Label check_exit_codesize;
     masm_->bind(&check_exit_codesize);
+#endif
+    // Make sure that the constant pool is not emitted inside of the return
+    // sequence.
+    { Assembler::BlockConstPoolScope block_const_pool(masm_);
+      // Tear down the frame which will restore the caller's frame pointer and
+      // the link register.
+      frame_->Exit();
 
-    // Calculate the exact length of the return sequence and make sure that
-    // the constant pool is not emitted inside of the return sequence.
-    int32_t sp_delta = (scope()->num_parameters() + 1) * kPointerSize;
-    int return_sequence_length = Assembler::kJSReturnSequenceLength;
-    if (!masm_->ImmediateFitsAddrMode1Instruction(sp_delta)) {
-      // Additional mov instruction generated.
-      return_sequence_length++;
+      // Here we use masm_-> instead of the __ macro to avoid the code coverage
+      // tool from instrumenting as we rely on the code size here.
+      int32_t sp_delta = (scope()->num_parameters() + 1) * kPointerSize;
+      masm_->add(sp, sp, Operand(sp_delta));
+      masm_->Jump(lr);
     }
-    masm_->BlockConstPoolFor(return_sequence_length);
 
-    // Tear down the frame which will restore the caller's frame pointer and
-    // the link register.
-    frame_->Exit();
-
-    // Here we use masm_-> instead of the __ macro to avoid the code coverage
-    // tool from instrumenting as we rely on the code size here.
-    masm_->add(sp, sp, Operand(sp_delta));
-    masm_->Jump(lr);
-
+#ifdef DEBUG
     // Check that the size of the code used for returning matches what is
-    // expected by the debugger. The add instruction above is an addressing
-    // mode 1 instruction where there are restrictions on which immediate values
-    // can be encoded in the instruction and which immediate values requires
-    // use of an additional instruction for moving the immediate to a temporary
-    // register.
-    ASSERT_EQ(return_sequence_length,
-              masm_->InstructionsGeneratedSince(&check_exit_codesize));
+    // expected by the debugger. If the sp_delts above cannot be encoded in the
+    // add instruction the add will generate two instructions.
+    int return_sequence_length =
+        masm_->InstructionsGeneratedSince(&check_exit_codesize);
+    CHECK(return_sequence_length == Assembler::kJSReturnSequenceLength ||
+          return_sequence_length == Assembler::kJSReturnSequenceLength + 1);
+#endif
   }
 
   // Adjust for function-level loop nesting.
@@ -393,6 +371,7 @@
   // Code generation state must be reset.
   ASSERT(!has_cc());
   ASSERT(state_ == NULL);
+  ASSERT(loop_nesting() == 0);
   ASSERT(!function_return_is_shadowed_);
   function_return_.Unuse();
   DeleteFrame();
@@ -606,6 +585,66 @@
 }
 
 
+ArgumentsAllocationMode CodeGenerator::ArgumentsMode() {
+  if (scope()->arguments() == NULL) return NO_ARGUMENTS_ALLOCATION;
+  ASSERT(scope()->arguments_shadow() != NULL);
+  // We don't want to do lazy arguments allocation for functions that
+  // have heap-allocated contexts, because it interfers with the
+  // uninitialized const tracking in the context objects.
+  return (scope()->num_heap_slots() > 0)
+      ? EAGER_ARGUMENTS_ALLOCATION
+      : LAZY_ARGUMENTS_ALLOCATION;
+}
+
+
+void CodeGenerator::StoreArgumentsObject(bool initial) {
+  VirtualFrame::SpilledScope spilled_scope(frame_);
+
+  ArgumentsAllocationMode mode = ArgumentsMode();
+  ASSERT(mode != NO_ARGUMENTS_ALLOCATION);
+
+  Comment cmnt(masm_, "[ store arguments object");
+  if (mode == LAZY_ARGUMENTS_ALLOCATION && initial) {
+    // When using lazy arguments allocation, we store the hole value
+    // as a sentinel indicating that the arguments object hasn't been
+    // allocated yet.
+    __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
+    frame_->EmitPush(ip);
+  } else {
+    ArgumentsAccessStub stub(ArgumentsAccessStub::NEW_OBJECT);
+    __ ldr(r2, frame_->Function());
+    // The receiver is below the arguments, the return address, and the
+    // frame pointer on the stack.
+    const int kReceiverDisplacement = 2 + scope()->num_parameters();
+    __ add(r1, fp, Operand(kReceiverDisplacement * kPointerSize));
+    __ mov(r0, Operand(Smi::FromInt(scope()->num_parameters())));
+    frame_->Adjust(3);
+    __ stm(db_w, sp, r0.bit() | r1.bit() | r2.bit());
+    frame_->CallStub(&stub, 3);
+    frame_->EmitPush(r0);
+  }
+
+  Variable* arguments = scope()->arguments()->var();
+  Variable* shadow = scope()->arguments_shadow()->var();
+  ASSERT(arguments != NULL && arguments->slot() != NULL);
+  ASSERT(shadow != NULL && shadow->slot() != NULL);
+  JumpTarget done;
+  if (mode == LAZY_ARGUMENTS_ALLOCATION && !initial) {
+    // We have to skip storing into the arguments slot if it has
+    // already been written to. This can happen if the a function
+    // has a local variable named 'arguments'.
+    LoadFromSlot(scope()->arguments()->var()->slot(), NOT_INSIDE_TYPEOF);
+    frame_->EmitPop(r0);
+    __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
+    __ cmp(r0, ip);
+    done.Branch(ne);
+  }
+  StoreToSlot(arguments->slot(), NOT_CONST_INIT);
+  if (mode == LAZY_ARGUMENTS_ALLOCATION) done.Bind();
+  StoreToSlot(shadow->slot(), NOT_CONST_INIT);
+}
+
+
 void CodeGenerator::LoadTypeofExpression(Expression* expr) {
   // Special handling of identifiers as subexpressions of typeof.
   VirtualFrame::SpilledScope spilled_scope(frame_);
@@ -622,7 +661,7 @@
   } else if (variable != NULL && variable->slot() != NULL) {
     // For a variable that rewrites to a slot, we signal it is the immediate
     // subexpression of a typeof.
-    LoadFromSlot(variable->slot(), INSIDE_TYPEOF);
+    LoadFromSlotCheckForArguments(variable->slot(), INSIDE_TYPEOF);
     frame_->SpillAll();
   } else {
     // Anything else can be handled normally.
@@ -1466,6 +1505,188 @@
 }
 
 
+void CodeGenerator::CallApplyLazy(Expression* applicand,
+                                  Expression* receiver,
+                                  VariableProxy* arguments,
+                                  int position) {
+  // An optimized implementation of expressions of the form
+  // x.apply(y, arguments).
+  // If the arguments object of the scope has not been allocated,
+  // and x.apply is Function.prototype.apply, this optimization
+  // just copies y and the arguments of the current function on the
+  // stack, as receiver and arguments, and calls x.
+  // In the implementation comments, we call x the applicand
+  // and y the receiver.
+  VirtualFrame::SpilledScope spilled_scope(frame_);
+
+  ASSERT(ArgumentsMode() == LAZY_ARGUMENTS_ALLOCATION);
+  ASSERT(arguments->IsArguments());
+
+  // Load applicand.apply onto the stack. This will usually
+  // give us a megamorphic load site. Not super, but it works.
+  LoadAndSpill(applicand);
+  Handle<String> name = Factory::LookupAsciiSymbol("apply");
+  __ mov(r2, Operand(name));
+  frame_->CallLoadIC(RelocInfo::CODE_TARGET);
+  frame_->EmitPush(r0);
+
+  // Load the receiver and the existing arguments object onto the
+  // expression stack. Avoid allocating the arguments object here.
+  LoadAndSpill(receiver);
+  LoadFromSlot(scope()->arguments()->var()->slot(), NOT_INSIDE_TYPEOF);
+
+  // Emit the source position information after having loaded the
+  // receiver and the arguments.
+  CodeForSourcePosition(position);
+  // Contents of the stack at this point:
+  //   sp[0]: arguments object of the current function or the hole.
+  //   sp[1]: receiver
+  //   sp[2]: applicand.apply
+  //   sp[3]: applicand.
+
+  // Check if the arguments object has been lazily allocated
+  // already. If so, just use that instead of copying the arguments
+  // from the stack. This also deals with cases where a local variable
+  // named 'arguments' has been introduced.
+  __ ldr(r0, MemOperand(sp, 0));
+
+  Label slow, done;
+  __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
+  __ cmp(ip, r0);
+  __ b(ne, &slow);
+
+  Label build_args;
+  // Get rid of the arguments object probe.
+  frame_->Drop();
+  // Stack now has 3 elements on it.
+  // Contents of stack at this point:
+  //   sp[0]: receiver
+  //   sp[1]: applicand.apply
+  //   sp[2]: applicand.
+
+  // Check that the receiver really is a JavaScript object.
+  __ ldr(r0, MemOperand(sp, 0));
+  __ BranchOnSmi(r0, &build_args);
+  // We allow all JSObjects including JSFunctions.  As long as
+  // JS_FUNCTION_TYPE is the last instance type and it is right
+  // after LAST_JS_OBJECT_TYPE, we do not have to check the upper
+  // bound.
+  ASSERT(LAST_TYPE == JS_FUNCTION_TYPE);
+  ASSERT(JS_FUNCTION_TYPE == LAST_JS_OBJECT_TYPE + 1);
+  __ CompareObjectType(r0, r1, r2, FIRST_JS_OBJECT_TYPE);
+  __ b(lt, &build_args);
+
+  // Check that applicand.apply is Function.prototype.apply.
+  __ ldr(r0, MemOperand(sp, kPointerSize));
+  __ BranchOnSmi(r0, &build_args);
+  __ CompareObjectType(r0, r1, r2, JS_FUNCTION_TYPE);
+  __ b(ne, &build_args);
+  __ ldr(r0, FieldMemOperand(r0, JSFunction::kSharedFunctionInfoOffset));
+  Handle<Code> apply_code(Builtins::builtin(Builtins::FunctionApply));
+  __ ldr(r1, FieldMemOperand(r0, SharedFunctionInfo::kCodeOffset));
+  __ cmp(r1, Operand(apply_code));
+  __ b(ne, &build_args);
+
+  // Check that applicand is a function.
+  __ ldr(r1, MemOperand(sp, 2 * kPointerSize));
+  __ BranchOnSmi(r1, &build_args);
+  __ CompareObjectType(r1, r2, r3, JS_FUNCTION_TYPE);
+  __ b(ne, &build_args);
+
+  // Copy the arguments to this function possibly from the
+  // adaptor frame below it.
+  Label invoke, adapted;
+  __ ldr(r2, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
+  __ ldr(r3, MemOperand(r2, StandardFrameConstants::kContextOffset));
+  __ cmp(r3, Operand(Smi::FromInt(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ b(eq, &adapted);
+
+  // No arguments adaptor frame. Copy fixed number of arguments.
+  __ mov(r0, Operand(scope()->num_parameters()));
+  for (int i = 0; i < scope()->num_parameters(); i++) {
+    __ ldr(r2, frame_->ParameterAt(i));
+    __ push(r2);
+  }
+  __ jmp(&invoke);
+
+  // Arguments adaptor frame present. Copy arguments from there, but
+  // avoid copying too many arguments to avoid stack overflows.
+  __ bind(&adapted);
+  static const uint32_t kArgumentsLimit = 1 * KB;
+  __ ldr(r0, MemOperand(r2, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ mov(r0, Operand(r0, LSR, kSmiTagSize));
+  __ mov(r3, r0);
+  __ cmp(r0, Operand(kArgumentsLimit));
+  __ b(gt, &build_args);
+
+  // Loop through the arguments pushing them onto the execution
+  // stack. We don't inform the virtual frame of the push, so we don't
+  // have to worry about getting rid of the elements from the virtual
+  // frame.
+  Label loop;
+  // r3 is a small non-negative integer, due to the test above.
+  __ cmp(r3, Operand(0));
+  __ b(eq, &invoke);
+  // Compute the address of the first argument.
+  __ add(r2, r2, Operand(r3, LSL, kPointerSizeLog2));
+  __ add(r2, r2, Operand(kPointerSize));
+  __ bind(&loop);
+  // Post-decrement argument address by kPointerSize on each iteration.
+  __ ldr(r4, MemOperand(r2, kPointerSize, NegPostIndex));
+  __ push(r4);
+  __ sub(r3, r3, Operand(1), SetCC);
+  __ b(gt, &loop);
+
+  // Invoke the function.
+  __ bind(&invoke);
+  ParameterCount actual(r0);
+  __ InvokeFunction(r1, actual, CALL_FUNCTION);
+  // Drop applicand.apply and applicand from the stack, and push
+  // the result of the function call, but leave the spilled frame
+  // unchanged, with 3 elements, so it is correct when we compile the
+  // slow-case code.
+  __ add(sp, sp, Operand(2 * kPointerSize));
+  __ push(r0);
+  // Stack now has 1 element:
+  //   sp[0]: result
+  __ jmp(&done);
+
+  // Slow-case: Allocate the arguments object since we know it isn't
+  // there, and fall-through to the slow-case where we call
+  // applicand.apply.
+  __ bind(&build_args);
+  // Stack now has 3 elements, because we have jumped from where:
+  //   sp[0]: receiver
+  //   sp[1]: applicand.apply
+  //   sp[2]: applicand.
+  StoreArgumentsObject(false);
+
+  // Stack and frame now have 4 elements.
+  __ bind(&slow);
+
+  // Generic computation of x.apply(y, args) with no special optimization.
+  // Flip applicand.apply and applicand on the stack, so
+  // applicand looks like the receiver of the applicand.apply call.
+  // Then process it as a normal function call.
+  __ ldr(r0, MemOperand(sp, 3 * kPointerSize));
+  __ ldr(r1, MemOperand(sp, 2 * kPointerSize));
+  __ str(r0, MemOperand(sp, 2 * kPointerSize));
+  __ str(r1, MemOperand(sp, 3 * kPointerSize));
+
+  CallFunctionStub call_function(2, NOT_IN_LOOP, NO_CALL_FUNCTION_FLAGS);
+  frame_->CallStub(&call_function, 3);
+  // The function and its two arguments have been dropped.
+  frame_->Drop();  // Drop the receiver as well.
+  frame_->EmitPush(r0);
+  // Stack now has 1 element:
+  //   sp[0]: result
+  __ bind(&done);
+
+  // Restore the context register after a call.
+  __ ldr(cp, frame_->Context());
+}
+
+
 void CodeGenerator::Branch(bool if_true, JumpTarget* target) {
   VirtualFrame::SpilledScope spilled_scope(frame_);
   ASSERT(has_cc());
@@ -1771,7 +1992,7 @@
   }
 #ifdef DEBUG
   JumpTarget verified_true;
-  __ cmp(r0, Operand(cp));
+  __ cmp(r0, cp);
   verified_true.Branch(eq);
   __ stop("PushContext: r0 is expected to be the same as cp");
   verified_true.Bind();
@@ -2248,7 +2469,7 @@
 
   __ ldr(r0, frame_->ElementAt(0));  // load the current count
   __ ldr(r1, frame_->ElementAt(1));  // load the length
-  __ cmp(r0, Operand(r1));  // compare to the array length
+  __ cmp(r0, r1);  // compare to the array length
   node->break_target()->Branch(hs);
 
   __ ldr(r0, frame_->ElementAt(0));
@@ -2802,6 +3023,34 @@
 }
 
 
+void CodeGenerator::LoadFromSlotCheckForArguments(Slot* slot,
+                                                  TypeofState state) {
+  LoadFromSlot(slot, state);
+
+  // Bail out quickly if we're not using lazy arguments allocation.
+  if (ArgumentsMode() != LAZY_ARGUMENTS_ALLOCATION) return;
+
+  // ... or if the slot isn't a non-parameter arguments slot.
+  if (slot->type() == Slot::PARAMETER || !slot->is_arguments()) return;
+
+  VirtualFrame::SpilledScope spilled_scope(frame_);
+
+  // Load the loaded value from the stack into r0 but leave it on the
+  // stack.
+  __ ldr(r0, MemOperand(sp, 0));
+
+  // If the loaded value is the sentinel that indicates that we
+  // haven't loaded the arguments object yet, we need to do it now.
+  JumpTarget exit;
+  __ LoadRoot(ip, Heap::kTheHoleValueRootIndex);
+  __ cmp(r0, ip);
+  exit.Branch(ne);
+  frame_->Drop();
+  StoreArgumentsObject(false);
+  exit.Bind();
+}
+
+
 void CodeGenerator::StoreToSlot(Slot* slot, InitState init_state) {
   ASSERT(slot != NULL);
   if (slot->type() == Slot::LOOKUP) {
@@ -2940,20 +3189,13 @@
     __ bind(&fast);
   }
 
-  // All extension objects were empty and it is safe to use a global
-  // load IC call.
-  Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
   // Load the global object.
   LoadGlobal();
-  // Setup the name register.
+  // Setup the name register and call load IC.
   __ mov(r2, Operand(slot->var()->name()));
-  // Call IC stub.
-  if (typeof_state == INSIDE_TYPEOF) {
-    frame_->CallCodeObject(ic, RelocInfo::CODE_TARGET, 0);
-  } else {
-    frame_->CallCodeObject(ic, RelocInfo::CODE_TARGET_CONTEXT, 0);
-  }
-
+  frame_->CallLoadIC(typeof_state == INSIDE_TYPEOF
+                     ? RelocInfo::CODE_TARGET
+                     : RelocInfo::CODE_TARGET_CONTEXT);
   // Drop the global object. The result is in r0.
   frame_->Drop();
 }
@@ -2964,7 +3206,7 @@
   int original_height = frame_->height();
 #endif
   Comment cmnt(masm_, "[ Slot");
-  LoadFromSlot(node, NOT_INSIDE_TYPEOF);
+  LoadFromSlotCheckForArguments(node, NOT_INSIDE_TYPEOF);
   ASSERT(frame_->height() == original_height + 1);
 }
 
@@ -3422,21 +3664,37 @@
       // JavaScript example: 'object.foo(1, 2, 3)' or 'map["key"](1, 2, 3)'
       // ------------------------------------------------------------------
 
-      LoadAndSpill(property->obj());  // Receiver.
-      // Load the arguments.
-      int arg_count = args->length();
-      for (int i = 0; i < arg_count; i++) {
-        LoadAndSpill(args->at(i));
-      }
+      Handle<String> name = Handle<String>::cast(literal->handle());
 
-      // Set the name register and call the IC initialization code.
-      __ mov(r2, Operand(literal->handle()));
-      InLoopFlag in_loop = loop_nesting() > 0 ? IN_LOOP : NOT_IN_LOOP;
-      Handle<Code> stub = ComputeCallInitialize(arg_count, in_loop);
-      CodeForSourcePosition(node->position());
-      frame_->CallCodeObject(stub, RelocInfo::CODE_TARGET, arg_count + 1);
-      __ ldr(cp, frame_->Context());
-      frame_->EmitPush(r0);
+      if (ArgumentsMode() == LAZY_ARGUMENTS_ALLOCATION &&
+          name->IsEqualTo(CStrVector("apply")) &&
+          args->length() == 2 &&
+          args->at(1)->AsVariableProxy() != NULL &&
+          args->at(1)->AsVariableProxy()->IsArguments()) {
+        // Use the optimized Function.prototype.apply that avoids
+        // allocating lazily allocated arguments objects.
+        CallApplyLazy(property->obj(),
+                      args->at(0),
+                      args->at(1)->AsVariableProxy(),
+                      node->position());
+
+      } else {
+        LoadAndSpill(property->obj());  // Receiver.
+        // Load the arguments.
+        int arg_count = args->length();
+        for (int i = 0; i < arg_count; i++) {
+          LoadAndSpill(args->at(i));
+        }
+
+        // Set the name register and call the IC initialization code.
+        __ mov(r2, Operand(name));
+        InLoopFlag in_loop = loop_nesting() > 0 ? IN_LOOP : NOT_IN_LOOP;
+        Handle<Code> stub = ComputeCallInitialize(arg_count, in_loop);
+        CodeForSourcePosition(node->position());
+        frame_->CallCodeObject(stub, RelocInfo::CODE_TARGET, arg_count + 1);
+        __ ldr(cp, frame_->Context());
+        frame_->EmitPush(r0);
+      }
 
     } else {
       // -------------------------------------------
@@ -3974,19 +4232,49 @@
   Label slow_allocate_heapnumber;
   Label heapnumber_allocated;
 
-  __ AllocateHeapNumber(r0, r1, r2, &slow_allocate_heapnumber);
+  __ AllocateHeapNumber(r4, r1, r2, &slow_allocate_heapnumber);
   __ jmp(&heapnumber_allocated);
 
   __ bind(&slow_allocate_heapnumber);
+  // To allocate a heap number, and ensure that it is not a smi, we
+  // call the runtime function FUnaryMinus on 0, returning the double
+  // -0.0. A new, distinct heap number is returned each time.
   __ mov(r0, Operand(Smi::FromInt(0)));
   __ push(r0);
   __ CallRuntime(Runtime::kNumberUnaryMinus, 1);
+  __ mov(r4, Operand(r0));
 
   __ bind(&heapnumber_allocated);
-  __ PrepareCallCFunction(1, r1);
-  __ CallCFunction(
-      ExternalReference::fill_heap_number_with_random_function(), 1);
-  frame_->EmitPush(r0);
+
+  // Convert 32 random bits in r0 to 0.(32 random bits) in a double
+  // by computing:
+  // ( 1.(20 0s)(32 random bits) x 2^20 ) - (1.0 x 2^20)).
+  if (CpuFeatures::IsSupported(VFP3)) {
+    __ PrepareCallCFunction(0, r1);
+    __ CallCFunction(ExternalReference::random_uint32_function(), 0);
+
+    CpuFeatures::Scope scope(VFP3);
+    // 0x41300000 is the top half of 1.0 x 2^20 as a double.
+    // Create this constant using mov/orr to avoid PC relative load.
+    __ mov(r1, Operand(0x41000000));
+    __ orr(r1, r1, Operand(0x300000));
+    // Move 0x41300000xxxxxxxx (x = random bits) to VFP.
+    __ vmov(d7, r0, r1);
+    // Move 0x4130000000000000 to VFP.
+    __ mov(r0, Operand(0));
+    __ vmov(d8, r0, r1);
+    // Subtract and store the result in the heap number.
+    __ vsub(d7, d7, d8);
+    __ sub(r0, r4, Operand(kHeapObjectTag));
+    __ vstr(d7, r0, HeapNumber::kValueOffset);
+    frame_->EmitPush(r4);
+  } else {
+    __ mov(r0, Operand(r4));
+    __ PrepareCallCFunction(1, r1);
+    __ CallCFunction(
+        ExternalReference::fill_heap_number_with_random_function(), 1);
+    frame_->EmitPush(r0);
+  }
 }
 
 
@@ -4172,18 +4460,20 @@
     frame_->EmitPush(r0);
     return;
   }
-  Handle<FixedArray> cache_obj(
-      FixedArray::cast(jsfunction_result_caches->get(cache_id)));
 
   Load(args->at(1));
   frame_->EmitPop(r2);
 
+  __ ldr(r1, ContextOperand(cp, Context::GLOBAL_INDEX));
+  __ ldr(r1, FieldMemOperand(r1, GlobalObject::kGlobalContextOffset));
+  __ ldr(r1, ContextOperand(r1, Context::JSFUNCTION_RESULT_CACHES_INDEX));
+  __ ldr(r1, FieldMemOperand(r1, FixedArray::OffsetOfElementAt(cache_id)));
+
   DeferredSearchCache* deferred = new DeferredSearchCache(r0, r1, r2);
 
   const int kFingerOffset =
       FixedArray::OffsetOfElementAt(JSFunctionResultCache::kFingerIndex);
   ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
-  __ mov(r1, Operand(cache_obj));
   __ ldr(r0, FieldMemOperand(r1, kFingerOffset));
   // r0 now holds finger offset as a smi.
   __ add(r3, r1, Operand(FixedArray::kHeaderSize - kHeapObjectTag));
@@ -4255,7 +4545,7 @@
   LoadAndSpill(args->at(1));
   frame_->EmitPop(r0);
   frame_->EmitPop(r1);
-  __ cmp(r0, Operand(r1));
+  __ cmp(r0, r1);
   cc_reg_ = eq;
 }
 
@@ -4935,6 +5225,97 @@
 }
 
 
+class DeferredReferenceGetNamedValue: public DeferredCode {
+ public:
+  explicit DeferredReferenceGetNamedValue(Handle<String> name) : name_(name) {
+    set_comment("[ DeferredReferenceGetNamedValue");
+  }
+
+  virtual void BeforeGenerate();
+  virtual void Generate();
+  virtual void AfterGenerate();
+
+ private:
+  Handle<String> name_;
+};
+
+
+void DeferredReferenceGetNamedValue::BeforeGenerate() {
+  __ StartBlockConstPool();
+}
+
+
+void DeferredReferenceGetNamedValue::Generate() {
+  __ IncrementCounter(&Counters::named_load_inline_miss, 1, r1, r2);
+  // Setup the name register and call load IC.
+  __ mov(r2, Operand(name_));
+  Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
+  __ Call(ic, RelocInfo::CODE_TARGET);
+  // The call must be followed by a nop(1) instruction to indicate that the
+  // inobject has been inlined.
+  __ nop(NAMED_PROPERTY_LOAD_INLINED);
+}
+
+
+void DeferredReferenceGetNamedValue::AfterGenerate() {
+  __ EndBlockConstPool();
+}
+
+
+void CodeGenerator::EmitNamedLoad(Handle<String> name, bool is_contextual) {
+  if (is_contextual || scope()->is_global_scope() || loop_nesting() == 0) {
+    Comment cmnt(masm(), "[ Load from named Property");
+    // Setup the name register and call load IC.
+    __ mov(r2, Operand(name));
+    frame_->CallLoadIC(is_contextual
+                       ? RelocInfo::CODE_TARGET_CONTEXT
+                       : RelocInfo::CODE_TARGET);
+  } else {
+    // Inline the inobject property case.
+    Comment cmnt(masm(), "[ Inlined named property load");
+
+    DeferredReferenceGetNamedValue* deferred =
+        new DeferredReferenceGetNamedValue(name);
+
+    // The following instructions are the inlined load of an in-object property.
+    // Parts of this code is patched, so the exact instructions generated needs
+    // to be fixed. Therefore the instruction pool is blocked when generating
+    // this code
+#ifdef DEBUG
+    int kInlinedNamedLoadInstructions = 8;
+    Label check_inlined_codesize;
+    masm_->bind(&check_inlined_codesize);
+#endif
+    { Assembler::BlockConstPoolScope block_const_pool(masm_);
+      // Load the receiver from the stack.
+      __ ldr(r1, MemOperand(sp, 0));
+
+      // Check that the receiver is a heap object.
+      __ tst(r1, Operand(kSmiTagMask));
+      deferred->Branch(eq);
+
+      // Check the map. The null map used below is patched by the inline cache
+      // code.
+      __ ldr(r2, FieldMemOperand(r1, HeapObject::kMapOffset));
+      __ mov(r3, Operand(Factory::null_value()));
+      __ cmp(r2, r3);
+      deferred->Branch(ne);
+
+      // Use initially use an invalid index. The index will be patched by the
+      // inline cache code.
+      __ ldr(r0, MemOperand(r1, 0));
+    }
+
+    // Make sure that the expected number of instructions are generated.
+    ASSERT_EQ(kInlinedNamedLoadInstructions,
+              masm_->InstructionsGeneratedSince(&check_inlined_codesize));
+
+    __ IncrementCounter(&Counters::named_load_inline, 1, r1, r2);
+    deferred->BindExit();
+  }
+}
+
+
 void CodeGenerator::EmitKeyedLoad(bool is_global) {
   Comment cmnt(masm_, "[ Load from keyed Property");
   Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
@@ -4986,24 +5367,16 @@
       Comment cmnt(masm, "[ Load from Slot");
       Slot* slot = expression_->AsVariableProxy()->AsVariable()->slot();
       ASSERT(slot != NULL);
-      cgen_->LoadFromSlot(slot, NOT_INSIDE_TYPEOF);
+      cgen_->LoadFromSlotCheckForArguments(slot, NOT_INSIDE_TYPEOF);
       break;
     }
 
     case NAMED: {
-      VirtualFrame* frame = cgen_->frame();
-      Comment cmnt(masm, "[ Load from named Property");
-      Handle<String> name(GetName());
       Variable* var = expression_->AsVariableProxy()->AsVariable();
-      Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
-      // Setup the name register.
-      __ mov(r2, Operand(name));
-      ASSERT(var == NULL || var->is_global());
-      RelocInfo::Mode rmode = (var == NULL)
-                            ? RelocInfo::CODE_TARGET
-                            : RelocInfo::CODE_TARGET_CONTEXT;
-      frame->CallCodeObject(ic, rmode, 0);
-      frame->EmitPush(r0);
+      bool is_global = var != NULL;
+      ASSERT(!is_global || var->is_global());
+      cgen_->EmitNamedLoad(GetName(), is_global);
+      cgen_->frame()->EmitPush(r0);
       break;
     }
 
@@ -5400,7 +5773,7 @@
   Label not_identical;
   Label heap_number, return_equal;
   Register exp_mask_reg = r5;
-  __ cmp(r0, Operand(r1));
+  __ cmp(r0, r1);
   __ b(ne, &not_identical);
 
   // The two objects are identical.  If we know that one of them isn't NaN then
@@ -5429,7 +5802,7 @@
           __ cmp(r4, Operand(ODDBALL_TYPE));
           __ b(ne, &return_equal);
           __ LoadRoot(r2, Heap::kUndefinedValueRootIndex);
-          __ cmp(r0, Operand(r2));
+          __ cmp(r0, r2);
           __ b(ne, &return_equal);
           if (cc == le) {
             // undefined <= undefined should fail.
@@ -5992,8 +6365,7 @@
     Register lhs,
     Register rhs,
     const Builtins::JavaScript& builtin) {
-  Label slow, slow_pop_2_first, do_the_call;
-  Label r0_is_smi, r1_is_smi, finished_loading_r0, finished_loading_r1;
+  Label slow, slow_reverse, do_the_call;
   bool use_fp_registers = CpuFeatures::IsSupported(VFP3) && Token::MOD != op_;
 
   ASSERT((lhs.is(r0) && rhs.is(r1)) || (lhs.is(r1) && rhs.is(r0)));
@@ -6002,7 +6374,7 @@
     // Smi-smi case (overflow).
     // Since both are Smis there is no heap number to overwrite, so allocate.
     // The new heap number is in r5.  r6 and r7 are scratch.
-    __ AllocateHeapNumber(r5, r6, r7, &slow);
+    __ AllocateHeapNumber(r5, r6, r7, lhs.is(r0) ? &slow_reverse : &slow);
 
     // If we have floating point hardware, inline ADD, SUB, MUL, and DIV,
     // using registers d7 and d6 for the double values.
@@ -6032,11 +6404,15 @@
   // We branch here if at least one of r0 and r1 is not a Smi.
   __ bind(not_smi);
 
+  // After this point we have the left hand side in r1 and the right hand side
+  // in r0.
   if (lhs.is(r0)) {
     __ Swap(r0, r1, ip);
   }
 
   if (ShouldGenerateFPCode()) {
+    Label r0_is_smi, r1_is_smi, finished_loading_r0, finished_loading_r1;
+
     if (runtime_operands_type_ == BinaryOpIC::DEFAULT) {
       switch (op_) {
         case Token::ADD:
@@ -6054,7 +6430,7 @@
     if (mode_ == NO_OVERWRITE) {
       // In the case where there is no chance of an overwritable float we may as
       // well do the allocation immediately while r0 and r1 are untouched.
-    __ AllocateHeapNumber(r5, r6, r7, &slow);
+      __ AllocateHeapNumber(r5, r6, r7, &slow);
     }
 
     // Move r0 to a double in r2-r3.
@@ -6097,11 +6473,22 @@
       __ pop(lr);
     }
 
+    // HEAP_NUMBERS stub is slower than GENERIC on a pair of smis.
+    // r0 is known to be a smi. If r1 is also a smi then switch to GENERIC.
+    Label r1_is_not_smi;
+    if (runtime_operands_type_ == BinaryOpIC::HEAP_NUMBERS) {
+      __ tst(r1, Operand(kSmiTagMask));
+      __ b(ne, &r1_is_not_smi);
+      GenerateTypeTransition(masm);
+      __ jmp(&r1_is_smi);
+    }
+
     __ bind(&finished_loading_r0);
 
     // Move r1 to a double in r0-r1.
     __ tst(r1, Operand(kSmiTagMask));
     __ b(eq, &r1_is_smi);  // It's a Smi so don't check it's a heap number.
+    __ bind(&r1_is_not_smi);
     __ CompareObjectType(r1, r4, r4, HEAP_NUMBER_TYPE);
     __ b(ne, &slow);
     if (mode_ == OVERWRITE_LEFT) {
@@ -6194,6 +6581,14 @@
       __ pop(pc);
     }
   }
+
+
+  if (lhs.is(r0)) {
+    __ b(&slow);
+    __ bind(&slow_reverse);
+    __ Swap(r0, r1, ip);
+  }
+
   // We jump to here if something goes wrong (one param is not a number of any
   // sort or new-space allocation fails).
   __ bind(&slow);
@@ -7743,7 +8138,8 @@
   __ ldr(last_match_info_elements,
          FieldMemOperand(r0, JSArray::kElementsOffset));
   __ ldr(r0, FieldMemOperand(last_match_info_elements, HeapObject::kMapOffset));
-  __ cmp(r0, Operand(Factory::fixed_array_map()));
+  __ LoadRoot(ip, kFixedArrayMapRootIndex);
+  __ cmp(r0, ip);
   __ b(ne, &runtime);
   // Check that the last match info has space for the capture registers and the
   // additional information.