Merge "Fix a DCHECK failure IsResolved() || IsErroneous()."
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index ee72706..3e76d91 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -90,6 +90,12 @@
 # Do you want run-tests without a dex2oat?
 ART_TEST_RUN_TEST_NO_DEX2OAT ?= $(ART_TEST_FULL)
 
+# Do you want run-tests with libartd.so?
+ART_TEST_RUN_TEST_DEBUG ?= true
+
+# Do you want run-tests with libart.so?
+ART_TEST_RUN_TEST_NDEBUG ?= $(ART_TEST_FULL)
+
 # Do you want failed tests to have their artifacts cleaned up?
 ART_TEST_RUN_TEST_ALWAYS_CLEAN ?= true
 
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 81f3297..86f445f 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -54,9 +54,10 @@
   include $(CLEAR_VARS)
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   LOCAL_MODULE_TAGS := optional
-  LOCAL_SRC_FILES := $$(art_source) ../sigchainlib/sigchain.cc
+  LOCAL_SRC_FILES := $$(art_source)
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime $$(art_c_includes)
   LOCAL_SHARED_LIBRARIES += $$(art_shared_libraries)
+  LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := $$(art_executable)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index f782d5c..db7257a 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -332,9 +332,10 @@
     LOCAL_MODULE_TAGS := tests
   endif
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
-  LOCAL_SRC_FILES := $$(art_gtest_filename) sigchainlib/sigchain.cc
+  LOCAL_SRC_FILES := $$(art_gtest_filename)
   LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
   LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest
+  LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e1b5984..4528688 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1955,6 +1955,19 @@
   context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_);
 }
 
+// Does the runtime for the InstructionSet provide an implementation returned by
+// GetQuickGenericJniStub allowing down calls that aren't compiled using a JNI compiler?
+static bool InstructionSetHasGenericJniStub(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64: return true;
+    default: return false;
+  }
+}
+
 void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
                                    InvokeType invoke_type, uint16_t class_def_idx,
                                    uint32_t method_idx, jobject class_loader,
@@ -1966,13 +1979,14 @@
   if ((access_flags & kAccNative) != 0) {
     // Are we interpreting only and have support for generic JNI down calls?
     if (!compiler_options_->IsCompilationEnabled() &&
-        (instruction_set_ == kX86_64 || instruction_set_ == kArm64)) {
+        InstructionSetHasGenericJniStub(instruction_set_)) {
       // Leaving this empty will trigger the generic JNI version
     } else {
       compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
       CHECK(compiled_method != nullptr);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
+    // Abstract methods don't have code.
   } else {
     MethodReference method_ref(&dex_file, method_idx);
     bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags);
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index f6795ea..3c3aa02 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -176,12 +176,8 @@
   // 4. Write out the end of the quick frames.
   if (is_64_bit_target) {
     __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<8>());
-    __ StoreImmediateToThread64(Thread::TopOfManagedStackPcOffset<8>(), 0,
-                              mr_conv->InterproceduralScratchRegister());
   } else {
     __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<4>());
-    __ StoreImmediateToThread32(Thread::TopOfManagedStackPcOffset<4>(), 0,
-                              mr_conv->InterproceduralScratchRegister());
   }
 
   // 5. Move frame down to allow space for out going args.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2648d4d..2f1a092 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -267,6 +267,13 @@
 }
 
 template<typename T>
+void HGraphBuilder::Unop_12x(const Instruction& instruction, Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
@@ -678,6 +685,11 @@
       break;
     }
 
+    case Instruction::NEG_INT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
     case Instruction::ADD_INT: {
       Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index e68cdb0..90e50ad 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -95,6 +95,9 @@
   bool InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
+  void Unop_12x(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 408e13e..d5cd490 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -473,8 +473,7 @@
       case Location::kRegister : {
         int id = location.reg();
         stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
-        if (current->GetType() == Primitive::kPrimDouble
-            || current->GetType() == Primitive::kPrimLong) {
+        if (current->GetType() == Primitive::kPrimLong) {
           stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
           ++i;
           DCHECK_LT(i, environment_size);
@@ -482,52 +481,55 @@
         break;
       }
 
+      case Location::kFpuRegister : {
+        int id = location.reg();
+        stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
+        if (current->GetType() == Primitive::kPrimDouble) {
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
+          ++i;
+          DCHECK_LT(i, environment_size);
+        }
+        break;
+      }
+
       default:
         LOG(FATAL) << "Unexpected kind " << location.GetKind();
     }
   }
 }
 
-size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) {
-  return first_register_slot_in_slow_path_ + index * GetWordSize();
-}
-
 void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
   RegisterSet* register_set = locations->GetLiveRegisters();
-  uint32_t count = 0;
+  size_t stack_offset = first_register_slot_in_slow_path_;
   for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
     if (register_set->ContainsCoreRegister(i)) {
-      size_t stack_offset = GetStackOffsetOfSavedRegister(count);
-      ++count;
-      SaveCoreRegister(Location::StackSlot(stack_offset), i);
       // If the register holds an object, update the stack mask.
       if (locations->RegisterContainsObject(i)) {
         locations->SetStackBit(stack_offset / kVRegSize);
       }
+      stack_offset += SaveCoreRegister(stack_offset, i);
     }
   }
 
   for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (register_set->ContainsFloatingPointRegister(i)) {
-      LOG(FATAL) << "Unimplemented";
+      stack_offset += SaveFloatingPointRegister(stack_offset, i);
     }
   }
 }
 
 void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
   RegisterSet* register_set = locations->GetLiveRegisters();
-  uint32_t count = 0;
+  size_t stack_offset = first_register_slot_in_slow_path_;
   for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
     if (register_set->ContainsCoreRegister(i)) {
-      size_t stack_offset = GetStackOffsetOfSavedRegister(count);
-      ++count;
-      RestoreCoreRegister(Location::StackSlot(stack_offset), i);
+      stack_offset += RestoreCoreRegister(stack_offset, i);
     }
   }
 
   for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (register_set->ContainsFloatingPointRegister(i)) {
-      LOG(FATAL) << "Unimplemented";
+      stack_offset += RestoreFloatingPointRegister(stack_offset, i);
     }
   }
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7aaf991..220d745 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -110,8 +110,18 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
   virtual InstructionSet GetInstructionSet() const = 0;
-  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0;
-  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0;
+  // Saves the register in the stack. Returns the size taken on stack.
+  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
+  // Restores the register from the stack. Returns the size taken on stack.
+  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
+  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+    LOG(FATAL) << "Unimplemented";
+    return 0u;
+  }
+  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+    LOG(FATAL) << "Unimplemented";
+    return 0u;
+  }
 
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
 
@@ -145,6 +155,7 @@
   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
 
   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
+  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
 
  protected:
   CodeGenerator(HGraph* graph,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a2cf670..24b7c2d 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -210,12 +210,14 @@
   stream << ArmManagedRegister::FromDRegister(DRegister(reg));
 }
 
-void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
-  __ StoreToOffset(kStoreWord, static_cast<Register>(reg_id), SP, stack_location.GetStackIndex());
+size_t CodeGeneratorARM::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreToOffset(kStoreWord, static_cast<Register>(reg_id), SP, stack_index);
+  return kArmWordSize;
 }
 
-void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
-  __ LoadFromOffset(kLoadWord, static_cast<Register>(reg_id), SP, stack_location.GetStackIndex());
+size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadFromOffset(kLoadWord, static_cast<Register>(reg_id), SP, stack_index);
+  return kArmWordSize;
 }
 
 CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
@@ -656,7 +658,7 @@
       new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
   HInstruction* cond = if_instr->InputAt(0);
   if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+    locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
@@ -715,10 +717,10 @@
 void LocationsBuilderARM::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1)), Location::kDiesAtEntry);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1)));
   if (comp->NeedsMaterialization()) {
-    locations->SetOut(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
@@ -859,6 +861,26 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARM::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitFloatConstant(HFloatConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderARM::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
@@ -1016,16 +1038,57 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+void LocationsBuilderARM::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::VisitNeg(HNeg* neg) {
+  LocationSummary* locations = neg->GetLocations();
+  Location out = locations->Out();
+  Location in = locations->InAt(0);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      DCHECK(in.IsRegister());
+      __ rsbs(out.As<Register>(), in.As<Register>(), ShifterOperand(0));
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
 void LocationsBuilderARM::VisitAdd(HAdd* add) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      bool dies_at_entry = add->GetResultType() != Primitive::kPrimLong;
-      locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
-      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)), dies_at_entry);
-      locations->SetOut(Location::RequiresRegister());
+      bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong);
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), output_overlaps);
       break;
     }
 
@@ -1088,10 +1151,10 @@
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      bool dies_at_entry = sub->GetResultType() != Primitive::kPrimLong;
-      locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
-      locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)), dies_at_entry);
-      locations->SetOut(Location::RequiresRegister());
+      bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong);
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), output_overlaps);
       break;
     }
 
@@ -1150,9 +1213,9 @@
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:  {
-      locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-      locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry);
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
 
@@ -1261,8 +1324,8 @@
 void LocationsBuilderARM::VisitNot(HNot* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) {
@@ -1274,9 +1337,9 @@
 void LocationsBuilderARM::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
@@ -1332,9 +1395,8 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   bool is_object_type = instruction->GetFieldType() == Primitive::kPrimNot;
-  bool dies_at_entry = !is_object_type;
-  locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
-  locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
   // Temporary registers for the write barrier.
   if (is_object_type) {
     locations->AddTemp(Location::RequiresRegister());
@@ -1394,8 +1456,8 @@
 void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -1481,10 +1543,9 @@
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
@@ -1594,10 +1655,9 @@
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
     locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-    locations->SetInAt(
-        1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
-    locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    locations->SetInAt(2, Location::RequiresRegister());
   }
 }
 
@@ -1684,8 +1744,8 @@
 void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 57b289c..1fe8a7e 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -142,8 +142,8 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(HBasicBlock* block) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
-  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kArmWordSize;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 041acdf..2550518 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -182,12 +182,14 @@
   stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg));
 }
 
-void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
-  __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id));
+size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
+  return kX86WordSize;
 }
 
-void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
-  __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex()));
+size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
+  return kX86WordSize;
 }
 
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
@@ -588,7 +590,7 @@
       new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
   HInstruction* cond = if_instr->InputAt(0);
   if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any(), Location::kDiesAtEntry);
+    locations->SetInAt(0, Location::Any());
   }
 }
 
@@ -699,8 +701,8 @@
 void LocationsBuilderX86::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -795,6 +797,26 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
@@ -957,6 +979,47 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+void LocationsBuilderX86::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
+  LocationSummary* locations = neg->GetLocations();
+  Location out = locations->Out();
+  Location in = locations->InAt(0);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      DCHECK(in.IsRegister());
+      __ negl(out.As<Register>());
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
 void LocationsBuilderX86::VisitAdd(HAdd* add) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
@@ -1279,9 +1342,9 @@
 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
@@ -1350,12 +1413,11 @@
       || (field_type == Primitive::kPrimByte);
   // The register allocator does not support multiple
   // inputs that die at entry with one in a specific register.
-  bool dies_at_entry = !is_object_type && !is_byte_type;
   if (is_byte_type) {
     // Ensure the value is in a byte register.
-    locations->SetInAt(1, Location::RegisterLocation(EAX), dies_at_entry);
+    locations->SetInAt(1, Location::RegisterLocation(EAX));
   } else {
-    locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry);
+    locations->SetInAt(1, Location::RequiresRegister());
   }
   // Temporary registers for the write barrier.
   if (is_object_type) {
@@ -1431,8 +1493,8 @@
 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -1521,10 +1583,9 @@
 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -1637,16 +1698,13 @@
     // We need the inputs to be different than the output in case of long operation.
     // In case of a byte operation, the register allocator does not support multiple
     // inputs that die at entry with one in a specific register.
-    bool dies_at_entry = value_type != Primitive::kPrimLong && !is_byte_type;
-    locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
-    locations->SetInAt(
-        1, Location::RegisterOrConstant(instruction->InputAt(1)), dies_at_entry);
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
     if (is_byte_type) {
       // Ensure the value is in a byte register.
-      locations->SetInAt(2, Location::ByteRegisterOrConstant(
-          EAX, instruction->InputAt(2)), dies_at_entry);
+      locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
     } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), dies_at_entry);
+      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
     }
   }
 }
@@ -1776,8 +1834,8 @@
 
 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   instruction->SetLocations(locations);
 }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index db8b9ab..fff91d1 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -144,8 +144,8 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(HBasicBlock* block) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
-  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kX86WordSize;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5fa9305..6174ac6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -191,12 +191,24 @@
   stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
 }
 
-void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
-  __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id));
+size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
+  return kX86_64WordSize;
 }
 
-void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
-  __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex()));
+size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+  return kX86_64WordSize;
+}
+
+size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+  return kX86_64WordSize;
+}
+
+size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+  return kX86_64WordSize;
 }
 
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
@@ -489,7 +501,7 @@
       new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
   HInstruction* cond = if_instr->InputAt(0);
   if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any(), Location::kDiesAtEntry);
+    locations->SetInAt(0, Location::Any());
   }
 }
 
@@ -598,8 +610,8 @@
 void LocationsBuilderX86_64::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -676,9 +688,9 @@
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
@@ -727,6 +739,26 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
@@ -933,6 +965,47 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
+  LocationSummary* locations = neg->GetLocations();
+  Location out = locations->Out();
+  Location in = locations->InAt(0);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      DCHECK(in.IsRegister());
+      __ negl(out.As<CpuRegister>());
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
@@ -954,7 +1027,7 @@
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -991,21 +1064,12 @@
     }
 
     case Primitive::kPrimFloat: {
-      if (second.IsFpuRegister()) {
-        __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
-      } else {
-        __ addss(first.As<XmmRegister>(),
-                 Address(CpuRegister(RSP), second.GetStackIndex()));
-      }
+      __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      if (second.IsFpuRegister()) {
-        __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
-      } else {
-        __ addsd(first.As<XmmRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
-      }
+      __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
 
@@ -1213,9 +1277,8 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   Primitive::Type field_type = instruction->GetFieldType();
   bool is_object_type = field_type == Primitive::kPrimNot;
-  bool dies_at_entry = !is_object_type;
-  locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
-  locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
   if (is_object_type) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());
@@ -1272,8 +1335,8 @@
 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -1356,10 +1419,10 @@
 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+      1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -1442,10 +1505,30 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
-      UNREACHABLE();
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      XmmRegister out = locations->Out().As<XmmRegister>();
+      if (index.IsConstant()) {
+        __ movss(out, Address(obj,
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
+      } else {
+        __ movss(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset));
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      XmmRegister out = locations->Out().As<XmmRegister>();
+      if (index.IsConstant()) {
+        __ movsd(out, Address(obj,
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
+      } else {
+        __ movsd(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset));
+      }
+      break;
+    }
+
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
@@ -1463,14 +1546,16 @@
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
     locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+    locations->SetInAt(0, Location::RequiresRegister());
     locations->SetInAt(
-        1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
-    locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+        1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    locations->SetInAt(2, Location::RequiresRegister());
     if (value_type == Primitive::kPrimLong) {
-      locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+      locations->SetInAt(2, Location::RequiresRegister());
+    } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
+      locations->SetInAt(2, Location::RequiresFpuRegister());
     } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), Location::kDiesAtEntry);
+      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
     }
   }
 }
@@ -1541,6 +1626,7 @@
           __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
                   value.As<CpuRegister>());
         } else {
+          DCHECK(value.IsConstant()) << value;
           __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
@@ -1569,10 +1655,34 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
-      UNREACHABLE();
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        DCHECK(value.IsFpuRegister());
+        __ movss(Address(obj, offset), value.As<XmmRegister>());
+      } else {
+        DCHECK(value.IsFpuRegister());
+        __ movss(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+                value.As<XmmRegister>());
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        DCHECK(value.IsFpuRegister());
+        __ movsd(Address(obj, offset), value.As<XmmRegister>());
+      } else {
+        DCHECK(value.IsFpuRegister());
+        __ movsd(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset),
+                value.As<XmmRegister>());
+      }
+      break;
+    }
+
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
@@ -1582,8 +1692,8 @@
 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
@@ -1706,6 +1816,9 @@
     if (destination.IsRegister()) {
       __ movl(destination.As<CpuRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else if (destination.IsFpuRegister()) {
+      __ movss(destination.As<XmmRegister>(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(destination.IsStackSlot());
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -1715,8 +1828,10 @@
     if (destination.IsRegister()) {
       __ movq(destination.As<CpuRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else if (destination.IsFpuRegister()) {
+      __ movsd(destination.As<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
-      DCHECK(destination.IsDoubleStackSlot());
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
@@ -1727,6 +1842,7 @@
       if (destination.IsRegister()) {
         __ movl(destination.As<CpuRegister>(), imm);
       } else {
+        DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else if (constant->IsLongConstant()) {
@@ -1734,14 +1850,42 @@
       if (destination.IsRegister()) {
         __ movq(destination.As<CpuRegister>(), Immediate(value));
       } else {
+        DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), Immediate(value));
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
+    } else if (constant->IsFloatConstant()) {
+      Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+      if (destination.IsFpuRegister()) {
+        __ movl(CpuRegister(TMP), imm);
+        __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+      } else {
+        DCHECK(destination.IsStackSlot()) << destination;
+        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+      }
     } else {
-      LOG(FATAL) << "Unimplemented constant type";
+      DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
+      Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+      if (destination.IsFpuRegister()) {
+        __ movq(CpuRegister(TMP), imm);
+        __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+      } else {
+        DCHECK(destination.IsDoubleStackSlot()) << destination;
+        __ movq(CpuRegister(TMP), imm);
+        __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+      }
     }
-  } else {
-    LOG(FATAL) << "Unimplemented";
+  } else if (source.IsFpuRegister()) {
+    if (destination.IsFpuRegister()) {
+      __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>());
+    } else if (destination.IsStackSlot()) {
+      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
+               source.As<XmmRegister>());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
+               source.As<XmmRegister>());
+    }
   }
 }
 
@@ -1783,6 +1927,18 @@
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movss(Address(CpuRegister(RSP), mem), reg);
+  __ movd(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movsd(Address(CpuRegister(RSP), mem), reg);
+  __ movd(reg, CpuRegister(TMP));
+}
+
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   MoveOperands* move = moves_.Get(index);
   Location source = move->GetSource();
@@ -1802,8 +1958,20 @@
     Exchange64(destination.As<CpuRegister>(), source.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
+  } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+    __ movd(CpuRegister(TMP), source.As<XmmRegister>());
+    __ movaps(source.As<XmmRegister>(), destination.As<XmmRegister>());
+    __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+  } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
+    Exchange32(source.As<XmmRegister>(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
+    Exchange32(destination.As<XmmRegister>(), source.GetStackIndex());
+  } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
+    Exchange64(source.As<XmmRegister>(), destination.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
+    Exchange64(destination.As<XmmRegister>(), source.GetStackIndex());
   } else {
-    LOG(FATAL) << "Unimplemented";
+    LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 5ac0189..e04a8d8 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -80,8 +80,10 @@
 
  private:
   void Exchange32(CpuRegister reg, int mem);
+  void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
   void Exchange64(CpuRegister reg, int mem);
+  void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
 
   CodeGeneratorX86_64* const codegen_;
@@ -146,8 +148,10 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(HBasicBlock* block) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
-  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 0b3ad98..10a7e46 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -31,11 +31,19 @@
     for (HInstructionIterator it(block->GetInstructions());
          !it.Done(); it.Advance()) {
       HInstruction* inst = it.Current();
-      // Constant folding: replace `c <- a op b' with a compile-time
-      // evaluation of `a op b' if `a' and `b' are constant.
       if (inst->IsBinaryOperation()) {
+        // Constant folding: replace `op(a, b)' with a constant at
+        // compile time if `a' and `b' are both constants.
         HConstant* constant =
-          inst->AsBinaryOperation()->TryStaticEvaluation(graph_->GetArena());
+            inst->AsBinaryOperation()->TryStaticEvaluation();
+        if (constant != nullptr) {
+          inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant);
+        }
+      } else if (inst->IsUnaryOperation()) {
+        // Constant folding: replace `op(a)' with a constant at compile
+        // time if `a' is a constant.
+        HConstant* constant =
+            inst->AsUnaryOperation()->TryStaticEvaluation();
         if (constant != nullptr) {
           inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant);
         }
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index ec2ff33..09bf2c8 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -72,6 +72,61 @@
 
 
 /**
+ * Tiny three-register program exercising int constant folding on negation.
+ *
+ *                              16-bit
+ *                              offset
+ *                              ------
+ *     v0 <- 1                  0.      const/4 v0, #+1
+ *     v1 <- -v0                1.      neg-int v0, v1
+ *     return v1                2.      return v1
+ */
+TEST(ConstantFolding, IntConstantFoldingNegation) {
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 8 | 1 << 12,
+    Instruction::NEG_INT | 1 << 8 | 0 << 12,
+    Instruction::RETURN | 1 << 8);
+
+  std::string expected_before =
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [5]\n"
+      "  10: SuspendCheck\n"
+      "  11: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  5: Neg(2) [8]\n"
+      "  8: Return(5)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  9: Exit\n";
+
+  // Expected difference after constant folding.
+  diff_t expected_cf_diff = {
+    { "  2: IntConstant [5]\n", "  2: IntConstant\n" },
+    { "  5: Neg(2) [8]\n",      "  12: IntConstant [8]\n" },
+    { "  8: Return(5)\n",       "  8: Return(12)\n" }
+  };
+  std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
+
+  // Check the value of the computed constant.
+  auto check_after_cf = [](HGraph* graph) {
+    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst->IsIntConstant());
+    ASSERT_EQ(inst->AsIntConstant()->GetValue(), -1);
+  };
+
+  // Expected difference after dead code elimination.
+  diff_t expected_dce_diff = {
+    { "  2: IntConstant\n", removed },
+  };
+  std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
+
+  TestCode(data,
+           expected_before,
+           expected_after_cf,
+           expected_after_dce,
+           check_after_cf);
+}
+
+/**
  * Tiny three-register program exercising int constant folding on addition.
  *
  *                              16-bit
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 459010d..4ed2156 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -120,13 +120,11 @@
     output_<< std::endl;
   }
 
-  void DumpLocation(Location location, Primitive::Type type) {
+  void DumpLocation(Location location) {
     if (location.IsRegister()) {
-      if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) {
-        codegen_.DumpFloatingPointRegister(output_, location.reg());
-      } else {
-        codegen_.DumpCoreRegister(output_, location.reg());
-      }
+      codegen_.DumpCoreRegister(output_, location.reg());
+    } else if (location.IsFpuRegister()) {
+      codegen_.DumpFloatingPointRegister(output_, location.reg());
     } else if (location.IsConstant()) {
       output_ << "constant";
       HConstant* constant = location.GetConstant();
@@ -150,9 +148,9 @@
     output_ << " (";
     for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) {
       MoveOperands* move = instruction->MoveOperandsAt(i);
-      DumpLocation(move->GetSource(), Primitive::kPrimInt);
+      DumpLocation(move->GetSource());
       output_ << " -> ";
-      DumpLocation(move->GetDestination(), Primitive::kPrimInt);
+      DumpLocation(move->GetDestination());
       if (i + 1 != e) {
         output_ << ", ";
       }
@@ -183,13 +181,13 @@
       if (locations != nullptr) {
         output_ << " ( ";
         for (size_t i = 0; i < instruction->InputCount(); ++i) {
-          DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType());
+          DumpLocation(locations->InAt(i));
           output_ << " ";
         }
         output_ << ")";
         if (locations->Out().IsValid()) {
           output_ << " -> ";
-          DumpLocation(locations->Out(), instruction->GetType());
+          DumpLocation(locations->Out());
         }
       }
       output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index d5f4f90..89c9495 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -73,7 +73,7 @@
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
-  ASSERT_EQ(9u, range->GetEnd());
+  ASSERT_EQ(8u, range->GetEnd());
   HBasicBlock* block = graph->GetBlocks().Get(1);
   ASSERT_TRUE(block->GetLastInstruction()->IsReturn());
   ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition());
@@ -119,7 +119,7 @@
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
-  ASSERT_EQ(23u, range->GetEnd());
+  ASSERT_EQ(22u, range->GetEnd());
   HBasicBlock* block = graph->GetBlocks().Get(3);
   ASSERT_TRUE(block->GetLastInstruction()->IsReturn());
   ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition());
@@ -193,7 +193,7 @@
   range = interval->GetFirstRange();
   ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(2)->GetLifetimePosition());
   ASSERT_EQ(22u, range->GetStart());
-  ASSERT_EQ(25u, range->GetEnd());
+  ASSERT_EQ(24u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
@@ -263,7 +263,7 @@
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
   ASSERT_EQ(6u, range->GetStart());
-  ASSERT_EQ(27u, range->GetEnd());
+  ASSERT_EQ(26u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the phi.
@@ -271,7 +271,7 @@
   range = interval->GetFirstRange();
   // Instruction is consumed by the if.
   ASSERT_EQ(14u, range->GetStart());
-  ASSERT_EQ(16u, range->GetEnd());
+  ASSERT_EQ(17u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
@@ -338,7 +338,7 @@
   range = range->GetNext();
   ASSERT_TRUE(range != nullptr);
   ASSERT_EQ(24u, range->GetStart());
-  ASSERT_EQ(27u, range->GetEnd());
+  ASSERT_EQ(26u, range->GetEnd());
 
   // Test for the add instruction.
   HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd();
@@ -410,7 +410,7 @@
   interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(4u, range->GetStart());
-  ASSERT_EQ(29u, range->GetEnd());
+  ASSERT_EQ(28u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the first add.
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 1637484..ed5e260 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -25,16 +25,14 @@
       temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
       environment_(instruction->GetBlock()->GetGraph()->GetArena(),
                    instruction->EnvironmentSize()),
-      dies_at_entry_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
+      output_overlaps_(true),
       call_kind_(call_kind),
       stack_mask_(nullptr),
       register_mask_(0),
       live_registers_() {
   inputs_.SetSize(instruction->InputCount());
-  dies_at_entry_.SetSize(instruction->InputCount());
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
-    dies_at_entry_.Put(i, false);
   }
   environment_.SetSize(instruction->EnvironmentSize());
   for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index dcf70f2..11bcd78 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -34,7 +34,7 @@
  */
 class Location : public ValueObject {
  public:
-  static constexpr bool kDiesAtEntry = true;
+  static constexpr bool kNoOutputOverlap = false;
 
   enum Kind {
     kInvalid = 0,
@@ -373,8 +373,7 @@
 
   LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
 
-  void SetInAt(uint32_t at, Location location, bool dies_at_entry = false) {
-    dies_at_entry_.Put(at, dies_at_entry);
+  void SetInAt(uint32_t at, Location location) {
     inputs_.Put(at, location);
   }
 
@@ -386,7 +385,8 @@
     return inputs_.Size();
   }
 
-  void SetOut(Location location) {
+  void SetOut(Location location, bool overlaps = true) {
+    output_overlaps_ = overlaps;
     output_ = Location(location);
   }
 
@@ -449,23 +449,30 @@
     return &live_registers_;
   }
 
-  bool InputOverlapsWithOutputOrTemp(uint32_t input, bool is_environment) const {
+  bool InputOverlapsWithOutputOrTemp(uint32_t input_index, bool is_environment) const {
     if (is_environment) return true;
-    Location location = Out();
-    if (input == 0 && location.IsUnallocated() && location.GetPolicy() == Location::kSameAsFirstInput) {
+    if ((input_index == 0)
+        && output_.IsUnallocated()
+        && (output_.GetPolicy() == Location::kSameAsFirstInput)) {
       return false;
     }
-    if (dies_at_entry_.Get(input)) {
+    if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) {
       return false;
     }
     return true;
   }
 
+  bool OutputOverlapsWithInputs() const {
+    return output_overlaps_;
+  }
+
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
   GrowableArray<Location> environment_;
-  GrowableArray<bool> dies_at_entry_;
+  // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
+  // share the same register as the inputs.
+  bool output_overlaps_;
   Location output_;
   const CallKind call_kind_;
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 10c6014..0505510 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -363,6 +363,25 @@
   Add(&phis_, this, phi);
 }
 
+void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
+  DCHECK_EQ(phi->GetId(), -1);
+  DCHECK_NE(cursor->GetId(), -1);
+  DCHECK_EQ(cursor->GetBlock(), this);
+  if (cursor->next_ == nullptr) {
+    cursor->next_ = phi;
+    phi->previous_ = cursor;
+    DCHECK(phi->next_ == nullptr);
+  } else {
+    phi->next_ = cursor->next_;
+    phi->previous_ = cursor;
+    cursor->next_ = phi;
+    phi->next_->previous_ = phi;
+  }
+  phi->SetBlock(this);
+  phi->SetId(GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(phi);
+}
+
 static void Remove(HInstructionList* instruction_list,
                    HBasicBlock* block,
                    HInstruction* instruction) {
@@ -531,6 +550,12 @@
   env_uses_ = nullptr;
 }
 
+void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
+  InputAt(index)->RemoveUser(this, index);
+  SetRawInputAt(index, replacement);
+  replacement->AddUseAt(this, index);
+}
+
 size_t HInstruction::EnvironmentSize() const {
   return HasEnvironment() ? environment_->Size() : 0;
 }
@@ -572,15 +597,26 @@
   }
 }
 
-HConstant* HBinaryOperation::TryStaticEvaluation(ArenaAllocator* allocator) const {
+HConstant* HUnaryOperation::TryStaticEvaluation() const {
+  if (GetInput()->IsIntConstant()) {
+    int32_t value = Evaluate(GetInput()->AsIntConstant()->GetValue());
+    return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value);
+  } else if (GetInput()->IsLongConstant()) {
+    LOG(FATAL) << "Static evaluation of long unary operations is not yet implemented.";
+    return nullptr;
+  }
+  return nullptr;
+}
+
+HConstant* HBinaryOperation::TryStaticEvaluation() const {
   if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
     int32_t value = Evaluate(GetLeft()->AsIntConstant()->GetValue(),
                              GetRight()->AsIntConstant()->GetValue());
-    return new(allocator) HIntConstant(value);
+    return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value);
   } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) {
     int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(),
                              GetRight()->AsLongConstant()->GetValue());
-    return new(allocator) HLongConstant(value);
+    return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value);
   }
   return nullptr;
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7bb71b6..7c933aa 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -399,6 +399,7 @@
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
   void AddPhi(HPhi* phi);
+  void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
   void RemovePhi(HPhi* phi);
 
   bool IsLoopHeader() const {
@@ -503,10 +504,14 @@
   M(Temporary, Instruction)                                             \
   M(SuspendCheck, Instruction)                                          \
   M(Mul, BinaryOperation)                                               \
+  M(Neg, UnaryOperation)                                                \
+  M(FloatConstant, Constant)                                            \
+  M(DoubleConstant, Constant)                                           \
 
 #define FOR_EACH_INSTRUCTION(M)                                         \
   FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
   M(Constant, Instruction)                                              \
+  M(UnaryOperation, Instruction)                                        \
   M(BinaryOperation, Instruction)                                       \
   M(Invoke, Instruction)
 
@@ -708,6 +713,7 @@
   void SetLocations(LocationSummary* locations) { locations_ = locations; }
 
   void ReplaceWith(HInstruction* instruction);
+  void ReplaceInput(HInstruction* replacement, size_t index);
 
   bool HasOnlyOneUse() const {
     return uses_ != nullptr && uses_->GetTail() == nullptr;
@@ -993,8 +999,8 @@
 
   virtual Primitive::Type GetType() const { return type_; }
 
- private:
-  const Primitive::Type type_;
+ protected:
+  Primitive::Type type_;
 };
 
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
@@ -1086,6 +1092,34 @@
   DISALLOW_COPY_AND_ASSIGN(HIf);
 };
 
+class HUnaryOperation : public HExpression<1> {
+ public:
+  HUnaryOperation(Primitive::Type result_type, HInstruction* input)
+      : HExpression(result_type, SideEffects::None()) {
+    SetRawInputAt(0, input);
+  }
+
+  HInstruction* GetInput() const { return InputAt(0); }
+  Primitive::Type GetResultType() const { return GetType(); }
+
+  virtual bool CanBeMoved() const { return true; }
+  virtual bool InstructionDataEquals(HInstruction* other) const { return true; }
+
+  // Try to statically evaluate `operation` and return a HConstant
+  // containing the result of this evaluation.  If `operation` cannot
+  // be evaluated as a constant, return nullptr.
+  HConstant* TryStaticEvaluation() const;
+
+  // Apply this operation to `x`.
+  virtual int32_t Evaluate(int32_t x) const = 0;
+  virtual int64_t Evaluate(int64_t x) const = 0;
+
+  DECLARE_INSTRUCTION(UnaryOperation);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HUnaryOperation);
+};
+
 class HBinaryOperation : public HExpression<2> {
  public:
   HBinaryOperation(Primitive::Type result_type,
@@ -1104,10 +1138,10 @@
   virtual bool CanBeMoved() const { return true; }
   virtual bool InstructionDataEquals(HInstruction* other) const { return true; }
 
-  // Try to statically evaluate `operation` and return an HConstant
+  // Try to statically evaluate `operation` and return a HConstant
   // containing the result of this evaluation.  If `operation` cannot
   // be evaluated as a constant, return nullptr.
-  HConstant* TryStaticEvaluation(ArenaAllocator* allocator) const;
+  HConstant* TryStaticEvaluation() const;
 
   // Apply this operation to `x` and `y`.
   virtual int32_t Evaluate(int32_t x, int32_t y) const = 0;
@@ -1371,6 +1405,48 @@
   DISALLOW_COPY_AND_ASSIGN(HConstant);
 };
 
+class HFloatConstant : public HConstant {
+ public:
+  explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {}
+
+  float GetValue() const { return value_; }
+
+  virtual bool InstructionDataEquals(HInstruction* other) const {
+    return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) ==
+        bit_cast<float, int32_t>(value_);
+  }
+
+  virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); }
+
+  DECLARE_INSTRUCTION(FloatConstant);
+
+ private:
+  const float value_;
+
+  DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
+};
+
+class HDoubleConstant : public HConstant {
+ public:
+  explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {}
+
+  double GetValue() const { return value_; }
+
+  virtual bool InstructionDataEquals(HInstruction* other) const {
+    return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) ==
+        bit_cast<double, int64_t>(value_);
+  }
+
+  virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); }
+
+  DECLARE_INSTRUCTION(DoubleConstant);
+
+ private:
+  const double value_;
+
+  DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
+};
+
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
 class HIntConstant : public HConstant {
@@ -1518,6 +1594,20 @@
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
 };
 
+class HNeg : public HUnaryOperation {
+ public:
+  explicit HNeg(Primitive::Type result_type, HInstruction* input)
+      : HUnaryOperation(result_type, input) {}
+
+  virtual int32_t Evaluate(int32_t x) const OVERRIDE { return -x; }
+  virtual int64_t Evaluate(int64_t x) const OVERRIDE { return -x; }
+
+  DECLARE_INSTRUCTION(Neg);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNeg);
+};
+
 class HAdd : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
@@ -1750,6 +1840,7 @@
 
   virtual bool CanBeMoved() const { return true; }
   virtual bool InstructionDataEquals(HInstruction* other) const { return true; }
+  void SetType(Primitive::Type type) { type_ = type; }
 
   DECLARE_INSTRUCTION(ArrayGet);
 
@@ -1762,11 +1853,11 @@
   HArraySet(HInstruction* array,
             HInstruction* index,
             HInstruction* value,
-            Primitive::Type component_type,
+            Primitive::Type expected_component_type,
             uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::ChangesSomething()),
         dex_pc_(dex_pc),
-        component_type_(component_type) {
+        expected_component_type_(expected_component_type) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
@@ -1780,13 +1871,24 @@
 
   uint32_t GetDexPc() const { return dex_pc_; }
 
-  Primitive::Type GetComponentType() const { return component_type_; }
+  HInstruction* GetValue() const { return InputAt(2); }
+
+  Primitive::Type GetComponentType() const {
+    // The Dex format does not type floating point index operations. Since the
+    // `expected_component_type_` is set during building and can therefore not
+    // be correct, we also check what is the value type. If it is a floating
+    // point type, we must use that type.
+    Primitive::Type value_type = GetValue()->GetType();
+    return ((value_type == Primitive::kPrimFloat) || (value_type == Primitive::kPrimDouble))
+        ? value_type
+        : expected_component_type_;
+  }
 
   DECLARE_INSTRUCTION(ArraySet);
 
  private:
   const uint32_t dex_pc_;
-  const Primitive::Type component_type_;
+  const Primitive::Type expected_component_type_;
 
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 5055a76..fc65f97 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -37,18 +37,21 @@
         handled_(allocator, 0),
         active_(allocator, 0),
         inactive_(allocator, 0),
-        physical_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
+        physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
+        physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
         temp_intervals_(allocator, 4),
         spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         safepoints_(allocator, 0),
         processing_core_registers_(false),
         number_of_registers_(-1),
         registers_array_(nullptr),
-        blocked_registers_(codegen->GetBlockedCoreRegisters()),
+        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
         reserved_out_slots_(0),
         maximum_number_of_live_registers_(0) {
   codegen->SetupBlockedRegisters();
-  physical_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
+  physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
+  physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
   // Always reserve for the current method and the graph's max out registers.
   // TODO: compute it instead.
   reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
@@ -65,8 +68,10 @@
          it.Advance()) {
       HInstruction* current = it.Current();
       if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
-      if (current->GetType() == Primitive::kPrimFloat) return false;
-      if (current->GetType() == Primitive::kPrimDouble) return false;
+      if ((current->GetType() == Primitive::kPrimFloat || current->GetType() == Primitive::kPrimDouble)
+          && instruction_set != kX86_64) {
+        return false;
+      }
     }
   }
   return true;
@@ -93,14 +98,22 @@
 
 void RegisterAllocator::BlockRegister(Location location,
                                       size_t start,
-                                      size_t end,
-                                      Primitive::Type type) {
+                                      size_t end) {
   int reg = location.reg();
-  LiveInterval* interval = physical_register_intervals_.Get(reg);
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_register_intervals_.Get(reg)
+      : physical_fp_register_intervals_.Get(reg);
+  Primitive::Type type = location.IsRegister()
+      ? Primitive::kPrimInt
+      : Primitive::kPrimDouble;
   if (interval == nullptr) {
     interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
-    physical_register_intervals_.Put(reg, interval);
-    inactive_.Add(interval);
+    if (location.IsRegister()) {
+      physical_core_register_intervals_.Put(reg, interval);
+    } else {
+      physical_fp_register_intervals_.Put(reg, interval);
+    }
   }
   DCHECK(interval->GetRegister() == reg);
   interval->AddRange(start, end);
@@ -123,8 +136,17 @@
   registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
   processing_core_registers_ = true;
   unhandled_ = &unhandled_core_intervals_;
+  for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) {
+    LiveInterval* fixed = physical_core_register_intervals_.Get(i);
+    if (fixed != nullptr) {
+      inactive_.Add(fixed);
+    }
+  }
   LinearScan();
 
+  size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
+  maximum_number_of_live_registers_ = 0;
+
   inactive_.Reset();
   active_.Reset();
   handled_.Reset();
@@ -133,9 +155,14 @@
   registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
   processing_core_registers_ = false;
   unhandled_ = &unhandled_fp_intervals_;
-  // TODO: Enable FP register allocation.
-  DCHECK(unhandled_->IsEmpty());
+  for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) {
+    LiveInterval* fixed = physical_fp_register_intervals_.Get(i);
+    if (fixed != nullptr) {
+      inactive_.Add(fixed);
+    }
+  }
   LinearScan();
+  maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
 }
 
 void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -148,8 +175,9 @@
   for (size_t i = 0; i < locations->GetTempCount(); ++i) {
     Location temp = locations->GetTemp(i);
     if (temp.IsRegister()) {
-      BlockRegister(temp, position, position + 1, Primitive::kPrimInt);
+      BlockRegister(temp, position, position + 1);
     } else {
+      DCHECK(temp.IsUnallocated());
       LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
       temp_intervals_.Add(interval);
       interval->AddRange(position, position + 1);
@@ -160,10 +188,6 @@
   bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
       && (instruction->GetType() != Primitive::kPrimFloat);
 
-  GrowableArray<LiveInterval*>& unhandled = core_register
-      ? unhandled_core_intervals_
-      : unhandled_fp_intervals_;
-
   if (locations->CanCall()) {
     if (!instruction->IsSuspendCheck()) {
       codegen_->MarkNotLeaf();
@@ -180,7 +204,8 @@
       // maximum before updating locations.
       LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
       interval->AddRange(position, position + 1);
-      unhandled.Add(interval);
+      unhandled_core_intervals_.Add(interval);
+      unhandled_fp_intervals_.Add(interval);
     }
   }
 
@@ -189,21 +214,29 @@
     for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
       BlockRegister(Location::RegisterLocation(i),
                     position,
-                    position + 1,
-                    Primitive::kPrimInt);
+                    position + 1);
+    }
+    for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+      BlockRegister(Location::FpuRegisterLocation(i),
+                    position,
+                    position + 1);
     }
   }
 
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     Location input = locations->InAt(i);
-    if (input.IsRegister()) {
-      BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType());
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
     }
   }
 
   LiveInterval* current = instruction->GetLiveInterval();
   if (current == nullptr) return;
 
+  GrowableArray<LiveInterval*>& unhandled = core_register
+      ? unhandled_core_intervals_
+      : unhandled_fp_intervals_;
+
   DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
   // Some instructions define their output in fixed register/stack slot. We need
   // to ensure we know these locations before doing register allocation. For a
@@ -213,21 +246,24 @@
   //
   // The backwards walking ensures the ranges are ordered on increasing start positions.
   Location output = locations->Out();
-  if (output.IsRegister()) {
+  if (output.IsRegister() || output.IsFpuRegister()) {
     // Shift the interval's start by one to account for the blocked register.
     current->SetFrom(position + 1);
     current->SetRegister(output.reg());
-    BlockRegister(output, position, position + 1, instruction->GetType());
+    BlockRegister(output, position, position + 1);
+  } else if (!locations->OutputOverlapsWithInputs()) {
+    // Shift the interval's start by one to not interfere with the inputs.
+    current->SetFrom(position + 1);
   } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
     current->SetSpillSlot(output.GetStackIndex());
   }
 
   // If needed, add interval to the list of unhandled intervals.
   if (current->HasSpillSlot() || instruction->IsConstant()) {
-    // Split before first register use.
+    // Split just before first register use.
     size_t first_register_use = current->FirstRegisterUse();
     if (first_register_use != kNoLifetime) {
-      LiveInterval* split = Split(current, first_register_use);
+      LiveInterval* split = Split(current, first_register_use - 1);
       // Don't add direclty to `unhandled`, it needs to be sorted and the start
       // of this new interval might be after intervals already in the list.
       AddSorted(&unhandled, split);
@@ -278,10 +314,19 @@
     }
   }
 
-  for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) {
-    LiveInterval* fixed = physical_register_intervals_.Get(i);
-    if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) {
-      intervals.Add(fixed);
+  if (processing_core_registers_) {
+    for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) {
+      LiveInterval* fixed = physical_core_register_intervals_.Get(i);
+      if (fixed != nullptr) {
+        intervals.Add(fixed);
+      }
+    }
+  } else {
+    for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) {
+      LiveInterval* fixed = physical_fp_register_intervals_.Get(i);
+      if (fixed != nullptr) {
+        intervals.Add(fixed);
+      }
     }
   }
 
@@ -374,10 +419,10 @@
   interval->Dump(stream);
   stream << ": ";
   if (interval->HasRegister()) {
-    if (processing_core_registers_) {
-      codegen_->DumpCoreRegister(stream, interval->GetRegister());
-    } else {
+    if (interval->IsFloatingPoint()) {
       codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+    } else {
+      codegen_->DumpCoreRegister(stream, interval->GetRegister());
     }
   } else {
     stream << "spilled";
@@ -391,6 +436,7 @@
     // (1) Remove interval with the lowest start position from unhandled.
     LiveInterval* current = unhandled_->Pop();
     DCHECK(!current->IsFixed() && !current->HasSpillSlot());
+    DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
     size_t position = current->GetStart();
 
     // (2) Remove currently active intervals that are dead at this position.
@@ -519,10 +565,9 @@
 }
 
 bool RegisterAllocator::IsBlocked(int reg) const {
-  // TODO: This only works for core registers and needs to be adjusted for
-  // floating point registers.
-  DCHECK(processing_core_registers_);
-  return blocked_registers_[reg];
+  return processing_core_registers_
+      ? blocked_core_registers_[reg]
+      : blocked_fp_registers_[reg];
 }
 
 // Find the register that is used the last, and spill the interval
@@ -591,7 +636,9 @@
     // If the first use of that instruction is after the last use of the found
     // register, we split this interval just before its first register use.
     AllocateSpillSlotFor(current);
-    LiveInterval* split = Split(current, first_register_use);
+    LiveInterval* split = Split(current, first_register_use - 1);
+    DCHECK_NE(current, split) << "There is not enough registers available for "
+      << split->GetParent()->GetDefinedBy()->DebugName();
     AddSorted(unhandled_, split);
     return false;
   } else {
@@ -635,6 +682,7 @@
 }
 
 void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) {
+  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
   size_t insert_at = 0;
   for (size_t i = array->Size(); i > 0; --i) {
     LiveInterval* current = array->Get(i - 1);
@@ -723,17 +771,11 @@
   parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
 }
 
-// We create a special marker for inputs moves to differentiate them from
-// moves created during resolution. They must be different instructions
-// because the input moves work on the assumption that the interval moves
-// have been executed.
-static constexpr size_t kInputMoveLifetimePosition = 0;
-static bool IsInputMove(HInstruction* instruction) {
-  return instruction->GetLifetimePosition() == kInputMoveLifetimePosition;
-}
-
 static bool IsValidDestination(Location destination) {
-  return destination.IsRegister() || destination.IsStackSlot() || destination.IsDoubleStackSlot();
+  return destination.IsRegister()
+      || destination.IsFpuRegister()
+      || destination.IsStackSlot()
+      || destination.IsDoubleStackSlot();
 }
 
 void RegisterAllocator::AddInputMoveFor(HInstruction* user,
@@ -748,14 +790,14 @@
   HParallelMove* move = nullptr;
   if (previous == nullptr
       || !previous->IsParallelMove()
-      || !IsInputMove(previous)) {
+      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
     move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(kInputMoveLifetimePosition);
+    move->SetLifetimePosition(user->GetLifetimePosition());
     user->GetBlock()->InsertInstructionBefore(move, user);
   } else {
     move = previous->AsParallelMove();
   }
-  DCHECK(IsInputMove(move));
+  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
   move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr));
 }
 
@@ -778,7 +820,7 @@
     move = at->GetNext()->AsParallelMove();
     // This is a parallel move for connecting siblings in a same block. We need to
     // differentiate it with moves for connecting blocks, and input moves.
-    if (move == nullptr || IsInputMove(move) || move->GetLifetimePosition() > position) {
+    if (move == nullptr || move->GetLifetimePosition() > position) {
       move = new (allocator_) HParallelMove(allocator_);
       move->SetLifetimePosition(position);
       at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
@@ -786,12 +828,6 @@
   } else {
     // Move must happen before the instruction.
     HInstruction* previous = at->GetPrevious();
-    if (previous != nullptr && previous->IsParallelMove() && IsInputMove(previous)) {
-      // This is a parallel move for connecting siblings in a same block. We need to
-      // differentiate it with input moves.
-      at = previous;
-      previous = previous->GetPrevious();
-    }
     if (previous == nullptr
         || !previous->IsParallelMove()
         || previous->GetLifetimePosition() != position) {
@@ -889,7 +925,9 @@
   if (current->HasSpillSlot() && current->HasRegister()) {
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
-                    Location::RegisterLocation(interval->GetRegister()),
+                    interval->IsFloatingPoint()
+                        ? Location::FpuRegisterLocation(interval->GetRegister())
+                        : Location::RegisterLocation(interval->GetRegister()),
                     interval->NeedsTwoSpillSlots()
                         ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
@@ -947,6 +985,10 @@
           }
           break;
         }
+        case Location::kFpuRegister: {
+          locations->AddLiveRegister(source);
+          break;
+        }
         case Location::kStackSlot:  // Fall-through
         case Location::kDoubleStackSlot:  // Fall-through
         case Location::kConstant: {
@@ -1110,6 +1152,7 @@
       current = at;
     }
     LocationSummary* locations = at->GetLocations();
+    DCHECK(temp->GetType() == Primitive::kPrimInt);
     locations->SetTempAt(
         temp_index++, Location::RegisterLocation(temp->GetRegister()));
   }
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 0c3a9b3..b881539 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -94,7 +94,7 @@
   bool IsBlocked(int reg) const;
 
   // Update the interval for the register in `location` to cover [start, end).
-  void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type);
+  void BlockRegister(Location location, size_t start, size_t end);
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
@@ -156,7 +156,8 @@
 
   // Fixed intervals for physical registers. Such intervals cover the positions
   // where an instruction requires a specific register.
-  GrowableArray<LiveInterval*> physical_register_intervals_;
+  GrowableArray<LiveInterval*> physical_core_register_intervals_;
+  GrowableArray<LiveInterval*> physical_fp_register_intervals_;
 
   // Intervals for temporaries. Such intervals cover the positions
   // where an instruction requires a temporary.
@@ -179,7 +180,8 @@
   size_t* registers_array_;
 
   // Blocked registers, as decided by the code generator.
-  bool* const blocked_registers_;
+  bool* const blocked_core_registers_;
+  bool* const blocked_fp_registers_;
 
   // Slots reserved for out arguments.
   size_t reserved_out_slots_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 7517a6b..2d84a9d 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -348,14 +348,14 @@
   // Split at the next instruction.
   interval = interval->SplitAt(first_add->GetLifetimePosition() + 2);
   // The user of the split is the last add.
-  ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1);
+  ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition());
 
   // Split before the last add.
   LiveInterval* new_interval = interval->SplitAt(last_add->GetLifetimePosition() - 1);
   // Ensure the current interval has no register use...
   ASSERT_EQ(interval->FirstRegisterUse(), kNoLifetime);
   // And the new interval has it for the last add.
-  ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1);
+  ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition());
 }
 
 TEST(RegisterAllocatorTest, DeadPhi) {
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index be2c039..a0cc8a9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -129,8 +129,112 @@
   }
 }
 
+/**
+ * Constants in the Dex format are not typed. So the builder types them as
+ * integers, but when doing the SSA form, we might realize the constant
+ * is used for floating point operations. We create a floating-point equivalent
+ * constant to make the operations correctly typed.
+ */
+static HFloatConstant* GetFloatEquivalent(HIntConstant* constant) {
+  // We place the floating point constant next to this constant.
+  HFloatConstant* result = constant->GetNext()->AsFloatConstant();
+  if (result == nullptr) {
+    HGraph* graph = constant->GetBlock()->GetGraph();
+    ArenaAllocator* allocator = graph->GetArena();
+    result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue()));
+    constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+  } else {
+    // If there is already a constant with the expected type, we know it is
+    // the floating point equivalent of this constant.
+    DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue());
+  }
+  return result;
+}
+
+/**
+ * Wide constants in the Dex format are not typed. So the builder types them as
+ * longs, but when doing the SSA form, we might realize the constant
+ * is used for floating point operations. We create a floating-point equivalent
+ * constant to make the operations correctly typed.
+ */
+static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) {
+  // We place the floating point constant next to this constant.
+  HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
+  if (result == nullptr) {
+    HGraph* graph = constant->GetBlock()->GetGraph();
+    ArenaAllocator* allocator = graph->GetArena();
+    result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue()));
+    constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+  } else {
+    // If there is already a constant with the expected type, we know it is
+    // the floating point equivalent of this constant.
+    DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue());
+  }
+  return result;
+}
+
+/**
+ * Because of Dex format, we might end up having the same phi being
+ * used for non floating point operations and floating point operations. Because
+ * we want the graph to be correctly typed (and thereafter avoid moves between
+ * floating point registers and core registers), we need to create a copy of the
+ * phi with a floating point type.
+ */
+static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+  // We place the floating point phi next to this phi.
+  HInstruction* next = phi->GetNext();
+  if (next == nullptr
+      || (next->GetType() != Primitive::kPrimDouble && next->GetType() != Primitive::kPrimFloat)) {
+    ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
+    HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
+    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+      // Copy the inputs. Note that the graph may not be correctly typed by doing this copy,
+      // but the type propagation phase will fix it.
+      new_phi->SetRawInputAt(i, phi->InputAt(i));
+    }
+    phi->GetBlock()->InsertPhiAfter(new_phi, phi);
+    return new_phi;
+  } else {
+    // If there is already a phi with the expected type, we know it is the floating
+    // point equivalent of this phi.
+    DCHECK_EQ(next->AsPhi()->GetRegNumber(), phi->GetRegNumber());
+    return next->AsPhi();
+  }
+}
+
+HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
+                                                     HInstruction* value,
+                                                     Primitive::Type type) {
+  if (value->IsArrayGet()) {
+    // The verifier has checked that values in arrays cannot be used for both
+    // floating point and non-floating point operations. It is therefore safe to just
+    // change the type of the operation.
+    value->AsArrayGet()->SetType(type);
+    return value;
+  } else if (value->IsLongConstant()) {
+    return GetDoubleEquivalent(value->AsLongConstant());
+  } else if (value->IsIntConstant()) {
+    return GetFloatEquivalent(value->AsIntConstant());
+  } else if (value->IsPhi()) {
+    return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type);
+  } else {
+    // For other instructions, we assume the verifier has checked that the dex format is correctly
+    // typed and the value in a dex register will not be used for both floating point and
+    // non-floating point operations. So the only reason an instruction would want a floating
+    // point equivalent is for an unused phi that will be removed by the dead phi elimination phase.
+    DCHECK(user->IsPhi());
+    return value;
+  }
+}
+
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  load->ReplaceWith(current_locals_->Get(load->GetLocal()->GetRegNumber()));
+  HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
+  if (load->GetType() != value->GetType()
+      && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
+    // If the operation requests a specific type, we make sure its input is of that type.
+    value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+  }
+  load->ReplaceWith(value);
   load->GetBlock()->RemoveInstruction(load);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 9d8c072..24f5ac5 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -52,6 +52,10 @@
   void VisitStoreLocal(HStoreLocal* store);
   void VisitInstruction(HInstruction* instruction);
 
+  static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user,
+                                                  HInstruction* instruction,
+                                                  Primitive::Type type);
+
  private:
   // Locals for the current block being visited.
   GrowableArray<HInstruction*>* current_locals_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index f0edc64..1e34670 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -319,7 +319,7 @@
       if (user->IsPhi()) {
         // If the phi has a register, try to use the same.
         Location phi_location = user->GetLiveInterval()->ToLocation();
-        if (phi_location.IsRegister() && free_until[phi_location.reg()] >= use_position) {
+        if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) {
           return phi_location.reg();
         }
         const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
@@ -345,7 +345,7 @@
         // We use the user's lifetime position - 1 (and not `use_position`) because the
         // register is blocked at the beginning of the user.
         size_t position = user->GetLifetimePosition() - 1;
-        if (expected.IsRegister() && free_until[expected.reg()] >= position) {
+        if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) {
           return expected.reg();
         }
       }
@@ -368,7 +368,7 @@
         // If the input dies at the end of the predecessor, we know its register can
         // be reused.
         Location input_location = input_interval.ToLocation();
-        if (input_location.IsRegister()) {
+        if (SameRegisterKind(input_location)) {
           return input_location.reg();
         }
       }
@@ -384,7 +384,7 @@
         // If the input dies at the start of this instruction, we know its register can
         // be reused.
         Location location = input_interval.ToLocation();
-        if (location.IsRegister()) {
+        if (SameRegisterKind(location)) {
           return location.reg();
         }
       }
@@ -393,13 +393,21 @@
   return kNoRegister;
 }
 
+bool LiveInterval::SameRegisterKind(Location other) const {
+  return IsFloatingPoint()
+      ? other.IsFpuRegister()
+      : other.IsRegister();
+}
+
 bool LiveInterval::NeedsTwoSpillSlots() const {
   return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
 }
 
 Location LiveInterval::ToLocation() const {
   if (HasRegister()) {
-    return Location::RegisterLocation(GetRegister());
+    return IsFloatingPoint()
+        ? Location::FpuRegisterLocation(GetRegister())
+        : Location::RegisterLocation(GetRegister());
   } else {
     HInstruction* defined_by = GetParent()->GetDefinedBy();
     if (defined_by->IsConstant()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index e9bd303..8f71848 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -188,10 +188,14 @@
         && (first_use_->GetPosition() < position)) {
       // The user uses the instruction multiple times, and one use dies before the other.
       // We update the use list so that the latter is first.
+      UsePosition* cursor = first_use_;
+      while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) {
+        cursor = cursor->GetNext();
+      }
       DCHECK(first_use_->GetPosition() + 1 == position);
       UsePosition* new_use = new (allocator_) UsePosition(
-          instruction, input_index, is_environment, position, first_use_->GetNext());
-      first_use_->SetNext(new_use);
+          instruction, input_index, is_environment, position, cursor->GetNext());
+      cursor->SetNext(new_use);
       if (first_range_->GetEnd() == first_use_->GetPosition()) {
         first_range_->end_ = position;
       }
@@ -354,6 +358,10 @@
              || (location.GetPolicy() == Location::kSameAsFirstInput
                  && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) {
           return position;
+        } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                   || (location.GetPolicy() == Location::kSameAsFirstInput
+                       && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
+          return position;
         }
       }
     }
@@ -362,12 +370,12 @@
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
       size_t use_position = use->GetPosition();
-      if (use_position >= position && !use->GetIsEnvironment()) {
+      if (use_position > position && !use->GetIsEnvironment()) {
         Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-          // Return the lifetime just before the user, so that the interval has a register
-          // when entering the user.
-          return use->GetUser()->GetLifetimePosition() - 1;
+        if (location.IsUnallocated()
+            && (location.GetPolicy() == Location::kRequiresRegister
+                || location.GetPolicy() == Location::kRequiresFpuRegister)) {
+          return use_position;
         }
       }
       use = use->GetNext();
@@ -498,6 +506,10 @@
   // slots for spilling.
   bool NeedsTwoSpillSlots() const;
 
+  bool IsFloatingPoint() const {
+    return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;
+  }
+
   // Converts the location of the interval to a `Location` object.
   Location ToLocation() const;
 
@@ -509,6 +521,9 @@
 
   bool IsTemp() const { return is_temp_; }
 
+  // Returns whether `other` and `this` share the same kind of register.
+  bool SameRegisterKind(Location other) const;
+
  private:
   ArenaAllocator* const allocator_;
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index e02a182..4eda0f3 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -24,18 +24,13 @@
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      if (phi->HasEnvironmentUses()) {
-        // TODO: Do we want to keep that phi alive?
-        worklist_.Add(phi);
-        phi->SetLive();
-        continue;
-      }
       for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) {
         HUseListNode<HInstruction>* current = it.Current();
         HInstruction* user = current->GetUser();
         if (!user->IsPhi()) {
           worklist_.Add(phi);
           phi->SetLive();
+          break;
         } else {
           phi->SetDead();
         }
@@ -76,6 +71,14 @@
             current->RemoveUser(user, user_node->GetIndex());
           }
         }
+        if (current->HasEnvironmentUses()) {
+          for (HUseIterator<HEnvironment> it(current->GetEnvUses()); !it.Done(); it.Advance()) {
+            HUseListNode<HEnvironment>* user_node = it.Current();
+            HEnvironment* user = user_node->GetUser();
+            user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+            current->RemoveEnvironmentUser(user, user_node->GetIndex());
+          }
+        }
         block->RemovePhi(current->AsPhi());
       }
       current = next;
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
index a860cb7..3828142 100644
--- a/compiler/optimizing/ssa_type_propagation.cc
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "ssa_builder.h"
 #include "ssa_type_propagation.h"
 
 #include "nodes.h"
@@ -38,15 +39,31 @@
 
 // Re-compute and update the type of the instruction. Returns
 // whether or not the type was changed.
-static bool UpdateType(HPhi* phi) {
+bool SsaTypePropagation::UpdateType(HPhi* phi) {
   Primitive::Type existing = phi->GetType();
 
-  Primitive::Type new_type = Primitive::kPrimVoid;
+  Primitive::Type new_type = existing;
   for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
     Primitive::Type input_type = phi->InputAt(i)->GetType();
     new_type = MergeTypes(new_type, input_type);
   }
   phi->SetType(new_type);
+
+  if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) {
+    // If the phi is of floating point type, we need to update its inputs to that
+    // type. For inputs that are phis, we need to recompute their types.
+    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+      HInstruction* input = phi->InputAt(i);
+      if (input->GetType() != new_type) {
+        HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
+        phi->ReplaceInput(equivalent, i);
+        if (equivalent->IsPhi()) {
+          AddToWorklist(equivalent->AsPhi());
+        }
+      }
+    }
+  }
+
   return existing != new_type;
 }
 
@@ -63,7 +80,12 @@
       HPhi* phi = it.Current()->AsPhi();
       // Set the initial type for the phi. Use the non back edge input for reaching
       // a fixed point faster.
-      phi->SetType(phi->InputAt(0)->GetType());
+      Primitive::Type phi_type = phi->GetType();
+      // We merge with the existing type, that has been set by the SSA builder.
+      DCHECK(phi_type == Primitive::kPrimVoid
+          || phi_type == Primitive::kPrimFloat
+          || phi_type == Primitive::kPrimDouble);
+      phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType()));
       AddToWorklist(phi);
     }
   } else {
diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h
index 5f471a9..f4d3d63 100644
--- a/compiler/optimizing/ssa_type_propagation.h
+++ b/compiler/optimizing/ssa_type_propagation.h
@@ -34,6 +34,7 @@
   void ProcessWorklist();
   void AddToWorklist(HPhi* phi);
   void AddDependentInstructionsToWorklist(HPhi* phi);
+  bool UpdateType(HPhi* phi);
 
   HGraph* const graph_;
   GrowableArray<HPhi*> worklist_;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 3742913..5bfa462 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -373,7 +373,7 @@
         }
       } else {
         // This will output the assembly.
-        EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical.";
+        EXPECT_EQ(*res.code, *data) << "Outputs (and disassembly) not identical.";
       }
     }
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db7151c..f4c9862 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -317,7 +317,7 @@
   EmitOptionalRex32(dst, src);
   EmitUint8(0x0F);
   EmitUint8(0x28);
-  EmitXmmRegisterOperand(src.LowBits(), dst);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
 
@@ -354,7 +354,7 @@
 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
-  EmitOptionalRex32(dst, src);
+  EmitRex64(dst, src);
   EmitUint8(0x0F);
   EmitUint8(0x6E);
   EmitOperand(dst.LowBits(), Operand(src));
@@ -364,7 +364,7 @@
 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
-  EmitOptionalRex32(src, dst);
+  EmitRex64(src, dst);
   EmitUint8(0x0F);
   EmitUint8(0x7E);
   EmitOperand(src.LowBits(), Operand(dst));
@@ -1748,6 +1748,10 @@
   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
 }
 
+void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
+  EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   if (dst.NeedsRex()) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 4ffb6b5..7e5859c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -666,6 +666,7 @@
   void EmitRex64(CpuRegister reg);
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
+  void EmitRex64(XmmRegister dst, CpuRegister src);
 
   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
@@ -692,7 +693,7 @@
 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
   CHECK_GE(rm, 0);
   CHECK_LT(rm, 8);
-  buffer_.Emit<uint8_t>(0xC0 + (rm << 3) + reg);
+  buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
 }
 
 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 69a5fa0..37a0932 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -134,6 +134,32 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
 }
 
+TEST_F(AssemblerX86_64Test, Movaps) {
+  GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::XmmRegister(x86_64::XMM8));
+  DriverStr("movaps %xmm8, %xmm0", "movaps");
+}
+
+TEST_F(AssemblerX86_64Test, Movd) {
+  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::R11));
+  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::R11));
+  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM0));
+  GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM0));
+  GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM8));
+  GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM8));
+  const char* expected =
+    "movd %r11, %xmm0\n"
+    "movd %rax, %xmm0\n"
+    "movd %r11, %xmm8\n"
+    "movd %rax, %xmm8\n"
+    "movd %xmm0, %r11\n"
+    "movd %xmm0, %rax\n"
+    "movd %xmm8, %r11\n"
+    "movd %xmm8, %rax\n";
+  DriverStr(expected, "movd");
+}
+
 TEST_F(AssemblerX86_64Test, Movl) {
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11));
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11));
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 0bab429..0ef20d6 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -24,10 +24,11 @@
 LOCAL_MODULE := dalvikvm
 LOCAL_MODULE_TAGS := optional
 LOCAL_CPP_EXTENSION := cc
-LOCAL_SRC_FILES := dalvikvm.cc ../sigchainlib/sigchain.cc
+LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
 LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libdl liblog libnativehelper
+LOCAL_WHOLE_STATIC_LIBRARIES := libsigchain
 LOCAL_LDFLAGS := -Wl,--version-script,art/sigchainlib/version-script.txt -Wl,--export-dynamic
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common.mk
@@ -51,10 +52,11 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_CLANG := true
 LOCAL_CPP_EXTENSION := cc
-LOCAL_SRC_FILES := dalvikvm.cc ../sigchainlib/sigchain.cc
+LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
 LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libnativehelper
+LOCAL_WHOLE_STATIC_LIBRARIES := libsigchain
 LOCAL_LDFLAGS := -ldl -lpthread
 # Mac OS linker doesn't understand --export-dynamic.
 ifneq ($(HOST_OS),darwin)
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index 5220dc3..42bf8fb 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -43,398 +43,98 @@
   }
 };
 
+// Common tests are declared next to the constants.
+#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y);
+#include "asm_support.h"
 
-TEST_F(ArchTest, ARM) {
+TEST_F(ArchTest, CheckCommonOffsetsAndSizes) {
+  CheckAsmSupportOffsetsAndSizes();
+}
+
+// Grab architecture specific constants.
+namespace arm {
 #include "arch/arm/asm_support_arm.h"
-#undef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
-
-
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, FRAME_SIZE_SAVE_ALL_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for SaveAll";
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsOnly";
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsAndArgs";
-#endif
-
-
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
+static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
 #undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
 #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
 #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#endif
-#ifdef HEAP_REFERENCE_SIZE
-#undef HEAP_REFERENCE_SIZE
-#endif
+}
+
+namespace arm64 {
+#include "arch/arm64/asm_support_arm64.h"
+static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
+}
+
+namespace mips {
+#include "arch/mips/asm_support_mips.h"
+static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
+}
+
+namespace x86 {
+#include "arch/x86/asm_support_x86.h"
+static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
+}
+
+namespace x86_64 {
+#include "arch/x86_64/asm_support_x86_64.h"
+static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
+#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
+}
+
+// Check architecture specific constants are sound.
+TEST_F(ArchTest, ARM) {
+  CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, arm::kFrameSizeSaveAllCalleeSave);
+  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, arm::kFrameSizeRefsOnlyCalleeSave);
+  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, arm::kFrameSizeRefsAndArgsCalleeSave);
 }
 
 
 TEST_F(ArchTest, ARM64) {
-#include "arch/arm64/asm_support_arm64.h"
-#undef ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
-
-
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kSaveAll, FRAME_SIZE_SAVE_ALL_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for SaveAll";
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsOnly";
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsAndArgs";
-#endif
-
-
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#endif
-#ifdef HEAP_REFERENCE_SIZE
-#undef HEAP_REFERENCE_SIZE
-#endif
+  CheckFrameSize(InstructionSet::kArm64, Runtime::kSaveAll, arm64::kFrameSizeSaveAllCalleeSave);
+  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, arm64::kFrameSizeRefsOnlyCalleeSave);
+  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs,
+                 arm64::kFrameSizeRefsAndArgsCalleeSave);
 }
 
-
 TEST_F(ArchTest, MIPS) {
-#include "arch/mips/asm_support_mips.h"
-#undef ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
-
-
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, FRAME_SIZE_SAVE_ALL_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for SaveAll";
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsOnly";
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsAndArgs";
-#endif
-
-
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#endif
-#ifdef HEAP_REFERENCE_SIZE
-#undef HEAP_REFERENCE_SIZE
-#endif
+  CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, mips::kFrameSizeSaveAllCalleeSave);
+  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, mips::kFrameSizeRefsOnlyCalleeSave);
+  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs,
+                 mips::kFrameSizeRefsAndArgsCalleeSave);
 }
 
-
 TEST_F(ArchTest, X86) {
-#include "arch/x86/asm_support_x86.h"
-#undef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
-
-
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, FRAME_SIZE_SAVE_ALL_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for SaveAll";
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsOnly";
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsAndArgs";
-#endif
-
-
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#endif
-#ifdef HEAP_REFERENCE_SIZE
-#undef HEAP_REFERENCE_SIZE
-#endif
+  CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, x86::kFrameSizeSaveAllCalleeSave);
+  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, x86::kFrameSizeRefsOnlyCalleeSave);
+  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, x86::kFrameSizeRefsAndArgsCalleeSave);
 }
 
-
 TEST_F(ArchTest, X86_64) {
-#include "arch/x86_64/asm_support_x86_64.h"
-#undef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
-
-
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, FRAME_SIZE_SAVE_ALL_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for SaveAll";
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsOnly";
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE);
-#else
-  LOG(WARNING) << "No frame size for RefsAndArgs";
-#endif
-
-
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#endif
-#ifdef HEAP_REFERENCE_SIZE
-#undef HEAP_REFERENCE_SIZE
-#endif
-}
-
-
-// The following tests are all for the running architecture. So we get away
-// with just including it and not undefining it every time.
-
-#if defined(__arm__)
-#include "arch/arm/asm_support_arm.h"
-#elif defined(__aarch64__)
-#include "arch/arm64/asm_support_arm64.h"
-#elif defined(__mips__)
-#include "arch/mips/asm_support_mips.h"
-#elif defined(__i386__)
-#include "arch/x86/asm_support_x86.h"
-#elif defined(__x86_64__)
-#include "arch/x86_64/asm_support_x86_64.h"
-#else
-  // This happens for the host test.
-#ifdef __LP64__
-#include "arch/x86_64/asm_support_x86_64.h"
-#else
-#include "arch/x86/asm_support_x86.h"
-#endif
-#endif
-
-
-TEST_F(ArchTest, ThreadOffsets) {
-  // Ugly hack, change when possible.
-#ifdef __LP64__
-#define POINTER_SIZE 8
-#else
-#define POINTER_SIZE 4
-#endif
-
-#if defined(THREAD_SELF_OFFSET)
-  ThreadOffset<POINTER_SIZE> self_offset = Thread::SelfOffset<POINTER_SIZE>();
-  EXPECT_EQ(self_offset.Int32Value(), THREAD_SELF_OFFSET);
-#else
-  LOG(INFO) << "No Thread Self Offset found.";
-#endif
-
-#if defined(THREAD_CARD_TABLE_OFFSET)
-  ThreadOffset<POINTER_SIZE> card_offset = Thread::CardTableOffset<POINTER_SIZE>();
-  EXPECT_EQ(card_offset.Int32Value(), THREAD_CARD_TABLE_OFFSET);
-#else
-  LOG(INFO) << "No Thread Card Table Offset found.";
-#endif
-
-#if defined(THREAD_EXCEPTION_OFFSET)
-  ThreadOffset<POINTER_SIZE> exc_offset = Thread::ExceptionOffset<POINTER_SIZE>();
-    EXPECT_EQ(exc_offset.Int32Value(), THREAD_EXCEPTION_OFFSET);
-#else
-  LOG(INFO) << "No Thread Exception Offset found.";
-#endif
-
-#if defined(THREAD_ID_OFFSET)
-  ThreadOffset<POINTER_SIZE> id_offset = Thread::ThinLockIdOffset<POINTER_SIZE>();
-  EXPECT_EQ(id_offset.Int32Value(), THREAD_ID_OFFSET);
-#else
-  LOG(INFO) << "No Thread ID Offset found.";
-#endif
-}
-
-
-TEST_F(ArchTest, CalleeSaveMethodOffsets) {
-#if defined(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET)
-  EXPECT_EQ(Runtime::GetCalleeSaveMethodOffset(Runtime::kSaveAll),
-            static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET));
-#else
-  LOG(INFO) << "No Runtime Save-all Offset found.";
-#endif
-
-#if defined(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET)
-  EXPECT_EQ(Runtime::GetCalleeSaveMethodOffset(Runtime::kRefsOnly),
-            static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET));
-#else
-  LOG(INFO) << "No Runtime Refs-only Offset found.";
-#endif
-
-#if defined(RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET)
-  EXPECT_EQ(Runtime::GetCalleeSaveMethodOffset(Runtime::kRefsAndArgs),
-            static_cast<size_t>(RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET));
-#else
-  LOG(INFO) << "No Runtime Refs-and-Args Offset found.";
-#endif
-}
-
-
-TEST_F(ArchTest, HeapReferenceSize) {
-#if defined(HEAP_REFERENCE_SIZE)
-  EXPECT_EQ(sizeof(mirror::HeapReference<mirror::Object>),
-            static_cast<size_t>(HEAP_REFERENCE_SIZE));
-#else
-  LOG(INFO) << "No expected HeapReference Size found.";
-#endif
-}
-
-TEST_F(ArchTest, StackReferenceSize) {
-#if defined(STACK_REFERENCE_SIZE)
-  EXPECT_EQ(sizeof(StackReference<mirror::Object>),
-            static_cast<size_t>(STACK_REFERENCE_SIZE));
-#else
-  LOG(INFO) << "No expected StackReference Size #define found.";
-#endif
+  CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, x86_64::kFrameSizeSaveAllCalleeSave);
+  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, x86_64::kFrameSizeRefsOnlyCalleeSave);
+  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs,
+                 x86_64::kFrameSizeRefsAndArgsCalleeSave);
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index fb6458c..2af636e 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -30,38 +30,92 @@
 .arch armv7-a
 .thumb
 
+// Macro to generate the value of Runtime::Current into rDest clobbering rTemp. As it uses labels
+// then the labels need to be unique. We bind these to the function name in the ENTRY macros.
+.macro RUNTIME_CURRENT name, num, rDest, rTemp
+    .if .Lruntime_current\num\()_used
+         .error
+    .endif
+    .set .Lruntime_current\num\()_used, 1
+    ldr \rDest, .Lgot_\name\()_\num               @ Load offset of the GOT.
+    ldr \rTemp, .Lruntime_instance_\name\()_\num  @ Load GOT offset of Runtime::instance_.
+.Lload_got_\name\()_\num\():
+    add \rDest, pc                                @ Fixup GOT address.
+    ldr \rDest, [\rDest, \rTemp]                  @ Load address of Runtime::instance_.
+    ldr \rDest, [\rDest]                          @ Load Runtime::instance_.
+.endm
+
+// Common ENTRY declaration code for ARM and thumb, an ENTRY should always be paired with an END.
+// Declares the RUNTIME_CURRENT[123] macros that can be used within an ENTRY and will have literals
+// generated at END.
+.macro DEF_ENTRY thumb_or_arm, name
+    \thumb_or_arm
+    .type \name, #function
+    .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
+    .global \name
+    // Cache alignment for function entry.
+    .balign 16
+\name:
+    .cfi_startproc
+    .fnstart
+    // Track whether RUNTIME_CURRENT was used.
+    .set .Lruntime_current1_used, 0
+    .set .Lruntime_current2_used, 0
+    .set .Lruntime_current3_used, 0
+    // The RUNTIME_CURRENT macros that are bound to the \name argument of DEF_ENTRY to ensure
+    // that label names are unique.
+    .macro RUNTIME_CURRENT1 rDest, rTemp
+        RUNTIME_CURRENT \name, 1, \rDest, \rTemp
+    .endm
+    .macro RUNTIME_CURRENT2 rDest, rTemp
+        RUNTIME_CURRENT \name, 2, \rDest, \rTemp
+    .endm
+    .macro RUNTIME_CURRENT3 rDest, rTemp
+        RUNTIME_CURRENT \name, 3, \rDest, \rTemp
+    .endm
+.endm
+
+// A thumb2 style ENTRY.
 .macro ENTRY name
-    .thumb_func
-    .type \name, #function
-    .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
-    .global \name
-    /* Cache alignment for function entry */
-    .balign 16
-\name:
-    .cfi_startproc
-    .fnstart
+    DEF_ENTRY .thumb_func, \name
 .endm
 
+// A ARM style ENTRY.
 .macro ARM_ENTRY name
-    .arm
-    .type \name, #function
-    .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
-    .global \name
-    /* Cache alignment for function entry */
-    .balign 16
-\name:
-    .cfi_startproc
-     /* Ensure we get a sane starting CFA. */
-    .cfi_def_cfa sp,0
-    .fnstart
+    DEF_ENTRY .arm, \name
 .endm
 
+// Terminate an ENTRY and generate GOT references.
 .macro END name
+     // Generate offsets of GOT and Runtime::instance_ used in RUNTIME_CURRENT.
+     .if .Lruntime_current1_used
+         .Lgot_\name\()_1:
+             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_1+4)
+         .Lruntime_instance_\name\()_1:
+             .word   _ZN3art7Runtime9instance_E(GOT)
+     .endif
+     .if .Lruntime_current2_used
+         .Lgot_\name\()_2:
+             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_2+4)
+         .Lruntime_instance_\name\()_2:
+             .word   _ZN3art7Runtime9instance_E(GOT)
+    .endif
+     .if .Lruntime_current3_used
+         .Lgot_\name\()_3:
+             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_3+4)
+         .Lruntime_instance_\name\()_3:
+             .word   _ZN3art7Runtime9instance_E(GOT)
+    .endif
+    // Remove the RUNTIME_CURRENTx macros so they get rebound in the next function entry.
+    .purgem RUNTIME_CURRENT1
+    .purgem RUNTIME_CURRENT2
+    .purgem RUNTIME_CURRENT3
     .fnend
     .cfi_endproc
     .size \name, .-\name
 .endm
 
+// Declare an unimplemented ENTRY that will halt a debugger.
 .macro UNIMPLEMENTED name
     ENTRY \name
     bkpt
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 330924e..5388cc0 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -19,21 +19,10 @@
 
 #include "asm_support.h"
 
-// Offset of field Thread::tls32_.state_and_flags verified in InitCpu
-#define THREAD_FLAGS_OFFSET 0
-// Offset of field Thread::tls32_.thin_lock_thread_id verified in InitCpu
-#define THREAD_ID_OFFSET 12
-// Offset of field Thread::tlsPtr_.card_table verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
-// Offset of field Thread::tlsPtr_.exception verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 124
-
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 48
 
-// Expected size of a heap reference
-#define HEAP_REFERENCE_SIZE 4
 // Flag for enabling R4 optimization in arm runtime
 #define ARM_R4_SUSPEND_FLAG
 
diff --git a/runtime/arch/arm/portable_entrypoints_arm.S b/runtime/arch/arm/portable_entrypoints_arm.S
index a34db6c..d37e760 100644
--- a/runtime/arch/arm/portable_entrypoints_arm.S
+++ b/runtime/arch/arm/portable_entrypoints_arm.S
@@ -53,7 +53,7 @@
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
     add    sp, #16                         @ first 4 args are not passed on stack for portable
-    ldr    ip, [r0, #METHOD_PORTABLE_CODE_OFFSET]  @ get pointer to the code
+    ldr    ip, [r0, #MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 3d619be..aae0c94 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -27,8 +27,8 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    push {r4-r11, lr} @ 9 words of callee saves
+.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
+    push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
     .save {r4-r11, lr}
     .cfi_adjust_cfa_offset 36
     .cfi_rel_offset r4, 0
@@ -40,12 +40,17 @@
     .cfi_rel_offset r10, 24
     .cfi_rel_offset r11, 28
     .cfi_rel_offset lr, 32
-    vpush {s0-s31}
+    vpush {s0-s31}                                @ 32 words (128 bytes) of floats.
     .pad #128
     .cfi_adjust_cfa_offset 128
-    sub sp, #12       @ 3 words of space, bottom word will hold Method*
+    sub sp, #12                                   @ 3 words of space, bottom word will hold Method*.
     .pad #12
     .cfi_adjust_cfa_offset 12
+    RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
+    THIS_LOAD_REQUIRES_READ_BARRIER
+    ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
+    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 128 + 12)
@@ -57,8 +62,8 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsOnly).
      */
-.macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-    push {r5-r8, r10-r11, lr} @ 7 words of callee saves
+.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp1, rTemp2
+    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     .save {r5-r8, r10-r11, lr}
     .cfi_adjust_cfa_offset 28
     .cfi_rel_offset r5, 0
@@ -68,9 +73,14 @@
     .cfi_rel_offset r10, 16
     .cfi_rel_offset r11, 20
     .cfi_rel_offset lr, 24
-    sub sp, #4                @ bottom word will hold Method*
+    sub sp, #4                                    @ bottom word will hold Method*
     .pad #4
     .cfi_adjust_cfa_offset 4
+    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
+    THIS_LOAD_REQUIRES_READ_BARRIER
+    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
+    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
@@ -78,7 +88,7 @@
 #endif
 .endm
 
-.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     add sp, #4               @ bottom word holds Method*
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
     .cfi_restore r5
@@ -87,10 +97,10 @@
     .cfi_restore r8
     .cfi_restore r10
     .cfi_restore r11
-    .cfi_adjust_cfa_offset -32
+    .cfi_adjust_cfa_offset -FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
 .endm
 
-.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     add sp, #4               @ bottom word holds Method*
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
     .cfi_restore r5
@@ -99,7 +109,7 @@
     .cfi_restore r8
     .cfi_restore r10
     .cfi_restore r11
-    .cfi_adjust_cfa_offset -32
+    .cfi_adjust_cfa_offset -FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
     bx  lr                   @ return
 .endm
 
@@ -107,8 +117,8 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
      */
-.macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    push {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
+    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves
     .save {r1-r3, r5-r8, r10-r11, lr}
     .cfi_rel_offset r1, 0
     .cfi_rel_offset r2, 4
@@ -121,9 +131,15 @@
     .cfi_rel_offset r11, 32
     .cfi_rel_offset lr, 36
     .cfi_adjust_cfa_offset 40
-    sub sp, #8                        @ 2 words of space, bottom word will hold Method*
+    sub sp, #8                         @ 2 words of space, bottom word will hold Method*
     .pad #8
     .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
+    THIS_LOAD_REQUIRES_READ_BARRIER
+     @ rTemp1 is kRefsAndArgs Method*.
+    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
+    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8)
@@ -131,7 +147,29 @@
 #endif
 .endm
 
-.macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves
+    .save {r1-r3, r5-r8, r10-r11, lr}
+    .cfi_rel_offset r1, 0
+    .cfi_rel_offset r2, 4
+    .cfi_rel_offset r3, 8
+    .cfi_rel_offset r5, 12
+    .cfi_rel_offset r6, 16
+    .cfi_rel_offset r7, 20
+    .cfi_rel_offset r8, 24
+    .cfi_rel_offset r10, 28
+    .cfi_rel_offset r11, 32
+    .cfi_rel_offset lr, 36
+    .cfi_adjust_cfa_offset 40
+    sub sp, #8                         @ 2 words of space, bottom word will hold Method*
+    .pad #8
+    .cfi_adjust_cfa_offset 8
+
+    str r0, [sp, #0]                   @ Store ArtMethod* to bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+.endm
+
+.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     add  sp, #8                      @ rewind sp
     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
     .cfi_restore r1
@@ -146,6 +184,7 @@
     .cfi_adjust_cfa_offset -48
 .endm
 
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -165,41 +204,35 @@
 .macro DELIVER_PENDING_EXCEPTION
     .fnend
     .fnstart
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME           @ save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1    @ save callee saves for throw
     mov    r0, r9                              @ pass Thread::Current
-    mov    r1, sp                              @ pass SP
-    b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*, SP)
+    b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
 .endm
 
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r0, r1 // save all registers as basis for long jump context
     mov r0, r9                      @ pass Thread::Current
-    mov r1, sp                      @ pass SP
-    b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
 .endm
 
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1, r2  // save all registers as basis for long jump context
     mov r1, r9                      @ pass Thread::Current
-    mov r2, sp                      @ pass SP
-    b   \cxx_name                   @ \cxx_name(Thread*, SP)
-    bkpt
+    b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
 .endm
 
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r2, r3  // save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
-    mov r3, sp                      @ pass SP
-    b   \cxx_name                   @ \cxx_name(Thread*, SP)
-    bkpt
+    b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
 .endm
 
@@ -224,12 +257,11 @@
 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
-    ldr    r1, [sp, #32]                 @ pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
-    mov    r3, sp                        @ pass SP
-    bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
 END \name
 .endm
@@ -237,17 +269,11 @@
 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
-    ldr    r2, [sp, #32]                 @ pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!              @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*, SP)
-    add    sp, #16                       @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
 END \name
 .endm
@@ -255,21 +281,15 @@
 .macro  THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
-    ldr    r3, [sp, #32]                 @ pass referrer
-    mov    r12, sp                       @ save SP
-    sub    sp, #8                        @ grow frame for alignment with stack args
-    .pad #8
-    .cfi_adjust_cfa_offset 8
-    push   {r9, r12}                     @ pass Thread::Current and SP
-    .save {r9, r12}
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r9, 0
-    .cfi_rel_offset r12, 4
-    bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*, SP)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12  @ save callee saves in case of GC
+    ldr    r3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
     \return
 END \name
 .endm
@@ -325,8 +345,8 @@
 .macro INVOKE_TRAMPOLINE c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  @ save callee saves in case allocation triggers GC
-    ldr    r2, [sp, #48]                  @ pass caller Method*
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case allocation triggers GC
+    ldr    r2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  @ pass caller Method*
     mov    r3, r9                         @ pass Thread::Current
     mov    r12, sp
     str    r12, [sp, #-16]!               @ expand the frame and pass SP
@@ -336,7 +356,7 @@
     add    sp, #16                        @ strip the extra frame
     .cfi_adjust_cfa_offset -16
     mov    r12, r1                        @ save Method*->code_
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
     bx     r12                            @ tail call to target
 1:
@@ -393,7 +413,7 @@
     ldr    r3, [sp, #12]                   @ copy arg value for r3
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
-    ldr    ip, [r0, #METHOD_QUICK_CODE_OFFSET]  @ get pointer to the code
+    ldr    ip, [r0, #MIRROR_ART_METHOD_QUICK_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
@@ -437,10 +457,10 @@
     cbz    r0, .Lslow_lock
 .Lretry_lock:
     ldr    r2, [r9, #THREAD_ID_OFFSET]
-    ldrex  r1, [r0, #LOCK_WORD_OFFSET]
+    ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     cbnz   r1, .Lnot_unlocked         @ already thin locked
     @ unlocked case - r2 holds thread id with count of 0
-    strex  r3, r2, [r0, #LOCK_WORD_OFFSET]
+    strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     cbnz   r3, .Lstrex_fail           @ store failed, retry
     dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
     bx lr
@@ -456,14 +476,13 @@
     add    r2, r1, #65536             @ increment count in lock word placing in r2 for storing
     lsr    r1, r2, 30                 @ if either of the top two bits are set, we overflowed.
     cbnz   r1, .Lslow_lock            @ if we overflow the count go slow path
-    str    r2, [r0, #LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+    str    r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
     bx lr
 .Lslow_lock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case we block
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
-    mov    r2, sp                     @ pass SP
-    bl     artLockObjectFromCode      @ (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
@@ -475,7 +494,7 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     cbz    r0, .Lslow_unlock
-    ldr    r1, [r0, #LOCK_WORD_OFFSET]
+    ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     lsr    r2, r1, 30
     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
     ldr    r2, [r9, #THREAD_ID_OFFSET]
@@ -486,18 +505,18 @@
     bpl    .Lrecursive_thin_unlock
     @ transition to unlocked, r3 holds 0
     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
-    str    r3, [r0, #LOCK_WORD_OFFSET]
+    str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     bx     lr
 .Lrecursive_thin_unlock:
     sub    r1, r1, #65536
-    str    r1, [r0, #LOCK_WORD_OFFSET]
+    str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     bx     lr
 .Lslow_unlock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case exception allocation triggers GC
+    @ save callee saves in case exception allocation triggers GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
     mov    r1, r9                     @ pass Thread::Current
-    mov    r2, sp                     @ pass SP
-    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object
@@ -528,10 +547,9 @@
     pop {r0-r1, lr}
     .cfi_restore r0
     .cfi_restore r1
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2, r3  // save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
-    mov r3, sp                      @ pass SP
-    b   artThrowClassCastException  @ (Class*, Class*, Thread*, SP)
+    b   artThrowClassCastException  @ (Class*, Class*, Thread*)
     bkpt
 END art_quick_check_cast
 
@@ -548,7 +566,7 @@
 
     .hidden art_quick_aput_obj_with_bound_check
 ENTRY art_quick_aput_obj_with_bound_check
-    ldr r3, [r0, #ARRAY_LENGTH_OFFSET]
+    ldr r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]
     cmp r3, r1
     bhi art_quick_aput_obj
     mov r0, r1
@@ -559,20 +577,20 @@
     .hidden art_quick_aput_obj
 ENTRY art_quick_aput_obj
     cbz r2, .Ldo_aput_null
-    ldr r3, [r0, #CLASS_OFFSET]
-    ldr ip, [r2, #CLASS_OFFSET]
-    ldr r3, [r3, #CLASS_COMPONENT_TYPE_OFFSET]
+    ldr r3, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
+    ldr ip, [r2, #MIRROR_OBJECT_CLASS_OFFSET]
+    ldr r3, [r3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
     cmp r3, ip  @ value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
-    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
     str r2, [r3, r1, lsl #2]
     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
     lsr r0, r0, #7
     strb r3, [r3, r0]
     blx lr
 .Ldo_aput_null:
-    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
     str r2, [r3, r1, lsl #2]
     blx lr
 .Lcheck_assignability:
@@ -593,7 +611,7 @@
     .cfi_restore r2
     .cfi_restore lr
     .cfi_adjust_cfa_offset -16
-    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
     str r2, [r3, r1, lsl #2]
     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
     lsr r0, r0, #7
@@ -606,12 +624,11 @@
     .cfi_restore r2
     .cfi_restore lr
     .cfi_adjust_cfa_offset -16
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3, ip
     mov r1, r2
-    mov r2, r9                   @ pass Thread::Current
-    mov r3, sp                   @ pass SP
-    b artThrowArrayStoreException  @ (Class*, Class*, Thread*, SP)
-    bkpt                         @ unreached
+    mov r2, r9                     @ pass Thread::Current
+    b artThrowArrayStoreException  @ (Class*, Class*, Thread*)
+    bkpt                           @ unreached
 END art_quick_aput_obj
 
     /*
@@ -621,12 +638,11 @@
      */
     .extern artInitializeStaticStorageFromCode
 ENTRY art_quick_initialize_static_storage
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME           @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3    @ save callee saves in case of GC
     mov    r2, r9                              @ pass Thread::Current
-    mov    r3, sp                              @ pass SP
-    @ artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*, SP)
+    @ artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*)
     bl     artInitializeStaticStorageFromCode
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_initialize_static_storage
@@ -636,12 +652,11 @@
      */
     .extern artInitializeTypeFromCode
 ENTRY art_quick_initialize_type
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME           @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3    @ save callee saves in case of GC
     mov    r2, r9                              @ pass Thread::Current
-    mov    r3, sp                              @ pass SP
-    @ artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, SP)
+    @ artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
     bl     artInitializeTypeFromCode
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_initialize_type
@@ -652,12 +667,11 @@
      */
     .extern artInitializeTypeAndVerifyAccessFromCode
 ENTRY art_quick_initialize_type_and_verify_access
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME           @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3    @ save callee saves in case of GC
     mov    r2, r9                              @ pass Thread::Current
-    mov    r3, sp                              @ pass SP
-    @ artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx, Method* referrer, Thread*, SP)
+    @ artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx, Method* referrer, Thread*)
     bl     artInitializeTypeAndVerifyAccessFromCode
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_initialize_type_and_verify_access
@@ -676,13 +690,12 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
-    ldr    r1, [sp, #32]                 @ pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
-    mov    r3, sp                        @ pass SP
-    bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*, SP)
+    bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -703,18 +716,12 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
-    ldr    r2, [sp, #32]                 @ pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!              @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*, SP)
-    add    sp, #16                       @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
+    bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -734,22 +741,17 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12   @ save callee saves in case of GC
     mov    r3, r2                        @ pass one half of wide argument
     mov    r2, r1                        @ pass other half of wide argument
-    ldr    r1, [sp, #32]                 @ pass referrer
-    mov    r12, sp                       @ save SP
-    sub    sp, #8                        @ grow frame for alignment with stack args
-    .pad #8
-    .cfi_adjust_cfa_offset 8
-    push   {r9, r12}                     @ pass Thread::Current and SP
-    .save {r9, r12}
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r9, 0
-    bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*, SP)
+    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_static
@@ -766,19 +768,18 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     @ save callee saves in case of GC
-    mov    r12, sp                       @ save SP
-    sub    sp, #8                        @ grow frame for alignment with stack args
-    .pad #8
-    .cfi_adjust_cfa_offset 8
-    push   {r9, r12}                     @ pass Thread::Current and SP
-    .save {r9, r12}
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r9, 0
-    bl     artSet64InstanceFromCode      @ (field_idx, Object*, new_val, Thread*, SP)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr  @ save callee saves in case of GC
+    ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
+    .pad #12
+    .cfi_adjust_cfa_offset 12
+    str    r12, [sp, #-4]!               @ expand the frame and pass the referrer
+    .pad #4
+    .cfi_adjust_cfa_offset 4
+    bl     artSet64InstanceFromCode      @ (field_idx, Object*, new_val, Method* referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_instance
@@ -791,12 +792,11 @@
      */
     .extern artResolveStringFromCode
 ENTRY art_quick_resolve_string
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    @ artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*, SP)
+    @ artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*)
     bl     artResolveStringFromCode
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolve_string
@@ -805,11 +805,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -819,17 +818,11 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
+    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
     bl     \entrypoint
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -844,25 +837,24 @@
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
 #ifdef ARM_R4_SUSPEND_FLAG
-    ldrh    r0, [rSELF, #THREAD_FLAGS_OFFSET]
+    ldrh   r0, [rSELF, #THREAD_FLAGS_OFFSET]
     mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL  @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     cbnz   r0, 1f                             @ check Thread::Current()->suspend_count_ == 0
     bx     lr                                 @ return if suspend_count_ == 0
 1:
 #endif
     mov    r0, rSELF
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          @ save callee saves for stack crawl
-    mov    r1, sp
-    bl     artTestSuspendFromCode             @ (Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for GC stack crawl
+    @ TODO: save FPRs to enable access in the debugger?
+    bl     artTestSuspendFromCode             @ (Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    r0, rSELF
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          @ save callee saves for stack crawl
-    mov    r1, sp
-    bl     artTestSuspendFromCode             @ (Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for stack crawl
+    bl     artTestSuspendFromCode             @ (Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
     /*
@@ -872,8 +864,7 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    str     r0, [sp, #0]           @ place proxy method at bottom of frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
@@ -881,10 +872,10 @@
     add     sp, #16                @ skip r1-r3, 4 bytes padding.
     .cfi_adjust_cfa_offset -16
     cbnz    r2, 1f                 @ success if no exception is pending
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bx      lr                     @ return on success
 1:
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
@@ -894,25 +885,25 @@
      */
 ENTRY art_quick_imt_conflict_trampoline
     ldr    r0, [sp, #0]            @ load caller Method*
-    ldr    r0, [r0, #METHOD_DEX_CACHE_METHODS_OFFSET]  @ load dex_cache_resolved_methods
-    add    r0, #OBJECT_ARRAY_DATA_OFFSET  @ get starting address of data
+    ldr    r0, [r0, #MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET]  @ load dex_cache_resolved_methods
+    add    r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET  @ get starting address of data
     ldr    r0, [r0, r12, lsl 2]    @ load the target method
     b art_quick_invoke_interface_trampoline
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
     cbz     r0, 1f                 @ is code pointer null? goto exception
     mov     r12, r0
     ldr  r0, [sp, #0]              @ load resolved method in r0
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     bx      r12                    @ tail-call into actual code
 1:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -920,8 +911,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    str r0, [sp, #0]  // Store native ArtMethod* to bottom of stack.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
 
     // Save rSELF
     mov r11, rSELF
@@ -1008,21 +998,21 @@
     .cfi_def_cfa_register sp
     mov r9, r11
 .Lexception_in_native:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 
 END art_quick_generic_jni_trampoline
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1, r2
     mov     r1, r9                 @ pass Thread::Current
     mov     r2, sp                 @ pass SP
     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     add     sp, #16                @ skip r1-r3, 4 bytes padding.
     .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     bx    lr                       @ return on success
 1:
@@ -1035,30 +1025,23 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    str   r0, [sp, #4]     @ preserve r0
-    mov   r12, sp          @ remember sp
-    str   lr, [sp, #-16]!  @ expand the frame and pass LR
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset lr, 0
+    @ Make stack crawlable and clobber r2 and r3 (post saving)
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
+    @ preserve r0 (not normally an arg) knowing there is a spare slot in kRefsAndArgs.
+    str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
-    mov   r3, r12        @ pass SP
-    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP, LR)
-    add   sp, #16        @ remove out argument and padding from stack
-    .cfi_adjust_cfa_offset -16
+    mov   r3, lr         @ pass LR
+    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     blx   r12            @ call method with lr set to art_quick_instrumentation_exit
-END art_quick_instrumentation_entry
+@ Deliberate fall-through into art_quick_instrumentation_exit.
     .type art_quick_instrumentation_exit, #function
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
-    .cfi_startproc
-    .fnstart
     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ set up frame knowing r2 and r3 must be dead on exit
     mov   r12, sp        @ remember bottom of caller's frame
     push  {r0-r1}        @ save return value
     .save {r0-r1}
@@ -1085,7 +1068,7 @@
     add sp, #32          @ remove callee save frame
     .cfi_adjust_cfa_offset -32
     bx    r2             @ return
-END art_quick_instrumentation_exit
+END art_quick_instrumentation_entry
 
     /*
      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
@@ -1093,10 +1076,9 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
     mov    r0, r9         @ Set up args.
-    mov    r1, sp
-    blx    artDeoptimize  @ artDeoptimize(Thread*, SP)
+    blx    artDeoptimize  @ artDeoptimize(Thread*)
 END art_quick_deoptimize
 
     /*
@@ -1219,9 +1201,9 @@
     .cfi_rel_offset r10, 4
     .cfi_rel_offset r11, 8
     .cfi_rel_offset lr, 12
-    ldr   r3, [r0, #STRING_COUNT_OFFSET]
-    ldr   r12, [r0, #STRING_OFFSET_OFFSET]
-    ldr   r0, [r0, #STRING_VALUE_OFFSET]
+    ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
+    ldr   r12, [r0, #MIRROR_STRING_OFFSET_OFFSET]
+    ldr   r0, [r0, #MIRROR_STRING_VALUE_OFFSET]
 
     /* Clamp start to [0..count] */
     cmp   r2, #0
@@ -1232,7 +1214,7 @@
     movgt r2, r3
 
     /* Build a pointer to the start of string data */
-    add   r0, #STRING_DATA_OFFSET
+    add   r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET
     add   r0, r0, r12, lsl #1
 
     /* Save a copy in r12 to later compute result */
@@ -1341,12 +1323,12 @@
     .cfi_rel_offset r12, 24
     .cfi_rel_offset lr, 28
 
-    ldr    r4, [r2, #STRING_OFFSET_OFFSET]
-    ldr    r9, [r1, #STRING_OFFSET_OFFSET]
-    ldr    r7, [r2, #STRING_COUNT_OFFSET]
-    ldr    r10, [r1, #STRING_COUNT_OFFSET]
-    ldr    r2, [r2, #STRING_VALUE_OFFSET]
-    ldr    r1, [r1, #STRING_VALUE_OFFSET]
+    ldr    r4, [r2, #MIRROR_STRING_OFFSET_OFFSET]
+    ldr    r9, [r1, #MIRROR_STRING_OFFSET_OFFSET]
+    ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
+    ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
+    ldr    r2, [r2, #MIRROR_STRING_VALUE_OFFSET]
+    ldr    r1, [r1, #MIRROR_STRING_VALUE_OFFSET]
 
     /*
      * At this point, we have:
@@ -1368,8 +1350,8 @@
       * Note: data pointers point to previous element so we can use pre-index
       * mode with base writeback.
       */
-     add   r2, #STRING_DATA_OFFSET-2   @ offset to contents[-1]
-     add   r1, #STRING_DATA_OFFSET-2   @ offset to contents[-1]
+     add   r2, #MIRROR_CHAR_ARRAY_DATA_OFFSET-2   @ offset to contents[-1]
+     add   r1, #MIRROR_CHAR_ARRAY_DATA_OFFSET-2   @ offset to contents[-1]
 
      /*
       * At this point we have:
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index a926449..989ecc6 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -19,30 +19,8 @@
 
 #include "asm_support.h"
 
-// Note: these callee save methods loads require read barriers.
-// Offset of field Runtime::callee_save_methods_[kSaveAll] verified in InitCpu
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-// Offset of field Runtime::callee_save_methods_[kRefsOnly] verified in InitCpu
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs] verified in InitCpu
-#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
-
-// Offset of field Thread::suspend_count_
-#define THREAD_FLAGS_OFFSET 0
-// Offset of field Thread::card_table_
-#define THREAD_CARD_TABLE_OFFSET 120
-// Offset of field Thread::exception_
-#define THREAD_EXCEPTION_OFFSET 128
-// Offset of field Thread::thin_lock_thread_id_
-#define THREAD_ID_OFFSET 12
-
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
 
-// Expected size of a heap reference
-#define HEAP_REFERENCE_SIZE 4
-// Expected size of a stack reference
-#define STACK_REFERENCE_SIZE 4
-
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ab9bf2d..0fb96d7 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -79,13 +79,16 @@
 
     // Loads appropriate callee-save-method
     str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsOnly).
      */
-.macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
@@ -133,11 +136,14 @@
     mov xETR, xSELF
 
     // Loads appropriate callee-save-method
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly]
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     // Restore xSELF.
     mov xSELF, xETR
 
@@ -170,7 +176,7 @@
     .cfi_adjust_cfa_offset -96
 .endm
 
-.macro POP_REF_ONLY_CALLEE_SAVE_FRAME
+.macro POP_REFS_ONLY_CALLEE_SAVE_FRAME
     // Restore xSELF as it might be scratched.
     mov xSELF, xETR
     // ETR
@@ -181,13 +187,13 @@
     .cfi_adjust_cfa_offset -96
 .endm
 
-.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     ret
 .endm
 
 
-.macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     sub sp, sp, #224
     .cfi_adjust_cfa_offset 224
 
@@ -251,7 +257,7 @@
      *
      * TODO This is probably too conservative - saving FP & LR.
      */
-.macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
@@ -260,15 +266,26 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
 
     str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
+.endm
+
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     // Restore xSELF.
     mov xSELF, xETR
 
@@ -340,10 +357,9 @@
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     mov x0, xSELF
-    mov x1, sp
 
     // Point of no return.
-    b artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*, SP)
+    b artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
     brk 0  // Unreached
 .endm
 
@@ -376,8 +392,7 @@
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov x0, xSELF                     // pass Thread::Current
-    mov x1, sp                        // pass SP
-    b   \cxx_name                     // \cxx_name(Thread*, SP)
+    b   \cxx_name                     // \cxx_name(Thread*)
 END \c_name
 .endm
 
@@ -386,8 +401,7 @@
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
     mov x1, xSELF                     // pass Thread::Current.
-    mov x2, sp                        // pass SP.
-    b   \cxx_name                     // \cxx_name(arg, Thread*, SP).
+    b   \cxx_name                     // \cxx_name(arg, Thread*).
     brk 0
 END \c_name
 .endm
@@ -397,8 +411,7 @@
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
-    mov x3, sp                        // pass SP
-    b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*, SP)
+    b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
     brk 0
 END \c_name
 .endm
@@ -458,7 +471,7 @@
 .macro INVOKE_TRAMPOLINE c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
@@ -467,7 +480,7 @@
     mov    x4, sp
     bl     \cxx_name                      // (method_idx, this, caller, Thread*, SP)
     mov    xIP0, x1                       // save Method*->code_
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
     br     xIP0                           // tail call to target
 1:
@@ -551,7 +564,7 @@
 .macro INVOKE_STUB_CALL_AND_RETURN
 
     // load method-> METHOD_QUICK_CODE_OFFSET
-    ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET]
+    ldr x9, [x0 , #MIRROR_ART_METHOD_QUICK_CODE_OFFSET]
     // Branch to method.
     blr x9
 
@@ -945,7 +958,7 @@
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
     cbz    w0, .Lslow_lock
-    add    x4, x0, #LOCK_WORD_OFFSET  // exclusive load/store had no immediate anymore
+    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
 .Lretry_lock:
     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
     ldxr   w1, [x4]
@@ -966,14 +979,13 @@
     add    w2, w1, #65536             // increment count in lock word placing in w2 for storing
     lsr    w1, w2, 30                 // if either of the top two bits are set, we overflowed.
     cbnz   w1, .Lslow_lock            // if we overflow the count go slow path
-    str    w2, [x0, #LOCK_WORD_OFFSET]// no need for stxr as we hold the lock
+    str    w2, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  // no need for stxr as we hold the lock
     ret
 .Lslow_lock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
-    mov    x2, sp                     // pass SP
-    bl     artLockObjectFromCode      // (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     artLockObjectFromCode      // (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object
 
@@ -986,7 +998,7 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     cbz    x0, .Lslow_unlock
-    ldr    w1, [x0, #LOCK_WORD_OFFSET]
+    ldr    w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     lsr    w2, w1, 30
     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
@@ -997,18 +1009,17 @@
     bpl    .Lrecursive_thin_unlock
     // transition to unlocked, w3 holds 0
     dmb    ish                        // full (LoadStore|StoreStore) memory barrier
-    str    w3, [x0, #LOCK_WORD_OFFSET]
+    str    w3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     ret
 .Lrecursive_thin_unlock:
     sub    w1, w1, #65536
-    str    w1, [x0, #LOCK_WORD_OFFSET]
+    str    w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     ret
 .Lslow_unlock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
-    mov    x2, sp                     // pass SP
-    bl     artUnlockObjectFromCode    // (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object
 
@@ -1058,8 +1069,7 @@
 
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
-    mov x3, sp                        // pass SP
-    b artThrowClassCastException      // (Class*, Class*, Thread*, SP)
+    b artThrowClassCastException      // (Class*, Class*, Thread*)
     brk 0                             // We should not return here...
 END art_quick_check_cast
 
@@ -1082,7 +1092,7 @@
 END art_quick_aput_obj_with_null_and_bound_check
 
 ENTRY art_quick_aput_obj_with_bound_check
-    ldr w3, [x0, #ARRAY_LENGTH_OFFSET]
+    ldr w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]
     cmp w3, w1
     bhi art_quick_aput_obj
     mov x0, x1
@@ -1092,16 +1102,16 @@
 
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    ldr w3, [x0, #CLASS_OFFSET]                          // Heap reference = 32b
+    ldr w3, [x0, #MIRROR_OBJECT_CLASS_OFFSET]            // Heap reference = 32b
                                                          // This also zero-extends to x3
-    ldr w4, [x2, #CLASS_OFFSET]                          // Heap reference = 32b
+    ldr w4, [x2, #MIRROR_OBJECT_CLASS_OFFSET]            // Heap reference = 32b
                                                          // This also zero-extends to x4
-    ldr w3, [x3, #CLASS_COMPONENT_TYPE_OFFSET]           // Heap reference = 32b
+    ldr w3, [x3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Heap reference = 32b
                                                          // This also zero-extends to x3
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
-    add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
                                                          // "Compress" = do nothing
     str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
@@ -1109,7 +1119,7 @@
     strb w3, [x3, x0]
     ret
 .Ldo_aput_null:
-    add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
                                                          // "Compress" = do nothing
     str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
     ret
@@ -1146,7 +1156,7 @@
     add sp, sp, #48
     .cfi_adjust_cfa_offset -48
 
-    add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
                                                           // "Compress" = do nothing
     str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
@@ -1168,8 +1178,7 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     mov x1, x2                    // Pass value.
     mov x2, xSELF                 // Pass Thread::Current.
-    mov x3, sp                    // Pass SP.
-    b artThrowArrayStoreException // (Object*, Object*, Thread*, SP).
+    b artThrowArrayStoreException // (Object*, Object*, Thread*).
     brk 0                         // Unreached.
 END art_quick_aput_obj
 
@@ -1177,11 +1186,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     mov    x2, xSELF                  // pass Thread::Current
-    mov    x3, sp                     // pass SP
-    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -1191,11 +1199,10 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     mov    x3, xSELF                  // pass Thread::Current
-    mov    x4, sp                     // pass SP
     bl     \entrypoint
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -1205,12 +1212,11 @@
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     ldr    w1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x2, xSELF                  // pass Thread::Current
-    mov    x3, sp                     // pass SP
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
 END \name
 .endm
@@ -1218,12 +1224,11 @@
 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     ldr    w2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x3, xSELF                  // pass Thread::Current
-    mov    x4, sp                     // pass SP
     bl     \entrypoint
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
 END \name
 .endm
@@ -1231,12 +1236,11 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     ldr    w3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x4, xSELF                  // pass Thread::Current
-    mov    x5, sp                     // pass SP
     bl     \entrypoint
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     \return
 END \name
 .endm
@@ -1287,14 +1291,13 @@
 // This is separated out as the argument order is different.
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     mov    x3, x1                     // Store value
     ldr    w1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x2, x3                     // Put value param
     mov    x3, xSELF                  // pass Thread::Current
-    mov    x4, sp                     // pass SP
     bl     artSet64StaticFromCode
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_set64_static
 
@@ -1320,18 +1323,16 @@
     ret                                       // return if flags == 0
 .Lneed_suspend:
     mov    x0, xSELF
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
-    mov    x1, sp
-    bl     artTestSuspendFromCode             // (Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    bl     artTestSuspendFromCode             // (Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    x0, xSELF
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
-    mov    x1, sp
-    bl     artTestSuspendFromCode             // (Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    bl     artTestSuspendFromCode             // (Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
      /*
@@ -1341,19 +1342,18 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    str     x0, [sp, #0]                // place proxy method at bottom of frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
     // Use xETR as xSELF might be scratched by native function above.
     ldr     x2, [xETR, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
     ret                                 // return on success
 .Lexception_in_proxy:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
@@ -1363,24 +1363,24 @@
      */
 ENTRY art_quick_imt_conflict_trampoline
     ldr    w0, [sp, #0]                                // load caller Method*
-    ldr    w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET]  // load dex_cache_resolved_methods
-    add    x0, x0, #OBJECT_ARRAY_DATA_OFFSET           // get starting address of data
+    ldr    w0, [x0, #MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET]  // load dex_cache_resolved_methods
+    add    x0, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET    // get starting address of data
     ldr    w0, [x0, xIP1, lsl 2]                       // load the target method
     b art_quick_invoke_interface_trampoline
 END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     mov x2, xSELF
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
     cbz x0, 1f
     mov xIP0, x0            // Remember returned code pointer in xIP0.
     ldr w0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     br xIP0
 1:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1439,8 +1439,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
-    str x0, [sp, #0]  // Store native ArtMethod* to bottom of stack.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
 
     // Save SP , so we can have static CFI info.
     mov x28, sp
@@ -1513,7 +1512,7 @@
     cbnz x1, .Lexception_in_native
 
     // Tear down the callee-save frame.
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
 
     // store into fpr, for when it's a fpr return...
     fmov d0, x0
@@ -1523,7 +1522,7 @@
     mov sp, x28
     .cfi_def_cfa_register sp
 .Lexception_in_native:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 
 END art_quick_generic_jni_trampoline
@@ -1535,7 +1534,7 @@
  * x1..x7, d0..d7 = arguments to that method.
  */
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
 
     //  x0 will contain mirror::ArtMethod* method.
     mov x1, xSELF                          // How to get Thread::Current() ???
@@ -1545,7 +1544,7 @@
     //                                      mirror::ArtMethod** sp)
     bl   artQuickToInterpreterBridge
 
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
 
     fmov d0, x0
 
@@ -1558,19 +1557,18 @@
 //
     .extern artInstrumentationMethodEntryFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
 
     mov   x20, x0             // Preserve method reference in a callee-save.
 
     mov   x2, xSELF
-    mov   x3, sp
-    mov   x4, xLR
-    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP, LR)
+    mov   x3, xLR
+    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
 
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
 END art_quick_instrumentation_entry
@@ -1579,7 +1577,7 @@
 ENTRY art_quick_instrumentation_exit
     mov   xLR, #0             // Clobber LR for later checks.
 
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
 
     // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
@@ -1602,7 +1600,7 @@
     ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
     .cfi_adjust_cfa_offset 16
 
-    POP_REF_ONLY_CALLEE_SAVE_FRAME
+    POP_REFS_ONLY_CALLEE_SAVE_FRAME
 
     br    xIP0                // Tail-call out.
 END art_quick_instrumentation_exit
@@ -1615,8 +1613,7 @@
 ENTRY art_quick_deoptimize
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     mov    x0, xSELF          // Pass thread.
-    mov    x1, sp             // Pass SP.
-    bl     artDeoptimize      // artDeoptimize(Thread*, SP)
+    bl     artDeoptimize      // artDeoptimize(Thread*)
     brk 0
 END art_quick_deoptimize
 
@@ -1631,9 +1628,9 @@
      *    w2:   Starting offset in string data
      */
 ENTRY art_quick_indexof
-    ldr   w3, [x0, #STRING_COUNT_OFFSET]
-    ldr   w4, [x0, #STRING_OFFSET_OFFSET]
-    ldr   w0, [x0, #STRING_VALUE_OFFSET] // x0 ?
+    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
+    ldr   w4, [x0, #MIRROR_STRING_OFFSET_OFFSET]
+    ldr   w0, [x0, #MIRROR_STRING_VALUE_OFFSET] // x0 ?
 
     /* Clamp start to [0..count] */
     cmp   w2, #0
@@ -1642,7 +1639,7 @@
     csel  w2, w3, w2, gt
 
     /* Build a pointer to the start of the string data */
-    add   x0, x0, #STRING_DATA_OFFSET
+    add   x0, x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET
     add   x0, x0, x4, lsl #1
 
     /* Save a copy to compute result */
@@ -1736,12 +1733,12 @@
     ret
 1:                        // Different string objects.
 
-    ldr    w6, [x2, #STRING_OFFSET_OFFSET]
-    ldr    w5, [x1, #STRING_OFFSET_OFFSET]
-    ldr    w4, [x2, #STRING_COUNT_OFFSET]
-    ldr    w3, [x1, #STRING_COUNT_OFFSET]
-    ldr    w2, [x2, #STRING_VALUE_OFFSET]
-    ldr    w1, [x1, #STRING_VALUE_OFFSET]
+    ldr    w6, [x2, #MIRROR_STRING_OFFSET_OFFSET]
+    ldr    w5, [x1, #MIRROR_STRING_OFFSET_OFFSET]
+    ldr    w4, [x2, #MIRROR_STRING_COUNT_OFFSET]
+    ldr    w3, [x1, #MIRROR_STRING_COUNT_OFFSET]
+    ldr    w2, [x2, #MIRROR_STRING_VALUE_OFFSET]
+    ldr    w1, [x1, #MIRROR_STRING_VALUE_OFFSET]
 
     /*
      * Now:           CharArray*    Offset   Count
@@ -1761,8 +1758,8 @@
     add x1, x1, w5, sxtw #1
 
     // Add offset in CharArray to array.
-    add x2, x2, #STRING_DATA_OFFSET
-    add x1, x1, #STRING_DATA_OFFSET
+    add x2, x2, #MIRROR_CHAR_ARRAY_DATA_OFFSET
+    add x1, x1, #MIRROR_CHAR_ARRAY_DATA_OFFSET
 
     // TODO: Tune this value.
     // Check for long string, do memcmp16 for them.
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index d8ec9cd..0d18f1a 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -26,15 +26,31 @@
 // Register holding Thread::Current().
 #define rSELF $s1
 
-
-    /* Cache alignment for function entry */
+     // Declare a function called name, sets up $gp.
 .macro ENTRY name
     .type \name, %function
     .global \name
+    // Cache alignment for function entry.
     .balign 16
 \name:
     .cfi_startproc
-     /* Ensure we get a sane starting CFA. */
+     // Ensure we get a sane starting CFA.
+    .cfi_def_cfa $sp,0
+    // Load $gp. We expect that ".set noreorder" is in effect.
+    .cpload $t9
+    // Declare a local convenience label to be branched to when $gp is already set up.
+.L\name\()_gp_set:
+.endm
+
+     // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    .type \name, %function
+    .global \name
+    // Cache alignment for function entry.
+    .balign 16
+\name:
+    .cfi_startproc
+     // Ensure we get a sane starting CFA.
     .cfi_def_cfa $sp,0
 .endm
 
@@ -43,11 +59,6 @@
     .size \name, .-\name
 .endm
 
-    /* Generates $gp for function calls */
-.macro GENERATE_GLOBAL_POINTER
-    .cpload $t9
-.endm
-
 .macro UNIMPLEMENTED name
     ENTRY \name
     break
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 6add93b..5bece18 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -19,18 +19,8 @@
 
 #include "asm_support.h"
 
-// Offset of field Thread::tls32_.state_and_flags verified in InitCpu
-#define THREAD_FLAGS_OFFSET 0
-// Offset of field Thread::tlsPtr_.card_table verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
-// Offset of field Thread::tlsPtr_.exception verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 124
-
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 64
 
-// Expected size of a heap reference
-#define HEAP_REFERENCE_SIZE 4
-
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/jni_entrypoints_mips.S b/runtime/arch/mips/jni_entrypoints_mips.S
index e5f4a79..9a79467 100644
--- a/runtime/arch/mips/jni_entrypoints_mips.S
+++ b/runtime/arch/mips/jni_entrypoints_mips.S
@@ -24,7 +24,6 @@
      */
     .extern artFindNativeMethod
 ENTRY art_jni_dlsym_lookup_stub
-    GENERATE_GLOBAL_POINTER
     addiu $sp, $sp, -32          # leave room for $a0, $a1, $a2, $a3, and $ra
     .cfi_adjust_cfa_offset 32
     sw     $ra, 16($sp)
diff --git a/runtime/arch/mips/memcmp16_mips.S b/runtime/arch/mips/memcmp16_mips.S
index 0196edc..aef81af 100644
--- a/runtime/arch/mips/memcmp16_mips.S
+++ b/runtime/arch/mips/memcmp16_mips.S
@@ -20,7 +20,7 @@
 #include "asm_support_mips.S"
 
 // u4 __memcmp16(const u2*, const u2*, size_t);
-ENTRY __memcmp16
+ENTRY_NO_GP __memcmp16
   li  $t0,0
   li  $t1,0
   beqz  $a2,done   /* 0 length string */
diff --git a/runtime/arch/mips/portable_entrypoints_mips.S b/runtime/arch/mips/portable_entrypoints_mips.S
index a171a1d..d7e7a8e 100644
--- a/runtime/arch/mips/portable_entrypoints_mips.S
+++ b/runtime/arch/mips/portable_entrypoints_mips.S
@@ -21,7 +21,6 @@
 
     .extern artPortableProxyInvokeHandler
 ENTRY art_portable_proxy_invoke_handler
-    GENERATE_GLOBAL_POINTER
     # Fake callee save ref and args frame set up, note portable doesn't use callee save frames.
     # TODO: just save the registers that are needed in artPortableProxyInvokeHandler.
     addiu  $sp, $sp, -64
@@ -72,7 +71,6 @@
      *   [sp + 20] = result type char
      */
 ENTRY art_portable_invoke_stub
-    GENERATE_GLOBAL_POINTER
     sw    $a0, 0($sp)           # save out a0
     addiu $sp, $sp, -16         # spill s0, s1, fp, ra
     .cfi_adjust_cfa_offset 16
@@ -87,7 +85,7 @@
     move  $fp, $sp              # save sp in fp
     .cfi_def_cfa_register 30
     move  $s1, $a3              # move managed thread pointer into s1
-    addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval
+    addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval. TODO: unused?
     addiu $t0, $a2, 16          # create space for method pointer in frame
     srl   $t0, $t0, 3           # shift the frame size right 3
     sll   $t0, $t0, 3           # shift the frame size left 3 to align to 16 bytes
@@ -100,7 +98,7 @@
     lw    $a1, 4($sp)           # copy arg value for a1
     lw    $a2, 8($sp)           # copy arg value for a2
     lw    $a3, 12($sp)          # copy arg value for a3
-    lw    $t9, METHOD_PORTABLE_CODE_OFFSET($a0)  # get pointer to the code
+    lw    $t9, MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET($a0)  # get pointer to the code
     jalr  $t9                   # call the method
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 609c65a..905b867 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -29,7 +29,8 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
-     * callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word padding + 4 open words for args
+     * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word padding + 4 open words for args
+     * Clobbers $t0 and $gp
      */
 .macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     addiu  $sp, $sp, -64
@@ -63,6 +64,12 @@
     sw     $s0, 20($sp)
     .cfi_rel_offset 16, 20
     # 1 word for alignment, 4 open words for args $a0-$a3, bottom will hold Method*
+
+    ld $t0, _ZN3art7Runtime9instance_E
+    THIS_LOAD_REQUIRES_READ_BARRIER
+    ld $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
+    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
     /*
@@ -71,7 +78,7 @@
      * Does not include rSUSPEND or rSELF
      * callee-save: $s2-$s8 + $gp + $ra, 9 total + 3 words padding + 4 open words for args
      */
-.macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     addiu  $sp, $sp, -64
     .cfi_adjust_cfa_offset 64
 
@@ -99,9 +106,15 @@
     sw     $s2, 28($sp)
     .cfi_rel_offset 18, 28
     # 3 words for alignment and extra args, 4 open words for args $a0-$a3, bottom will hold Method*
+
+    ld $t0, _ZN3art7Runtime9instance_E
+    THIS_LOAD_REQUIRES_READ_BARRIER
+    ld $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
+    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     lw     $ra, 60($sp)
     .cfi_restore 31
     lw     $s8, 56($sp)
@@ -124,7 +137,7 @@
     .cfi_adjust_cfa_offset -64
 .endm
 
-.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     lw     $ra, 60($sp)
     .cfi_restore 31
     lw     $s8, 56($sp)
@@ -153,7 +166,7 @@
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      */
-.macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     addiu  $sp, $sp, -64
     .cfi_adjust_cfa_offset 64
 
@@ -187,9 +200,15 @@
     sw     $a1, 4($sp)
     .cfi_rel_offset 5, 4
     # bottom will hold Method*
+
+    ld $t0, _ZN3art7Runtime9instance_E
+    THIS_LOAD_REQUIRES_READ_BARRIER
+    ld $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
+    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     lw     $ra, 60($sp)
     .cfi_restore 31
     lw     $s8, 56($sp)
@@ -224,15 +243,14 @@
      */
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
-    move    $a0, rSELF                   # pass Thread::Current
     la      $t9, artDeliverPendingExceptionFromCode
-    jr      $t9                          # artDeliverPendingExceptionFromCode(Thread*, $sp)
-    move    $a1, $sp                     # pass $sp
+    jr      $t9                          # artDeliverPendingExceptionFromCode(Thread*)
+    move    $a0, rSELF                   # pass Thread::Current
 .endm
 
 .macro RETURN_IF_NO_EXCEPTION
     lw     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bnez   $t0, 1f                       # success if no exception is pending
     nop
     jr     $ra
@@ -242,7 +260,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bnez   $v0, 1f                       # success?
     nop
     jr     $ra                           # return on success
@@ -252,7 +270,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     beqz   $v0, 1f                       # success?
     nop
     jr     $ra                           # return on success
@@ -342,12 +360,10 @@
      * the bottom of the thread. On entry r0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    GENERATE_GLOBAL_POINTER
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a1, rSELF                 # pass Thread::Current
     la   $t9, artDeliverExceptionFromCode
-    jr   $t9                        # artDeliverExceptionFromCode(Throwable*, Thread*, $sp)
-    move $a2, $sp                   # pass $sp
+    jr   $t9                        # artDeliverExceptionFromCode(Throwable*, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
 END art_quick_deliver_exception
 
     /*
@@ -355,13 +371,10 @@
      */
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
-    GENERATE_GLOBAL_POINTER
-.Lart_quick_throw_null_pointer_exception_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a0, rSELF                 # pass Thread::Current
     la   $t9, artThrowNullPointerExceptionFromCode
-    jr   $t9                        # artThrowNullPointerExceptionFromCode(Thread*, $sp)
-    move $a1, $sp                   # pass $sp
+    jr   $t9                        # artThrowNullPointerExceptionFromCode(Thread*)
+    move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
     /*
@@ -369,12 +382,10 @@
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    GENERATE_GLOBAL_POINTER
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a0, rSELF                 # pass Thread::Current
     la   $t9, artThrowDivZeroFromCode
-    jr   $t9                        # artThrowDivZeroFromCode(Thread*, $sp)
-    move $a1, $sp                   # pass $sp
+    jr   $t9                        # artThrowDivZeroFromCode(Thread*)
+    move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_div_zero
 
     /*
@@ -382,13 +393,10 @@
      */
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
-    GENERATE_GLOBAL_POINTER
-.Lart_quick_throw_array_bounds_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a2, rSELF                 # pass Thread::Current
     la   $t9, artThrowArrayBoundsFromCode
-    jr   $t9                        # artThrowArrayBoundsFromCode(index, limit, Thread*, $sp)
-    move $a3, $sp                   # pass $sp
+    jr   $t9                        # artThrowArrayBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
 END art_quick_throw_array_bounds
 
     /*
@@ -396,12 +404,10 @@
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    GENERATE_GLOBAL_POINTER
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a0, rSELF                 # pass Thread::Current
     la   $t9, artThrowStackOverflowFromCode
-    jr   $t9                        # artThrowStackOverflowFromCode(Thread*, $sp)
-    move $a1, $sp                   # pass $sp
+    jr   $t9                        # artThrowStackOverflowFromCode(Thread*)
+    move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_stack_overflow
 
     /*
@@ -409,12 +415,10 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    GENERATE_GLOBAL_POINTER
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a1, rSELF                 # pass Thread::Current
     la   $t9, artThrowNoSuchMethodFromCode
-    jr   $t9                        # artThrowNoSuchMethodFromCode(method_idx, Thread*, $sp)
-    move $a2, $sp                   # pass $sp
+    jr   $t9                        # artThrowNoSuchMethodFromCode(method_idx, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
 END art_quick_throw_no_such_method
 
     /*
@@ -436,9 +440,8 @@
 .macro INVOKE_TRAMPOLINE c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
-    lw    $a2, 64($sp)                    # pass caller Method*
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    lw    $a2, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE($sp)                    # pass caller Method*
     move  $t0, $sp                        # save $sp
     addiu $sp, $sp, -32                   # make space for extra args
     .cfi_adjust_cfa_offset 32
@@ -450,7 +453,7 @@
     .cfi_adjust_cfa_offset -32
     move  $a0, $v0                        # save target Method*
     move  $t9, $v1                        # save $v0->code_
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     beqz  $v0, 1f
     nop
     jr    $t9
@@ -479,7 +482,6 @@
      *   [sp + 20] = shorty
      */
 ENTRY art_quick_invoke_stub
-    GENERATE_GLOBAL_POINTER
     sw    $a0, 0($sp)           # save out a0
     addiu $sp, $sp, -16         # spill s0, s1, fp, ra
     .cfi_adjust_cfa_offset 16
@@ -507,7 +509,7 @@
     lw    $a1, 4($sp)           # copy arg value for a1
     lw    $a2, 8($sp)           # copy arg value for a2
     lw    $a3, 12($sp)          # copy arg value for a3
-    lw    $t9, METHOD_QUICK_CODE_OFFSET($a0)  # get pointer to the code
+    lw    $t9, MIRROR_ART_METHOD_QUICK_CODE_OFFSET($a0)  # get pointer to the code
     jalr  $t9                   # call the method
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
@@ -543,12 +545,10 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    lw     $a2, 64($sp)                   # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)                   # pass referrer's Method*
+    jal    artHandleFillArrayDataFromCode # (payload offset, Array*, method, Thread*)
     move   $a3, rSELF                     # pass Thread::Current
-    jal    artHandleFillArrayDataFromCode # (payload offset, Array*, method, Thread*, $sp)
-    sw     $sp, 16($sp)                   # pass $sp
     RETURN_IF_ZERO
 END art_quick_handle_fill_data
 
@@ -557,13 +557,11 @@
      */
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
-    GENERATE_GLOBAL_POINTER
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME      # save callee saves in case we block
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME      # save callee saves in case we block
+    jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
-    jal     artLockObjectFromCode         # (Object* obj, Thread*, $sp)
-    move    $a2, $sp                      # pass $sp
     RETURN_IF_ZERO
 END art_quick_lock_object
 
@@ -572,13 +570,11 @@
      */
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
-    GENERATE_GLOBAL_POINTER
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    jal     artUnlockObjectFromCode   # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
-    jal     artUnlockObjectFromCode   # (Object* obj, Thread*, $sp)
-    move    $a2, $sp                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_unlock_object
 
@@ -587,7 +583,6 @@
      */
     .extern artThrowClassCastException
 ENTRY art_quick_check_cast
-    GENERATE_GLOBAL_POINTER
     addiu  $sp, $sp, -16
     .cfi_adjust_cfa_offset 16
     sw     $ra, 12($sp)
@@ -609,10 +604,9 @@
     addiu  $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move $a2, rSELF                 # pass Thread::Current
     la   $t9, artThrowClassCastException
-    jr   $t9                        # artThrowClassCastException (Class*, Class*, Thread*, SP)
-    move $a3, $sp                   # pass $sp
+    jr   $t9                        # artThrowClassCastException (Class*, Class*, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
 END art_quick_check_cast
 
     /*
@@ -621,7 +615,6 @@
      * a0 = array, a1 = index, a2 = value
      */
 ENTRY art_quick_aput_obj_with_null_and_bound_check
-    GENERATE_GLOBAL_POINTER
     bnez    $a0, .Lart_quick_aput_obj_with_bound_check_gp_set
     nop
     b .Lart_quick_throw_null_pointer_exception_gp_set
@@ -629,9 +622,7 @@
 END art_quick_aput_obj_with_null_and_bound_check
 
 ENTRY art_quick_aput_obj_with_bound_check
-    GENERATE_GLOBAL_POINTER
-.Lart_quick_aput_obj_with_bound_check_gp_set:
-    lw $t0, ARRAY_LENGTH_OFFSET($a0)
+    lw $t0, MIRROR_ARRAY_LENGTH_OFFSET($a0)
     sltu $t1, $a1, $t0
     bnez $t1, .Lart_quick_aput_obj_gp_set
     nop
@@ -641,19 +632,17 @@
 END art_quick_aput_obj_with_bound_check
 
 ENTRY art_quick_aput_obj
-    GENERATE_GLOBAL_POINTER
-.Lart_quick_aput_obj_gp_set:
     beqz $a2, .Ldo_aput_null
     nop
-    lw $t0, CLASS_OFFSET($a0)
-    lw $t1, CLASS_OFFSET($a2)
-    lw $t0, CLASS_COMPONENT_TYPE_OFFSET($t0)
+    lw $t0, MIRROR_OBJECT_CLASS_OFFSET($a0)
+    lw $t1, MIRROR_OBJECT_CLASS_OFFSET($a2)
+    lw $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET($t0)
     bne $t1, $t0, .Lcheck_assignability  # value's type == array's component type - trivial assignability
     nop
 .Ldo_aput:
     sll $a1, $a1, 2
     add $t0, $a0, $a1
-    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
     srl $t1, $a0, 7
     add $t1, $t1, $t0
@@ -663,7 +652,7 @@
 .Ldo_aput_null:
     sll $a1, $a1, 2
     add $t0, $a0, $a1
-    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     jr  $ra
     nop
 .Lcheck_assignability:
@@ -690,10 +679,9 @@
     nop
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a1, $a2
-    move $a2, rSELF                 # pass Thread::Current
     la   $t9, artThrowArrayStoreException
-    jr   $t9                        # artThrowArrayStoreException(Class*, Class*, Thread*, SP)
-    move $a3, $sp                   # pass $sp
+    jr   $t9                        # artThrowArrayStoreException(Class*, Class*, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
 END art_quick_aput_obj
 
     /*
@@ -703,12 +691,10 @@
      */
     .extern artInitializeStaticStorageFromCode
 ENTRY art_quick_initialize_static_storage
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME            # save callee saves in case of GC
-    move    $a2, rSELF                          # pass Thread::Current
-    # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME            # save callee saves in case of GC
+    # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*)
     jal     artInitializeStaticStorageFromCode
-    move    $a3, $sp                            # pass $sp
+    move    $a2, rSELF                          # pass Thread::Current
     RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_static_storage
 
@@ -717,12 +703,10 @@
      */
     .extern artInitializeTypeFromCode
 ENTRY art_quick_initialize_type
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
-    move    $a2, rSELF                         # pass Thread::Current
-    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
+    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
     jal     artInitializeTypeFromCode
-    move    $a3, $sp                           # pass $sp
+    move    $a2, rSELF                         # pass Thread::Current
     RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_type
 
@@ -732,12 +716,10 @@
      */
     .extern artInitializeTypeAndVerifyAccessFromCode
 ENTRY art_quick_initialize_type_and_verify_access
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
-    move    $a2, rSELF                         # pass Thread::Current
-    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
+    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
     jal     artInitializeTypeAndVerifyAccessFromCode
-    move    $a3, $sp                           # pass $sp
+    move    $a2, rSELF                         # pass Thread::Current
     RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_type_and_verify_access
     /*
@@ -745,12 +727,10 @@
      */
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_boolean_static
     /*
@@ -758,12 +738,10 @@
      */
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_byte_static
 
@@ -772,12 +750,10 @@
      */
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_char_static
     /*
@@ -785,12 +761,10 @@
      */
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_short_static
 
@@ -799,12 +773,10 @@
      */
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get32_static
 
@@ -813,12 +785,10 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get64_static
 
@@ -827,12 +797,10 @@
      */
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
-    jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*, $sp)
-    move   $a3, $sp                      # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_obj_static
 
@@ -841,12 +809,10 @@
      */
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_boolean_instance
     /*
@@ -854,12 +820,10 @@
      */
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_byte_instance
 
@@ -868,12 +832,10 @@
      */
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_char_instance
     /*
@@ -881,12 +843,10 @@
      */
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetShortInstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGetShortInstanceFromCode      # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_short_instance
 
@@ -895,9 +855,8 @@
      */
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
     move   $a3, rSELF                    # pass Thread::Current
     jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*, $sp)
     sw     $sp, 16($sp)                  # pass $sp
@@ -909,9 +868,8 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
     move   $a3, rSELF                    # pass Thread::Current
     jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*, $sp)
     sw     $sp, 16($sp)                  # pass $sp
@@ -923,12 +881,10 @@
      */
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_NO_EXCEPTION
 END art_quick_get_obj_instance
 
@@ -937,12 +893,10 @@
      */
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set8_static
 
@@ -951,12 +905,10 @@
      */
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
-    move   $a3, rSELF                    # pass Thread::Current
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
     jal    artSet16StaticFromCode        # (field_idx, new_val, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
+    move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_set16_static
 
@@ -965,12 +917,10 @@
      */
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set32_static
 
@@ -979,12 +929,10 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a1, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
-    jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*, $sp)
-    sw     $sp, 20($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set64_static
 
@@ -993,12 +941,10 @@
      */
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a2, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
-    jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*, $sp)
-    sw     $sp, 16($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set_obj_static
 
@@ -1007,12 +953,10 @@
      */
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSet8InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
-    jal    artSet8InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*, $sp)
-    sw     $sp, 20($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set8_instance
 
@@ -1021,12 +965,10 @@
      */
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
-    jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*, $sp)
-    sw     $sp, 20($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set16_instance
 
@@ -1035,12 +977,10 @@
      */
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
-    jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*, $sp)
-    sw     $sp, 20($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set32_instance
 
@@ -1049,11 +989,11 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    sw     rSELF, 16($sp)                # pass Thread::Current
-    jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, Thread*, $sp)
-    sw     $sp, 20($sp)                  # pass $sp
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $t0, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # load referrer's Method*
+    sw     rSELF, 20($sp)                # pass Thread::Current
+    jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
+    sw     $t0, 16($sp)                  # pass referrer's Method*
     RETURN_IF_ZERO
 END art_quick_set64_instance
 
@@ -1062,12 +1002,10 @@
      */
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
-    lw     $a3, 64($sp)                  # pass referrer's Method*
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case of GC
+    lw     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
-    jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*, $sp)
-    sw     $sp, 20($sp)                  # pass $sp
     RETURN_IF_ZERO
 END art_quick_set_obj_instance
 
@@ -1079,12 +1017,10 @@
      */
     .extern artResolveStringFromCode
 ENTRY art_quick_resolve_string
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*, $sp)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*)
     jal     artResolveStringFromCode
-    move    $a3, $sp                  # pass $sp
+    move    $a2, rSELF                # pass Thread::Current
     RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_resolve_string
 
@@ -1093,11 +1029,9 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
     jal     \entrypoint
-    move    $a3, $sp                  # pass $sp
+    move    $a2, rSELF                # pass Thread::Current
     \return
 END \name
 .endm
@@ -1105,11 +1039,9 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
     jal     \entrypoint
-    sw      $sp, 16($sp)              # pass $sp
+    move    $a3, rSELF                # pass Thread::Current
     \return
 END \name
 .endm
@@ -1122,18 +1054,16 @@
      */
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
-    GENERATE_GLOBAL_POINTER
     lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
     bnez   $a0, 1f
     addi  rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jr     $ra
     nop
 1:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves for stack crawl
+    jal    artTestSuspendFromCode              # (Thread*)
     move   $a0, rSELF
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          # save callee saves for stack crawl
-    jal    artTestSuspendFromCode             # (Thread*, $sp)
-    move   $a1, $sp
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_test_suspend
 
     /*
@@ -1142,14 +1072,13 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     sw      $a0, 0($sp)            # place proxy method at bottom of frame
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bnez    $t0, 1f
     mtc1    $v0, $f0               # place return value to FP return value
     jr      $ra
@@ -1162,32 +1091,30 @@
      * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
      * dex method index.
      */
-ENTRY art_quick_imt_conflict_trampoline
-    GENERATE_GLOBAL_POINTER
+ENTRY_NO_GP art_quick_imt_conflict_trampoline
     lw      $a0, 0($sp)            # load caller Method*
-    lw      $a0, METHOD_DEX_CACHE_METHODS_OFFSET($a0)  # load dex_cache_resolved_methods
+    lw      $a0, MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET($a0)  # load dex_cache_resolved_methods
     sll     $t0, 2                 # convert target method offset to bytes
     add     $a0, $t0               # get address of target method
-    lw      $a0, OBJECT_ARRAY_DATA_OFFSET($a0)  # load the target method
+    lw      $a0, MIRROR_OBJECT_ARRAY_DATA_OFFSET($a0)  # load the target method
     la      $t9, art_quick_invoke_interface_trampoline
     jr      $t9
 END art_quick_imt_conflict_trampoline
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickResolutionTrampoline  # (Method* called, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     beqz    $v0, 1f
     lw      $a0, 0($sp)            # load resolved method to $a0
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jr      $v0                    # tail call to method
     nop
 1:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1195,13 +1122,12 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     move    $a1, rSELF             # pass Thread::Current
     jal     artQuickToInterpreterBridge    # (Method* method, Thread*, SP)
     move    $a2, $sp               # pass $sp
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bnez    $t0, 1f
     mtc1    $v0, $f0               # place return value to FP return value
     jr      $ra
@@ -1216,21 +1142,19 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     move     $t0, $sp       # remember bottom of caller's frame
-    addiu    $sp, $sp, -32  # space for args, pad (3 words), arguments (5 words)
+    addiu    $sp, $sp, -32  # space for saved a0, pad (2 words), arguments (4 words)
     .cfi_adjust_cfa_offset 32
     sw       $a0, 28($sp)   # save arg0
-    sw       $ra, 16($sp)   # pass $ra
-    move     $a3, $t0       # pass $sp
-    jal      artInstrumentationMethodEntryFromCode  # (Method*, Object*, Thread*, SP, LR)
+    move     $a3, $ra       # pass $ra
+    jal      artInstrumentationMethodEntryFromCode  # (Method*, Object*, Thread*, LR)
     move     $a2, rSELF     # pass Thread::Current
     move     $t9, $v0       # $t9 holds reference to code
     lw       $a0, 28($sp)   # restore arg0
     addiu    $sp, $sp, 32   # remove args
     .cfi_adjust_cfa_offset -32
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     jalr     $t9            # call method
     nop
 END art_quick_instrumentation_entry
@@ -1239,9 +1163,9 @@
 art_quick_instrumentation_exit:
     .cfi_startproc
     addiu    $t9, $ra, 4    # put current address into $t9 to rebuild $gp
-    GENERATE_GLOBAL_POINTER
+    .cpload  $t9
     move     $ra, $zero     # link register is to here, so clobber with 0 for later checks
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     move     $t0, $sp       # remember bottom of caller's frame
     addiu    $sp, $sp, -48  # save return values and set up args
     .cfi_adjust_cfa_offset 48
@@ -1274,14 +1198,11 @@
      * will long jump to the upcall with a special exception of -1.
      */
     .extern artDeoptimize
-    .extern artEnterInterpreterFromDeoptimize
 ENTRY art_quick_deoptimize
-    GENERATE_GLOBAL_POINTER
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    move     $a0, rSELF     # pass Thread::current
-    jal      artDeoptimize  # artDeoptimize(Thread*, SP)
+    jal      artDeoptimize  # artDeoptimize(Thread*)
                             # Returns caller method's frame size.
-    move     $a1, $sp       # pass $sp
+    move     $a0, rSELF     # pass Thread::current
 END art_quick_deoptimize
 
     /*
@@ -1294,7 +1215,7 @@
      *   $a1: high word
      *   $a2: shift count
      */
-ENTRY art_quick_shl_long
+ENTRY_NO_GP art_quick_shl_long
     /* shl-long vAA, vBB, vCC */
     sll     $v0, $a0, $a2                    #  rlo<- alo << (shift&31)
     not     $v1, $a2                         #  rhi<- 31-shift  (shift is 5b)
@@ -1318,8 +1239,7 @@
      *   $a1: high word
      *   $a2: shift count
      */
-    .global art_quick_shr_long
-ENTRY art_quick_shr_long
+ENTRY_NO_GP art_quick_shr_long
     sra     $v1, $a1, $a2                    #  rhi<- ahi >> (shift&31)
     srl     $v0, $a0, $a2                    #  rlo<- alo >> (shift&31)
     sra     $a3, $a1, 31                     #  $a3<- sign(ah)
@@ -1344,8 +1264,7 @@
      *   r2: shift count
      */
     /* ushr-long vAA, vBB, vCC */
-    .global art_quick_ushr_long
-ENTRY art_quick_ushr_long
+ENTRY_NO_GP art_quick_ushr_long
     srl     $v1, $a1, $a2                    #  rhi<- ahi >> (shift&31)
     srl     $v0, $a0, $a2                    #  rlo<- alo >> (shift&31)
     not     $a0, $a2                         #  alo<- 31-shift (shift is 5b)
@@ -1358,12 +1277,5 @@
     movn    $v1, $zero, $a2                  #  rhi<- 0 (if shift&0x20)
 END art_quick_ushr_long
 
-ENTRY art_quick_indexof
-    jr $ra
-    nop
-END art_quick_indexof
-
-ENTRY art_quick_string_compareto
-    jr $ra
-    nop
-END art_quick_string_compareto
+UNIMPLEMENTED art_quick_indexof
+UNIMPLEMENTED art_quick_string_compareto
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index c9b9f04..ea586b8 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1221,13 +1221,12 @@
   // Use array so we can index into it and use a matrix for expected results
   // Setup: The first half is standard. The second half uses a non-zero offset.
   // TODO: Shared backing arrays.
-  static constexpr size_t kBaseStringCount  = 8;
-  const char* c[kBaseStringCount] = { "", "", "a", "aa", "ab",
+  const char* c[] = { "", "", "a", "aa", "ab",
       "aacaacaacaacaacaac",  // This one's under the default limit to go to __memcmp16.
       "aacaacaacaacaacaacaacaacaacaacaacaac",     // This one's over.
       "aacaacaacaacaacaacaacaacaacaacaacaaca" };  // As is this one. We need a separate one to
                                                   // defeat object-equal optimizations.
-
+  static constexpr size_t kBaseStringCount  = arraysize(c);
   static constexpr size_t kStringCount = 2 * kBaseStringCount;
 
   StackHandleScope<kStringCount> hs(self);
@@ -1337,10 +1336,9 @@
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 5;
-  int8_t values[num_values] = { -128, -64, 0, 64, 127 };
+  int8_t values[] = { -128, -64, 0, 64, 127 };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
@@ -1367,10 +1365,9 @@
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 5;
-  uint8_t values[num_values] = { 0, true, 2, 128, 0xFF };
+  uint8_t values[] = { 0, true, 2, 128, 0xFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
@@ -1401,10 +1398,9 @@
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 5;
-  int8_t values[num_values] = { -128, -64, 0, 64, 127 };
+  int8_t values[] = { -128, -64, 0, 64, 127 };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
@@ -1435,10 +1431,9 @@
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 6;
-  uint16_t values[num_values] = { 0, 1, 2, 255, 32768, 0xFFFF };
+  uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
@@ -1464,10 +1459,9 @@
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 6;
-  int16_t values[num_values] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
+  int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
@@ -1494,10 +1488,9 @@
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 6;
-  uint16_t values[num_values] = { 0, 1, 2, 255, 32768, 0xFFFF };
+  uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
@@ -1527,10 +1520,9 @@
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 6;
-  int16_t values[num_values] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
+  int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
@@ -1561,10 +1553,9 @@
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 7;
-  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+  uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
@@ -1592,10 +1583,9 @@
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  constexpr size_t num_values = 7;
-  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+  uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
@@ -1716,10 +1706,9 @@
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
-  constexpr size_t num_values = 8;
-  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+  uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3UWithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                                values[i],
                                StubTest::GetEntrypoint(self, kQuickSet64Static),
@@ -1746,10 +1735,9 @@
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
-  constexpr size_t num_values = 8;
-  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+  uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
-  for (size_t i = 0; i < num_values; ++i) {
+  for (size_t i = 0; i < arraysize(values); ++i) {
     test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
@@ -2109,10 +2097,10 @@
   // Use array so we can index into it and use a matrix for expected results
   // Setup: The first half is standard. The second half uses a non-zero offset.
   // TODO: Shared backing arrays.
-  static constexpr size_t kStringCount = 7;
-  const char* c_str[kStringCount] = { "", "a", "ba", "cba", "dcba", "edcba", "asdfghjkl" };
-  static constexpr size_t kCharCount = 5;
-  const char c_char[kCharCount] = { 'a', 'b', 'c', 'd', 'e' };
+  const char* c_str[] = { "", "a", "ba", "cba", "dcba", "edcba", "asdfghjkl" };
+  static constexpr size_t kStringCount = arraysize(c_str);
+  const char c_char[] = { 'a', 'b', 'c', 'd', 'e' };
+  static constexpr size_t kCharCount = arraysize(c_char);
 
   StackHandleScope<kStringCount> hs(self);
   Handle<mirror::String> s[kStringCount];
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 78b97e5..fea16da 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -164,10 +164,14 @@
     SIZE(\name, 0)
 END_MACRO
 
-MACRO0(SETUP_GOT_NOSAVE)
+MACRO1(SETUP_GOT_NOSAVE, got_reg)
 #ifndef __APPLE__
-    call __x86.get_pc_thunk.bx
-    addl $_GLOBAL_OFFSET_TABLE_, %ebx
+    .ifc RAW_VAR(got_reg, 0), ebx
+      call __x86.get_pc_thunk.bx
+      addl $_GLOBAL_OFFSET_TABLE_, %ebx
+    .else
+      .error "Unknown GOT register \got_reg"
+    .endif
 #endif
 END_MACRO
 
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index c9f5a25..5a88f80 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -19,20 +19,8 @@
 
 #include "asm_support.h"
 
-// Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 156
-// Offset of field Thread::card_table_ verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
-// Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 124
-// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
-#define THREAD_ID_OFFSET 12
-
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 32
 
-// Expected size of a heap reference
-#define HEAP_REFERENCE_SIZE 4
-
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/portable_entrypoints_x86.S b/runtime/arch/x86/portable_entrypoints_x86.S
index 70c0ae2..a7c4124 100644
--- a/runtime/arch/x86/portable_entrypoints_x86.S
+++ b/runtime/arch/x86/portable_entrypoints_x86.S
@@ -37,7 +37,7 @@
     andl LITERAL(0xFFFFFFF0), %ebx    // align frame size to 16 bytes
     subl LITERAL(12), %ebx        // remove space for return address, ebx, and ebp
     subl %ebx, %esp               // reserve stack space for argument array
-    SETUP_GOT_NOSAVE              // reset ebx to GOT table
+    SETUP_GOT_NOSAVE ebx          // reset ebx to GOT table
     lea  4(%esp), %eax            // use stack pointer + method ptr as dest for memcpy
     pushl 20(%ebp)                // push size of region to memcpy
     pushl 16(%ebp)                // push arg array as source of memcpy
@@ -46,7 +46,7 @@
     addl LITERAL(12), %esp        // pop arguments to memcpy
     mov 12(%ebp), %eax            // move method pointer into eax
     mov %eax, (%esp)              // push method pointer onto stack
-    call *METHOD_PORTABLE_CODE_OFFSET(%eax) // call the method
+    call *MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index a158e6d..a1fdcf1 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -22,12 +22,21 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
-MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+MACRO2(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
-    subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
-    CFI_ADJUST_CFA_OFFSET(16)
+    subl  MACRO_LITERAL(12), %esp  // Grow stack by 3 words.
+    CFI_ADJUST_CFA_OFFSET(12)
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg, 0)
+    // Load Runtime::instance_ from GOT.
+    movl _ZN3art7Runtime9instance_E@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
+    movl (REG_VAR(temp_reg, 1)), REG_VAR(temp_reg, 1)
+    // Push save all callee-save method.
+    pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg, 1))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the top quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 3*4 + 16 + 4)
@@ -39,12 +48,21 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsOnly)
      */
-MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
+MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
-    subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
-    CFI_ADJUST_CFA_OFFSET(16)
+    subl  MACRO_LITERAL(12), %esp  // Grow stack by 3 words.
+    CFI_ADJUST_CFA_OFFSET(12)
+    SETUP_GOT_NOSAVE VAR(got_reg, 0)
+    // Load Runtime::instance_ from GOT.
+    movl _ZN3art7Runtime9instance_E@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
+    movl (REG_VAR(temp_reg, 1)), REG_VAR(temp_reg, 1)
+    // Push save all callee-save method.
+    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg, 1))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the top quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
@@ -53,7 +71,7 @@
 #endif
 END_MACRO
 
-MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to saved values
     CFI_ADJUST_CFA_OFFSET(-16)
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
@@ -65,14 +83,22 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
      */
-MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO2(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves
     PUSH esi
     PUSH ebp
     PUSH ebx  // Save args
     PUSH edx
     PUSH ecx
-    PUSH eax   // Align stack, eax will be clobbered by Method*
+    SETUP_GOT_NOSAVE VAR(got_reg, 0)
+    // Load Runtime::instance_ from GOT.
+    movl _ZN3art7Runtime9instance_E@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1)
+    movl (REG_VAR(temp_reg, 1)), REG_VAR(temp_reg, 1)
+    // Push save all callee-save method.
+    pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg, 1))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the stop quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
@@ -81,7 +107,23 @@
 #endif
 END_MACRO
 
-MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs) where the method is passed in EAX.
+     */
+MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX)
+    PUSH edi  // Save callee saves
+    PUSH esi
+    PUSH ebp
+    PUSH ebx  // Save args
+    PUSH edx
+    PUSH ecx
+    PUSH eax  // Store the ArtMethod reference at the bottom of the stack.
+    // Store esp as the stop quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+END_MACRO
+
+MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(4), %esp  // Remove padding
     CFI_ADJUST_CFA_OFFSET(-4)
     POP ecx  // Restore args except eax
@@ -97,59 +139,54 @@
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
-    mov %esp, %ecx
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save callee saves for throw
     // Outgoing argument set up
-    subl  MACRO_LITERAL(8), %esp             // Alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ecx                                 // pass SP
-    pushl %fs:THREAD_SELF_OFFSET             // pass Thread::Current()
+    subl  MACRO_LITERAL(12), %esp              // Alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*, SP)
-    int3                                     // unreached
+    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
+    int3                                       // unreached
 END_MACRO
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %esp, %ecx
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
     // Outgoing argument set up
-    subl  MACRO_LITERAL(8), %esp  // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ecx                      // pass SP
+    subl  MACRO_LITERAL(12), %esp  // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(Thread*)
     int3                          // unreached
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
-    PUSH eax                      // alignment padding
-    PUSH ecx                      // pass SP
+    subl  MACRO_LITERAL(8), %esp  // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(arg1, Thread*)
     int3                          // unreached
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %esp, %edx
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
     // Outgoing argument set up
-    PUSH edx                      // pass SP
+    PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, arg2, Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, Thread*)
     int3                          // unreached
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
@@ -204,15 +241,7 @@
      */
 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    // Set up the callee save frame to conform with Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
-    // return address
-    PUSH edi
-    PUSH esi
-    PUSH ebp
-    PUSH ebx  // Save args
-    PUSH edx
-    PUSH ecx
-    PUSH eax    // <-- callee save Method* to go here
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
     movl %esp, %edx  // remember SP
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp  // alignment padding
@@ -224,7 +253,7 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, arg2, arg3, Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, Thread*, SP)
     movl %edx, %edi               // save code pointer in EDI
     addl MACRO_LITERAL(36), %esp  // Pop arguments skip eax
     CFI_ADJUST_CFA_OFFSET(-36)
@@ -275,7 +304,7 @@
     andl LITERAL(0xFFFFFFF0), %ebx    // align frame size to 16 bytes
     subl LITERAL(12), %ebx        // remove space for return address, ebx, and ebp
     subl %ebx, %esp               // reserve stack space for argument array
-    SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
+    SETUP_GOT_NOSAVE ebx          // clobbers ebx (harmless here)
     lea  4(%esp), %eax            // use stack pointer + method ptr as dest for memcpy
     pushl 20(%ebp)                // push size of region to memcpy
     pushl 16(%ebp)                // push arg array as source of memcpy
@@ -287,7 +316,7 @@
     mov 4(%esp), %ecx             // copy arg1 into ecx
     mov 8(%esp), %edx             // copy arg2 into edx
     mov 12(%esp), %ebx            // copy arg3 into ebx
-    call *METHOD_QUICK_CODE_OFFSET(%eax) // call the method
+    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     CFI_DEF_CFA_REGISTER(esp)
     POP ebx                       // pop ebx
@@ -311,120 +340,127 @@
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %edx                // remember SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl MACRO_LITERAL(8), %esp   // push padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH edx                      // pass SP
+    subl MACRO_LITERAL(12), %esp  // push padding
+    CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(Thread*)
     addl MACRO_LITERAL(16), %esp  // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %edx                // remember SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH eax                      // push padding
-    PUSH edx                      // pass SP
+    subl MACRO_LITERAL(8), %esp   // push padding
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(arg1, Thread*)
     addl MACRO_LITERAL(16), %esp  // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %edx                // remember SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH edx                      // pass SP
+    PUSH eax                      // push padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, arg2, Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, Thread*)
     addl MACRO_LITERAL(16), %esp  // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp  // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, arg2, arg3, Thread*, SP)
-    addl MACRO_LITERAL(32), %esp  // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, Thread*)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %edx                // remember SP
-    mov 32(%esp), %ecx            // get referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH edx                      // pass SP
+    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    PUSH eax                      // push padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass referrer
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)         // cxx_name(arg1, referrer, Thread*, SP)
+    call VAR(cxx_name, 1)         // cxx_name(arg1, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp  // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    mov 32(%esp), %edx            // get referrer
-    subl MACRO_LITERAL(12), %esp  // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
+    // Outgoing argument set up
+    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %edx  // get referrer
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    // Outgoing argument set up
     PUSH edx                      // pass referrer
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)     // cxx_name(arg1, arg2, referrer, Thread*, SP)
-    addl MACRO_LITERAL(32), %esp  // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, referrer, Thread*)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
+MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
+    subl LITERAL(12), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ebx                      // pass referrer
+    PUSH edx                      // pass arg3
+    PUSH ecx                      // pass arg2
+    PUSH eax                      // pass arg1
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, referrer, Thread*)
+    addl LITERAL(32), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    CALL_MACRO(return_macro, 2)   // return or deliver exception
+    END_FUNCTION RAW_VAR(c_name, 0)
+END_MACRO
 
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
     testl %eax, %eax               // eax == 0 ?
@@ -443,9 +479,8 @@
 END_MACRO
 
 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
-    mov %fs:THREAD_EXCEPTION_OFFSET, %ebx // get exception field
-    testl %ebx, %ebx               // ebx == 0 ?
-    jnz 1f                         // if ebx != 0 goto 1
+    cmpl MACRO_LITERAL(0),%fs:THREAD_EXCEPTION_OFFSET // exception field == 0 ?
+    jne 1f                         // if exception field != 0 goto 1
     ret                            // return
 1:                                 // deliver exception on current thread
     DELIVER_PENDING_EXCEPTION
@@ -566,7 +601,7 @@
     testl %eax, %eax                      // null check object/eax
     jz   .Lslow_lock
 .Lretry_lock:
-    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx  // ecx := lock word
     test LITERAL(0xC0000000), %ecx        // test the 2 high bits.
     jne  .Lslow_lock                      // slow path if either of the two high bits are set.
     movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
@@ -575,11 +610,11 @@
     // unlocked case - %edx holds thread id with count of 0
     movl %eax, %ecx                       // remember object in case of retry
     xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
-    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%ecx)
+    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
     jnz  .Lcmpxchg_fail                   // cmpxchg failed retry
     ret
 .Lcmpxchg_fail:
-    movl  %ecx, %eax                       // restore eax
+    movl  %ecx, %eax                      // restore eax
     jmp  .Lretry_lock
 .Lalready_thin:
     cmpw %cx, %dx                         // do we hold the lock already?
@@ -587,28 +622,28 @@
     addl LITERAL(65536), %ecx             // increment recursion count
     test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
     jne  .Lslow_lock                      // count overflowed so go slow
-    movl %ecx, LOCK_WORD_OFFSET(%eax)     // update lockword, cmpxchg not necessary as we hold lock
+    // update lockword, cmpxchg not necessary as we hold lock
+    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
     ret
 .Lslow_lock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %edx                // remember SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH eax                      // push padding
-    PUSH edx                      // pass SP
+    subl LITERAL(8), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass object
-    call SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*, SP)
+    call SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp  // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_unlock_object
     testl %eax, %eax                      // null check object/eax
     jz   .Lslow_unlock
-    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx  // ecx := lock word
     movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
     test LITERAL(0xC0000000), %ecx
     jnz  .Lslow_unlock                    // lock word contains a monitor
@@ -616,25 +651,24 @@
     jne  .Lslow_unlock
     cmpl LITERAL(65536), %ecx
     jae  .Lrecursive_thin_unlock
-    movl LITERAL(0), LOCK_WORD_OFFSET(%eax)
+    movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
     ret
 .Lrecursive_thin_unlock:
     subl LITERAL(65536), %ecx
-    mov  %ecx, LOCK_WORD_OFFSET(%eax)
+    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
     ret
 .Lslow_unlock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %edx                // remember SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
-    PUSH eax                      // push padding
-    PUSH edx                      // pass SP
+    subl LITERAL(8), %esp         // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass object
-    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*, SP)
+    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp  // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
@@ -663,15 +697,14 @@
     POP ecx
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-12)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %esp, %edx
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
     // Outgoing argument set up
-    PUSH edx                      // pass SP
+    PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
+    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
     int3                          // unreached
 END_FUNCTION art_quick_check_cast
 
@@ -687,7 +720,7 @@
 END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
 
 DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
-    movl ARRAY_LENGTH_OFFSET(%eax), %ebx
+    movl MIRROR_ARRAY_LENGTH_OFFSET(%eax), %ebx
     cmpl %ebx, %ecx
     jb SYMBOL(art_quick_aput_obj)
     mov %ecx, %eax
@@ -698,18 +731,19 @@
 DEFINE_FUNCTION art_quick_aput_obj
     test %edx, %edx              // store of null
     jz .Ldo_aput_null
-    movl CLASS_OFFSET(%eax), %ebx
-    movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
-    cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
+    movl MIRROR_OBJECT_CLASS_OFFSET(%eax), %ebx
+    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+    // value's type == array's component type - trivial assignability
+    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ebx
     jne .Lcheck_assignability
 .Ldo_aput:
-    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
     movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
     shrl LITERAL(7), %eax
     movb %dl, (%edx, %eax)
     ret
 .Ldo_aput_null:
-    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
     ret
 .Lcheck_assignability:
     PUSH eax                     // save arguments
@@ -717,7 +751,7 @@
     PUSH edx
     subl LITERAL(8), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl CLASS_OFFSET(%edx)     // pass arg2 - type of the value to be stored
+    pushl MIRROR_OBJECT_CLASS_OFFSET(%edx)  // pass arg2 - type of the value to be stored
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                     // pass arg1 - component type of the array
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
@@ -728,7 +762,7 @@
     POP  edx
     POP  ecx
     POP  eax
-    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
+    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
     movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
     shrl LITERAL(7), %eax
     movb %dl, (%edx, %eax)
@@ -737,20 +771,19 @@
     POP  edx
     POP  ecx
     POP  eax
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %esp, %ecx
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
-    PUSH ecx                      // pass SP
+    PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass arg2 - value
     PUSH eax                      // pass arg1 - array
-    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
     int3                          // unreached
 END_FUNCTION art_quick_aput_obj
 
 DEFINE_FUNCTION art_quick_memcpy
-    SETUP_GOT_NOSAVE              // clobbers EBX
+    SETUP_GOT_NOSAVE ebx          // clobbers EBX
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
@@ -856,236 +889,76 @@
     ret
 END_FUNCTION art_quick_lushr
 
-DEFINE_FUNCTION art_quick_set8_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    subl LITERAL(8), %esp         // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    mov 32(%ebx), %ebx            // get referrer
-    PUSH ebx                      // pass referrer
-    PUSH edx                      // pass new_val
-    PUSH ecx                      // pass object
-    PUSH eax                      // pass field_idx
-    call PLT_SYMBOL(artSet8InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set8_instance
+ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_set16_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    subl LITERAL(8), %esp         // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    mov 32(%ebx), %ebx            // get referrer
-    PUSH ebx                      // pass referrer
-    PUSH edx                      // pass new_val
-    PUSH ecx                      // pass object
-    PUSH eax                      // pass field_idx
-    call PLT_SYMBOL(artSet16InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set16_instance
+TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_set32_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    subl LITERAL(8), %esp         // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    mov 32(%ebx), %ebx            // get referrer
-    PUSH ebx                      // pass referrer
-    PUSH edx                      // pass new_val
-    PUSH ecx                      // pass object
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artSet32InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set32_instance
+TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
 
+THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
+
+// Call artSet64InstanceFromCode with 4 word size arguments and the referrer.
 DEFINE_FUNCTION art_quick_set64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
-    PUSH esp                      // pass SP-8
-    addl LITERAL(8), (%esp)       // fix SP on stack by adding 8
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
+    pushl (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+12)(%esp)  // pass referrer
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                      // pass high half of new_val
     PUSH edx                      // pass low half of new_val
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
-    call SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, Thread*, SP)
+    call SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_instance
 
-DEFINE_FUNCTION art_quick_set_obj_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    subl LITERAL(8), %esp         // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    mov 32(%ebx), %ebx            // get referrer
-    PUSH ebx                      // pass referrer
-    PUSH edx                      // pass new_val
-    PUSH ecx                      // pass object
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artSetObjInstanceFromCode) // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set_obj_instance
-
-TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-
-DEFINE_FUNCTION art_quick_get64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    mov 32(%esp), %edx            // get referrer
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass referrer
-    PUSH ecx                      // pass object
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artGet64InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
-END_FUNCTION art_quick_get64_instance
-
-DEFINE_FUNCTION art_quick_set8_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    mov 32(%esp), %edx            // get referrer
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass referrer
-    PUSH ecx                      // pass new_val
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artSet8StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set8_static
-
-DEFINE_FUNCTION art_quick_set16_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    mov 32(%esp), %edx            // get referrer
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass referrer
-    PUSH ecx                      // pass new_val
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artSet16StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set16_static
-
-DEFINE_FUNCTION art_quick_set32_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    mov 32(%esp), %edx            // get referrer
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass referrer
-    PUSH ecx                      // pass new_val
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artSet32StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set32_static
-
+// Call artSet64StaticFromCode with 3 word size arguments plus with the referrer in the 2nd position
+// so that new_val is aligned on even registers were we passing arguments in registers.
 DEFINE_FUNCTION art_quick_set64_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    subl LITERAL(8), %esp         // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ebx                      // pass SP
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
+    subl LITERAL(12), %esp        // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    mov 32(%ebx), %ebx            // get referrer
     PUSH edx                      // pass high half of new_val
     PUSH ecx                      // pass low half of new_val
     PUSH ebx                      // pass referrer
     PUSH eax                      // pass field_idx
-    call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*, SP)
+    call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
-DEFINE_FUNCTION art_quick_set_obj_static
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
-    mov %esp, %ebx                // remember SP
-    mov 32(%esp), %edx            // get referrer
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass referrer
-    PUSH ecx                      // pass new_val
-    PUSH eax                      // pass field_idx
-    call SYMBOL(artSetObjStaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    addl LITERAL(32), %esp        // pop arguments
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    RETURN_IF_EAX_ZERO            // return or deliver exception
-END_FUNCTION art_quick_set_obj_static
-
-ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
-
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // save frame and Method*
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
     PUSH esp                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1107,15 +980,15 @@
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
     PUSH ecx
     movl 8(%esp), %eax            // load caller Method*
-    movl METHOD_DEX_CACHE_METHODS_OFFSET(%eax), %eax  // load dex_cache_resolved_methods
+    movl MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET(%eax), %eax  // load dex_cache_resolved_methods
     movd %xmm0, %ecx              // get target method index stored in xmm0
-    movl OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4), %eax  // load the target method
+    movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4), %eax  // load the target method
     POP ecx
     jmp SYMBOL(art_quick_invoke_interface_trampoline)
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
     movl %esp, %edi
     PUSH EDI                      // pass SP. do not just PUSH ESP; that messes up unwinding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1136,14 +1009,12 @@
     xchgl 0(%esp),%edi            // restore EDI and place code pointer as only value on stack
     ret                           // tail call into method
 1:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    // This also stores the native ArtMethod reference at the bottom of the stack.
-
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
     movl %esp, %ebp                 // save SP at callee-save frame
     CFI_DEF_CFA_REGISTER(ebp)
     subl LITERAL(5120), %esp
@@ -1151,7 +1022,6 @@
     // (Thread*,  SP)
     //  (esp)    4(esp)   <= C calling convention
     //  fs:...    ebp     <= where they are
-    // Also: PLT, so need GOT in ebx.
 
     subl LITERAL(8), %esp         // Padding for 16B alignment.
     pushl %ebp                    // Pass SP (to ArtMethod).
@@ -1216,12 +1086,12 @@
     movl %ebp, %esp
     CFI_DEF_CFA_REGISTER(esp)
 .Lexception_in_native:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // save frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  ebx, ebx  // save frame
     mov %esp, %edx                // remember SP
     PUSH eax                      // alignment padding
     PUSH edx                      // pass SP
@@ -1234,7 +1104,7 @@
     punpckldq %xmm1, %xmm0
     addl LITERAL(16), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
 
@@ -1242,26 +1112,23 @@
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    movl  %esp, %edx              // Save SP.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
-    subl LITERAL(8), %esp         // Align stack.
-    CFI_ADJUST_CFA_OFFSET(8)
+    subl LITERAL(12), %esp        // Align stack.
+    CFI_ADJUST_CFA_OFFSET(12)
     pushl 40(%esp)                // Pass LR.
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // Pass receiver.
     PUSH eax                      // Pass Method*.
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
-    SETUP_GOT_NOSAVE
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
     addl LITERAL(28), %esp        // Pop arguments upto saved Method*.
     movl 28(%esp), %edi           // Restore edi.
     movl %eax, 28(%esp)           // Place code* over edi, just under return pc.
     movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx
+    // Place instrumentation exit as return pc. ebx holds the GOT computed on entry.
     movl %ebx, 32(%esp)
-                                  // Place instrumentation exit as return pc.
     movl (%esp), %eax             // Restore eax.
     movl 8(%esp), %ecx            // Restore ecx.
     movl 12(%esp), %edx           // Restore edx.
@@ -1274,7 +1141,7 @@
 
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushl LITERAL(0)              // Push a fake return PC as there will be none on the stack.
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx
     mov  %esp, %ecx               // Remember SP
     subl LITERAL(8), %esp         // Save float return value.
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1300,7 +1167,7 @@
     movq (%esp), %xmm0            // Restore fpr return value.
     addl LITERAL(8), %esp
     CFI_ADJUST_CFA_OFFSET(-8)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     addl LITERAL(4), %esp         // Remove fake return pc.
     jmp   *%ecx                   // Return.
 END_FUNCTION art_quick_instrumentation_exit
@@ -1311,14 +1178,12 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize
     pushl %ebx                    // Fake that we were called.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    mov  %esp, %ecx               // Remember SP.
-    subl LITERAL(8), %esp         // Align stack.
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH ecx                      // Pass SP.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    subl LITERAL(12), %esp        // Align stack.
+    CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artDeoptimize)  // artDeoptimize(Thread*, SP)
+    call SYMBOL(artDeoptimize)    // artDeoptimize(Thread*)
     int3                          // Unreachable.
 END_FUNCTION art_quick_deoptimize
 
@@ -1332,15 +1197,15 @@
 DEFINE_FUNCTION art_quick_string_compareto
     PUSH esi                    // push callee save reg
     PUSH edi                    // push callee save reg
-    mov STRING_COUNT_OFFSET(%eax), %edx
-    mov STRING_COUNT_OFFSET(%ecx), %ebx
-    mov STRING_VALUE_OFFSET(%eax), %esi
-    mov STRING_VALUE_OFFSET(%ecx), %edi
-    mov STRING_OFFSET_OFFSET(%eax), %eax
-    mov STRING_OFFSET_OFFSET(%ecx), %ecx
+    mov MIRROR_STRING_COUNT_OFFSET(%eax), %edx
+    mov MIRROR_STRING_COUNT_OFFSET(%ecx), %ebx
+    mov MIRROR_STRING_VALUE_OFFSET(%eax), %esi
+    mov MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
+    mov MIRROR_STRING_OFFSET_OFFSET(%eax), %eax
+    mov MIRROR_STRING_OFFSET_OFFSET(%ecx), %ecx
     /* Build pointers to the start of string data */
-    lea  STRING_DATA_OFFSET(%esi, %eax, 2), %esi
-    lea  STRING_DATA_OFFSET(%edi, %ecx, 2), %edi
+    lea  MIRROR_CHAR_ARRAY_DATA_OFFSET(%esi, %eax, 2), %esi
+    lea  MIRROR_CHAR_ARRAY_DATA_OFFSET(%edi, %ecx, 2), %edi
     /* Calculate min length and count diff */
     mov   %edx, %ecx
     mov   %edx, %eax
@@ -1375,7 +1240,7 @@
 //  eax: address of jmp_buf in TLS
 
 DEFINE_FUNCTION art_nested_signal_return
-    SETUP_GOT_NOSAVE                // sets %ebx for call into PLT
+    SETUP_GOT_NOSAVE ebx            // sets %ebx for call into PLT
     movl LITERAL(1), %ecx
     pushl %ecx                      // second arg to longjmp (1)
     pushl %eax                      // first arg to longjmp (jmp_buf)
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 40958dc..eddd172 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,30 +19,8 @@
 
 #include "asm_support.h"
 
-// Note: these callee save methods loads require read barriers.
-// Offset of field Runtime::callee_save_methods_[kSaveAll]
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-// Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
-
-// Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 192
-// Offset of field Thread::card_table_ verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
-// Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 128
-// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
-#define THREAD_ID_OFFSET 12
-
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
 
-// Expected size of a heap reference
-#define HEAP_REFERENCE_SIZE 4
-// Expected size of a stack reference
-#define STACK_REFERENCE_SIZE 4
-
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 648a99a..bed7238 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -57,25 +57,25 @@
     PUSH r12  // Callee save.
     PUSH rbp  // Callee save.
     PUSH rbx  // Callee save.
-    // Create space for FPR args, plus padding for alignment
-    subq LITERAL(4 * 8), %rsp
-    CFI_ADJUST_CFA_OFFSET(4 * 8)
+    // Create space for FPR args, plus space for StackReference<ArtMethod>.
+    subq MACRO_LITERAL(4 * 8 + 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
     // Save FPRs.
-    movq %xmm12, 0(%rsp)
-    movq %xmm13, 8(%rsp)
-    movq %xmm14, 16(%rsp)
-    movq %xmm15, 24(%rsp)
-    subq MACRO_LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
-    CFI_ADJUST_CFA_OFFSET(8)
+    movq %xmm12, 8(%rsp)
+    movq %xmm13, 16(%rsp)
+    movq %xmm14, 24(%rsp)
+    movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
+    // Store rsp as the top quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
@@ -85,7 +85,7 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsOnly)
      */
-MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -100,9 +100,9 @@
     PUSH r12  // Callee save.
     PUSH rbp  // Callee save.
     PUSH rbx  // Callee save.
-    // Create space for FPR args, plus padding for alignment
-    subq LITERAL(8 + 4*8), %rsp
-    CFI_ADJUST_CFA_OFFSET(8 + 4*8)
+    // Create space for FPR args, plus space for StackReference<ArtMethod>.
+    subq LITERAL(8 + 4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
     // Save FPRs.
     movq %xmm12, 8(%rsp)
     movq %xmm13, 16(%rsp)
@@ -113,16 +113,18 @@
     movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
+    // Store rsp as the stop quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
     movq 8(%rsp), %xmm12
     movq 16(%rsp), %xmm13
     movq 24(%rsp), %xmm14
@@ -142,7 +144,7 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
      */
-MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -162,12 +164,13 @@
     PUSH rbx  // Callee save.
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
-    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
+    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the
+    // StackReference<ArtMethod>.
     subq MACRO_LITERAL(80 + 4 * 8), %rsp
     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
@@ -183,16 +186,54 @@
     movq %xmm15, 104(%rsp)
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
+    // Store rsp as the top quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
+    // Save callee and GPR args, mixed together to agree with core spills bitmap.
+    PUSH r15  // Callee save.
+    PUSH r14  // Callee save.
+    PUSH r13  // Callee save.
+    PUSH r12  // Callee save.
+    PUSH r9   // Quick arg 5.
+    PUSH r8   // Quick arg 4.
+    PUSH rsi  // Quick arg 1.
+    PUSH rbp  // Callee save.
+    PUSH rbx  // Callee save.
+    PUSH rdx  // Quick arg 2.
+    PUSH rcx  // Quick arg 3.
+    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the
+    // StackReference<ArtMethod>.
+    subq LITERAL(80 + 4 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
+    // Save FPRs.
+    movq %xmm0, 16(%rsp)
+    movq %xmm1, 24(%rsp)
+    movq %xmm2, 32(%rsp)
+    movq %xmm3, 40(%rsp)
+    movq %xmm4, 48(%rsp)
+    movq %xmm5, 56(%rsp)
+    movq %xmm6, 64(%rsp)
+    movq %xmm7, 72(%rsp)
+    movq %xmm12, 80(%rsp)
+    movq %xmm13, 88(%rsp)
+    movq %xmm14, 96(%rsp)
+    movq %xmm15, 104(%rsp)
+    // Store ArtMethod to bottom of stack.
+    movq %rdi, 0(%rsp)
+    // Store rsp as the stop quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+END_MACRO
+
+MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
     // Restore FPRs.
     movq 16(%rsp), %xmm0
     movq 24(%rsp), %xmm1
@@ -229,10 +270,9 @@
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
-    // (Thread*, SP) setup
+    // (Thread*) setup
     movq %gs:THREAD_SELF_OFFSET, %rdi
-    movq %rsp, %rsi
-    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*, SP)
+    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
     UNREACHABLE
 END_MACRO
 
@@ -240,9 +280,8 @@
     DEFINE_FUNCTION VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
-    movq %rsp, %rsi                    // pass SP
     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
-    call VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
+    call VAR(cxx_name, 1)     // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -251,9 +290,8 @@
     DEFINE_FUNCTION VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
-    movq %rsp, %rdx                    // pass SP
     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
-    call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
+    call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -262,9 +300,8 @@
     DEFINE_FUNCTION VAR(c_name, 0)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
-    movq %rsp, %rcx                    // pass SP
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
+    call VAR(cxx_name, 1)     // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -321,7 +358,7 @@
      */
 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
@@ -333,7 +370,7 @@
                                                            // save the code pointer
     movq %rax, %rdi
     movq %rdx, %rax
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
 
     testq %rdi, %rdi
     jz 1f
@@ -481,7 +518,7 @@
     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
 .Lgpr_setup_finished:
-    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
+    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
     movq %rbp, %rsp               // Restore stack pointer.
     CFI_DEF_CFA_REGISTER(rsp)
     POP r9                        // Pop r9 - shorty*.
@@ -564,7 +601,7 @@
     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
 .Lgpr_setup_finished2:
-    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
+    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
     movq %rbp, %rsp               // Restore stack pointer.
     CFI_DEF_CFA_REGISTER(rsp)
     POP r9                        // Pop r9 - shorty*.
@@ -639,88 +676,81 @@
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
     // Outgoing argument set up
-    movq %rsp, %rsi                   // pass SP
-    movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
-    call VAR(cxx_name, 1)         // cxx_name(Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)       // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
+    call VAR(cxx_name, 1)                // cxx_name(Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    CALL_MACRO(return_macro, 2)          // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
     // Outgoing argument set up
-    movq %rsp, %rdx                    // pass SP
-    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
-    call VAR(cxx_name, 1)          // cxx_name(arg0, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
+    call VAR(cxx_name, 1)                // cxx_name(arg0, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    CALL_MACRO(return_macro, 2)          // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
     // Outgoing argument set up
-    movq %rsp, %rcx                    // pass SP
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call VAR(cxx_name, 1)          // cxx_name(arg0, arg1, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro, 2)       // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
+    call VAR(cxx_name, 1)                // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    CALL_MACRO(return_macro, 2)          // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
     // Outgoing argument set up
-    movq %rsp, %r8                     // pass SP
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    call VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
+    call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)         // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movl 8(%rsp), %esi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // arg0 is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    movl 8(%rsp), %esi                  // pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+                                        // arg0 is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
+    call VAR(cxx_name, 1)               // cxx_name(arg0, referrer, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
     CALL_MACRO(return_macro, 2)
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movl 8(%rsp), %edx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // arg0 and arg1 are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    movl 8(%rsp), %edx                  // pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+                                        // arg0 and arg1 are in rdi/rsi
+    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
+    call VAR(cxx_name, 1)               // (arg0, arg1, referrer, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
     CALL_MACRO(return_macro, 2)
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movl 8(%rsp), %ecx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
+    movl 8(%rsp), %ecx                  // pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    movq %rsp, %r9                     // pass SP
-    call VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, referrer, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)         // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
@@ -864,7 +894,7 @@
     testl %edi, %edi                      // Null check object/rdi.
     jz   .Lslow_lock
 .Lretry_lock:
-    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word.
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
     test LITERAL(0xC0000000), %ecx        // Test the 2 high bits.
     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
@@ -872,7 +902,7 @@
     jnz  .Lalready_thin                   // Lock word contains a thin lock.
     // unlocked case - %edx holds thread id with count of 0
     xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
-    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%edi)
+    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
     jnz  .Lretry_lock                     // cmpxchg failed retry
     ret
 .Lalready_thin:
@@ -881,21 +911,21 @@
     addl LITERAL(65536), %ecx             // increment recursion count
     test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
     jne  .Lslow_lock                      // count overflowed so go slow
-    movl %ecx, LOCK_WORD_OFFSET(%edi)     // update lockword, cmpxchg not necessary as we hold lock
+    // update lockword, cmpxchg not necessary as we hold lock
+    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
     ret
 .Lslow_lock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
-    movq %rsp, %rdx                       // pass SP
-    call SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
+    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_unlock_object
     testl %edi, %edi                      // null check object/edi
     jz   .Lslow_unlock
-    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
     test LITERAL(0xC0000000), %ecx
     jnz  .Lslow_unlock                    // lock word contains a monitor
@@ -903,18 +933,17 @@
     jne  .Lslow_unlock
     cmpl LITERAL(65536), %ecx
     jae  .Lrecursive_thin_unlock
-    movl LITERAL(0), LOCK_WORD_OFFSET(%edi)
+    movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
     ret
 .Lrecursive_thin_unlock:
     subl LITERAL(65536), %ecx
-    mov  %ecx, LOCK_WORD_OFFSET(%edi)
+    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
     ret
 .Lslow_unlock:
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
-    movq %rsp, %rdx                       // pass SP
-    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
+    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
@@ -935,9 +964,8 @@
     POP rsi                           // Pop arguments
     POP rdi
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %rsp, %rcx                    // pass SP
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
+    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
     int3                              // unreached
 END_FUNCTION art_quick_check_cast
 
@@ -969,8 +997,8 @@
     int3
     int3
 #else
-    movl ARRAY_LENGTH_OFFSET(%edi), %ecx
-//  movl ARRAY_LENGTH_OFFSET(%rdi), %ecx      // This zero-extends, so value(%rcx)=value(%ecx)
+    movl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %ecx
+//  movl MIRROR_ARRAY_LENGTH_OFFSET(%rdi), %ecx  // This zero-extends, so value(%rcx)=value(%ecx)
     cmpl %ecx, %esi
     jb art_quick_aput_obj
     mov %esi, %edi
@@ -986,24 +1014,24 @@
     testl %edx, %edx                // store of null
 //  test %rdx, %rdx
     jz .Ldo_aput_null
-    movl CLASS_OFFSET(%edi), %ecx
-//  movq CLASS_OFFSET(%rdi), %rcx
-    movl CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
-//  movq CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
-    cmpl CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
-//  cmpq CLASS_OFFSET(%rdx), %rcx
+    movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %ecx
+//  movq MIRROR_OBJECT_CLASS_OFFSET(%rdi), %rcx
+    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
+//  movq MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
+    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
+//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
     jne .Lcheck_assignability
 .Ldo_aput:
-    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
-//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
     shrl LITERAL(7), %edi
 //  shrl LITERAL(7), %rdi
     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
     ret
 .Ldo_aput_null:
-    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
-//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     ret
 .Lcheck_assignability:
     // Save arguments.
@@ -1015,7 +1043,7 @@
     SETUP_FP_CALLEE_SAVE_FRAME
 
                                   // "Uncompress" = do nothing, as already zero-extended on load.
-    movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
+    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
     movq %rcx, %rdi               // Pass arg1 = array's component type.
 
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
@@ -1032,8 +1060,8 @@
     POP  rsi
     POP  rdi
 
-    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
-//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
     shrl LITERAL(7), %edi
 //  shrl LITERAL(7), %rdi
@@ -1052,12 +1080,10 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
 
     // Outgoing argument set up.
-    movq %rsp, %rcx                         // Pass arg 4 = SP.
     movq %rdx, %rsi                         // Pass arg 2 = value.
-    movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
+    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
                                             // Pass arg 1 = array.
-
-    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
     int3                          // unreached
 END_FUNCTION art_quick_aput_obj
 
@@ -1079,7 +1105,7 @@
 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
-THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
 
 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
@@ -1105,55 +1131,25 @@
 
 // This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
-    movq %rsi, %rdx                    // pass new_val
-    movl 8(%rsp), %esi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
+    movq %rsi, %rdx                      // pass new_val
+    movl 8(%rsp), %esi                   // pass referrer
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+                                         // field_idx is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
+    call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO                   // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
-    // callee save frame.
-    PUSH r15  // Callee save.
-    PUSH r14  // Callee save.
-    PUSH r13  // Callee save.
-    PUSH r12  // Callee save.
-    PUSH r9   // Quick arg 5.
-    PUSH r8   // Quick arg 4.
-    PUSH rsi  // Quick arg 1.
-    PUSH rbp  // Callee save.
-    PUSH rbx  // Callee save.
-    PUSH rdx  // Quick arg 2.
-    PUSH rcx  // Quick arg 3.
-    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
-    subq LITERAL(80 + 4*8), %rsp
-    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
-    // Save FPRs.
-    movq %xmm0, 16(%rsp)
-    movq %xmm1, 24(%rsp)
-    movq %xmm2, 32(%rsp)
-    movq %xmm3, 40(%rsp)
-    movq %xmm4, 48(%rsp)
-    movq %xmm5, 56(%rsp)
-    movq %xmm6, 64(%rsp)
-    movq %xmm7, 72(%rsp)
-    movq %xmm12, 80(%rsp)
-    movq %xmm13, 88(%rsp)
-    movq %xmm14, 96(%rsp)
-    movq %xmm15, 104(%rsp)
-    // Store proxy method to bottom of stack.
-    movq %rdi, 0(%rsp)
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // Pass Thread::Current().
-    movq %rsp, %rcx                    // Pass SP.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+
+    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
+    movq %rsp, %rcx                         // Pass SP.
     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
-    movq %rax, %xmm0                   // Copy return value in case of float returns.
-    addq LITERAL(168 + 4*8), %rsp            // Pop arguments.
+    movq %rax, %xmm0                        // Copy return value in case of float returns.
+    addq LITERAL(168 + 4*8), %rsp           // Pop arguments.
     CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
@@ -1168,20 +1164,20 @@
     int3
 #else
     movl 8(%rsp), %edi            // load caller Method*
-    movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
-    movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
+    movl MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
+    movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
     jmp art_quick_invoke_interface_trampoline
 #endif  // __APPLE__
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rdx
     movq %rsp, %rcx
     call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
     movq %rax, %r10               // Remember returned code pointer in R10.
     movq (%rsp), %rdi             // Load called method into RDI.
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     testq %r10, %r10              // If code pointer is NULL goto deliver pending exception.
     jz 1f
     jmp *%r10                     // Tail call into method.
@@ -1267,37 +1263,11 @@
      * Called to do a generic JNI down-call
      */
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    // Save callee and GPR args, mixed together to agree with core spills bitmap.
-    PUSH r15  // Callee save.
-    PUSH r14  // Callee save.
-    PUSH r13  // Callee save.
-    PUSH r12  // Callee save.
-    PUSH r9   // Quick arg 5.
-    PUSH r8   // Quick arg 4.
-    PUSH rsi  // Quick arg 1.
-    PUSH rbp  // Callee save.
-    PUSH rbx  // Callee save.
-    PUSH rdx  // Quick arg 2.
-    PUSH rcx  // Quick arg 3.
-    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
-    subq LITERAL(80 + 4*8), %rsp
-    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
-    // Save FPRs.
-    movq %xmm0, 16(%rsp)
-    movq %xmm1, 24(%rsp)
-    movq %xmm2, 32(%rsp)
-    movq %xmm3, 40(%rsp)
-    movq %xmm4, 48(%rsp)
-    movq %xmm5, 56(%rsp)
-    movq %xmm6, 64(%rsp)
-    movq %xmm7, 72(%rsp)
-    movq %xmm12, 80(%rsp)
-    movq %xmm13, 88(%rsp)
-    movq %xmm14, 96(%rsp)
-    movq %xmm15, 104(%rsp)
-    movq %rdi, 0(%rsp)              // Store native ArtMethod* to bottom of stack.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+
     movq %rsp, %rbp                 // save SP at (old) callee-save frame
     CFI_DEF_CFA_REGISTER(rbp)
+
     //
     // reserve a lot of space
     //
@@ -1454,11 +1424,11 @@
      * RSI, RDX, RCX, R8, R9 are arguments to that method.
      */
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
     movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
     movq %rsp, %rdx                        // RDX := sp
     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
     movq %rax, %xmm0                   // Place return value also into floating point return value.
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
@@ -1471,15 +1441,14 @@
     int3
     int3
 #else
-    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
 
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq %rsp, %rcx                     // Pass SP.
     movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %r8   // Pass return PC.
 
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
 
                                   // %rax = result of call.
     movq %r12, %rdi               // Reload method pointer.
@@ -1487,7 +1456,7 @@
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
     movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
 
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
 
     jmp *%rax                     // Tail call to intended method.
 #endif  // __APPLE__
@@ -1496,7 +1465,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
 
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
 
     // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a good number of callee-save registers, it
@@ -1536,9 +1505,8 @@
     pushq %rsi                     // Fake that we were called. Use hidden arg.
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
                                    // Stack should be aligned now.
-    movq %rsp, %rsi                           // Pass SP.
     movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
-    call SYMBOL(artDeoptimize) // artDeoptimize(Thread*, SP)
+    call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
     int3                           // Unreachable.
 END_FUNCTION art_quick_deoptimize
 
@@ -1551,15 +1519,15 @@
      *    rsi:   comp string object (known non-null)
      */
 DEFINE_FUNCTION art_quick_string_compareto
-    movl STRING_COUNT_OFFSET(%edi), %r8d
-    movl STRING_COUNT_OFFSET(%esi), %r9d
-    movl STRING_VALUE_OFFSET(%edi), %r10d
-    movl STRING_VALUE_OFFSET(%esi), %r11d
-    movl STRING_OFFSET_OFFSET(%edi), %eax
-    movl STRING_OFFSET_OFFSET(%esi), %ecx
+    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
+    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
+    movl MIRROR_STRING_VALUE_OFFSET(%edi), %r10d
+    movl MIRROR_STRING_VALUE_OFFSET(%esi), %r11d
+    movl MIRROR_STRING_OFFSET_OFFSET(%edi), %eax
+    movl MIRROR_STRING_OFFSET_OFFSET(%esi), %ecx
     /* Build pointers to the start of string data */
-    leal STRING_DATA_OFFSET(%r10d, %eax, 2), %esi
-    leal STRING_DATA_OFFSET(%r11d, %ecx, 2), %edi
+    leal MIRROR_CHAR_ARRAY_DATA_OFFSET(%r10d, %eax, 2), %esi
+    leal MIRROR_CHAR_ARRAY_DATA_OFFSET(%r11d, %ecx, 2), %edi
     /* Calculate min length and count diff */
     movl  %r8d, %ecx
     movl  %r8d, %eax
@@ -1605,5 +1573,3 @@
     call PLT_SYMBOL(longjmp)
     int3                            // won't get here
 END_FUNCTION art_nested_signal_return
-
-
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
index 6dff2b4..553b656 100644
--- a/runtime/arch/x86_64/thread_x86_64.cc
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -49,29 +49,16 @@
 
   // Sanity check that reads from %gs point to this Thread*.
   Thread* self_check;
-  CHECK_EQ(THREAD_SELF_OFFSET, SelfOffset<8>().Int32Value());
   __asm__ __volatile__("movq %%gs:(%1), %0"
       : "=r"(self_check)  // output
       : "r"(THREAD_SELF_OFFSET)  // input
       :);  // clobber
   CHECK_EQ(self_check, this);
-
-  // Sanity check other offsets.
-  CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET),
-           Runtime::GetCalleeSaveMethodOffset(Runtime::kSaveAll));
-  CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
-           Runtime::GetCalleeSaveMethodOffset(Runtime::kRefsOnly));
-  CHECK_EQ(static_cast<size_t>(RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
-           Runtime::GetCalleeSaveMethodOffset(Runtime::kRefsAndArgs));
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<8>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
   // Sanity check that reads from %gs point to this Thread*.
   Thread* self_check;
-  CHECK_EQ(THREAD_SELF_OFFSET, SelfOffset<8>().Int32Value());
   __asm__ __volatile__("movq %%gs:(%1), %0"
       : "=r"(self_check)  // output
       : "r"(THREAD_SELF_OFFSET)  // input
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 62f3593..26df045 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -17,56 +17,147 @@
 #ifndef ART_RUNTIME_ASM_SUPPORT_H_
 #define ART_RUNTIME_ASM_SUPPORT_H_
 
+#if defined(__cplusplus)
+#include "mirror/art_method.h"
+#include "mirror/class.h"
+#include "mirror/string.h"
+#include "runtime.h"
+#include "thread.h"
+#endif
+
 #include "read_barrier_c.h"
 
-// Value loaded into rSUSPEND for quick. When this value is counted down to zero we do a suspend
-// check.
-#define SUSPEND_CHECK_INTERVAL (1000)
+#if defined(__arm__) ||  defined(__aarch64__) || defined(__mips__)
+// In quick code for ARM, ARM64 and MIPS we make poor use of registers and perform frequent suspend
+// checks in the event of loop back edges. The SUSPEND_CHECK_INTERVAL constant is loaded into a
+// register at the point of an up-call or after handling a suspend check. It reduces the number of
+// loads of the TLS suspend check value by the given amount (turning it into a decrement and compare
+// of a register). This increases the time for a thread to respond to requests from GC and the
+// debugger, damaging GC performance and creating other unwanted artifacts. For example, this count
+// has the effect of making loops and Java code look cold in profilers, where the count is reset
+// impacts where samples will occur. Reducing the count as much as possible improves profiler
+// accuracy in tools like traceview.
+// TODO: get a compiler that can do a proper job of loop optimization and remove this.
+#define SUSPEND_CHECK_INTERVAL 1000
+#endif
+
+#if defined(__cplusplus)
+
+#ifndef ADD_TEST_EQ  // Allow #include-r to replace with their own.
+#define ADD_TEST_EQ(x, y) CHECK_EQ(x, y);
+#endif
+
+static inline void CheckAsmSupportOffsetsAndSizes() {
+#else
+#define ADD_TEST_EQ(x, y)
+#endif
+
+// Size of references to the heap on the stack.
+#define STACK_REFERENCE_SIZE 4
+ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
+
+// Note: these callee save methods loads require read barriers.
+// Offset of field Runtime::callee_save_methods_[kSaveAll]
+#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
+ADD_TEST_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET),
+            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kSaveAll))
+
+// Offset of field Runtime::callee_save_methods_[kRefsOnly]
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET __SIZEOF_POINTER__
+ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
+            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsOnly))
+
+// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
+#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
+            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsAndArgs))
+
+// Offset of field Thread::tls32_.state_and_flags.
+#define THREAD_FLAGS_OFFSET 0
+ADD_TEST_EQ(THREAD_FLAGS_OFFSET,
+            art::Thread::ThreadFlagsOffset<__SIZEOF_POINTER__>().Int32Value())
+
+// Offset of field Thread::tls32_.thin_lock_thread_id.
+#define THREAD_ID_OFFSET 12
+ADD_TEST_EQ(THREAD_ID_OFFSET,
+            art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
+
+// Offset of field Thread::tlsPtr_.card_table.
+#define THREAD_CARD_TABLE_OFFSET 120
+ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
+            art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
+
+// Offset of field Thread::tlsPtr_.exception.
+#define THREAD_EXCEPTION_OFFSET (THREAD_CARD_TABLE_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_EXCEPTION_OFFSET,
+            art::Thread::ExceptionOffset<__SIZEOF_POINTER__>().Int32Value())
+
+// Offset of field Thread::tlsPtr_.managed_stack.top_quick_frame_.
+#define THREAD_TOP_QUICK_FRAME_OFFSET (THREAD_CARD_TABLE_OFFSET + (3 * __SIZEOF_POINTER__))
+ADD_TEST_EQ(THREAD_TOP_QUICK_FRAME_OFFSET,
+            art::Thread::TopOfManagedStackOffset<__SIZEOF_POINTER__>().Int32Value())
+
+// Offset of field Thread::tlsPtr_.managed_stack.top_quick_frame_.
+#define THREAD_SELF_OFFSET (THREAD_CARD_TABLE_OFFSET + (8 * __SIZEOF_POINTER__))
+ADD_TEST_EQ(THREAD_SELF_OFFSET,
+            art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offsets within java.lang.Object.
-#define CLASS_OFFSET 0
-#define LOCK_WORD_OFFSET 4
+#define MIRROR_OBJECT_CLASS_OFFSET 0
+ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
+#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
+ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
 
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-
-// Offsets within java.lang.Class.
-#define CLASS_COMPONENT_TYPE_OFFSET 12
-
-// Array offsets.
-#define ARRAY_LENGTH_OFFSET 8
-#define OBJECT_ARRAY_DATA_OFFSET 12
-
-// Offsets within java.lang.String.
-#define STRING_VALUE_OFFSET 8
-#define STRING_COUNT_OFFSET 12
-#define STRING_OFFSET_OFFSET 20
-#define STRING_DATA_OFFSET 12
-
-// Offsets within java.lang.Method.
-#define METHOD_DEX_CACHE_METHODS_OFFSET 12
-#define METHOD_PORTABLE_CODE_OFFSET 40
-#define METHOD_QUICK_CODE_OFFSET 48
-
+#if defined(USE_BAKER_OR_BROOKS_READ_BARRIER)
+#define MIRROR_OBJECT_HEADER_SIZE 16
 #else
+#define MIRROR_OBJECT_HEADER_SIZE 8
+#endif
+ADD_TEST_EQ(size_t(MIRROR_OBJECT_HEADER_SIZE), sizeof(art::mirror::Object))
 
 // Offsets within java.lang.Class.
-#define CLASS_COMPONENT_TYPE_OFFSET 20
+#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
+            art::mirror::Class::ComponentTypeOffset().Int32Value())
 
 // Array offsets.
-#define ARRAY_LENGTH_OFFSET 16
-#define OBJECT_ARRAY_DATA_OFFSET 20
+#define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
+ADD_TEST_EQ(MIRROR_ARRAY_LENGTH_OFFSET, art::mirror::Array::LengthOffset().Int32Value())
+
+#define MIRROR_CHAR_ARRAY_DATA_OFFSET   (4 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CHAR_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value())
+
+#define MIRROR_OBJECT_ARRAY_DATA_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_OBJECT_ARRAY_DATA_OFFSET,
+    art::mirror::Array::DataOffset(
+        sizeof(art::mirror::HeapReference<art::mirror::Object>)).Int32Value())
 
 // Offsets within java.lang.String.
-#define STRING_VALUE_OFFSET 16
-#define STRING_COUNT_OFFSET 20
-#define STRING_OFFSET_OFFSET 28
-#define STRING_DATA_OFFSET 20
+#define MIRROR_STRING_VALUE_OFFSET  MIRROR_OBJECT_HEADER_SIZE
+ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
-// Offsets within java.lang.Method.
-#define METHOD_DEX_CACHE_METHODS_OFFSET 20
-#define METHOD_PORTABLE_CODE_OFFSET 48
-#define METHOD_QUICK_CODE_OFFSET 56
+#define MIRROR_STRING_COUNT_OFFSET  (4 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_STRING_COUNT_OFFSET, art::mirror::String::CountOffset().Int32Value())
 
+#define MIRROR_STRING_OFFSET_OFFSET (12 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_STRING_OFFSET_OFFSET, art::mirror::String::OffsetOffset().Int32Value())
+
+// Offsets within java.lang.reflect.ArtMethod.
+#define MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET,
+            art::mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())
+
+#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET     (32 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET,
+            art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value())
+
+#define MIRROR_ART_METHOD_QUICK_CODE_OFFSET        (40 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET,
+            art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())
+
+#if defined(__cplusplus)
+}  // End of CheckAsmSupportOffsets.
 #endif
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 1a78d72..8c2293f 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -84,8 +84,12 @@
         case DexRegisterMap::kInRegister:
           CHECK_NE(register_mask & dex_register_map.GetValue(reg), 0u);
           break;
+        case DexRegisterMap::kInFpuRegister:
+          // In Fpu register, should not be a reference.
+          CHECK(false);
+          break;
         case DexRegisterMap::kConstant:
-          CHECK_EQ(dex_register_map.GetValue(0), 0);
+          CHECK_EQ(dex_register_map.GetValue(reg), 0);
           break;
       }
     }
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index e728f7d..49357ad 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,9 +18,8 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "base/mutex.h"
-#include "gc_root-inl.h"
 #include "instruction_set.h"
-#include "runtime-inl.h"
+#include "runtime.h"
 #include "thread-inl.h"
 
 // Specific frame size code is in architecture-specific files. We include this to compile-time
@@ -36,16 +35,41 @@
 class ArtMethod;
 }  // namespace mirror
 
-// Place a special frame at the TOS that will save the callee saves for the given type.
-static inline void FinishCalleeSaveFrameSetup(Thread* self, StackReference<mirror::ArtMethod>* sp,
-                                              Runtime::CalleeSaveType type)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // Be aware the store below may well stomp on an incoming argument.
-  Locks::mutator_lock_->AssertSharedHeld(self);
-  sp->Assign(Runtime::Current()->GetCalleeSaveMethod(type));
-  self->SetTopOfStack(sp, 0);
-  self->VerifyStack();
-}
+class ScopedQuickEntrypointChecks {
+ public:
+  explicit ScopedQuickEntrypointChecks(Thread *self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : self_(self) {
+    if (kIsDebugBuild) {
+      TestsOnEntry();
+    }
+  }
+
+  explicit ScopedQuickEntrypointChecks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : self_(kIsDebugBuild ? Thread::Current() : nullptr) {
+    if (kIsDebugBuild) {
+      TestsOnEntry();
+    }
+  }
+
+  ~ScopedQuickEntrypointChecks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (kIsDebugBuild) {
+      TestsOnExit();
+    }
+  }
+
+ private:
+  void TestsOnEntry() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Locks::mutator_lock_->AssertSharedHeld(self_);
+    self_->VerifyStack();
+  }
+
+  void TestsOnExit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Locks::mutator_lock_->AssertSharedHeld(self_);
+    self_->VerifyStack();
+  }
+
+  Thread* const self_;
+};
 
 static constexpr size_t GetCalleeSaveFrameSize(InstructionSet isa, Runtime::CalleeSaveType type) {
   // constexpr must be a return statement.
@@ -71,7 +95,8 @@
 }
 
 // Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
-static constexpr size_t GetCalleeSavePCOffset(InstructionSet isa, Runtime::CalleeSaveType type) {
+static constexpr size_t GetCalleeSaveReturnPcOffset(InstructionSet isa,
+                                                    Runtime::CalleeSaveType type) {
   return GetCalleeSaveFrameSize(isa, type) - GetConstExprPointerSize(isa);
 }
 
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index d8da463..a2869ec 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -32,6 +32,7 @@
     uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  ScopedQuickEntrypointChecks sqec(self); \
   if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
     mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx); \
     if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
@@ -53,13 +54,12 @@
       } \
     } \
   } \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
-    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  ScopedQuickEntrypointChecks sqec(self); \
   if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
     if (LIKELY(klass->IsInitialized())) { \
       size_t byte_count = klass->GetObjectSize(); \
@@ -80,13 +80,12 @@
       } \
     } \
   } \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
-    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  ScopedQuickEntrypointChecks sqec(self); \
   if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
     size_t byte_count = klass->GetObjectSize(); \
     byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -105,45 +104,39 @@
       return obj; \
     } \
   } \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  ScopedQuickEntrypointChecks sqec(self); \
   return AllocObjectFromCode<true, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  ScopedQuickEntrypointChecks sqec(self); \
   return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \
                                                       allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCodeResolved##suffix##suffix2( \
-    mirror::Class* klass, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    mirror::Class* klass, mirror::ArtMethod* method, int32_t component_count, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  ScopedQuickEntrypointChecks sqec(self); \
   return AllocArrayFromCodeResolved<false, instrumented_bool>(klass, method, component_count, self, \
                                                               allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  ScopedQuickEntrypointChecks sqec(self); \
   return AllocArrayFromCode<true, instrumented_bool>(type_idx, method, component_count, self, \
                                                      allocator_type); \
 } \
 extern "C" mirror::Array* artCheckAndAllocArrayFromCode##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  ScopedQuickEntrypointChecks sqec(self); \
   if (!instrumented_bool) { \
     return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false, allocator_type); \
   } else { \
@@ -151,10 +144,9 @@
   } \
 } \
 extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    StackReference<mirror::ArtMethod>* sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  ScopedQuickEntrypointChecks sqec(self); \
   if (!instrumented_bool) { \
     return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true, allocator_type); \
   } else { \
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index f9f62c2..14ab320 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -27,9 +27,8 @@
 
 namespace art {
 
-extern "C" void artDeoptimize(Thread* self, StackReference<mirror::ArtMethod>* sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+extern "C" void artDeoptimize(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   self->SetException(ThrowLocation(), Thread::GetDeoptimizationException());
   self->QuickDeliverException();
 }
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 704db05..2e7c8ba 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -27,42 +27,39 @@
 
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx,
                                                              mirror::ArtMethod* referrer,
-                                                             Thread* self,
-                                                             StackReference<mirror::ArtMethod>* sp)
+                                                             Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  ScopedQuickEntrypointChecks sqec(self);
   return ResolveVerifyAndClinit(type_idx, referrer, self, true, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx,
                                                     mirror::ArtMethod* referrer,
-                                                    Thread* self,
-                                                    StackReference<mirror::ArtMethod>* sp)
+                                                    Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  ScopedQuickEntrypointChecks sqec(self);
   return ResolveVerifyAndClinit(type_idx, referrer, self, false, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
-    mirror::ArtMethod* referrer,
-    Thread* self,
-    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+                                                                   mirror::ArtMethod* referrer,
+                                                                   Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  ScopedQuickEntrypointChecks sqec(self);
   return ResolveVerifyAndClinit(type_idx, referrer, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(mirror::ArtMethod* referrer,
                                                     int32_t string_idx,
-                                                    Thread* self,
-                                                    StackReference<mirror::ArtMethod>* sp)
+                                                    Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  ScopedQuickEntrypointChecks sqec(self);
   return ResolveStringFromCode(referrer, string_idx);
 }
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index b89c015..7326fcf 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -25,295 +25,284 @@
 
 namespace art {
 
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx,
-                                           mirror::ArtMethod* referrer,
-                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
+extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+                                           Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetByte(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveRead, true>(field_idx, referrer, self, sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetByte(field->GetDeclaringClass());
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx,
-                                               mirror::ArtMethod* referrer,
-                                               Thread* self, StackReference<mirror::ArtMethod>* sp)
+extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+                                               Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetBoolean(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveRead, true>(field_idx, referrer, self, sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetBoolean(field->GetDeclaringClass());
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx,
-                                             mirror::ArtMethod* referrer,
-                                             Thread* self, StackReference<mirror::ArtMethod>* sp)
+extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+                                             Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetShort(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveRead, true>(field_idx, referrer, self, sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetShort(field->GetDeclaringClass());
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx,
                                              mirror::ArtMethod* referrer,
-                                             Thread* self, StackReference<mirror::ArtMethod>* sp)
+                                             Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetChar(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveRead, true>(field_idx, referrer, self, sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetChar(field->GetDeclaringClass());
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
                                            mirror::ArtMethod* referrer,
-                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
+                                           Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int32_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->Get32(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveRead, true>(field_idx, referrer, self, sizeof(int32_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->Get32(field->GetDeclaringClass());
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint64_t artGet64StaticFromCode(uint32_t field_idx,
                                            mirror::ArtMethod* referrer,
-                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
+                                           Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int64_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->Get64(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveRead, true>(field_idx, referrer, self, sizeof(int64_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->Get64(field->GetDeclaringClass());
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" mirror::Object* artGetObjStaticFromCode(uint32_t field_idx,
                                                    mirror::ArtMethod* referrer,
-                                                   Thread* self,
-                                                   StackReference<mirror::ArtMethod>* sp)
+                                                   Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
                                           sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, self,
                                                     sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
   }
-  return NULL;  // Will throw exception by checking with Thread::Current
+  return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             mirror::ArtMethod* referrer, Thread* self,
-                                             StackReference<mirror::ArtMethod>* sp)
+                                             mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int8_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetByte(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
                                                          sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->GetByte(obj);
     }
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                                 mirror::ArtMethod* referrer, Thread* self,
-                                                 StackReference<mirror::ArtMethod>* sp)
+                                                 mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int8_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetBoolean(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
                                                          sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->GetBoolean(obj);
     }
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               mirror::ArtMethod* referrer, Thread* self,
-                                               StackReference<mirror::ArtMethod>* sp)
+                                               mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int16_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetShort(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
                                                          sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->GetShort(obj);
     }
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               mirror::ArtMethod* referrer, Thread* self,
-                                               StackReference<mirror::ArtMethod>* sp)
+                                               mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int16_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetChar(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
                                                          sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->GetChar(obj);
     }
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             mirror::ArtMethod* referrer, Thread* self,
-                                             StackReference<mirror::ArtMethod>* sp)
+                                             mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int32_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get32(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
                                                          sizeof(int32_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->Get32(obj);
     }
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             mirror::ArtMethod* referrer, Thread* self,
-                                             StackReference<mirror::ArtMethod>* sp)
+                                             mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int64_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get64(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
                                                          sizeof(int64_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->Get64(obj);
     }
   }
-  return 0;  // Will throw exception by checking with Thread::Current
+  return 0;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                                      mirror::ArtMethod* referrer,
-                                                     Thread* self,
-                                                     StackReference<mirror::ArtMethod>* sp)
+                                                     Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
                                           sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetObj(obj);
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstanceObjectRead, true>(field_idx, referrer, self,
                                                       sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, true);
     } else {
       return field->GetObj(obj);
     }
   }
-  return NULL;  // Will throw exception by checking with Thread::Current
+  return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
 extern "C" int artSet8StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                     mirror::ArtMethod* referrer, Thread* self,
-                                     StackReference<mirror::ArtMethod>* sp)
+                                     mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
                                           sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
     if (type == Primitive::kPrimBoolean) {
@@ -324,9 +313,8 @@
     }
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int8_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
     if (type == Primitive::kPrimBoolean) {
@@ -341,12 +329,12 @@
 }
 
 extern "C" int artSet16StaticFromCode(uint32_t field_idx, uint16_t new_value,
-                                      mirror::ArtMethod* referrer, Thread* self,
-                                      StackReference<mirror::ArtMethod>* sp)
+                                      mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
                                           sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
     if (type == Primitive::kPrimChar) {
@@ -357,9 +345,8 @@
     }
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int16_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
     if (type == Primitive::kPrimChar) {
@@ -374,19 +361,18 @@
 }
 
 extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      mirror::ArtMethod* referrer, Thread* self,
-                                      StackReference<mirror::ArtMethod>* sp)
+                                      mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
                                           sizeof(int32_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set32<false>(field->GetDeclaringClass(), new_value);
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int32_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set32<false>(field->GetDeclaringClass(), new_value);
     return 0;  // success
@@ -395,19 +381,18 @@
 }
 
 extern "C" int artSet64StaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
-                                      uint64_t new_value, Thread* self,
-                                      StackReference<mirror::ArtMethod>* sp)
+                                      uint64_t new_value, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
                                           sizeof(int64_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set64<false>(field->GetDeclaringClass(), new_value);
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set64<false>(field->GetDeclaringClass(), new_value);
     return 0;  // success
@@ -416,22 +401,21 @@
 }
 
 extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
-                                       mirror::ArtMethod* referrer, Thread* self,
-                                       StackReference<mirror::ArtMethod>* sp)
+                                       mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     if (LIKELY(!field->IsPrimitiveType())) {
       // Compiled code can't use transactional mode.
       field->SetObj<false>(field->GetDeclaringClass(), new_value);
       return 0;  // success
     }
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, self,
                                                      sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL)) {
+  if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(field->GetDeclaringClass(), new_value);
     return 0;  // success
@@ -440,12 +424,12 @@
 }
 
 extern "C" int artSet8InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
-                                       mirror::ArtMethod* referrer, Thread* self,
-                                       StackReference<mirror::ArtMethod>* sp)
+                                       mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int8_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
     if (type == Primitive::kPrimBoolean) {
@@ -456,15 +440,14 @@
     }
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   {
     StackHandleScope<1> hs(self);
     HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
     field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                             sizeof(int8_t));
   }
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, false);
     } else {
@@ -482,12 +465,12 @@
 }
 
 extern "C" int artSet16InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
-                                        mirror::ArtMethod* referrer, Thread* self,
-                                        StackReference<mirror::ArtMethod>* sp)
+                                        mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int16_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
     if (type == Primitive::kPrimChar) {
@@ -498,15 +481,14 @@
     }
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   {
     StackHandleScope<1> hs(self);
     HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
     field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                             sizeof(int16_t));
   }
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, false);
     } else {
@@ -525,25 +507,24 @@
 }
 
 extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
-                                        mirror::ArtMethod* referrer, Thread* self,
-                                        StackReference<mirror::ArtMethod>* sp)
+                                        mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int32_t));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set32<false>(obj, new_value);
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   {
     StackHandleScope<1> hs(self);
     HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
     field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                             sizeof(int32_t));
   }
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, false);
     } else {
@@ -556,25 +537,20 @@
 }
 
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
-                                        Thread* self, StackReference<mirror::ArtMethod>* sp)
+                                        mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  constexpr size_t frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsOnly);
-  mirror::ArtMethod* referrer =
-      reinterpret_cast<StackReference<mirror::ArtMethod>*>(
-          reinterpret_cast<uint8_t*>(sp) + frame_size)->AsMirrorPtr();
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
-  if (LIKELY(field != NULL  && obj != NULL)) {
+  if (LIKELY(field != nullptr  && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  sp->Assign(Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly));
-  self->SetTopOfStack(sp, 0);
   field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                           sizeof(int64_t));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, false);
     } else {
@@ -588,21 +564,20 @@
 
 extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                          mirror::Object* new_value,
-                                         mirror::ArtMethod* referrer, Thread* self,
-                                         StackReference<mirror::ArtMethod>* sp)
+                                         mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL && obj != NULL)) {
+  if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(obj, new_value);
     return 0;  // success
   }
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, self,
                                                        sizeof(mirror::HeapReference<mirror::Object>));
-  if (LIKELY(field != NULL)) {
-    if (UNLIKELY(obj == NULL)) {
+  if (LIKELY(field != nullptr)) {
+    if (UNLIKELY(obj == nullptr)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       ThrowNullPointerExceptionForFieldAccess(throw_location, field, false);
     } else {
diff --git a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
index 06bbabc..e336543 100644
--- a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
@@ -25,10 +25,9 @@
  * Handle fill array data by copying appropriate part of dex file into array.
  */
 extern "C" int artHandleFillArrayDataFromCode(uint32_t payload_offset, mirror::Array* array,
-                                              mirror::ArtMethod* method, Thread* self,
-                                              StackReference<mirror::ArtMethod>* sp)
+                                              mirror::ArtMethod* method, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  ScopedQuickEntrypointChecks sqec(self);
   const uint16_t* const insns = method->GetCodeItem()->insns_;
   const Instruction::ArrayDataPayload* payload =
       reinterpret_cast<const Instruction::ArrayDataPayload*>(insns + payload_offset);
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index bb0e5e3..6b3e9dc 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -28,10 +28,9 @@
 extern "C" const void* artInstrumentationMethodEntryFromCode(mirror::ArtMethod* method,
                                                              mirror::Object* this_object,
                                                              Thread* self,
-                                                             StackReference<mirror::ArtMethod>* sp,
                                                              uintptr_t lr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
+  ScopedQuickEntrypointChecks sqec(self);
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
   const void* result;
   if (instrumentation->IsDeoptimized(method)) {
@@ -52,23 +51,19 @@
                                                               uint64_t gpr_result,
                                                               uint64_t fpr_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // TODO: use FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly) not the hand inlined below.
-  //       We use the hand inline version to ensure the return_pc is assigned before verifying the
-  //       stack.
-  // Be aware the store below may well stomp on an incoming argument.
-  Locks::mutator_lock_->AssertSharedHeld(self);
-  Runtime* runtime = Runtime::Current();
-  sp->Assign(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly));
-  uint32_t return_pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly);
+  // Compute address of return PC and sanity check that it currently holds 0.
+  uint32_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
-  self->SetTopOfStack(sp, 0);
-  self->VerifyStack();
+
+  // Pop the frame filling in the return pc. The low half of the return value is 0 when
+  // deoptimization shouldn't be performed with the high-half having the return address. When
+  // deoptimization should be performed the low half is zero and the high-half the address of the
+  // deoptimization entry point.
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
   TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
       self, return_pc, gpr_result, fpr_result);
-  self->VerifyStack();
   return return_or_deoptimize_pc;
 }
 
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 92c0841..8ceac97 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -20,12 +20,11 @@
 
 namespace art {
 
-extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self,
-                                     StackReference<mirror::ArtMethod>* sp)
+extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  if (UNLIKELY(obj == NULL)) {
+  ScopedQuickEntrypointChecks sqec(self);
+  if (UNLIKELY(obj == nullptr)) {
     ThrowLocation throw_location(self->GetCurrentLocationForThrow());
     ThrowNullPointerException(&throw_location,
                               "Null reference used for synchronization (monitor-enter)");
@@ -43,12 +42,11 @@
   }
 }
 
-extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self,
-                                       StackReference<mirror::ArtMethod>* sp)
+extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  if (UNLIKELY(obj == NULL)) {
+  ScopedQuickEntrypointChecks sqec(self);
+  if (UNLIKELY(obj == nullptr)) {
     ThrowLocation throw_location(self->GetCurrentLocationForThrow());
     ThrowNullPointerException(&throw_location,
                               "Null reference used for synchronization (monitor-exit)");
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index ea75fb6..87e0c6e 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -19,10 +19,9 @@
 
 namespace art {
 
-extern "C" void artTestSuspendFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+extern "C" void artTestSuspendFromCode(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when suspend count check value is 0 and thread->suspend_count_ != 0
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  ScopedQuickEntrypointChecks sqec(self);
   self->CheckSuspend();
 }
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 13decc8..25df40b 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -24,16 +24,14 @@
 namespace art {
 
 // Deliver an exception that's pending on thread helping set up a callee save frame on the way.
-extern "C" void artDeliverPendingExceptionFromCode(Thread* thread,
-                                                   StackReference<mirror::ArtMethod>* sp)
+extern "C" void artDeliverPendingExceptionFromCode(Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
-  thread->QuickDeliverException();
+  ScopedQuickEntrypointChecks sqec(self);
+  self->QuickDeliverException();
 }
 
 // Called by generated call to throw an exception.
-extern "C" void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self,
-                                            StackReference<mirror::ArtMethod>* sp)
+extern "C" void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   /*
    * exception may be NULL, in which case this routine should
@@ -42,9 +40,9 @@
    * and threw a NPE if NULL.  This routine responsible for setting
    * exception_ in thread and delivering the exception.
    */
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-  if (exception == NULL) {
+  if (exception == nullptr) {
     self->ThrowNewException(throw_location, "Ljava/lang/NullPointerException;",
                             "throw with null exception");
   } else {
@@ -54,10 +52,9 @@
 }
 
 // Called by generated call to throw a NPE exception.
-extern "C" void artThrowNullPointerExceptionFromCode(Thread* self,
-                                                     StackReference<mirror::ArtMethod>* sp)
+extern "C" void artThrowNullPointerExceptionFromCode(Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   self->NoteSignalBeingHandled();
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   ThrowNullPointerExceptionFromDexPC(throw_location);
@@ -66,52 +63,50 @@
 }
 
 // Called by generated call to throw an arithmetic divide by zero exception.
-extern "C" void artThrowDivZeroFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
+extern "C" void artThrowDivZeroFromCode(Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   ThrowArithmeticExceptionDivideByZero();
   self->QuickDeliverException();
 }
 
 // Called by generated call to throw an array index out of bounds exception.
-extern "C" void artThrowArrayBoundsFromCode(int index, int length, Thread* self,
-                                            StackReference<mirror::ArtMethod>*sp)
+extern "C" void artThrowArrayBoundsFromCode(int index, int length, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   ThrowArrayIndexOutOfBoundsException(index, length);
   self->QuickDeliverException();
 }
 
-extern "C" void artThrowStackOverflowFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
+extern "C" void artThrowStackOverflowFromCode(Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   self->NoteSignalBeingHandled();
   ThrowStackOverflowError(self);
   self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
-extern "C" void artThrowNoSuchMethodFromCode(int32_t method_idx, Thread* self,
-                                             StackReference<mirror::ArtMethod>* sp)
+extern "C" void artThrowNoSuchMethodFromCode(int32_t method_idx, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   ThrowNoSuchMethodError(method_idx);
   self->QuickDeliverException();
 }
 
 extern "C" void artThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type,
-                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
+                                           Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
-  CHECK(!dest_type->IsAssignableFrom(src_type));
+  ScopedQuickEntrypointChecks sqec(self);
+  DCHECK(!dest_type->IsAssignableFrom(src_type));
   ThrowClassCastException(dest_type, src_type);
   self->QuickDeliverException();
 }
 
 extern "C" void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
-                                            Thread* self, StackReference<mirror::ArtMethod>* sp)
+                                            Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ScopedQuickEntrypointChecks sqec(self);
   ThrowArrayStoreException(value->GetClass(), array->GetClass());
   self->QuickDeliverException();
 }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 224756b..af341bb 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -466,7 +466,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Ensure we don't get thread suspension until the object arguments are safely in the shadow
   // frame.
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
+  ScopedQuickEntrypointChecks sqec(self);
 
   if (method->IsAbstract()) {
     ThrowAbstractMethodError(method);
@@ -593,7 +593,6 @@
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
   DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) << PrettyMethod(proxy_method);
-  self->SetTopOfStack(sp, 0);
   DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
             Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
       << PrettyMethod(proxy_method);
@@ -678,7 +677,7 @@
                                                     Thread* self,
                                                     StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
+  ScopedQuickEntrypointChecks sqec(self);
   // Start new JNI local reference state
   JNIEnvExt* env = self->GetJniEnv();
   ScopedObjectAccessUnchecked soa(env);
@@ -950,7 +949,7 @@
 
   virtual ~BuildNativeCallFrameStateMachine() {}
 
-  bool HavePointerGpr() {
+  bool HavePointerGpr() const {
     return gpr_index_ > 0;
   }
 
@@ -965,7 +964,7 @@
     }
   }
 
-  bool HaveHandleScopeGpr() {
+  bool HaveHandleScopeGpr() const {
     return gpr_index_ > 0;
   }
 
@@ -981,7 +980,7 @@
     }
   }
 
-  bool HaveIntGpr() {
+  bool HaveIntGpr() const {
     return gpr_index_ > 0;
   }
 
@@ -996,17 +995,17 @@
     }
   }
 
-  bool HaveLongGpr() {
+  bool HaveLongGpr() const {
     return gpr_index_ >= kRegistersNeededForLong + (LongGprNeedsPadding() ? 1 : 0);
   }
 
-  bool LongGprNeedsPadding() {
+  bool LongGprNeedsPadding() const {
     return kRegistersNeededForLong > 1 &&     // only pad when using multiple registers
         kAlignLongOnStack &&                  // and when it needs alignment
         (gpr_index_ & 1) == 1;                // counter is odd, see constructor
   }
 
-  bool LongStackNeedsPadding() {
+  bool LongStackNeedsPadding() const {
     return kRegistersNeededForLong > 1 &&     // only pad when using multiple registers
         kAlignLongOnStack &&                  // and when it needs 8B alignment
         (stack_entries_ & 1) == 1;            // counter is odd
@@ -1042,7 +1041,7 @@
     }
   }
 
-  bool HaveFloatFpr() {
+  bool HaveFloatFpr() const {
     return fpr_index_ > 0;
   }
 
@@ -1077,17 +1076,17 @@
     }
   }
 
-  bool HaveDoubleFpr() {
+  bool HaveDoubleFpr() const {
     return fpr_index_ >= kRegistersNeededForDouble + (DoubleFprNeedsPadding() ? 1 : 0);
   }
 
-  bool DoubleFprNeedsPadding() {
+  bool DoubleFprNeedsPadding() const {
     return kRegistersNeededForDouble > 1 &&     // only pad when using multiple registers
         kAlignDoubleOnStack &&                  // and when it needs alignment
         (fpr_index_ & 1) == 1;                  // counter is odd, see constructor
   }
 
-  bool DoubleStackNeedsPadding() {
+  bool DoubleStackNeedsPadding() const {
     return kRegistersNeededForDouble > 1 &&     // only pad when using multiple registers
         kAlignDoubleOnStack &&                  // and when it needs 8B alignment
         (stack_entries_ & 1) == 1;              // counter is odd
@@ -1122,15 +1121,15 @@
     }
   }
 
-  uint32_t getStackEntries() {
+  uint32_t GetStackEntries() const {
     return stack_entries_;
   }
 
-  uint32_t getNumberOfUsedGprs() {
+  uint32_t GetNumberOfUsedGprs() const {
     return kNumNativeGprArgs - gpr_index_;
   }
 
-  uint32_t getNumberOfUsedFprs() {
+  uint32_t GetNumberOfUsedFprs() const {
     return kNumNativeFprArgs - fpr_index_;
   }
 
@@ -1155,7 +1154,7 @@
   uint32_t fpr_index_;      // Number of free FPRs
   uint32_t stack_entries_;  // Stack entries are in multiples of 32b, as floats are usually not
                             // extended
-  T* delegate_;             // What Push implementation gets called
+  T* const delegate_;             // What Push implementation gets called
 };
 
 // Computes the sizes of register stacks and call stack area. Handling of references can be extended
@@ -1169,18 +1168,19 @@
 
   virtual ~ComputeNativeCallFrameSize() {}
 
-  uint32_t GetStackSize() {
+  uint32_t GetStackSize() const {
     return num_stack_entries_ * sizeof(uintptr_t);
   }
 
-  uint8_t* LayoutCallStack(uint8_t* sp8) {
+  uint8_t* LayoutCallStack(uint8_t* sp8) const {
     sp8 -= GetStackSize();
     // Align by kStackAlignment.
     sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
     return sp8;
   }
 
-  uint8_t* LayoutCallRegisterStacks(uint8_t* sp8, uintptr_t** start_gpr, uint32_t** start_fpr) {
+  uint8_t* LayoutCallRegisterStacks(uint8_t* sp8, uintptr_t** start_gpr, uint32_t** start_fpr)
+      const {
     // Assumption is OK right now, as we have soft-float arm
     size_t fregs = BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeFprArgs;
     sp8 -= fregs * sizeof(uintptr_t);
@@ -1192,7 +1192,7 @@
   }
 
   uint8_t* LayoutNativeCall(uint8_t* sp8, uintptr_t** start_stack, uintptr_t** start_gpr,
-                            uint32_t** start_fpr) {
+                            uint32_t** start_fpr) const {
     // Native call stack.
     sp8 = LayoutCallStack(sp8);
     *start_stack = reinterpret_cast<uintptr_t*>(sp8);
@@ -1216,6 +1216,7 @@
       Primitive::Type cur_type_ = Primitive::GetType(shorty[i]);
       switch (cur_type_) {
         case Primitive::kPrimNot:
+          // TODO: fix abuse of mirror types.
           sm.AdvanceHandleScope(
               reinterpret_cast<mirror::Object*>(0x12345678));
           break;
@@ -1241,7 +1242,7 @@
       }
     }
 
-    num_stack_entries_ = sm.getStackEntries();
+    num_stack_entries_ = sm.GetStackEntries();
   }
 
   void PushGpr(uintptr_t /* val */) {
@@ -1311,7 +1312,7 @@
   }
 
   // Adds space for the cookie. Note: may leave stack unaligned.
-  void LayoutCookie(uint8_t** sp) {
+  void LayoutCookie(uint8_t** sp) const {
     // Reference cookie and padding
     *sp -= 8;
   }
@@ -1458,11 +1459,11 @@
     return handle_scope_->GetHandle(0).GetReference();
   }
 
-  jobject GetFirstHandleScopeJObject() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jobject GetFirstHandleScopeJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return handle_scope_->GetHandle(0).ToJObject();
   }
 
-  void* GetBottomOfUsedArea() {
+  void* GetBottomOfUsedArea() const {
     return bottom_of_used_area_;
   }
 
@@ -1609,13 +1610,13 @@
   uint32_t shorty_len = 0;
   const char* shorty = called->GetShorty(&shorty_len);
 
-  // Run the visitor.
+  // Run the visitor and update sp.
   BuildGenericJniFrameVisitor visitor(self, called->IsStatic(), shorty, shorty_len, &sp);
   visitor.VisitArguments();
   visitor.FinalizeHandleScope(self);
 
   // Fix up managed-stack things in Thread.
-  self->SetTopOfStack(sp, 0);
+  self->SetTopOfStack(sp);
 
   self->VerifyStack();
 
@@ -1744,10 +1745,11 @@
 static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
                                      Thread* self, StackReference<mirror::ArtMethod>* sp) {
+  ScopedQuickEntrypointChecks sqec(self);
+  DCHECK_EQ(sp->AsMirrorPtr(), Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check,
                                              type);
   if (UNLIKELY(method == nullptr)) {
-    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
     const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()->GetDexFile();
     uint32_t shorty_len;
     const char* shorty = dex_file->GetMethodShorty(dex_file->GetMethodId(method_idx), &shorty_len);
@@ -1852,21 +1854,20 @@
                                                       Thread* self,
                                                       StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   mirror::ArtMethod* method;
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     method = this_object->GetClass()->FindVirtualMethodForInterface(interface_method);
     if (UNLIKELY(method == NULL)) {
-      FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object,
                                                                  caller_method);
       return GetTwoWordFailureValue();  // Failure.
     }
   } else {
-    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
     DCHECK(interface_method == Runtime::Current()->GetResolutionMethod());
 
     // Find the caller PC.
-    constexpr size_t pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs);
+    constexpr size_t pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsAndArgs);
     uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) + pc_offset);
 
     // Map the caller PC to a dex PC.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 41af88e..a9af754 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -98,11 +98,11 @@
   // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
   // sizeof(void*), which is wrong when the target bitwidth is not the same as the host's.
   CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs,
-                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs));
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsAndArgs));
   CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly,
-                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly));
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly));
   CheckPCOffset(kRuntimeISA, Runtime::kSaveAll,
-                GetCalleeSavePCOffset(kRuntimeISA, Runtime::kSaveAll));
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAll));
 }
 
 }  // namespace art
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 1714134..1365cd4 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -43,7 +43,7 @@
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle"))));
     my_klass_ = class_linker_->FindClass(soa.Self(), "LExceptionHandle;", class_loader);
-    ASSERT_TRUE(my_klass_ != NULL);
+    ASSERT_TRUE(my_klass_ != nullptr);
     Handle<mirror::Class> klass(hs.NewHandle(my_klass_));
     class_linker_->EnsureInitialized(soa.Self(), klass, true, true);
     my_klass_ = klass.Get();
@@ -93,12 +93,12 @@
     const uint8_t* code_ptr = &fake_header_code_and_maps_[mapping_table_offset];
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I");
-    ASSERT_TRUE(method_f_ != NULL);
+    ASSERT_TRUE(method_f_ != nullptr);
     method_f_->SetEntryPointFromQuickCompiledCode(code_ptr);
     method_f_->SetNativeGcMap(&fake_gc_map_[0]);
 
     method_g_ = my_klass_->FindVirtualMethod("g", "(I)V");
-    ASSERT_TRUE(method_g_ != NULL);
+    ASSERT_TRUE(method_g_ != nullptr);
     method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
     method_g_->SetNativeGcMap(&fake_gc_map_[0]);
   }
@@ -122,7 +122,7 @@
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = dex_->GetCodeItem(method_f_->GetCodeItemOffset());
 
-  ASSERT_TRUE(code_item != NULL);
+  ASSERT_TRUE(code_item != nullptr);
 
   ASSERT_EQ(2u, code_item->tries_size_);
   ASSERT_NE(0u, code_item->insns_size_in_code_units_);
@@ -163,14 +163,30 @@
   ScopedObjectAccess soa(env);
 
   std::vector<uintptr_t> fake_stack;
+  Runtime* r = Runtime::Current();
+  r->SetInstructionSet(kRuntimeISA);
+  mirror::ArtMethod* save_method = r->CreateCalleeSaveMethod(Runtime::kSaveAll);
+  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll);
+  QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+
   ASSERT_EQ(kStackAlignment, 16U);
   // ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
 
+
   if (!kUsePortableCompiler) {
-    // Create two fake stack frames with mapping data created in SetUp. We map offset 3 in the code
-    // to dex pc 3.
+    // Create three fake stack frames with mapping data created in SetUp. We map offset 3 in the
+    // code to dex pc 3.
     const uint32_t dex_pc = 3;
 
+    // Create the stack frame for the callee save method, expected by the runtime.
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(save_method));
+    for (size_t i = 0; i < frame_info.FrameSizeInBytes() - 2 * sizeof(uintptr_t);
+         i += sizeof(uintptr_t)) {
+      fake_stack.push_back(0);
+    }
+
+    fake_stack.push_back(method_g_->ToNativeQuickPc(dex_pc));  // return pc
+
     // Create/push fake 16byte stack frame for method g
     fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
     fake_stack.push_back(0);
@@ -183,7 +199,7 @@
     fake_stack.push_back(0);
     fake_stack.push_back(0xEBAD6070);  // return pc
 
-    // Pull Method* of NULL to terminate the trace
+    // Push Method* of NULL to terminate the trace
     fake_stack.push_back(0);
 
     // Push null values which will become null incoming arguments.
@@ -192,9 +208,7 @@
     fake_stack.push_back(0);
 
     // Set up thread to appear as if we called out of method_g_ at pc dex 3
-    thread->SetTopOfStack(
-        reinterpret_cast<StackReference<mirror::ArtMethod>*>(&fake_stack[0]),
-        method_g_->ToNativeQuickPc(dex_pc));  // return pc
+    thread->SetTopOfStack(reinterpret_cast<StackReference<mirror::ArtMethod>*>(&fake_stack[0]));
   } else {
     // Create/push fake 20-byte shadow frame for method g
     fake_stack.push_back(0);
@@ -215,33 +229,35 @@
   }
 
   jobject internal = thread->CreateInternalStackTrace<false>(soa);
-  ASSERT_TRUE(internal != NULL);
+  ASSERT_TRUE(internal != nullptr);
   jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(soa, internal);
-  ASSERT_TRUE(ste_array != NULL);
+  ASSERT_TRUE(ste_array != nullptr);
   mirror::ObjectArray<mirror::StackTraceElement>* trace_array =
       soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(ste_array);
 
-  ASSERT_TRUE(trace_array != NULL);
-  ASSERT_TRUE(trace_array->Get(0) != NULL);
+  ASSERT_TRUE(trace_array != nullptr);
+  ASSERT_TRUE(trace_array->Get(0) != nullptr);
   EXPECT_STREQ("ExceptionHandle",
                trace_array->Get(0)->GetDeclaringClass()->ToModifiedUtf8().c_str());
-  EXPECT_STREQ("ExceptionHandle.java", trace_array->Get(0)->GetFileName()->ToModifiedUtf8().c_str());
+  EXPECT_STREQ("ExceptionHandle.java",
+               trace_array->Get(0)->GetFileName()->ToModifiedUtf8().c_str());
   EXPECT_STREQ("g", trace_array->Get(0)->GetMethodName()->ToModifiedUtf8().c_str());
   EXPECT_EQ(37, trace_array->Get(0)->GetLineNumber());
 
-  ASSERT_TRUE(trace_array->Get(1) != NULL);
+  ASSERT_TRUE(trace_array->Get(1) != nullptr);
   EXPECT_STREQ("ExceptionHandle",
                trace_array->Get(1)->GetDeclaringClass()->ToModifiedUtf8().c_str());
-  EXPECT_STREQ("ExceptionHandle.java", trace_array->Get(1)->GetFileName()->ToModifiedUtf8().c_str());
+  EXPECT_STREQ("ExceptionHandle.java",
+               trace_array->Get(1)->GetFileName()->ToModifiedUtf8().c_str());
   EXPECT_STREQ("f", trace_array->Get(1)->GetMethodName()->ToModifiedUtf8().c_str());
   EXPECT_EQ(22, trace_array->Get(1)->GetLineNumber());
 
-#if !defined(ART_USE_PORTABLE_COMPILER)
-  thread->SetTopOfStack(NULL, 0);  // Disarm the assertion that no code is running when we detach.
-#else
-  thread->PopShadowFrame();
-  thread->PopShadowFrame();
-#endif
+  if (!kUsePortableCompiler) {
+    thread->SetTopOfStack(nullptr);  // Disarm the assertion that no code is running when we detach.
+  } else {
+    thread->PopShadowFrame();
+    thread->PopShadowFrame();
+  }
 }
 
 }  // namespace art
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 62e0609..4ae929b 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -400,7 +400,7 @@
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     StackReference<mirror::ArtMethod>* frame =
         reinterpret_cast<StackReference<mirror::ArtMethod>*>(sp);
-    self->SetTopOfStack(frame, 0);  // Since we don't necessarily have a dex pc, pass in 0.
+    self->SetTopOfStack(frame);
 #ifdef TEST_NESTED_SIGNAL
     // To test the nested signal handler we raise a signal here.  This will cause the
     // nested signal handler to be called and perform a longjmp back to the setjmp
diff --git a/runtime/handle.h b/runtime/handle.h
index addb663..6af3220 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -20,6 +20,7 @@
 #include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/value_object.h"
 #include "stack.h"
 
 namespace art {
@@ -33,7 +34,7 @@
 // a wrap pointer. Handles are generally allocated within HandleScopes. Handle is a super-class
 // of MutableHandle and doesn't support assignment operations.
 template<class T>
-class Handle {
+class Handle : public ValueObject {
  public:
   Handle() : reference_(nullptr) {
   }
@@ -58,7 +59,7 @@
   }
 
   ALWAYS_INLINE T* Get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return reference_->AsMirrorPtr();
+    return down_cast<T*>(reference_->AsMirrorPtr());
   }
 
   ALWAYS_INLINE jobject ToJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -70,25 +71,25 @@
   }
 
  protected:
-  StackReference<T>* reference_;
-
   template<typename S>
   explicit Handle(StackReference<S>* reference)
-      : reference_(reinterpret_cast<StackReference<T>*>(reference)) {
+      : reference_(reference) {
   }
   template<typename S>
   explicit Handle(const Handle<S>& handle)
-      : reference_(reinterpret_cast<StackReference<T>*>(handle.reference_)) {
+      : reference_(handle.reference_) {
   }
 
-  StackReference<T>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+  StackReference<mirror::Object>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
     return reference_;
   }
-  ALWAYS_INLINE const StackReference<T>* GetReference() const
+  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return reference_;
   }
 
+  StackReference<mirror::Object>* reference_;
+
  private:
   friend class BuildGenericJniFrameVisitor;
   template<class S> friend class Handle;
@@ -121,8 +122,8 @@
   }
 
   ALWAYS_INLINE T* Assign(T* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    StackReference<T>* ref = Handle<T>::GetReference();
-    T* const old = ref->AsMirrorPtr();
+    StackReference<mirror::Object>* ref = Handle<T>::GetReference();
+    T* old = down_cast<T*>(ref->AsMirrorPtr());
     ref->Assign(reference);
     return old;
   }
@@ -132,7 +133,6 @@
       : Handle<T>(handle) {
   }
 
- protected:
   template<typename S>
   explicit MutableHandle(StackReference<S>* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : Handle<T>(reference) {
@@ -153,7 +153,7 @@
   }
 
  private:
-  StackReference<T> null_ref_;
+  StackReference<mirror::Object> null_ref_;
 };
 
 }  // namespace art
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index f795e38..13c939f 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -159,7 +159,7 @@
   }
 
  private:
-  T** obj_;
+  T** const obj_;
 };
 
 // Scoped handle storage of a fixed size that is usually stack allocated.
@@ -169,19 +169,28 @@
   explicit StackHandleScope(Thread* self);
   ~StackHandleScope();
 
-  // Currently unused, using this GetReference instead of the one in HandleScope is preferred to
-  // avoid compiler optimizations incorrectly optimizing out of bound array accesses.
-  // TODO: Remove this when it is un-necessary.
-  ALWAYS_INLINE mirror::Object* GetReference(size_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, kNumReferences);
-    return GetReferences()[i].AsMirrorPtr();
+  template<class T>
+  MutableHandle<T> NewHandle(T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReference(pos_, object);
+    MutableHandle<T> h(GetHandle<T>(pos_));
+    pos_++;
+    return h;
   }
 
-  ALWAYS_INLINE MutableHandle<mirror::Object> GetHandle(size_t i)
+  template<class T>
+  HandleWrapper<T> NewHandleWrapper(T** object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReference(pos_, *object);
+    MutableHandle<T> h(GetHandle<T>(pos_));
+    pos_++;
+    return HandleWrapper<T>(object, h);
+  }
+
+ private:
+  template<class T>
+  ALWAYS_INLINE MutableHandle<T> GetHandle(size_t i)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, kNumReferences);
-    return MutableHandle<mirror::Object>(&GetReferences()[i]);
+    return MutableHandle<T>(&GetReferences()[i]);
   }
 
   ALWAYS_INLINE void SetReference(size_t i, mirror::Object* object)
@@ -190,23 +199,6 @@
     GetReferences()[i].Assign(object);
   }
 
-  template<class T>
-  MutableHandle<T> NewHandle(T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetReference(pos_, object);
-    MutableHandle<T> h(GetHandle(pos_));
-    pos_++;
-    return h;
-  }
-
-  template<class T>
-  HandleWrapper<T> NewHandleWrapper(T** object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetReference(pos_, *object);
-    MutableHandle<T> h(GetHandle(pos_));
-    pos_++;
-    return HandleWrapper<T>(object, h);
-  }
-
- private:
   // Reference storage needs to be first as expected by the HandleScope layout.
   StackReference<mirror::Object> storage_[kNumReferences];
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 6c6058f..adbece0 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1016,6 +1016,7 @@
   // Set return PC and check the sanity of the stack.
   *return_pc = instrumentation_frame.return_pc_;
   CheckStackDepth(self, instrumentation_frame, 0);
+  self->VerifyStack();
 
   mirror::ArtMethod* method = instrumentation_frame.method_;
   uint32_t length;
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 5c72e55..9584d15 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -315,12 +315,12 @@
 
 bool ArtMethod::IsEntrypointInterpreter() {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* oat_quick_code = class_linker->GetOatMethodQuickCodeFor(this);
-  const void* oat_portable_code = class_linker->GetOatMethodPortableCodeFor(this);
   if (!IsPortableCompiled()) {  // Quick.
+    const void* oat_quick_code = class_linker->GetOatMethodQuickCodeFor(this);
     return oat_quick_code == nullptr ||
         oat_quick_code != GetEntryPointFromQuickCompiledCode();
   } else {  // Portable.
+    const void* oat_portable_code = class_linker->GetOatMethodPortableCodeFor(this);
     return oat_portable_code == nullptr ||
         oat_portable_code != GetEntryPointFromPortableCompiledCode();
   }
@@ -414,7 +414,7 @@
         // stack. Continue execution in the interpreter.
         self->ClearException();
         ShadowFrame* shadow_frame = self->GetAndClearDeoptimizationShadowFrame(result);
-        self->SetTopOfStack(nullptr, 0);
+        self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
         interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
       }
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 1dbfe5d..3b92012 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -334,7 +334,9 @@
 
   ALWAYS_INLINE static const void* EntryPointToCodePointer(const void* entry_point) {
     uintptr_t code = reinterpret_cast<uintptr_t>(entry_point);
-    code &= ~0x1;  // TODO: Make this Thumb2 specific.
+    // TODO: Make this Thumb2 specific. It is benign on other architectures as code is always at
+    //       least 2 byte aligned.
+    code &= ~0x1;
     return reinterpret_cast<const void*>(code);
   }
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index a2a0626..a0aaa9e 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -80,26 +80,6 @@
   EXPECT_EQ(kObjectHeaderSize, sizeof(Object));
 }
 
-// Keep the assembly code constats in sync.
-TEST_F(ObjectTest, AsmConstants) {
-  EXPECT_EQ(CLASS_OFFSET, Object::ClassOffset().Int32Value());
-  EXPECT_EQ(LOCK_WORD_OFFSET, Object::MonitorOffset().Int32Value());
-
-  EXPECT_EQ(CLASS_COMPONENT_TYPE_OFFSET, Class::ComponentTypeOffset().Int32Value());
-
-  EXPECT_EQ(ARRAY_LENGTH_OFFSET, Array::LengthOffset().Int32Value());
-  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(HeapReference<Object>)).Int32Value());
-
-  EXPECT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
-  EXPECT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
-  EXPECT_EQ(STRING_OFFSET_OFFSET, String::OffsetOffset().Int32Value());
-  EXPECT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
-
-  EXPECT_EQ(METHOD_DEX_CACHE_METHODS_OFFSET, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-  EXPECT_EQ(METHOD_PORTABLE_CODE_OFFSET, ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value());
-  EXPECT_EQ(METHOD_QUICK_CODE_OFFSET, ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
-}
-
 TEST_F(ObjectTest, IsInSamePackage) {
   // Matches
   EXPECT_TRUE(Class::IsInSamePackage("Ljava/lang/Object;", "Ljava/lang/Class;"));
diff --git a/runtime/oat.cc b/runtime/oat.cc
index a237bf6..0d7fb01 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '4', '2', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '4', '3', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index adf0994..c16e9ed 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -43,8 +43,9 @@
 #include "arch/x86/registers_x86.h"
 #include "arch/x86_64/quick_method_frame_info_x86_64.h"
 #include "arch/x86_64/registers_x86_64.h"
-#include "base/unix_file/fd_file.h"
+#include "asm_support.h"
 #include "atomic.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
 #include "elf_file.h"
@@ -146,6 +147,7 @@
       implicit_null_checks_(false),
       implicit_so_checks_(false),
       implicit_suspend_checks_(false) {
+  CheckAsmSupportOffsetsAndSizes();
 }
 
 Runtime::~Runtime() {
@@ -170,9 +172,6 @@
     BackgroundMethodSamplingProfiler::Shutdown();
   }
 
-  // Shutdown the fault manager if it was initialized.
-  fault_manager.Shutdown();
-
   Trace::Shutdown();
 
   // Make sure to let the GC complete if it is running.
@@ -185,6 +184,10 @@
 
   // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
   delete thread_list_;
+
+  // Shutdown the fault manager if it was initialized.
+  fault_manager.Shutdown();
+
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
@@ -848,14 +851,14 @@
   // Pre-allocate an OutOfMemoryError for the double-OOME case.
   self->ThrowNewException(ThrowLocation(), "Ljava/lang/OutOfMemoryError;",
                           "OutOfMemoryError thrown while trying to throw OutOfMemoryError; "
-                          "no stack available");
+                          "no stack trace available");
   pre_allocated_OutOfMemoryError_ = GcRoot<mirror::Throwable>(self->GetException(NULL));
   self->ClearException();
 
   // Pre-allocate a NoClassDefFoundError for the common case of failing to find a system class
   // ahead of checking the application's class loader.
   self->ThrowNewException(ThrowLocation(), "Ljava/lang/NoClassDefFoundError;",
-                          "Class not found using the boot class loader; no stack available");
+                          "Class not found using the boot class loader; no stack trace available");
   pre_allocated_NoClassDefFoundError_ = GcRoot<mirror::Throwable>(self->GetException(NULL));
   self->ClearException();
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index b4e85e2..0cdc984 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -526,7 +526,7 @@
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
-    cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
+    cur_quick_frame_pc_ = 0;
 
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
diff --git a/runtime/stack.h b/runtime/stack.h
index 25e50a1..2f8df61 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -360,7 +360,7 @@
 class PACKED(4) ManagedStack {
  public:
   ManagedStack()
-      : link_(NULL), top_shadow_frame_(NULL), top_quick_frame_(NULL), top_quick_frame_pc_(0) {}
+      : top_quick_frame_(nullptr), link_(nullptr), top_shadow_frame_(nullptr) {}
 
   void PushManagedStackFragment(ManagedStack* fragment) {
     // Copy this top fragment into given fragment.
@@ -386,29 +386,16 @@
   }
 
   void SetTopQuickFrame(StackReference<mirror::ArtMethod>* top) {
-    DCHECK(top_shadow_frame_ == NULL);
+    DCHECK(top_shadow_frame_ == nullptr);
     top_quick_frame_ = top;
   }
 
-  uintptr_t GetTopQuickFramePc() const {
-    return top_quick_frame_pc_;
-  }
-
-  void SetTopQuickFramePc(uintptr_t pc) {
-    DCHECK(top_shadow_frame_ == NULL);
-    top_quick_frame_pc_ = pc;
-  }
-
   static size_t TopQuickFrameOffset() {
     return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_);
   }
 
-  static size_t TopQuickFramePcOffset() {
-    return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_pc_);
-  }
-
   ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame) {
-    DCHECK(top_quick_frame_ == NULL);
+    DCHECK(top_quick_frame_ == nullptr);
     ShadowFrame* old_frame = top_shadow_frame_;
     top_shadow_frame_ = new_top_frame;
     new_top_frame->SetLink(old_frame);
@@ -416,8 +403,8 @@
   }
 
   ShadowFrame* PopShadowFrame() {
-    DCHECK(top_quick_frame_ == NULL);
-    CHECK(top_shadow_frame_ != NULL);
+    DCHECK(top_quick_frame_ == nullptr);
+    CHECK(top_shadow_frame_ != nullptr);
     ShadowFrame* frame = top_shadow_frame_;
     top_shadow_frame_ = frame->GetLink();
     return frame;
@@ -428,7 +415,7 @@
   }
 
   void SetTopShadowFrame(ShadowFrame* top) {
-    DCHECK(top_quick_frame_ == NULL);
+    DCHECK(top_quick_frame_ == nullptr);
     top_shadow_frame_ = top;
   }
 
@@ -441,10 +428,9 @@
   bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
 
  private:
+  StackReference<mirror::ArtMethod>* top_quick_frame_;
   ManagedStack* link_;
   ShadowFrame* top_shadow_frame_;
-  StackReference<mirror::ArtMethod>* top_quick_frame_;
-  uintptr_t top_quick_frame_pc_;
 };
 
 class StackVisitor {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 9b49d31..b1c46a9 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -88,6 +88,7 @@
     kNone,
     kInStack,
     kInRegister,
+    kInFpuRegister,
     kConstant
   };
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index efe27ee..83c4e03 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1812,7 +1812,6 @@
   DO_THREAD_OFFSET(StackEndOffset<ptr_size>(), "stack_end")
   DO_THREAD_OFFSET(ThinLockIdOffset<ptr_size>(), "thin_lock_thread_id")
   DO_THREAD_OFFSET(TopOfManagedStackOffset<ptr_size>(), "top_quick_frame_method")
-  DO_THREAD_OFFSET(TopOfManagedStackPcOffset<ptr_size>(), "top_quick_frame_pc")
   DO_THREAD_OFFSET(TopShadowFrameOffset<ptr_size>(), "top_shadow_frame")
   DO_THREAD_OFFSET(TopHandleScopeOffset<ptr_size>(), "top_handle_scope")
   DO_THREAD_OFFSET(ThreadSuspendTriggerOffset<ptr_size>(), "suspend_trigger")
diff --git a/runtime/thread.h b/runtime/thread.h
index 32ed758..694dbda 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -365,9 +365,8 @@
 
   ThrowLocation GetCurrentLocationForThrow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetTopOfStack(StackReference<mirror::ArtMethod>* top_method, uintptr_t pc) {
+  void SetTopOfStack(StackReference<mirror::ArtMethod>* top_method) {
     tlsPtr_.managed_stack.SetTopQuickFrame(top_method);
-    tlsPtr_.managed_stack.SetTopQuickFramePc(pc);
   }
 
   void SetTopOfShadowStack(ShadowFrame* top) {
@@ -637,13 +636,6 @@
         ManagedStack::TopQuickFrameOffset());
   }
 
-  template<size_t pointer_size>
-  static ThreadOffset<pointer_size> TopOfManagedStackPcOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(
-        OFFSETOF_MEMBER(tls_ptr_sized_values, managed_stack) +
-        ManagedStack::TopQuickFramePcOffset());
-  }
-
   const ManagedStack* GetManagedStack() const {
     return &tlsPtr_.managed_stack;
   }
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index e52adfc..b7ff360 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -30,6 +30,18 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common_build.mk
 include $(BUILD_SHARED_LIBRARY)
 
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+LOCAL_MODULE_TAGS := optional
+LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+LOCAL_SRC_FILES := sigchain.cc
+LOCAL_CLANG = $(ART_TARGET_CLANG)
+LOCAL_MODULE:= libsigchain
+LOCAL_SHARED_LIBRARIES := liblog
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common_build.mk
+include $(BUILD_STATIC_LIBRARY)
+
 # Build host library.
 include $(CLEAR_VARS)
 LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
@@ -43,3 +55,17 @@
 LOCAL_LDLIBS = -ldl
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+LOCAL_MODULE_TAGS := optional
+LOCAL_IS_HOST_MODULE := true
+LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_SRC_FILES := sigchain.cc
+LOCAL_MODULE:= libsigchain
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+LOCAL_LDLIBS = -ldl
+LOCAL_MULTILIB := both
+include external/libcxx/libcxx.mk
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/test/089-many-methods/check b/test/089-many-methods/check
new file mode 100755
index 0000000..ec6733d
--- /dev/null
+++ b/test/089-many-methods/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Strip build error debug messages, as they are environment-specific.
+sed -e '/^Failed to build/d' -e '/^Non-canonical tmpdir/d' "$2" > "$2.tmp"
+
+diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
\ No newline at end of file
diff --git a/test/410-floats/src/Main.java b/test/410-floats/src/Main.java
index d8d6fac..2300457 100644
--- a/test/410-floats/src/Main.java
+++ b/test/410-floats/src/Main.java
@@ -17,9 +17,10 @@
 public class Main {
   public static void main(String[] args) {
     assertEquals(4.2f, returnFloat());
-    float[] a = new float[1];
+    float[] a = new float[2];
     a[0] = 42.2f;
-    assertEquals(42.2f, returnFloat(a));
+    a[1] = 3.2f;
+    assertEquals(45.4f, returnFloat(a));
 
     assertEquals(4.4, returnDouble());
     double[] b = new double[1];
@@ -36,6 +37,9 @@
     assertEquals(3.1, invokeTakeADouble(3.1));
     assertEquals(12.7, invokeTakeThreeDouble(3.1, 4.4, 5.2));
     assertEquals(12.7f, invokeTakeThreeFloat(3.1f, 4.4f, 5.2f));
+
+    testArrayOperations(new float[2], 0, 1.2f, 3.4f);
+    testArrayOperations(new double[2], 0, 4.1, 7.6);
   }
 
   public static float invokeReturnFloat() {
@@ -51,7 +55,7 @@
   }
 
   public static float returnFloat(float[] a) {
-    return a[0];
+    return a[0] + a[1];
   }
 
   public static double returnDouble() {
@@ -94,6 +98,34 @@
     return takeThreeFloat(a, b, c);
   }
 
+  // Test simple operations on a float array to ensure the register allocator works
+  // properly.
+  public static void testArrayOperations(float[] a, int index, float value1, float value2) {
+    a[0] = value1;
+    a[1] = value2;
+    assertEquals(value1 + value2, a[0] + a[1]);
+    a[0] = 0.0f;
+    a[1] = 0.0f;
+    assertEquals(0.0f, a[0] + a[1]);
+    a[index] = value1;
+    a[index + 1] = value2;
+    assertEquals(value1 + value2, a[0] + a[1]);
+  }
+
+  // Test simple operations on a double array to ensure the register allocator works
+  // properly.
+  public static void testArrayOperations(double[] a, int index, double value1, double value2) {
+    a[0] = value1;
+    a[1] = value2;
+    assertEquals(value1 + value2, a[0] + a[1]);
+    a[0] = 0.0;
+    a[1] = 0.0;
+    assertEquals(0.0, a[0] + a[1]);
+    a[index] = value1;
+    a[index + 1] = value2;
+    assertEquals(value1 + value2, a[0] + a[1]);
+  }
+
   public static void assertEquals(float expected, float actual) {
     if (expected != actual) {
       throw new AssertionError("Expected " + expected + " got " + actual);
diff --git a/test/411-optimizing-arith/src/Main.java b/test/411-optimizing-arith/src/Main.java
index 74c47a6..2b3ba33 100644
--- a/test/411-optimizing-arith/src/Main.java
+++ b/test/411-optimizing-arith/src/Main.java
@@ -33,6 +33,7 @@
 
   public static void main(String[] args) {
     mul();
+    neg();
   }
 
   public static void mul() {
@@ -51,6 +52,34 @@
     expectEquals(36L, $opt$Mul(-12L, -3L));
     expectEquals(33L, $opt$Mul(1L, 3L) * 11);
     expectEquals(240518168583L, $opt$Mul(34359738369L, 7L)); // (2^35 + 1) * 7
+
+    $opt$InplaceNegOne(1);
+  }
+
+  public static void neg() {
+    expectEquals(-1, $opt$Neg(1));
+    expectEquals(1, $opt$Neg(-1));
+    expectEquals(0, $opt$Neg(0));
+    expectEquals(51, $opt$Neg(-51));
+    expectEquals(-51, $opt$Neg(51));
+    expectEquals(2147483647, $opt$Neg(-2147483647));  // (2^31 - 1)
+    expectEquals(-2147483647, $opt$Neg(2147483647));  // -(2^31 - 1)
+    // From the Java 7 SE Edition specification:
+    // http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.15.4
+    //
+    //   For integer values, negation is the same as subtraction from
+    //   zero.  The Java programming language uses two's-complement
+    //   representation for integers, and the range of two's-complement
+    //   values is not symmetric, so negation of the maximum negative
+    //   int or long results in that same maximum negative number.
+    //   Overflow occurs in this case, but no exception is thrown.
+    //   For all integer values x, -x equals (~x)+1.''
+    expectEquals(-2147483648, $opt$Neg(-2147483648)); // -(2^31)
+  }
+
+  public static void $opt$InplaceNegOne(int a) {
+    a = -a;
+    expectEquals(-1, a);
   }
 
   static int $opt$Mul(int a, int b) {
@@ -61,4 +90,7 @@
     return a * b;
   }
 
+  static int $opt$Neg(int a){
+    return -a;
+  }
 }
diff --git a/test/413-regalloc-regression/expected.txt b/test/413-regalloc-regression/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/413-regalloc-regression/expected.txt
diff --git a/test/413-regalloc-regression/info.txt b/test/413-regalloc-regression/info.txt
new file mode 100644
index 0000000..c706c1d
--- /dev/null
+++ b/test/413-regalloc-regression/info.txt
@@ -0,0 +1,2 @@
+Regression test for the linear scan register allocator, that use to
+fail compiling removeElementAt in x86.
diff --git a/test/413-regalloc-regression/src/Main.java b/test/413-regalloc-regression/src/Main.java
new file mode 100644
index 0000000..3e649f8
--- /dev/null
+++ b/test/413-regalloc-regression/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  private Object[] data;
+  private int size;
+
+  public Main() {
+    data = new Object[4];
+    size = 0;
+  }
+
+  public void removeElementAt(int index) {
+    for (int i = index; i < size - 1; i++) {
+      data[i] = data[i + 1];
+    }
+    data[--size] = null;
+  }
+
+  public static void main(String[] args) {
+    Main main = new Main();
+    main.size++;
+    main.removeElementAt(0);
+    if (main.size != 0) {
+      throw new Error("Unexpected size");
+    }
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index e066a38..97afd00 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -115,6 +115,13 @@
 ifeq ($(ART_TEST_RUN_TEST_NO_IMAGE),true)
   IMAGE_TYPES += no-image
 endif
+RUN_TYPES :=
+ifeq ($(ART_TEST_RUN_TEST_DEBUG),true)
+  RUN_TYPES += debug
+endif
+ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true)
+  RUN_TYPES += ndebug
+endif
 ADDRESS_SIZES_TARGET := $(ART_PHONY_TEST_TARGET_SUFFIX) $(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
 ADDRESS_SIZES_HOST := $(ART_PHONY_TEST_HOST_SUFFIX) $(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 ALL_ADDRESS_SIZES := 64 32
@@ -122,21 +129,22 @@
 # List all run test names with number arguments agreeing with the comment above.
 define all-run-test-names
   $(foreach target, $(1), \
-    $(foreach prebuild, $(2), \
-      $(foreach compiler, $(3), \
-        $(foreach relocate, $(4), \
-          $(foreach trace, $(5), \
-            $(foreach gc, $(6), \
-              $(foreach jni, $(7), \
-                $(foreach image, $(8), \
-                  $(foreach test, $(9), \
-                    $(foreach address_size, $(10), \
-                      test-art-$(target)-run-test-$(prebuild)-$(compiler)-$(relocate)-$(trace)-$(gc)-$(jni)-$(image)-$(test)$(address_size) \
-                  ))))))))))
+    $(foreach run-type, $(2), \
+      $(foreach prebuild, $(3), \
+        $(foreach compiler, $(4), \
+          $(foreach relocate, $(5), \
+            $(foreach trace, $(6), \
+              $(foreach gc, $(7), \
+                $(foreach jni, $(8), \
+                  $(foreach image, $(9), \
+                    $(foreach test, $(10), \
+                      $(foreach address_size, $(11), \
+                        test-art-$(target)-run-test-$(run-type)-$(prebuild)-$(compiler)-$(relocate)-$(trace)-$(gc)-$(jni)-$(image)-$(test)$(address_size) \
+                    )))))))))))
 endef  # all-run-test-names
 
 # To generate a full list or tests:
-# $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
+# $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
 #        $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
 #        $(TEST_ART_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 
@@ -152,7 +160,7 @@
 
  # disable timing sensitive tests on "dist" builds.
 ifdef dist_goal
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
         $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
@@ -162,7 +170,7 @@
 TEST_ART_BROKEN_RUN_TESTS := \
   004-ThreadStress
 
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES), $(TEST_ART_BROKEN_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 
@@ -173,7 +181,7 @@
   116-nodex2oat
 
 ifneq (,$(filter prebuild,$(PREBUILD_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),prebuild, \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),prebuild, \
       $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES), $(TEST_ART_BROKEN_PREBUILD_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
@@ -184,7 +192,7 @@
   117-nopatchoat
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),no-prebuild, \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
       $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES), $(TEST_ART_BROKEN_NO_PREBUILD_TESTS), $(ALL_ADDRESS_SIZES))
 endif
@@ -197,7 +205,7 @@
   117-nopatchoat
 
 ifneq (,$(filter no-relocate,$(RELOCATE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES), no-relocate,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES), $(TEST_ART_BROKEN_NO_RELOCATE_TESTS), $(ALL_ADDRESS_SIZES))
 endif
@@ -210,7 +218,7 @@
   114-ParallelGC
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \
       $(IMAGE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
@@ -218,7 +226,7 @@
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
 
 # 115-native-bridge setup is complicated. Need to implement it correctly for the target.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(PREBUILD_TYPES),$(COMPILER_TYPES), \
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
     $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),115-native-bridge, \
     $(ALL_ADDRESS_SIZES))
 
@@ -232,26 +240,49 @@
   119-noimage-patchoat
 
 ifneq (,$(filter no-dex2oat,$(PREBUILD_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),no-dex2oat, \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-dex2oat, \
       $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
       $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 
 ifneq (,$(filter no-image,$(IMAGE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
       $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 ifneq (,$(filter relocate-no-patchoat,$(RELOCATE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(PREBUILD_TYPES), \
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES), relocate-no-patchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS :=
 
+# The following tests use libarttest.so, which is linked against libartd.so, so will
+# not work when libart.so is the one loaded.
+# TODO: Find a way to run these tests in ndebug mode.
+TEST_ART_BROKEN_NDEBUG_TESTS := \
+  004-JniTest \
+  004-ReferenceMap \
+  004-SignalTest \
+  004-StackWalk \
+  004-UnsafeTest \
+  115-native-bridge \
+  116-nodex2oat \
+  117-nopatchoat \
+  118-noimage-dex2oat \
+  119-noimage-patchoat \
+
+ifneq (,$(filter ndebug,$(RUN_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
+      $(TEST_ART_BROKEN_NDEBUG_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_NDEBUG_TESTS :=
+
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \
@@ -281,6 +312,9 @@
 $(foreach target, $(TARGET_TYPES), \
   $(foreach address_size, $(ALL_ADDRESS_SIZES), \
     $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(address_size))_RULES :=)))
+$(foreach target, $(TARGET_TYPES), \
+  $(foreach run_type, $(RUN_TYPES), \
+    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run_type))_RULES :=)))
 
 # We need dex2oat and dalvikvm on the target as well as the core image.
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUT) $(2ND_TARGET_CORE_IMG_OUT)
@@ -314,10 +348,10 @@
 endif
 
 # Create a rule to build and run a tests following the form:
-# test-art-{1: host or target}-run-test-{2: prebuild no-prebuild no-dex2oat}-
-#    {3: interpreter default optimizing}-{4: relocate no-relocate relocate-no-patchoat}-
-#    {5: trace or no-trace}-{6: gcstress gcverify cms}-{7: forcecopy checkjni jni}-
-#    {8: no-image image}-{9: test name}{10: 32 or 64}
+# test-art-{1: host or target}-run-test-{2:debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
+#    {4: interpreter default optimizing}-{5: relocate no-relocate relocate-no-patchoat}-
+#    {6: trace or no-trace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
+#    {9: no-image image}-{10: test name}{11: 32 or 64}
 define define-test-art-run-test
   run_test_options :=
   prereq_rule :=
@@ -340,119 +374,129 @@
       $$(error found $(1) expected $(TARGET_TYPES))
     endif
   endif
-  ifeq ($(2),prebuild)
+  ifeq ($(2),debug)
+    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUG_RULES
+  else
+    ifeq ($(2),ndebug)
+      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELEASE_RULES
+      run_test_options += -O
+    else
+      $$(error found $(2) expected $(RUN_TYPES))
+    endif
+  endif
+  ifeq ($(3),prebuild)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PREBUILD_RULES
     run_test_options += --prebuild
   else
-    ifeq ($(2),no-prebuild)
+    ifeq ($(3),no-prebuild)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_PREBUILD_RULES
       run_test_options += --no-prebuild
     else
-      ifeq ($(2),no-dex2oat)
+      ifeq ($(3),no-dex2oat)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_DEX2OAT_RULES
         run_test_options += --no-prebuild --no-dex2oat
       else
-        $$(error found $(2) expected $(PREBUILD_TYPES))
+        $$(error found $(3) expected $(PREBUILD_TYPES))
       endif
     endif
   endif
-  ifeq ($(3),optimizing)
+  ifeq ($(4),optimizing)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_RULES
     run_test_options += -Xcompiler-option --compiler-backend=Optimizing
   else
-    ifeq ($(3),interpreter)
+    ifeq ($(4),interpreter)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES
       run_test_options += --interpreter
     else
-      ifeq ($(3),default)
+      ifeq ($(4),default)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEFAULT_RULES
       else
-        $$(error found $(3) expected $(COMPILER_TYPES))
+        $$(error found $(4) expected $(COMPILER_TYPES))
       endif
     endif
   endif
-  ifeq ($(4),relocate)
+  ifeq ($(5),relocate)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_RULES
     run_test_options += --relocate
   else
-    ifeq ($(4),no-relocate)
+    ifeq ($(5),no-relocate)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_RELOCATE_RULES
       run_test_options += --no-relocate
     else
-      ifeq ($(4),relocate-no-patchoat)
+      ifeq ($(5),relocate-no-patchoat)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_NO_PATCHOAT_RULES
         run_test_options += --relocate --no-patchoat
       else
-        $$(error found $(4) expected $(RELOCATE_TYPES))
+        $$(error found $(5) expected $(RELOCATE_TYPES))
       endif
     endif
   endif
-  ifeq ($(5),trace)
+  ifeq ($(6),trace)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES
     run_test_options += --trace
   else
-    ifeq ($(5),no-trace)
+    ifeq ($(6),no-trace)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_TRACE_RULES
     else
-      $$(error found $(5) expected $(TRACE_TYPES))
+      $$(error found $(6) expected $(TRACE_TYPES))
     endif
   endif
-  ifeq ($(6),gcverify)
+  ifeq ($(7),gcverify)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_GCVERIFY_RULES
     run_test_options += --gcverify
   else
-    ifeq ($(6),gcstress)
+    ifeq ($(7),gcstress)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_GCSTRESS_RULES
       run_test_options += --gcstress
     else
-      ifeq ($(6),cms)
+      ifeq ($(7),cms)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_CMS_RULES
       else
-        $$(error found $(6) expected $(GC_TYPES))
+        $$(error found $(7) expected $(GC_TYPES))
       endif
     endif
   endif
-  ifeq ($(7),forcecopy)
+  ifeq ($(8),forcecopy)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_FORCECOPY_RULES
     run_test_options += --runtime-option -Xjniopts:forcecopy
     ifneq ($$(ART_TEST_JNI_FORCECOPY),true)
       skip_test := true
     endif
   else
-    ifeq ($(7),checkjni)
+    ifeq ($(8),checkjni)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_CHECKJNI_RULES
       run_test_options += --runtime-option -Xcheck:jni
     else
-      ifeq ($(7),jni)
+      ifeq ($(8),jni)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JNI_RULES
       else
-        $$(error found $(7) expected $(JNI_TYPES))
+        $$(error found $(8) expected $(JNI_TYPES))
       endif
     endif
   endif
-  ifeq ($(8),no-image)
+  ifeq ($(9),no-image)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES
     run_test_options += --no-image
   else
-    ifeq ($(8),image)
+    ifeq ($(9),image)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES
     else
-      $$(error found $(8) expected $(IMAGE_TYPES))
+      $$(error found $(9) expected $(IMAGE_TYPES))
     endif
   endif
-  # $(9) is the test name
-  test_groups += ART_RUN_TEST_$$(uc_host_or_target)_$(call name-to-var,$(9))_RULES
-  ifeq ($(10),64)
+  # $(10) is the test name
+  test_groups += ART_RUN_TEST_$$(uc_host_or_target)_$(call name-to-var,$(10))_RULES
+  ifeq ($(11),64)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_64_RULES
     run_test_options += --64
   else
-    ifeq ($(10),32)
+    ifeq ($(11),32)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_32_RULES
     else
-      $$(error found $(10) expected $(ALL_ADDRESS_SIZES))
+      $$(error found $(11) expected $(ALL_ADDRESS_SIZES))
     endif
   endif
-  run_test_rule_name := test-art-$(1)-run-test-$(2)-$(3)-$(4)-$(5)-$(6)-$(7)-$(8)-$(9)$(10)
+  run_test_rule_name := test-art-$(1)-run-test-$(2)-$(3)-$(4)-$(5)-$(6)-$(7)-$(8)-$(9)-$(10)$(11)
   run_test_options := --output-path $(ART_HOST_TEST_DIR)/run-test-output/$$(run_test_rule_name) \
       $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
@@ -462,7 +506,7 @@
 	  DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
-	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(9) \
+	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(10) \
 	      && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
 	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
@@ -480,16 +524,17 @@
 
 $(foreach target, $(TARGET_TYPES), \
   $(foreach test, $(TEST_ART_RUN_TESTS), \
-    $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), \
-      $(foreach prebuild, $(PREBUILD_TYPES), \
-        $(foreach compiler, $(COMPILER_TYPES), \
-          $(foreach relocate, $(RELOCATE_TYPES), \
-            $(foreach trace, $(TRACE_TYPES), \
-              $(foreach gc, $(GC_TYPES), \
-                $(foreach jni, $(JNI_TYPES), \
-                  $(foreach image, $(IMAGE_TYPES), \
-                    $(eval $(call define-test-art-run-test,$(target),$(prebuild),$(compiler),$(relocate),$(trace),$(gc),$(jni),$(image),$(test),$(address_size))) \
-                ))))))))))
+    $(foreach run_type, $(RUN_TYPES), \
+      $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), \
+        $(foreach prebuild, $(PREBUILD_TYPES), \
+          $(foreach compiler, $(COMPILER_TYPES), \
+            $(foreach relocate, $(RELOCATE_TYPES), \
+              $(foreach trace, $(TRACE_TYPES), \
+                $(foreach gc, $(GC_TYPES), \
+                  $(foreach jni, $(JNI_TYPES), \
+                    $(foreach image, $(IMAGE_TYPES), \
+                      $(eval $(call define-test-art-run-test,$(target),$(run_type),$(prebuild),$(compiler),$(relocate),$(trace),$(gc),$(jni),$(image),$(test),$(address_size))) \
+                  )))))))))))
 define-test-art-run-test :=
 
 # Define a phony rule whose purpose is to test its prerequisites.
@@ -509,6 +554,9 @@
   $(foreach prebuild, $(PREBUILD_TYPES), $(eval \
     $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(prebuild),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES)))))
 $(foreach target, $(TARGET_TYPES), \
+  $(foreach run-type, $(RUN_TYPES), $(eval \
+    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(run-type),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run-type))_RULES)))))
+$(foreach target, $(TARGET_TYPES), \
   $(foreach compiler, $(COMPILER_TYPES), $(eval \
     $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(compiler),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES)))))
 $(foreach target, $(TARGET_TYPES), \
@@ -562,6 +610,9 @@
 $(foreach target, $(TARGET_TYPES), \
   $(foreach address_size, $(ALL_ADDRESS_SIZES), \
     $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(address_size))_RULES :=)))
+$(foreach target, $(TARGET_TYPES), \
+  $(foreach run_type, $(RUN_TYPES), \
+    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run_type))_RULES :=)))
 define-test-art-run-test-group :=
 TARGET_TYPES :=
 PREBUILD_TYPES :=
@@ -574,6 +625,7 @@
 ADDRESS_SIZES_TARGET :=
 ADDRESS_SIZES_HOST :=
 ALL_ADDRESS_SIZES :=
+RUN_TYPES :=
 
 include $(LOCAL_PATH)/Android.libarttest.mk
 include art/test/Android.libnativebridgetest.mk
diff --git a/test/run-test b/test/run-test
index 62c701f..36288d7 100755
--- a/test/run-test
+++ b/test/run-test
@@ -526,9 +526,9 @@
         "./${run}" $run_args "$@" >"$output" 2>&1
     else
         cp "$build_output" "$output"
-        echo "Failed to build in tmpdir=${tmp_dir} from oldwd=${oldwd} and cwd=`pwd`"
-        echo "Non-canonical tmpdir was ${noncanonical_tmp_dir}"
-        echo "build exit status: $build_exit" >>"$output"
+        echo "Failed to build in tmpdir=${tmp_dir} from oldwd=${oldwd} and cwd=`pwd`" >> "$output"
+        echo "Non-canonical tmpdir was ${noncanonical_tmp_dir}" >> "$output"
+        echo "build exit status: $build_exit" >> "$output"
     fi
     ./$check_cmd "$expected" "$output"
     if [ "$?" = "0" ]; then