Merge "Fix error with remembered sets creation."
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 265e8d2..9814cb4 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -45,7 +45,6 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(kFPReg);
@@ -89,7 +88,6 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       {
         ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod);
         RegStorage r_tgt = CallHelperSetup(helper_offset);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c7e289d..3f54798 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -407,10 +407,17 @@
   { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" },
-  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" },
-  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" },
-  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fild32M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
+  { kX86Fild64M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
+  { kX86Fld32M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
+  { kX86Fld64M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
+  { kX86Fstp32M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
+  { kX86Fstp64M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
+  { kX86Fst32M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
+  { kX86Fst64M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
+  { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
+  { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
+  { kX86Fstsw16R, kNullary, NO_OPERAND,                                         { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
   { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 3540843..d874aaa 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -148,6 +148,7 @@
                         RegLocation rl_src2);
   void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                        RegLocation rl_src2);
+  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
   void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                 RegLocation rl_src2);
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 61623d0..458f9c6 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -48,16 +48,7 @@
       break;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location
-      if (cu_->target64) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
       return;
     case Instruction::NEG_FLOAT:
       GenNegFloat(rl_dest, rl_src1);
@@ -110,16 +101,7 @@
       break;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location
-      if (cu_->target64) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
       return;
     case Instruction::NEG_DOUBLE:
       GenNegDouble(rl_dest, rl_src1);
@@ -356,6 +338,110 @@
   }
 }
 
+void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
+  // Compute offsets to the source and destination VRs on stack.
+  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
+  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
+  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
+
+  // Update the in-register state of sources.
+  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
+  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
+
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
+  // If the source is in physical register, then put it in its location on stack.
+  if (rl_src1.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
+
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
+      FlushSpecificReg(reg_info);
+      // ResetDef to prevent NullifyRange from removing stores.
+      ResetDef(rl_src1.reg);
+    } else {
+      // It must have been register promoted if it is not a temp but is still in physical
+      // register. Since we need it to be in memory to convert, we place it there now.
+      StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32);
+    }
+  }
+
+  if (rl_src2.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      FlushSpecificReg(reg_info);
+      ResetDef(rl_src2.reg);
+    } else {
+      StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32);
+    }
+  }
+
+  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
+
+  // Push the source virtual registers onto the x87 stack.
+  LIR *fld_2 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src2_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  LIR *fld_1 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src1_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  FlushReg(rs_rAX);
+  Clobber(rs_rAX);
+  LockTemp(rs_rAX);
+
+  LIR* retry = NewLIR0(kPseudoTargetLabel);
+
+  // Divide ST(0) by ST(1) and place result to ST(0).
+  NewLIR0(kX86Fprem);
+
+  // Move FPU status word to AX.
+  NewLIR0(kX86Fstsw16R);
+
+  // Check if reduction is complete.
+  OpRegImm(kOpAnd, rs_rAX, 0x400);
+
+  // If no then continue to compute remainder.
+  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  branch->target = retry;
+
+  FreeTemp(rs_rAX);
+
+  // Now store result in the destination VR's stack location.
+  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
+  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
+  LIR *fst = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
+  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
+
+  // Pop ST(1) and ST(0).
+  NewLIR0(kX86Fucompp);
+
+  /*
+   * The result is in a physical register if it was in a temp or was register
+   * promoted. For that reason it is enough to check if it is in physical
+   * register. If it is, then we must do all of the bookkeeping necessary to
+   * invalidate temp (if needed) and load in promoted register (if needed).
+   * If the result's location is in memory, then we do not need to do anything
+   * more since the fstp has already placed the correct value in memory.
+   */
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
+  if (rl_result.location == kLocPhysReg) {
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (is_double) {
+      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
+      StoreFinalValueWide(rl_dest, rl_result);
+    } else {
+      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      StoreFinalValue(rl_dest, rl_result);
+    }
+  }
+}
+
 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2) {
   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
@@ -502,5 +588,4 @@
 }
 
 
-
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f1b5811..28b9dca 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -572,8 +572,15 @@
   kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
   kX86Fild32M,                  // push 32-bit integer on x87 stack
   kX86Fild64M,                  // push 64-bit integer on x87 stack
+  kX86Fld32M,                   // push float on x87 stack
+  kX86Fld64M,                   // push double on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
   kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
+  kX86Fst32M,                   // do 32-bit store
+  kX86Fst64M,                   // do 64-bit store
+  kX86Fprem,                    // remainder from dividing of two floating point values
+  kX86Fucompp,                  // compare floating point values and pop x87 fp stack twice
+  kX86Fstsw16R,                 // store FPU status word
   Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
   kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 6a337b3..96ffc93 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -25,14 +25,14 @@
 namespace art {
 namespace arm {
 
-static const uint32_t gZero = 0;
+static constexpr uint32_t gZero = 0;
 
 void ArmContext::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfSRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[PC] = &pc_;
@@ -69,31 +69,46 @@
   }
 }
 
-void ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
+bool ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  DCHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ArmContext::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+  DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void ArmContext::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[R0] = const_cast<uint32_t*>(&gZero);
   gprs_[R1] = const_cast<uint32_t*>(&gZero);
-  gprs_[R2] = NULL;
-  gprs_[R3] = NULL;
+  gprs_[R2] = nullptr;
+  gprs_[R3] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
 
 void ArmContext::DoLongJump() {
-  uintptr_t gprs[16];
-  uint32_t fprs[32];
+  uintptr_t gprs[kNumberOfCoreRegisters];
+  uint32_t fprs[kNumberOfSRegisters];
   for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : ArmContext::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : ArmContext::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfSRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : ArmContext::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : ArmContext::kBadFprBase + i;
   }
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
   art_quick_do_long_jump(gprs, fprs);
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 2ccce8d..e894f16 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -32,31 +32,53 @@
 
   virtual ~ArmContext() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
-    SetGPR(PC, new_pc);
+  void SetPC(uintptr_t new_pc) OVERRIDE {
+    bool success = SetGPR(PC, new_pc);
+    CHECK(success) << "Failed to set PC register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, initialized to NULL or the specific registers below.
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 09e8b59..3eb92c8 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -28,14 +28,14 @@
 namespace art {
 namespace arm64 {
 
-static const uint64_t gZero = 0;
+static constexpr uint64_t gZero = 0;
 
 void Arm64Context::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[LR] = &pc_;
@@ -73,73 +73,88 @@
   }
 }
 
-void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  DCHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Arm64Context::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+  DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void Arm64Context::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[X0] = const_cast<uint64_t*>(&gZero);
-  gprs_[X1] = NULL;
-  gprs_[X2] = NULL;
-  gprs_[X3] = NULL;
-  gprs_[X4] = NULL;
-  gprs_[X5] = NULL;
-  gprs_[X6] = NULL;
-  gprs_[X7] = NULL;
-  gprs_[X8] = NULL;
-  gprs_[X9] = NULL;
-  gprs_[X10] = NULL;
-  gprs_[X11] = NULL;
-  gprs_[X12] = NULL;
-  gprs_[X13] = NULL;
-  gprs_[X14] = NULL;
-  gprs_[X15] = NULL;
+  gprs_[X1] = nullptr;
+  gprs_[X2] = nullptr;
+  gprs_[X3] = nullptr;
+  gprs_[X4] = nullptr;
+  gprs_[X5] = nullptr;
+  gprs_[X6] = nullptr;
+  gprs_[X7] = nullptr;
+  gprs_[X8] = nullptr;
+  gprs_[X9] = nullptr;
+  gprs_[X10] = nullptr;
+  gprs_[X11] = nullptr;
+  gprs_[X12] = nullptr;
+  gprs_[X13] = nullptr;
+  gprs_[X14] = nullptr;
+  gprs_[X15] = nullptr;
 
   // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
 
-  fprs_[D0] = NULL;
-  fprs_[D1] = NULL;
-  fprs_[D2] = NULL;
-  fprs_[D3] = NULL;
-  fprs_[D4] = NULL;
-  fprs_[D5] = NULL;
-  fprs_[D6] = NULL;
-  fprs_[D7] = NULL;
+  fprs_[D0] = nullptr;
+  fprs_[D1] = nullptr;
+  fprs_[D2] = nullptr;
+  fprs_[D3] = nullptr;
+  fprs_[D4] = nullptr;
+  fprs_[D5] = nullptr;
+  fprs_[D6] = nullptr;
+  fprs_[D7] = nullptr;
 
-  fprs_[D16] = NULL;
-  fprs_[D17] = NULL;
-  fprs_[D18] = NULL;
-  fprs_[D19] = NULL;
-  fprs_[D20] = NULL;
-  fprs_[D21] = NULL;
-  fprs_[D22] = NULL;
-  fprs_[D23] = NULL;
-  fprs_[D24] = NULL;
-  fprs_[D25] = NULL;
-  fprs_[D26] = NULL;
-  fprs_[D27] = NULL;
-  fprs_[D28] = NULL;
-  fprs_[D29] = NULL;
-  fprs_[D30] = NULL;
-  fprs_[D31] = NULL;
+  fprs_[D16] = nullptr;
+  fprs_[D17] = nullptr;
+  fprs_[D18] = nullptr;
+  fprs_[D19] = nullptr;
+  fprs_[D20] = nullptr;
+  fprs_[D21] = nullptr;
+  fprs_[D22] = nullptr;
+  fprs_[D23] = nullptr;
+  fprs_[D24] = nullptr;
+  fprs_[D25] = nullptr;
+  fprs_[D26] = nullptr;
+  fprs_[D27] = nullptr;
+  fprs_[D28] = nullptr;
+  fprs_[D29] = nullptr;
+  fprs_[D30] = nullptr;
+  fprs_[D31] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
 
 void Arm64Context::DoLongJump() {
   uint64_t gprs[32];
-  uint64_t fprs[32];
+  uint64_t fprs[kNumberOfDRegisters];
 
   // Do not use kNumberOfCoreRegisters, as this is with the distinction of SP and XZR
   for (size_t i = 0; i < 32; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : Arm64Context::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : Arm64Context::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : Arm64Context::kBadGprBase + i;
   }
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
   art_quick_do_long_jump(gprs, fprs);
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index d40e291..1f69869 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -32,31 +32,53 @@
 
   ~Arm64Context() {}
 
-  void Reset();
+  void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE;
 
-  void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  void SetPC(uintptr_t new_lr) {
-    SetGPR(LR, new_lr);
+  void SetPC(uintptr_t new_lr) OVERRIDE {
+    bool success = SetGPR(LR, new_lr);
+    CHECK(success) << "Failed to set LR register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  void SetGPR(uint32_t reg, uintptr_t value);
-  void SmashCallerSaves();
-  void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, initialized to NULL or the specific registers below.
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 84ee778..cbb2c27 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -72,17 +72,14 @@
 extern "C" void art_quick_lock_object(void*);
 extern "C" void art_quick_unlock_object(void*);
 
-// Math entrypoints.
-extern int32_t CmpgDouble(double a, double b);
-extern int32_t CmplDouble(double a, double b);
-extern int32_t CmpgFloat(float a, float b);
-extern int32_t CmplFloat(float a, float b);
-
 // Single-precision FP arithmetics.
-extern "C" float fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
+extern "C" float art_quick_fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
 
 // Double-precision FP arithmetics.
-extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+extern "C" double art_quick_fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+
+// Memcpy
+extern "C" void* art_quick_memcpy(void* __restrict, const void* __restrict, size_t);
 
 // Intrinsic entrypoints.
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
@@ -175,31 +172,31 @@
   qpoints->pUnlockObject = art_quick_unlock_object;
 
   // Math
-  // TODO NULL entrypoints not needed for ARM64 - generate inline.
-  qpoints->pCmpgDouble = CmpgDouble;
-  qpoints->pCmpgFloat = CmpgFloat;
-  qpoints->pCmplDouble = CmplDouble;
-  qpoints->pCmplFloat = CmplFloat;
-  qpoints->pFmod = fmod;
-  qpoints->pL2d = NULL;
-  qpoints->pFmodf = fmodf;
-  qpoints->pL2f = NULL;
-  qpoints->pD2iz = NULL;
-  qpoints->pF2iz = NULL;
-  qpoints->pIdivmod = NULL;
-  qpoints->pD2l = NULL;
-  qpoints->pF2l = NULL;
-  qpoints->pLdiv = NULL;
-  qpoints->pLmod = NULL;
-  qpoints->pLmul = NULL;
-  qpoints->pShlLong = NULL;
-  qpoints->pShrLong = NULL;
-  qpoints->pUshrLong = NULL;
+  // TODO nullptr entrypoints not needed for ARM64 - generate inline.
+  qpoints->pCmpgDouble = nullptr;
+  qpoints->pCmpgFloat = nullptr;
+  qpoints->pCmplDouble = nullptr;
+  qpoints->pCmplFloat = nullptr;
+  qpoints->pFmod = art_quick_fmod;
+  qpoints->pL2d = nullptr;
+  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pL2f = nullptr;
+  qpoints->pD2iz = nullptr;
+  qpoints->pF2iz = nullptr;
+  qpoints->pIdivmod = nullptr;
+  qpoints->pD2l = nullptr;
+  qpoints->pF2l = nullptr;
+  qpoints->pLdiv = nullptr;
+  qpoints->pLmod = nullptr;
+  qpoints->pLmul = nullptr;
+  qpoints->pShlLong = nullptr;
+  qpoints->pShrLong = nullptr;
+  qpoints->pUshrLong = nullptr;
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
-  qpoints->pMemcpy = memcpy;
+  qpoints->pMemcpy = art_quick_memcpy;
 
   // Invocation
   qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d704788..4ede453 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1862,3 +1862,22 @@
     csel x0, x0, x1, ne          // x0 := x0 != 0 ? x0 : x1
     ret
 END art_quick_string_compareto
+
+// Macro to facilitate adding new entrypoints which call to native function directly.
+// Currently, xSELF is the only thing we need to take care of between managed code and AAPCS.
+// But we might introduce more differences.
+.macro NATIVE_DOWNCALL name, entrypoint
+    .extern \entrypoint
+ENTRY \name
+    sub    sp, sp, #16
+    stp    xSELF, xLR, [sp]
+    bl     \entrypoint
+    ldp    xSELF, xLR, [sp]
+    add    sp, sp, #16
+    ret
+END \name
+.endm
+
+NATIVE_DOWNCALL art_quick_fmod fmod
+NATIVE_DOWNCALL art_quick_fmodf fmodf
+NATIVE_DOWNCALL art_quick_memcpy memcpy
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index f7b7835..20a84dd 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -38,30 +38,40 @@
   // Re-initializes the registers for context re-use.
   virtual void Reset() = 0;
 
-  // Read values from callee saves in the given frame. The frame also holds
+  // Reads values from callee saves in the given frame. The frame also holds
   // the method that holds the layout.
   virtual void FillCalleeSaves(const StackVisitor& fr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  // Set the stack pointer value
+  // Sets the stack pointer value.
   virtual void SetSP(uintptr_t new_sp) = 0;
 
-  // Set the program counter value
+  // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
   // Gets the given GPRs address.
   virtual uintptr_t* GetGPRAddress(uint32_t reg) = 0;
 
-  // Read the given GPR
-  virtual uintptr_t GetGPR(uint32_t reg) = 0;
+  // Reads the given GPR. Returns true if we successfully read the register and
+  // set its value into 'val', returns false otherwise.
+  virtual bool GetGPR(uint32_t reg, uintptr_t* val) = 0;
 
-  // Set the given GPR.
-  virtual void SetGPR(uint32_t reg, uintptr_t value) = 0;
+  // Sets the given GPR. Returns true if we successfully write the given value
+  // into the register, returns false otherwise.
+  virtual bool SetGPR(uint32_t reg, uintptr_t value) = 0;
 
-  // Smash the caller save registers. If we're throwing, we don't want to return bogus values.
+  // Reads the given FPR. Returns true if we successfully read the register and
+  // set its value into 'val', returns false otherwise.
+  virtual bool GetFPR(uint32_t reg, uintptr_t* val) = 0;
+
+  // Sets the given FPR. Returns true if we successfully write the given value
+  // into the register, returns false otherwise.
+  virtual bool SetFPR(uint32_t reg, uintptr_t value) = 0;
+
+  // Smashes the caller save registers. If we're throwing, we don't want to return bogus values.
   virtual void SmashCallerSaves() = 0;
 
-  // Switch execution of the executing context to this context
+  // Switches execution of the executing context to this context
   virtual void DoLongJump() = 0;
 
  protected:
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index ad28891..789dbbb 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -24,14 +24,14 @@
 namespace art {
 namespace mips {
 
-static const uint32_t gZero = 0;
+static constexpr uint32_t gZero = 0;
 
 void MipsContext::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfFRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[RA] = &ra_;
@@ -68,20 +68,35 @@
   }
 }
 
-void MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
+bool MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   CHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool MipsContext::SetFPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+  CHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void MipsContext::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[V0] = const_cast<uint32_t*>(&gZero);
   gprs_[V1] = const_cast<uint32_t*>(&gZero);
-  gprs_[A1] = NULL;
-  gprs_[A2] = NULL;
-  gprs_[A3] = NULL;
+  gprs_[A1] = nullptr;
+  gprs_[A2] = nullptr;
+  gprs_[A3] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
@@ -90,10 +105,10 @@
   uintptr_t gprs[kNumberOfCoreRegisters];
   uint32_t fprs[kNumberOfFRegisters];
   for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : MipsContext::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : MipsContext::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfFRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : MipsContext::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : MipsContext::kBadGprBase + i;
   }
   art_quick_do_long_jump(gprs, fprs);
 }
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index d5f27ae..f2ee335 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -31,31 +31,53 @@
   }
   virtual ~MipsContext() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
-    SetGPR(RA, new_pc);
+  void SetPC(uintptr_t new_pc) OVERRIDE {
+    bool success = SetGPR(RA, new_pc);
+    CHECK(success) << "Failed to set RA register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to registers in the stack, initialized to NULL except for the special cases below.
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 8c98d91..37049cf 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -24,11 +24,11 @@
 namespace art {
 namespace x86 {
 
-static const uintptr_t gZero = 0;
+static constexpr uintptr_t gZero = 0;
 
 void X86Context::Reset() {
   for (size_t  i = 0; i < kNumberOfCpuRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   gprs_[ESP] = &esp_;
   // Initialize registers with easy to spot debug values.
@@ -57,15 +57,19 @@
   // This needs to be 0 because we want a null/zero return value.
   gprs_[EAX] = const_cast<uintptr_t*>(&gZero);
   gprs_[EDX] = const_cast<uintptr_t*>(&gZero);
-  gprs_[ECX] = NULL;
-  gprs_[EBX] = NULL;
+  gprs_[ECX] = nullptr;
+  gprs_[EBX] = nullptr;
 }
 
-void X86Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool X86Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
   CHECK_NE(gprs_[reg], &gZero);
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void X86Context::DoLongJump() {
@@ -74,7 +78,7 @@
   // the top for the stack pointer that doesn't get popped in a pop-all.
   volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86Context::kBadGprBase + i;
+    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86Context::kBadGprBase + i;
   }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - kWordSize;
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index 1c51026..a350b25 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -31,32 +31,49 @@
   }
   virtual ~X86Context() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(ESP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(ESP, new_sp);
+    CHECK(success) << "Failed to set ESP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
+  void SetPC(uintptr_t new_pc) OVERRIDE {
     eip_ = new_pc;
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    LOG(FATAL) << "Floating-point registers are all caller save in X86";
+    return false;
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE {
+    LOG(FATAL) << "Floating-point registers are all caller save in X86";
+    return false;
+  }
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, floating point registers are all caller save. Values are
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c30dca1..a85e250 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -69,8 +69,6 @@
 extern "C" void art_quick_unlock_object(void*);
 
 // Math entrypoints.
-extern "C" double art_quick_fmod(double, double);
-extern "C" float art_quick_fmodf(float, float);
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
@@ -175,9 +173,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = art_quick_fmod;
+  // qpoints->pFmod = NULL;  // Not needed on x86.
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  qpoints->pFmodf = art_quick_fmodf;
+  // qpoints->pFmodf = NULL;  // Not needed on x86.
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 28e4dd6..ecd8ce6 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -734,35 +734,6 @@
 
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
-DEFINE_FUNCTION art_quick_fmod
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass arg4 b.hi
-    PUSH edx                      // pass arg3 b.lo
-    PUSH ecx                      // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    SETUP_GOT_NOSAVE              // clobbers EBX
-    call PLT_SYMBOL(fmod)         // (jdouble a, jdouble b)
-    fstpl (%esp)                  // pop return value off fp stack
-    movsd (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(28), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-28)
-    ret
-END_FUNCTION art_quick_fmod
-
-DEFINE_FUNCTION art_quick_fmodf
-    PUSH eax                      // alignment padding
-    PUSH ecx                      // pass arg2 b
-    PUSH eax                      // pass arg1 a
-    SETUP_GOT_NOSAVE              // clobbers EBX
-    call PLT_SYMBOL(fmodf)        // (jfloat a, jfloat b)
-    fstps (%esp)                  // pop return value off fp stack
-    movss (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(12), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-12)
-    ret
-END_FUNCTION art_quick_fmodf
-
 DEFINE_FUNCTION art_quick_d2l
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass arg2 a.hi
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 810ef94..0ccbd27 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -24,7 +24,7 @@
 namespace art {
 namespace x86_64 {
 
-static const uintptr_t gZero = 0;
+static constexpr uintptr_t gZero = 0;
 
 void X86_64Context::Reset() {
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
@@ -80,11 +80,26 @@
   gprs_[R11] = nullptr;
 }
 
-void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
   CHECK_NE(gprs_[reg], &gZero);
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+  CHECK_NE(fprs_[reg], &gZero);
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void X86_64Context::DoLongJump() {
@@ -93,7 +108,7 @@
   // the top for the stack pointer that doesn't get popped in a pop-all.
   volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86_64Context::kBadGprBase + i;
+    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i;
   }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize;
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 055df61..902c3b9 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -31,32 +31,52 @@
   }
   virtual ~X86_64Context() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(RSP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(RSP, new_sp);
+    CHECK(success) << "Failed to set RSP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
+  void SetPC(uintptr_t new_pc) OVERRIDE {
     rip_ = new_pc;
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations. Values are initialized to NULL or the special registers below.
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2612417..92aabee 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -174,9 +174,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = fmod;
+  // qpoints->pFmod = NULL;  // Not needed on x86.
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  qpoints->pFmodf = fmodf;
+  // qpoints->pFmodf = NULL;  // Not needed on x86.
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 50e9624..f19c353 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2346,100 +2346,125 @@
         return false;
       }
       uint16_t reg = DemangleSlot(slot_, m);
-
+      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
       switch (tag_) {
-      case JDWP::JT_BOOLEAN:
-        {
+        case JDWP::JT_BOOLEAN: {
           CHECK_EQ(width_, 1U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
-          JDWP::Set1(buf_+1, intVal != 0);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
+            JDWP::Set1(buf_+1, intVal != 0);
+          } else {
+            VLOG(jdwp) << "failed to get boolean local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_BYTE:
-        {
+        case JDWP::JT_BYTE: {
           CHECK_EQ(width_, 1U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
-          JDWP::Set1(buf_+1, intVal);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
+            JDWP::Set1(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get byte local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_SHORT:
-      case JDWP::JT_CHAR:
-        {
+        case JDWP::JT_SHORT:
+        case JDWP::JT_CHAR: {
           CHECK_EQ(width_, 2U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
-          JDWP::Set2BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_INT:
-        {
-          CHECK_EQ(width_, 4U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get int local " << reg << " = " << intVal;
-          JDWP::Set4BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_FLOAT:
-        {
-          CHECK_EQ(width_, 4U);
-          uint32_t intVal = GetVReg(m, reg, kFloatVReg);
-          VLOG(jdwp) << "get int/float local " << reg << " = " << intVal;
-          JDWP::Set4BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_ARRAY:
-        {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
-          VLOG(jdwp) << "get array local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-            LOG(FATAL) << "Register " << reg << " expected to hold array: " << o;
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
+            JDWP::Set2BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get short/char local " << reg;
+            error_ = kFailureErrorCode;
           }
-          JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          break;
         }
-        break;
-      case JDWP::JT_CLASS_LOADER:
-      case JDWP::JT_CLASS_OBJECT:
-      case JDWP::JT_OBJECT:
-      case JDWP::JT_STRING:
-      case JDWP::JT_THREAD:
-      case JDWP::JT_THREAD_GROUP:
-        {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
-          VLOG(jdwp) << "get object local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-            LOG(FATAL) << "Register " << reg << " expected to hold object: " << o;
+        case JDWP::JT_INT: {
+          CHECK_EQ(width_, 4U);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get int local " << reg << " = " << intVal;
+            JDWP::Set4BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get int local " << reg;
+            error_ = kFailureErrorCode;
           }
-          tag_ = TagFromObject(soa_, o);
-          JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          break;
         }
-        break;
-      case JDWP::JT_DOUBLE:
-        {
+        case JDWP::JT_FLOAT: {
+          CHECK_EQ(width_, 4U);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kFloatVReg, &intVal)) {
+            VLOG(jdwp) << "get float local " << reg << " = " << intVal;
+            JDWP::Set4BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get float local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
+        }
+        case JDWP::JT_ARRAY:
+        case JDWP::JT_CLASS_LOADER:
+        case JDWP::JT_CLASS_OBJECT:
+        case JDWP::JT_OBJECT:
+        case JDWP::JT_STRING:
+        case JDWP::JT_THREAD:
+        case JDWP::JT_THREAD_GROUP: {
+          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
+          uint32_t intVal;
+          if (GetVReg(m, reg, kReferenceVReg, &intVal)) {
+            mirror::Object* o = reinterpret_cast<mirror::Object*>(intVal);
+            VLOG(jdwp) << "get " << tag_ << " object local " << reg << " = " << o;
+            if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+              LOG(FATAL) << "Register " << reg << " expected to hold " << tag_ << " object: " << o;
+            }
+            tag_ = TagFromObject(soa_, o);
+            JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          } else {
+            VLOG(jdwp) << "failed to get " << tag_ << " object local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
+        }
+        case JDWP::JT_DOUBLE: {
           CHECK_EQ(width_, 8U);
-          uint32_t lo = GetVReg(m, reg, kDoubleLoVReg);
-          uint64_t hi = GetVReg(m, reg + 1, kDoubleHiVReg);
-          uint64_t longVal = (hi << 32) | lo;
-          VLOG(jdwp) << "get double/long local " << hi << ":" << lo << " = " << longVal;
-          JDWP::Set8BE(buf_+1, longVal);
+          uint32_t lo;
+          uint32_t hi;
+          if (GetVReg(m, reg, kDoubleLoVReg, &lo) && GetVReg(m, reg + 1, kDoubleHiVReg, &hi)) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "get double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            JDWP::Set8BE(buf_+1, longVal);
+          } else {
+            VLOG(jdwp) << "failed to get double local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_LONG:
-        {
+        case JDWP::JT_LONG: {
           CHECK_EQ(width_, 8U);
-          uint32_t lo = GetVReg(m, reg, kLongLoVReg);
-          uint64_t hi = GetVReg(m, reg + 1, kLongHiVReg);
-          uint64_t longVal = (hi << 32) | lo;
-          VLOG(jdwp) << "get double/long local " << hi << ":" << lo << " = " << longVal;
-          JDWP::Set8BE(buf_+1, longVal);
+          uint32_t lo;
+          uint32_t hi;
+          if (GetVReg(m, reg, kLongLoVReg, &lo) && GetVReg(m, reg + 1, kLongHiVReg, &hi)) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "get long local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            JDWP::Set8BE(buf_+1, longVal);
+          } else {
+            VLOG(jdwp) << "failed to get long local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      default:
-        LOG(FATAL) << "Unknown tag " << tag_;
-        break;
+        default:
+          LOG(FATAL) << "Unknown tag " << tag_;
+          break;
       }
 
       // Prepend tag, which may have been updated.
@@ -2495,48 +2520,89 @@
         return false;
       }
       uint16_t reg = DemangleSlot(slot_, m);
-
+      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
       switch (tag_) {
         case JDWP::JT_BOOLEAN:
         case JDWP::JT_BYTE:
           CHECK_EQ(width_, 1U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set boolean/byte local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_SHORT:
         case JDWP::JT_CHAR:
           CHECK_EQ(width_, 2U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set short/char local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_INT:
           CHECK_EQ(width_, 4U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set int local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_FLOAT:
           CHECK_EQ(width_, 4U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg)) {
+            VLOG(jdwp) << "failed to set float local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_ARRAY:
+        case JDWP::JT_CLASS_LOADER:
+        case JDWP::JT_CLASS_OBJECT:
         case JDWP::JT_OBJECT:
         case JDWP::JT_STRING:
-        {
+        case JDWP::JT_THREAD:
+        case JDWP::JT_THREAD_GROUP: {
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = gRegistry->Get<mirror::Object*>(static_cast<JDWP::ObjectId>(value_));
           if (o == ObjectRegistry::kInvalidObject) {
-            UNIMPLEMENTED(FATAL) << "return an error code when given an invalid object to store";
+            VLOG(jdwp) << tag_ << " object " << o << " is an invalid object";
+            error_ = JDWP::ERR_INVALID_OBJECT;
+          } else if (!SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+                              kReferenceVReg)) {
+            VLOG(jdwp) << "failed to set " << tag_ << " object local " << reg << " = " << o;
+            error_ = kFailureErrorCode;
           }
-          SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)), kReferenceVReg);
+          break;
         }
-        break;
-        case JDWP::JT_DOUBLE:
+        case JDWP::JT_DOUBLE: {
           CHECK_EQ(width_, 8U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kDoubleLoVReg);
-          SetVReg(m, reg + 1, static_cast<uint32_t>(value_ >> 32), kDoubleHiVReg);
+          const uint32_t lo = static_cast<uint32_t>(value_);
+          const uint32_t hi = static_cast<uint32_t>(value_ >> 32);
+          bool success = SetVReg(m, reg, lo, kDoubleLoVReg);
+          success &= SetVReg(m, reg + 1, hi, kDoubleHiVReg);
+          if (!success) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "failed to set double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            error_ = kFailureErrorCode;
+          }
           break;
-        case JDWP::JT_LONG:
+        }
+        case JDWP::JT_LONG: {
           CHECK_EQ(width_, 8U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kLongLoVReg);
-          SetVReg(m, reg + 1, static_cast<uint32_t>(value_ >> 32), kLongHiVReg);
+          const uint32_t lo = static_cast<uint32_t>(value_);
+          const uint32_t hi = static_cast<uint32_t>(value_ >> 32);
+          bool success = SetVReg(m, reg, lo, kLongLoVReg);
+          success &= SetVReg(m, reg + 1, hi, kLongHiVReg);
+          if (!success) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "failed to set double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            error_ = kFailureErrorCode;
+          }
           break;
+        }
         default:
           LOG(FATAL) << "Unknown tag " << tag_;
           break;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 890036b..c72913a 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -176,7 +176,7 @@
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true, &timings_, clear_soft_references_, &IsMarkedCallback, &MarkObjectCallback,
+      true, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback,
       &ProcessMarkStackCallback, this);
 }
 
@@ -374,6 +374,10 @@
   reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
 }
 
+bool MarkSweep::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg) {
+  return reinterpret_cast<MarkSweep*>(arg)->IsMarked(ref->AsMirrorPtr());
+}
+
 class MarkSweepMarkObjectSlowPath {
  public:
   explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
@@ -1170,11 +1174,11 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void MarkSweep::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref) {
-  DCHECK(klass != nullptr);
   if (kCountJavaLangRefs) {
     ++reference_count_;
   }
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, &HeapReferenceMarkedCallback,
+                                                         this);
 }
 
 class MarkObjectVisitor {
@@ -1270,6 +1274,7 @@
 
 inline bool MarkSweep::IsMarked(const Object* object) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  DCHECK(object != nullptr);
   if (immune_region_.ContainsObject(object)) {
     return true;
   }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index a0a0dd8..a44d8a1 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -178,6 +178,10 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index e5bb1cc..badf8b3 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -164,7 +164,7 @@
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      false, &timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+      false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback,
       &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
@@ -649,6 +649,22 @@
   Runtime::Current()->VisitRoots(MarkRootCallback, this);
 }
 
+bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
+                                            void* arg) {
+  mirror::Object* obj = object->AsMirrorPtr();
+  mirror::Object* new_obj =
+      reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(obj);
+  if (new_obj == nullptr) {
+    return false;
+  }
+  if (new_obj != obj) {
+    // Write barrier is not necessary since it still points to the same object, just at a different
+    // address.
+    object->Assign(new_obj);
+  }
+  return true;
+}
+
 mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
@@ -698,7 +714,7 @@
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
-                                                         MarkedForwardingAddressCallback, this);
+                                                         &HeapReferenceMarkedCallback, this);
 }
 
 class SemiSpaceMarkObjectVisitor {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index a95abe4..bff0847 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -162,6 +162,10 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8e1132d..5cde451 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1413,7 +1413,7 @@
   ChangeCollector(collector_type);
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
-  reference_processor_.EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences(self);
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
@@ -1816,7 +1816,7 @@
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
   // Enqueue cleared references.
-  reference_processor_.EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences(self);
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(collector);
   const size_t duration = collector->GetDurationNs();
@@ -1842,7 +1842,7 @@
                      << ((i != pause_times.size() - 1) ? "," : "");
     }
     LOG(INFO) << gc_cause << " " << collector->GetName()
-              << " GC freed "  <<  collector->GetFreedObjects() << "("
+              << " GC freed "  << collector->GetFreedObjects() << "("
               << PrettySize(collector->GetFreedBytes()) << ") AllocSpace objects, "
               << collector->GetFreedLargeObjects() << "("
               << PrettySize(collector->GetFreedLargeObjectBytes()) << ") LOS objects, "
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 7988af7..3ff9889 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -44,36 +44,35 @@
 
 mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) {
   mirror::Object* const referent = reference->GetReferent();
-  if (LIKELY(!slow_path_enabled_)) {
+  // If the referent is null then it is already cleared, we can just return null since there is no
+  // scenario where it becomes non-null during the reference processing phase.
+  if (LIKELY(!slow_path_enabled_) || referent == nullptr) {
     return referent;
   }
-  // Another fast path, the referent is cleared, we can just return null since there is no scenario
-  // where it becomes non-null.
-  if (referent == nullptr) {
-    return nullptr;
-  }
   MutexLock mu(self, lock_);
   while (slow_path_enabled_) {
-    mirror::Object* const referent = reference->GetReferent();
-    // If the referent became cleared, return it.
-    if (referent == nullptr) {
+    mirror::HeapReference<mirror::Object>* const referent_addr =
+        reference->GetReferentReferenceAddr();
+    // If the referent became cleared, return it. Don't need barrier since thread roots can't get
+    // updated until after we leave the function due to holding the mutator lock.
+    if (referent_addr->AsMirrorPtr() == nullptr) {
       return nullptr;
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
-    // it to the mutator as long as the GC is not preserving references. If the GC is
-    IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
+    // it to the mutator as long as the GC is not preserving references.
+    IsHeapReferenceMarkedCallback* const is_marked_callback =
+        process_references_args_.is_marked_callback_;
     if (LIKELY(is_marked_callback != nullptr)) {
-      mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
       // If it's null it means not marked, but it could become marked if the referent is reachable
       // by finalizer referents. So we can not return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
       // in the heap causing corruption since this field would get swept.
-      if (obj != nullptr) {
+      if (is_marked_callback(referent_addr, process_references_args_.arg_)) {
         if (!preserving_references_ ||
            (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
-          return obj;
+          return referent_addr->AsMirrorPtr();
         }
       }
     }
@@ -82,10 +81,14 @@
   return reference->GetReferent();
 }
 
-mirror::Object* ReferenceProcessor::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+bool ReferenceProcessor::PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj,
+                                                       void* arg) {
   auto* const args = reinterpret_cast<ProcessReferencesArgs*>(arg);
-  // TODO: Not preserve all soft references.
-  return args->mark_callback_(obj, args->arg_);
+  // TODO: Add smarter logic for preserving soft references.
+  mirror::Object* new_obj = args->mark_callback_(obj->AsMirrorPtr(), args->arg_);
+  DCHECK(new_obj != nullptr);
+  obj->Assign(new_obj);
+  return true;
 }
 
 void ReferenceProcessor::StartPreservingReferences(Thread* self) {
@@ -103,7 +106,7 @@
 // Process reference class instances and schedule finalizations.
 void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timings,
                                            bool clear_soft_references,
-                                           IsMarkedCallback* is_marked_callback,
+                                           IsHeapReferenceMarkedCallback* is_marked_callback,
                                            MarkObjectCallback* mark_object_callback,
                                            ProcessMarkStackCallback* process_mark_stack_callback,
                                            void* arg) {
@@ -132,8 +135,8 @@
     }
   }
   // Clear all remaining soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   {
     TimingLogger::ScopedSplit split(concurrent ? "EnqueueFinalizerReferences" :
         "(Paused)EnqueueFinalizerReferences", timings);
@@ -141,7 +144,7 @@
       StartPreservingReferences(self);
     }
     // Preserve all white objects with finalize methods and schedule them for finalization.
-    finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+    finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, is_marked_callback,
                                                           mark_object_callback, arg);
     process_mark_stack_callback(arg);
     if (concurrent) {
@@ -149,10 +152,10 @@
     }
   }
   // Clear all finalizer referent reachable soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   // Clear all phantom references with white referents.
-  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   // At this point all reference queues other than the cleared references should be empty.
   DCHECK(soft_reference_queue_.IsEmpty());
   DCHECK(weak_reference_queue_.IsEmpty());
@@ -176,39 +179,33 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void ReferenceProcessor::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                                                IsMarkedCallback is_marked_callback, void* arg) {
+                                                IsHeapReferenceMarkedCallback* is_marked_callback,
+                                                void* arg) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
+  DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
-  mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-  if (referent != nullptr) {
-    mirror::Object* forward_address = is_marked_callback(referent, arg);
-    // Null means that the object is not currently marked.
-    if (forward_address == nullptr) {
-      Thread* self = Thread::Current();
-      // TODO: Remove these locks, and use atomic stacks for storing references?
-      // We need to check that the references haven't already been enqueued since we can end up
-      // scanning the same reference multiple times due to dirty cards.
-      if (klass->IsSoftReferenceClass()) {
-        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsWeakReferenceClass()) {
-        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsFinalizerReferenceClass()) {
-        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsPhantomReferenceClass()) {
-        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else {
-        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
-                   << klass->GetAccessFlags();
-      }
-    } else if (referent != forward_address) {
-      // Referent is already marked and we need to update it.
-      ref->SetReferent<false>(forward_address);
+  mirror::HeapReference<mirror::Object>* referent = ref->GetReferentReferenceAddr();
+  if (referent->AsMirrorPtr() != nullptr && !is_marked_callback(referent, arg)) {
+    Thread* self = Thread::Current();
+    // TODO: Remove these locks, and use atomic stacks for storing references?
+    // We need to check that the references haven't already been enqueued since we can end up
+    // scanning the same reference multiple times due to dirty cards.
+    if (klass->IsSoftReferenceClass()) {
+      soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsWeakReferenceClass()) {
+      weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsFinalizerReferenceClass()) {
+      finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsPhantomReferenceClass()) {
+      phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else {
+      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                 << klass->GetAccessFlags();
     }
   }
 }
 
-void ReferenceProcessor::EnqueueClearedReferences() {
-  Thread* self = Thread::Current();
+void ReferenceProcessor::EnqueueClearedReferences(Thread* self) {
   Locks::mutator_lock_->AssertNotHeld(self);
   if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index f082a9e..ff7da52 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -40,9 +40,10 @@
 class ReferenceProcessor {
  public:
   explicit ReferenceProcessor();
-  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  static bool PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ProcessReferences(bool concurrent, TimingLogger* timings, bool clear_soft_references,
-                         IsMarkedCallback* is_marked_callback,
+                         IsHeapReferenceMarkedCallback* is_marked_callback,
                          MarkObjectCallback* mark_object_callback,
                          ProcessMarkStackCallback* process_mark_stack_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -54,21 +55,21 @@
   // Decode the referent, may block if references are being processed.
   mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
-  void EnqueueClearedReferences() LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void EnqueueClearedReferences(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_);
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                              IsMarkedCallback is_marked_callback, void* arg)
+                              IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   class ProcessReferencesArgs {
    public:
-    ProcessReferencesArgs(IsMarkedCallback* is_marked_callback,
+    ProcessReferencesArgs(IsHeapReferenceMarkedCallback* is_marked_callback,
                           MarkObjectCallback* mark_callback, void* arg)
         : is_marked_callback_(is_marked_callback), mark_callback_(mark_callback), arg_(arg) {
     }
 
     // The is marked callback is null when the args aren't set up.
-    IsMarkedCallback* is_marked_callback_;
+    IsHeapReferenceMarkedCallback* is_marked_callback_;
     MarkObjectCallback* mark_callback_;
     void* arg_;
   };
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 3910c29..19476e6 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -26,8 +26,7 @@
 namespace gc {
 
 ReferenceQueue::ReferenceQueue()
-    : lock_("reference queue lock"),
-      list_(nullptr) {
+    : lock_("reference queue lock"), list_(nullptr) {
 }
 
 void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
@@ -104,76 +103,61 @@
   }
 }
 
-void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references,
-                                          IsMarkedCallback* preserve_callback,
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references,
+                                          IsHeapReferenceMarkedCallback* preserve_callback,
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address == nullptr) {
-        // Referent is white, clear it.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          ref->ClearReferent<true>();
-        } else {
-          ref->ClearReferent<false>();
-        }
-        if (ref->IsEnqueuable()) {
-          cleared_references.EnqueuePendingReference(ref);
-        }
-      } else if (referent != forward_address) {
-        // Object moved, need to updated the referent.
-        ref->SetReferent<false>(forward_address);
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr && !preserve_callback(referent_addr, arg)) {
+      // Referent is white, clear it.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        ref->ClearReferent<true>();
+      } else {
+        ref->ClearReferent<false>();
+      }
+      if (ref->IsEnqueuable()) {
+        cleared_references->EnqueuePendingReference(ref);
       }
     }
   }
 }
 
-void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                                IsMarkedCallback* is_marked_callback,
+void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+                                                IsHeapReferenceMarkedCallback* is_marked_callback,
                                                 MarkObjectCallback* mark_object_callback,
                                                 void* arg) {
   while (!IsEmpty()) {
     mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = is_marked_callback(referent, arg);
-      // If the referent isn't marked, mark it and update the
-      if (forward_address == nullptr) {
-        forward_address = mark_object_callback(referent, arg);
-        // If the referent is non-null the reference must queuable.
-        DCHECK(ref->IsEnqueuable());
-        // Move the updated referent to the zombie field.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          ref->SetZombie<true>(forward_address);
-          ref->ClearReferent<true>();
-        } else {
-          ref->SetZombie<false>(forward_address);
-          ref->ClearReferent<false>();
-        }
-        cleared_references.EnqueueReference(ref);
-      } else if (referent != forward_address) {
-        ref->SetReferent<false>(forward_address);
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr && !is_marked_callback(referent_addr, arg)) {
+      mirror::Object* forward_address = mark_object_callback(referent_addr->AsMirrorPtr(), arg);
+      // If the referent is non-null the reference must queuable.
+      DCHECK(ref->IsEnqueuable());
+      // Move the updated referent to the zombie field.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        ref->SetZombie<true>(forward_address);
+        ref->ClearReferent<true>();
+      } else {
+        ref->SetZombie<false>(forward_address);
+        ref->ClearReferent<false>();
       }
+      cleared_references->EnqueueReference(ref);
     }
   }
 }
 
-void ReferenceQueue::ForwardSoftReferences(IsMarkedCallback* preserve_callback,
-                                                void* arg) {
+void ReferenceQueue::ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback,
+                                           void* arg) {
   if (UNLIKELY(IsEmpty())) {
     return;
   }
   mirror::Reference* const head = list_;
   mirror::Reference* ref = head;
   do {
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address != nullptr && forward_address != referent) {
-        ref->SetReferent<false>(forward_address);
-      }
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr) {
+      UNUSED(preserve_callback(referent_addr, arg));
     }
     ref = ref->GetPendingNext();
   } while (LIKELY(ref != head));
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 1d8cc1a..8ef0d20 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -58,23 +58,22 @@
   mirror::Reference* DequeuePendingReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
   // zombie field, and the referent field is cleared.
-  void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                  IsMarkedCallback* is_marked_callback,
+  void EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+                                  IsHeapReferenceMarkedCallback* is_marked_callback,
                                   MarkObjectCallback* mark_object_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void ForwardSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
+  void ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
-  void ClearWhiteReferences(ReferenceQueue& cleared_references,
-                            IsMarkedCallback* is_marked_callback,
-                            void* arg)
+  void ClearWhiteReferences(ReferenceQueue* cleared_references,
+                            IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) const
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsEmpty() const {
     return list_ == nullptr;
   }
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 0b6e759..9c9d87b 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -21,6 +21,13 @@
 
 namespace art {
 
+namespace gc {
+
+class ReferenceProcessor;
+class ReferenceQueue;
+
+}  // namespace gc
+
 struct ReferenceOffsets;
 struct FinalizerReferenceOffsets;
 
@@ -41,7 +48,6 @@
   static MemberOffset ReferentOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Reference, referent_);
   }
-
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Object* GetReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldObjectVolatile<Object, kDefaultVerifyFlags, kReadBarrierOption>(
@@ -55,7 +61,6 @@
   void ClearReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
-
   // Volatile read/write is not necessary since the java pending next is only accessed from
   // the java threads for cleared references. Once these cleared references have a null referent,
   // we never end up reading their pending next from the GC again.
@@ -76,6 +81,11 @@
   bool IsEnqueuable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  // Note: This avoids a read barrier, it should only be used by the GC.
+  HeapReference<Object>* GetReferentReferenceAddr() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObjectReferenceAddr<kDefaultVerifyFlags>(ReferentOffset());
+  }
+
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Reference> pending_next_;  // Note this is Java volatile:
   HeapReference<Object> queue_;  // Note this is Java volatile:
@@ -83,6 +93,8 @@
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
   friend struct art::ReferenceOffsets;  // for verifying offset information
+  friend class gc::ReferenceProcessor;
+  friend class gc::ReferenceQueue;
   DISALLOW_IMPLICIT_CONSTRUCTORS(Reference);
 };
 
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index dd8ce16..d8c1c40 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -70,6 +70,11 @@
 // address the object (if the object didn't move, returns the object input parameter).
 typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
     __attribute__((warn_unused_result));
+
+// Returns true if the object in the heap reference is marked, if it is marked and has moved the
+// callback updates the heap reference contain the new value.
+typedef bool (IsHeapReferenceMarkedCallback)(mirror::HeapReference<mirror::Object>* object,
+    void* arg) __attribute__((warn_unused_result));
 typedef void (ProcessMarkStackCallback)(void* arg);
 
 }  // namespace art
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 7e922c5..132ac3e 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -142,7 +142,8 @@
   return GetMethod()->NativePcOffset(cur_quick_frame_pc_);
 }
 
-uint32_t StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const {
+bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
+                           uint32_t* val) const {
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
@@ -155,19 +156,30 @@
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-      return GetGPR(vmap_table.ComputeRegister(spill_mask, vmap_offset, kind));
+      uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      uintptr_t ptr_val;
+      bool success = false;
+      if (is_float) {
+        success = GetFPR(reg, &ptr_val);
+      } else {
+        success = GetGPR(reg, &ptr_val);
+      }
+      *val = ptr_val;
+      return success;
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
-      return *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
+      *val = *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                           frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      return true;
     }
   } else {
-    return cur_shadow_frame_->GetVReg(vreg);
+    *val = cur_shadow_frame_->GetVReg(vreg);
+    return true;
   }
 }
 
-void StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
+bool StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
                            VRegKind kind) {
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably write registers without a context.
@@ -181,8 +193,12 @@
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-      const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kReferenceVReg);
-      SetGPR(reg, new_value);
+      const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      if (is_float) {
+        return SetFPR(reg, new_value);
+      } else {
+        return SetGPR(reg, new_value);
+      }
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
@@ -190,9 +206,11 @@
                                  frame_info.FrameSizeInBytes(), vreg, kRuntimeISA);
       byte* vreg_addr = reinterpret_cast<byte*>(GetCurrentQuickFrame()) + offset;
       *reinterpret_cast<uint32_t*>(vreg_addr) = new_value;
+      return true;
     }
   } else {
-    return cur_shadow_frame_->SetVReg(vreg, new_value);
+    cur_shadow_frame_->SetVReg(vreg, new_value);
+    return true;
   }
 }
 
@@ -201,14 +219,24 @@
   return context_->GetGPRAddress(reg);
 }
 
-uintptr_t StackVisitor::GetGPR(uint32_t reg) const {
+bool StackVisitor::GetGPR(uint32_t reg, uintptr_t* val) const {
   DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->GetGPR(reg);
+  return context_->GetGPR(reg, val);
 }
 
-void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
+bool StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  context_->SetGPR(reg, value);
+  return context_->SetGPR(reg, value);
+}
+
+bool StackVisitor::GetFPR(uint32_t reg, uintptr_t* val) const {
+  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  return context_->GetFPR(reg, val);
+}
+
+bool StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  return context_->SetFPR(reg, value);
 }
 
 uintptr_t StackVisitor::GetReturnPc() const {
diff --git a/runtime/stack.h b/runtime/stack.h
index 1991115..9402cdd 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -561,15 +561,21 @@
   bool GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const
+  bool GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
+  uint32_t GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t val;
+    bool success = GetVReg(m, vreg, kind, &val);
+    CHECK(success) << "Failed to read vreg " << vreg << " of kind " << kind;
+    return val;
+  }
+
+  bool SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uintptr_t* GetGPRAddress(uint32_t reg) const;
-  uintptr_t GetGPR(uint32_t reg) const;
-  void SetGPR(uint32_t reg, uintptr_t value);
 
   // This is a fast-path for getting/setting values in a quick frame.
   uint32_t* GetVRegAddr(StackReference<mirror::ArtMethod>* cur_quick_frame,
@@ -700,6 +706,11 @@
   StackVisitor(Thread* thread, Context* context, size_t num_frames)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool GetGPR(uint32_t reg, uintptr_t* val) const;
+  bool SetGPR(uint32_t reg, uintptr_t value);
+  bool GetFPR(uint32_t reg, uintptr_t* val) const;
+  bool SetFPR(uint32_t reg, uintptr_t value);
+
   instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(uint32_t depth) const;
 
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2f4e805..c9c3bba 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3006,6 +3006,164 @@
   return res_method;
 }
 
+template <class T>
+mirror::ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator(T* it, const Instruction* inst,
+                                                                    MethodType method_type,
+                                                                    bool is_range,
+                                                                    mirror::ArtMethod* res_method) {
+  // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
+  // match the call to the signature. Also, we might be calling through an abstract method
+  // definition (which doesn't have register count values).
+  const size_t expected_args = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
+  /* caught by static verifier */
+  DCHECK(is_range || expected_args <= 5);
+  if (expected_args > code_item_->outs_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
+        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
+    return nullptr;
+  }
+
+  uint32_t arg[5];
+  if (!is_range) {
+    inst->GetVarArgs(arg);
+  }
+  uint32_t sig_registers = 0;
+
+  /*
+   * Check the "this" argument, which must be an instance of the class that declared the method.
+   * For an interface class, we don't do the full interface merge (see JoinClass), so we can't do a
+   * rigorous check here (which is okay since we have to do it at runtime).
+   */
+  if (method_type != METHOD_STATIC) {
+    const RegType& actual_arg_type = work_line_->GetInvocationThis(inst, is_range);
+    if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
+      CHECK(have_pending_hard_failure_);
+      return nullptr;
+    }
+    if (actual_arg_type.IsUninitializedReference()) {
+      if (res_method) {
+        if (!res_method->IsConstructor()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
+          return nullptr;
+        }
+      } else {
+        // Check whether the name of the called method is "<init>"
+        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        if (strcmp(dex_file_->GetMethodName(dex_file_->GetMethodId(method_idx)), "init") != 0) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
+          return nullptr;
+        }
+      }
+    }
+    if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
+      const RegType* res_method_class;
+      if (res_method != nullptr) {
+        mirror::Class* klass = res_method->GetDeclaringClass();
+        res_method_class = &reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
+                                                 klass->CannotBeAssignedFromOtherTypes());
+      } else {
+        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        const uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
+        res_method_class = &reg_types_.FromDescriptor(class_loader_->Get(),
+                                                      dex_file_->StringByTypeIdx(class_idx),
+                                                      false);
+      }
+      if (!res_method_class->IsAssignableFrom(actual_arg_type)) {
+        Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
+            VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
+                << "' not instance of '" << *res_method_class << "'";
+        // Continue on soft failures. We need to find possible hard failures to avoid problems in
+        // the compiler.
+        if (have_pending_hard_failure_) {
+          return nullptr;
+        }
+      }
+    }
+    sig_registers = 1;
+  }
+
+  for ( ; it->HasNext(); it->Next()) {
+    if (sig_registers >= expected_args) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
+          " arguments, found " << sig_registers << " or more.";
+      return nullptr;
+    }
+
+    const char* param_descriptor = it->GetDescriptor();
+
+    if (param_descriptor == nullptr) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation because of missing signature "
+          "component";
+      return nullptr;
+    }
+
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), param_descriptor,
+                                                        false);
+    uint32_t get_reg = is_range ? inst->VRegC_3rc() + static_cast<uint32_t>(sig_registers) :
+        arg[sig_registers];
+    if (reg_type.IsIntegralTypes()) {
+      const RegType& src_type = work_line_->GetRegisterType(get_reg);
+      if (!src_type.IsIntegralTypes()) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "register v" << get_reg << " has type " << src_type
+            << " but expected " << reg_type;
+        return res_method;
+      }
+    } else if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
+      // Continue on soft failures. We need to find possible hard failures to avoid problems in the
+      // compiler.
+      if (have_pending_hard_failure_) {
+        return res_method;
+      }
+    }
+    sig_registers += reg_type.IsLongOrDoubleTypes() ?  2 : 1;
+  }
+  if (expected_args != sig_registers) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << expected_args <<
+        " arguments, found " << sig_registers;
+    return nullptr;
+  }
+  return res_method;
+}
+
+void MethodVerifier::VerifyInvocationArgsUnresolvedMethod(const Instruction* inst,
+                                                          MethodType method_type,
+                                                          bool is_range) {
+  // As the method may not have been resolved, make this static check against what we expect.
+  // The main reason for this code block is to fail hard when we find an illegal use, e.g.,
+  // wrong number of arguments or wrong primitive types, even if the method could not be resolved.
+  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  DexFileParameterIterator it(*dex_file_,
+                              dex_file_->GetProtoId(dex_file_->GetMethodId(method_idx).proto_idx_));
+  VerifyInvocationArgsFromIterator<DexFileParameterIterator>(&it, inst, method_type, is_range,
+                                                             nullptr);
+}
+
+class MethodParamListDescriptorIterator {
+ public:
+  explicit MethodParamListDescriptorIterator(mirror::ArtMethod* res_method) :
+      res_method_(res_method), pos_(0), params_(res_method->GetParameterTypeList()),
+      params_size_(params_ == nullptr ? 0 : params_->Size()) {
+  }
+
+  bool HasNext() {
+    return pos_ < params_size_;
+  }
+
+  void Next() {
+    ++pos_;
+  }
+
+  const char* GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return res_method_->GetTypeDescriptorFromTypeIdx(params_->GetTypeItem(pos_).type_idx_);
+  }
+
+ private:
+  mirror::ArtMethod* res_method_;
+  size_t pos_;
+  const DexFile::TypeList* params_;
+  const size_t params_size_;
+};
+
 mirror::ArtMethod* MethodVerifier::VerifyInvocationArgs(const Instruction* inst,
                                                              MethodType method_type,
                                                              bool is_range,
@@ -3014,28 +3172,13 @@
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  // As the method may not have been resolved, make this static check against what we expect.
-  const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-  uint32_t shorty_idx = dex_file_->GetProtoId(method_id.proto_idx_).shorty_idx_;
-  uint32_t shorty_len;
-  const char* descriptor = dex_file_->StringDataAndUtf16LengthByIdx(shorty_idx, &shorty_len);
-  int32_t sig_registers = method_type == METHOD_STATIC ? 0 : 1;
-  for (size_t i = 1; i < shorty_len; i++) {
-    if (descriptor[i] == 'J' || descriptor[i] == 'D') {
-      sig_registers += 2;
-    } else {
-      sig_registers++;
-    }
-  }
-  if (inst->VRegA() != sig_registers) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
-        " arguments, found " << sig_registers;
-    return nullptr;
-  }
-
   mirror::ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == NULL) {  // error or class is unresolved
-    return NULL;
+    // Check what we can statically.
+    if (!have_pending_hard_failure_) {
+      VerifyInvocationArgsUnresolvedMethod(inst, method_type, is_range);
+    }
+    return nullptr;
   }
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
@@ -3047,7 +3190,7 @@
       Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
                                    << PrettyMethod(dex_method_idx_, *dex_file_)
                                    << " to super " << PrettyMethod(res_method);
-      return NULL;
+      return nullptr;
     }
     mirror::Class* super_klass = super.GetClass();
     if (res_method->GetMethodIndex() >= super_klass->GetVTable()->GetLength()) {
@@ -3056,95 +3199,14 @@
                                    << " to super " << super
                                    << "." << res_method->GetName()
                                    << res_method->GetSignature();
-      return NULL;
+      return nullptr;
     }
   }
-  // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
-  // match the call to the signature. Also, we might be calling through an abstract method
-  // definition (which doesn't have register count values).
-  const size_t expected_args = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
-  /* caught by static verifier */
-  DCHECK(is_range || expected_args <= 5);
-  if (expected_args > code_item_->outs_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
-        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
-    return NULL;
-  }
 
-  /*
-   * Check the "this" argument, which must be an instance of the class that declared the method.
-   * For an interface class, we don't do the full interface merge (see JoinClass), so we can't do a
-   * rigorous check here (which is okay since we have to do it at runtime).
-   */
-  size_t actual_args = 0;
-  if (!res_method->IsStatic()) {
-    const RegType& actual_arg_type = work_line_->GetInvocationThis(inst, is_range);
-    if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
-      return NULL;
-    }
-    if (actual_arg_type.IsUninitializedReference() && !res_method->IsConstructor()) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
-      return NULL;
-    }
-    if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
-      mirror::Class* klass = res_method->GetDeclaringClass();
-      const RegType& res_method_class =
-          reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
-                               klass->CannotBeAssignedFromOtherTypes());
-      if (!res_method_class.IsAssignableFrom(actual_arg_type)) {
-        Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
-            VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
-            << "' not instance of '" << res_method_class << "'";
-        return NULL;
-      }
-    }
-    actual_args++;
-  }
-  /*
-   * Process the target method's signature. This signature may or may not
-   * have been verified, so we can't assume it's properly formed.
-   */
-  const DexFile::TypeList* params = res_method->GetParameterTypeList();
-  size_t params_size = params == NULL ? 0 : params->Size();
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetVarArgs(arg);
-  }
-  for (size_t param_index = 0; param_index < params_size; param_index++) {
-    if (actual_args >= expected_args) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invalid call to '" << PrettyMethod(res_method)
-          << "'. Expected " << expected_args << " arguments, processing argument " << actual_args
-          << " (where longs/doubles count twice).";
-      return NULL;
-    }
-    const char* descriptor =
-        res_method->GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
-    if (descriptor == NULL) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-          << " missing signature component";
-      return NULL;
-    }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
-    uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
-    if (reg_type.IsIntegralTypes()) {
-      const RegType& src_type = work_line_->GetRegisterType(get_reg);
-      if (!src_type.IsIntegralTypes()) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "register v" << get_reg << " has type " << src_type
-                                          << " but expected " << reg_type;
-        return res_method;
-      }
-    } else if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
-      return res_method;
-    }
-    actual_args = reg_type.IsLongOrDoubleTypes() ? actual_args + 2 : actual_args + 1;
-  }
-  if (actual_args != expected_args) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-        << " expected " << expected_args << " arguments, found " << actual_args;
-    return NULL;
-  } else {
-    return res_method;
-  }
+  // Process the target method's signature. This signature may or may not
+  MethodParamListDescriptorIterator it(res_method);
+  return VerifyInvocationArgsFromIterator<MethodParamListDescriptorIterator>(&it, inst, method_type,
+                                                                             is_range, res_method);
 }
 
 mirror::ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 451c9e2..b6d5b35 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -565,6 +565,18 @@
                                           bool is_range, bool is_super)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Similar checks to the above, but on the proto. Will be used when the method cannot be
+  // resolved.
+  void VerifyInvocationArgsUnresolvedMethod(const Instruction* inst, MethodType method_type,
+                                            bool is_range)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <class T>
+  mirror::ArtMethod* VerifyInvocationArgsFromIterator(T* it, const Instruction* inst,
+                                                      MethodType method_type, bool is_range,
+                                                      mirror::ArtMethod* res_method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::ArtMethod* GetQuickInvokedMethod(const Instruction* inst,
                                            RegisterLine* reg_line,
                                            bool is_range)
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index 5d6d16a..f672974 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -30,6 +30,9 @@
             exit 1
         fi
         LIB="$1"
+        if [ `uname` = "Darwin" ]; then
+            LIB=${LIB/%so/dylib}
+        fi
         shift
     elif [ "x$1" = "x--boot" ]; then
         shift
@@ -110,10 +113,16 @@
 fi
 
 if [ "$GDB" = "y" ]; then
-    gdb=gdb
-    gdbargs="--args $exe"
-    # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
-    # gdbargs="--annotate=3 $gdbargs"
+    if [ `uname` = "Darwin" ]; then
+        gdb=lldb
+        gdbargs="-- $exe"
+        exe=
+    else
+        gdb=gdb
+        gdbargs="--args $exe"
+        # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
+        # gdbargs="--annotate=3 $gdbargs"
+    fi
 fi
 
 if [ "$INTERPRETER" = "y" ]; then