Merge "x86_64: Enable core.oat/boot.oat compilation"
diff --git a/Android.mk b/Android.mk
index f7f65ac..593ee04 100644
--- a/Android.mk
+++ b/Android.mk
@@ -189,6 +189,13 @@
 test-art-host-oat: test-art-host-oat-default test-art-host-oat-interpreter
 	@echo test-art-host-oat PASSED
 
+FAILING_OPTIMIZING_MESSAGE := failed with the optimizing compiler. If the test passes \
+  with Quick and interpreter, it is probably just a bug in the optimizing compiler. Please \
+  add the test name to the FAILING_OPTIMIZING_TESTS Makefile variable in art/Android.mk, \
+  and file a bug.
+
+# Placeholder for failing tests on the optimizing compiler.
+
 define declare-test-art-host-run-test
 .PHONY: test-art-host-run-test-default-$(1)
 test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
@@ -197,6 +204,14 @@
 
 TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS += test-art-host-run-test-default-$(1)
 
+.PHONY: test-art-host-run-test-optimizing-$(1)
+test-art-host-run-test-optimizing-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test -Xcompiler-option --compiler-backend=Optimizing $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host $(1) \
+	|| (echo -e "\x1b[31;01mTest $(1) $(FAILING_OPTIMIZING_MESSAGE)\x1b[0m" && exit 1)
+	@echo test-art-host-run-test-optimizing-$(1) PASSED
+
+TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS += test-art-host-run-test-optimizing-$(1)
+
 .PHONY: test-art-host-run-test-interpreter-$(1)
 test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
 	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host --interpreter $(1)
@@ -205,7 +220,7 @@
 TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS += test-art-host-run-test-interpreter-$(1)
 
 .PHONY: test-art-host-run-test-$(1)
-test-art-host-run-test-$(1): test-art-host-run-test-default-$(1) test-art-host-run-test-interpreter-$(1)
+test-art-host-run-test-$(1): test-art-host-run-test-default-$(1) test-art-host-run-test-interpreter-$(1) test-art-host-run-test-optimizing-$(1)
 
 endef
 
@@ -215,12 +230,21 @@
 test-art-host-run-test-default: $(TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS)
 	@echo test-art-host-run-test-default PASSED
 
+FAILING_OPTIMIZING_TESTS :=
+$(foreach test, $(FAILING_OPTIMIZING_TESTS), \
+	$(eval TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS := $(filter-out test-art-host-run-test-optimizing-$(test), $(TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS))))
+
+.PHONY: test-art-host-run-test-optimizing
+test-art-host-run-test-optimizing: $(TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS)
+	$(foreach test, $(FAILING_OPTIMIZING_TESTS), $(info Optimizing compiler has skipped $(test)))
+	@echo test-art-host-run-test-optimizing PASSED
+
 .PHONY: test-art-host-run-test-interpreter
 test-art-host-run-test-interpreter: $(TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS)
 	@echo test-art-host-run-test-interpreter PASSED
 
 .PHONY: test-art-host-run-test
-test-art-host-run-test: test-art-host-run-test-default test-art-host-run-test-interpreter
+test-art-host-run-test: test-art-host-run-test-default test-art-host-run-test-interpreter test-art-host-run-test-optimizing
 	@echo test-art-host-run-test PASSED
 
 ########################################################################
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5466abd..590c767 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -190,7 +190,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
+      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -261,7 +261,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
+      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -356,13 +356,13 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                            (static_cast<size_t>(frame_size_) <
-                            Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes -
+      Thread::kStackOverflowSignalReservedBytes;
+  bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         /* Load stack limit */
         LockTemp(rs_r12);
@@ -381,7 +381,7 @@
       // This is done before the callee save instructions to avoid any possibility
       // of these overflowing.  This uses r12 and that's never saved in a callee
       // save.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes);
       Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
     }
@@ -401,7 +401,7 @@
   const int spill_size = spill_count * 4;
   const int frame_size_without_spills = frame_size_ - spill_size;
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index c1ce03d..3f32c51 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -219,7 +219,8 @@
   kA64First = 0,
   kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
   kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Add4rrro,      // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Add4rrro,      // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Add4RRre,      // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
   kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
@@ -328,7 +329,8 @@
   kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
   kA64Stlxr3wrX,     // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0].
   kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Sub4RRre,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
   kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
   kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index c5bd005..2a8da24 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -115,6 +115,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
                  "add", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4RRre), SF_VARIANTS(0x0b200000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
+                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "add", "!0r, !1r, !2r!3e", kFixupNone),
     // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
     //   fixups and contains information to identify the adr label.
     ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
@@ -558,6 +562,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
                  "sub", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4RRre), SF_VARIANTS(0x4b200000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
+                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "sub", "!0r, !1r, !2r!3e", kFixupNone),
     ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
                  kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index f1748ef..1df576b 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -95,8 +95,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"?
-  OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT);
+  OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
   NewLIR1(kA64Br1x, r_base.GetReg());
 
   // Loop exit label.
@@ -141,7 +140,6 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
   LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
@@ -150,8 +148,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"?
-  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT);
+  OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
   NewLIR1(kA64Br1x, branch_reg.GetReg());
 
   // branch_over target here
@@ -213,7 +210,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -261,7 +258,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -337,19 +334,19 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                              (static_cast<size_t>(frame_size_) <
-                              Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
-  const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
+        Thread::kStackOverflowSignalReservedBytes;
+  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
   const int frame_size_without_spills = frame_size_ - spill_size;
 
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         // Load stack limit
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
@@ -382,7 +379,7 @@
   }
 
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
@@ -412,7 +409,7 @@
         // Branch to throw target if there is not enough room.
         OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
         OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
       } else {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index b1b83f0..f1270ec 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -87,7 +87,9 @@
                           OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                               RegStorage r_src, OpSize size) OVERRIDE;
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
+    LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
+                           int offset, int check_value, LIR* target) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg);
@@ -239,6 +241,8 @@
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
+    LIR* OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
+                           A64RegExtEncodings ext, uint8_t amount);
     LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 265e8d2..9814cb4 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -45,7 +45,6 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(kFPReg);
@@ -89,7 +88,6 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       {
         ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod);
         RegStorage r_tgt = CallHelperSetup(helper_offset);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2c6b11d..2ac4adb 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -160,6 +160,19 @@
   return branch;
 }
 
+LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
+                                     RegStorage base_reg, int offset, int check_value,
+                                     LIR* target) {
+  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
+  // Always compare 32-bit value no matter what temp_reg is.
+  if (temp_reg.Is64Bit()) {
+    temp_reg = As32BitReg(temp_reg);
+  }
+  Load32Disp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
 LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
   bool dest_is_fp = r_dest.IsFloat();
   bool src_is_fp = r_src.IsFloat();
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index fba368a..06e1cda 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -1163,7 +1163,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 2254b8b..672aa88 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -99,7 +99,8 @@
 
   LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
   if (data_target == NULL) {
-    data_target = AddWordData(&literal_list_, value);
+    // Wide, as we need 8B alignment.
+    data_target = AddWideData(&literal_list_, value, 0);
   }
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
@@ -643,6 +644,44 @@
   }
 }
 
+LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
+                                     RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpAdd:
+      opcode = kA64Add4RRre;
+      break;
+    case kOpSub:
+      opcode = kA64Sub4RRre;
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented opcode: " << op;
+      break;
+  }
+  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+
+  if (r_dest.Is64Bit()) {
+    CHECK(r_src1.Is64Bit());
+
+    // dest determines whether the op is wide or not. Up-convert src2 when necessary.
+    // Note: this is not according to aarch64 specifications, but our encoding.
+    if (!r_src2.Is64Bit()) {
+      r_src2 = As64BitReg(r_src2);
+    }
+  } else {
+    CHECK(!r_src1.Is64Bit());
+    CHECK(!r_src2.Is64Bit());
+  }
+
+  // Sanity checks.
+  //    1) Amount is in the range 0..4
+  CHECK_LE(amount, 4);
+
+  return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
+                 EncodeExtend(ext, amount));
+}
+
 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
 }
@@ -660,6 +699,7 @@
   int32_t log_imm = -1;
   bool is_wide = r_dest.Is64Bit();
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  int info = 0;
 
   switch (op) {
     case kOpLsl: {
@@ -692,7 +732,8 @@
         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
       } else {
         log_imm = -1;
-        alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
+        alt_opcode = (neg) ? kA64Add4RRre : kA64Sub4RRre;
+        info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
       }
       break;
     // case kOpRsub:
@@ -734,8 +775,8 @@
   if (log_imm >= 0) {
     return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
   } else {
-    RegStorage r_scratch = AllocTemp();
-    if (IS_WIDE(wide)) {
+    RegStorage r_scratch;
+    if (is_wide) {
       r_scratch = AllocTempWide();
       LoadConstantWide(r_scratch, value);
     } else {
@@ -743,7 +784,7 @@
       LoadConstant(r_scratch, value);
     }
     if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
     else
       res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
     FreeTemp(r_scratch);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 6397208..3f9379c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -292,10 +292,14 @@
   return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
-bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) {
+bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
-  return it != inline_methods_.end() && (it->second.flags & kInlineIntrinsic) != 0;
+  bool res = (it != inline_methods_.end() && (it->second.flags & kInlineIntrinsic) != 0);
+  if (res && intrinsic != nullptr) {
+    *intrinsic = it->second;
+  }
+  return res;
 }
 
 bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index c03f89c..70693c2 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -67,7 +67,7 @@
     /**
      * Check whether a particular method index corresponds to an intrinsic function.
      */
-    bool IsIntrinsic(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+    bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
 
     /**
      * Generate code for an intrinsic function invocation.
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3b99421..e36b592 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -173,7 +173,7 @@
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
-  if (Runtime::Current()->ExplicitNullChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
@@ -188,7 +188,7 @@
 }
 
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
-  if (!Runtime::Current()->ExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -197,13 +197,13 @@
 }
 
 void Mir2Lir::MarkPossibleStackOverflowException() {
-  if (!Runtime::Current()->ExplicitStackOverflowChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
     MarkSafepointPC(last_lir_insn_);
   }
 }
 
 void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (!Runtime::Current()->ExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -2171,7 +2171,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTest(int opt_flags) {
-  if (Runtime::Current()->ExplicitSuspendChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       return;
     }
@@ -2191,7 +2191,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
-  if (Runtime::Current()->ExplicitSuspendChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       OpUnconditionalBranch(target);
       return;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 641579f..b3fac77 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -977,7 +977,7 @@
                            type, skip_this);
 
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
@@ -1204,7 +1204,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c734202..e53105f 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -305,8 +305,7 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
   NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocTemp();
   RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 9155677..f70087d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -25,6 +25,7 @@
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
 #include "driver/compiler_driver.h"
+#include "instruction_set.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
@@ -206,6 +207,36 @@
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
 
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame.
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm),
+               kSmallFrameSize_is_not_a_small_frame_arm);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64),
+               kSmallFrameSize_is_not_a_small_frame_arm64);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips),
+               kSmallFrameSize_is_not_a_small_frame_mips);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86),
+               kSmallFrameSize_is_not_a_small_frame_x86);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
+               kSmallFrameSize_is_not_a_small_frame_x64_64);
+
 class Mir2Lir : public Backend {
   public:
     /*
@@ -953,8 +984,8 @@
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     virtual bool GenInlinedAbsLong(CallInfo* info);
-    bool GenInlinedAbsFloat(CallInfo* info);
-    bool GenInlinedAbsDouble(CallInfo* info);
+    virtual bool GenInlinedAbsFloat(CallInfo* info);
+    virtual bool GenInlinedAbsDouble(CallInfo* info);
     bool GenInlinedFloatCvt(CallInfo* info);
     bool GenInlinedDoubleCvt(CallInfo* info);
     virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c7e289d..3f54798 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -407,10 +407,17 @@
   { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" },
-  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" },
-  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" },
-  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fild32M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
+  { kX86Fild64M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
+  { kX86Fld32M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
+  { kX86Fld64M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
+  { kX86Fstp32M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
+  { kX86Fstp64M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
+  { kX86Fst32M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
+  { kX86Fst64M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
+  { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
+  { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
+  { kX86Fstsw16R, kNullary, NO_OPERAND,                                         { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
   { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index dd5dab2..28195ab 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -235,8 +235,8 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
+      !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86);
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 3540843..d482e58 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -148,12 +148,15 @@
                         RegLocation rl_src2);
   void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                        RegLocation rl_src2);
+  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
   void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                 RegLocation rl_src2);
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
   bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
   bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
   bool GenInlinedSqrt(CallInfo* info);
+  bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+  bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
   bool GenInlinedPeek(CallInfo* info, OpSize size);
   bool GenInlinedPoke(CallInfo* info, OpSize size);
   void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
@@ -795,6 +798,14 @@
    */
   void AnalyzeDoubleUse(RegLocation rl_use);
 
+  /*
+   * @brief Analyze one invoke-static MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  void AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir);
+
   bool Gen64Bit() const  { return gen64bit_; }
 
   // Information derived from analysis of MIR
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 61623d0..20bb7bf 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -48,16 +48,7 @@
       break;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location
-      if (cu_->target64) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
       return;
     case Instruction::NEG_FLOAT:
       GenNegFloat(rl_dest, rl_src1);
@@ -110,16 +101,7 @@
       break;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location
-      if (cu_->target64) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
       return;
     case Instruction::NEG_DOUBLE:
       GenNegDouble(rl_dest, rl_src1);
@@ -356,6 +338,110 @@
   }
 }
 
+void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
+  // Compute offsets to the source and destination VRs on stack.
+  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
+  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
+  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
+
+  // Update the in-register state of sources.
+  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
+  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
+
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
+  // If the source is in physical register, then put it in its location on stack.
+  if (rl_src1.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
+
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
+      FlushSpecificReg(reg_info);
+      // ResetDef to prevent NullifyRange from removing stores.
+      ResetDef(rl_src1.reg);
+    } else {
+      // It must have been register promoted if it is not a temp but is still in physical
+      // register. Since we need it to be in memory to convert, we place it there now.
+      StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32);
+    }
+  }
+
+  if (rl_src2.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      FlushSpecificReg(reg_info);
+      ResetDef(rl_src2.reg);
+    } else {
+      StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32);
+    }
+  }
+
+  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
+
+  // Push the source virtual registers onto the x87 stack.
+  LIR *fld_2 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src2_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  LIR *fld_1 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src1_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  FlushReg(rs_rAX);
+  Clobber(rs_rAX);
+  LockTemp(rs_rAX);
+
+  LIR* retry = NewLIR0(kPseudoTargetLabel);
+
+  // Divide ST(0) by ST(1) and place result to ST(0).
+  NewLIR0(kX86Fprem);
+
+  // Move FPU status word to AX.
+  NewLIR0(kX86Fstsw16R);
+
+  // Check if reduction is complete.
+  OpRegImm(kOpAnd, rs_rAX, 0x400);
+
+  // If no then continue to compute remainder.
+  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  branch->target = retry;
+
+  FreeTemp(rs_rAX);
+
+  // Now store result in the destination VR's stack location.
+  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
+  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
+  LIR *fst = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
+  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
+
+  // Pop ST(1) and ST(0).
+  NewLIR0(kX86Fucompp);
+
+  /*
+   * The result is in a physical register if it was in a temp or was register
+   * promoted. For that reason it is enough to check if it is in physical
+   * register. If it is, then we must do all of the bookkeeping necessary to
+   * invalidate temp (if needed) and load in promoted register (if needed).
+   * If the result's location is in memory, then we do not need to do anything
+   * more since the fstp has already placed the correct value in memory.
+   */
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
+  if (rl_result.location == kLocPhysReg) {
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (is_double) {
+      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
+      StoreFinalValueWide(rl_dest, rl_result);
+    } else {
+      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      StoreFinalValue(rl_dest, rl_result);
+    }
+  }
+}
+
 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2) {
   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
@@ -501,6 +587,107 @@
   return true;
 }
 
+bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  // Get the argument
+  RegLocation rl_src = info->args[0];
 
+  // Get the inlined intrinsic target virtual register
+  RegLocation rl_dest = InlineTarget(info);
+
+  // Get the virtual register number
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  if (rl_dest.s_reg_low == INVALID_SREG) {
+    // Result is unused, the code is dead. Inlining successful, no code generated.
+    return true;
+  }
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+
+  // if argument is the same as inlined intrinsic target
+  if (v_src_reg == v_dst_reg) {
+    rl_src = UpdateLoc(rl_src);
+
+    // if argument is in the physical register
+    if (rl_src.location == kLocPhysReg) {
+      rl_src = LoadValue(rl_src, kCoreReg);
+      OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
+      StoreValue(rl_dest, rl_src);
+      return true;
+    }
+    // the argument is in memory
+    DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+
+    // Operate directly into memory.
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement, 0x7fffffff);
+    AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
+    AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
+    return true;
+  } else {
+    rl_src = LoadValue(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
+    StoreValue(rl_dest, rl_result);
+    return true;
+  }
+}
+
+bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_dest = InlineTargetWide(info);
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  if (rl_dest.s_reg_low == INVALID_SREG) {
+    // Result is unused, the code is dead. Inlining successful, no code generated.
+    return true;
+  }
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+  rl_src = UpdateLocWide(rl_src);
+
+  // if argument is in the physical XMM register
+  if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
+    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (rl_result.reg != rl_src.reg) {
+      LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+    } else {
+      RegStorage sign_mask = AllocTempDouble();
+      LoadConstantWide(sign_mask, 0x7fffffffffffffff);
+      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
+      FreeTemp(sign_mask);
+    }
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  } else if (v_src_reg == v_dst_reg) {
+    // if argument is the same as inlined intrinsic target
+    // if argument is in the physical register
+    if (rl_src.location == kLocPhysReg) {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
+      StoreValueWide(rl_dest, rl_src);
+      return true;
+    }
+    // the argument is in memory
+    DCHECK((rl_src.location == kLocDalvikFrame) ||
+           (rl_src.location == kLocCompilerTemp));
+
+    // Operate directly into memory.
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
+    return true;
+  } else {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegCopyWide(rl_result.reg, rl_src.reg);
+    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
+}
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 483d8cf..078dd5a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -441,6 +441,31 @@
   Clobber(rs_rCX);
   Clobber(rs_rDX);
   Clobber(rs_rBX);
+
+  Clobber(rs_fr0);
+  Clobber(rs_fr1);
+  Clobber(rs_fr2);
+  Clobber(rs_fr3);
+  Clobber(rs_fr4);
+  Clobber(rs_fr5);
+  Clobber(rs_fr6);
+  Clobber(rs_fr7);
+
+  if (Gen64Bit()) {
+    Clobber(rs_r8);
+    Clobber(rs_r9);
+    Clobber(rs_r10);
+    Clobber(rs_r11);
+
+    Clobber(rs_fr8);
+    Clobber(rs_fr9);
+    Clobber(rs_fr10);
+    Clobber(rs_fr11);
+    Clobber(rs_fr12);
+    Clobber(rs_fr13);
+    Clobber(rs_fr14);
+    Clobber(rs_fr15);
+  }
 }
 
 RegLocation X86Mir2Lir::GetReturnWideAlt() {
@@ -2177,7 +2202,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index b93e3e8..46e877f 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -18,6 +18,8 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "x86_lir.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 
 namespace art {
 
@@ -953,6 +955,9 @@
     case Instruction::PACKED_SWITCH:
       store_method_addr_ = true;
       break;
+    case Instruction::INVOKE_STATIC:
+      AnalyzeInvokeStatic(opcode, bb, mir);
+      break;
     default:
       // Other instructions are not interesting yet.
       break;
@@ -1020,4 +1025,22 @@
   DCHECK(CheckCorePoolSanity());
   return loc;
 }
+
+void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir) {
+  uint32_t index = mir->dalvikInsn.vB;
+  if (!(mir->optimization_flags & MIR_INLINED)) {
+    DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+    InlineMethod method;
+    if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+        ->IsIntrinsic(index, &method)) {
+      switch (method.opcode) {
+        case kIntrinsicAbsDouble:
+          store_method_addr_ = true;
+          break;
+        default:
+          break;
+      }
+    }
+  }
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f1b5811..28b9dca 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -572,8 +572,15 @@
   kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
   kX86Fild32M,                  // push 32-bit integer on x87 stack
   kX86Fild64M,                  // push 64-bit integer on x87 stack
+  kX86Fld32M,                   // push float on x87 stack
+  kX86Fld64M,                   // push double on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
   kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
+  kX86Fst32M,                   // do 32-bit store
+  kX86Fst64M,                   // do 64-bit store
+  kX86Fprem,                    // remainder from dividing of two floating point values
+  kX86Fucompp,                  // compare floating point values and pop x87 fp stack twice
+  kX86Fstsw16R,                 // store FPU status word
   Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
   kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5d1c5da..fb3341b 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -53,7 +53,10 @@
     num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
     generate_gdb_information_(false),
     top_k_profile_threshold_(kDefaultTopKProfileThreshold),
-    include_debug_symbols_(kDefaultIncludeDebugSymbols)
+    include_debug_symbols_(kDefaultIncludeDebugSymbols),
+    explicit_null_checks_(true),
+    explicit_so_checks_(true),
+    explicit_suspend_checks_(true)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -67,7 +70,10 @@
                   size_t num_dex_methods_threshold,
                   bool generate_gdb_information,
                   double top_k_profile_threshold,
-                  bool include_debug_symbols
+                  bool include_debug_symbols,
+                  bool explicit_null_checks,
+                  bool explicit_so_checks,
+                  bool explicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -80,7 +86,10 @@
     num_dex_methods_threshold_(num_dex_methods_threshold),
     generate_gdb_information_(generate_gdb_information),
     top_k_profile_threshold_(top_k_profile_threshold),
-    include_debug_symbols_(include_debug_symbols)
+    include_debug_symbols_(include_debug_symbols),
+    explicit_null_checks_(explicit_null_checks),
+    explicit_so_checks_(explicit_so_checks),
+    explicit_suspend_checks_(explicit_suspend_checks)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -147,6 +156,30 @@
     return include_debug_symbols_;
   }
 
+  bool GetExplicitNullChecks() const {
+    return explicit_null_checks_;
+  }
+
+  void SetExplicitNullChecks(bool new_val) {
+    explicit_null_checks_ = new_val;
+  }
+
+  bool GetExplicitStackOverflowChecks() const {
+    return explicit_so_checks_;
+  }
+
+  void SetExplicitStackOverflowChecks(bool new_val) {
+    explicit_so_checks_ = new_val;
+  }
+
+  bool GetExplicitSuspendChecks() const {
+    return explicit_suspend_checks_;
+  }
+
+  void SetExplicitSuspendChecks(bool new_val) {
+    explicit_suspend_checks_ = new_val;
+  }
+
 #ifdef ART_SEA_IR_MODE
   bool GetSeaIrMode();
 #endif
@@ -166,6 +199,9 @@
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool include_debug_symbols_;
+  bool explicit_null_checks_;
+  bool explicit_so_checks_;
+  bool explicit_suspend_checks_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 7ffab69..b6b5313 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -744,6 +744,19 @@
   *parsed_value = value;
 }
 
+void CheckExplicitCheckOptions(InstructionSet isa, bool* explicit_null_checks,
+                               bool* explicit_so_checks, bool* explicit_suspend_checks) {
+  switch (isa) {
+    case kArm:
+      break;  // All checks implemented, leave as is.
+
+    default:  // No checks implemented, reset all to explicit checks.
+      *explicit_null_checks = true;
+      *explicit_so_checks = true;
+      *explicit_suspend_checks = true;
+  }
+}
+
 static int dex2oat(int argc, char** argv) {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
@@ -825,6 +838,11 @@
   bool watch_dog_enabled = !kIsTargetBuild;
   bool generate_gdb_information = kIsDebugBuild;
 
+  bool explicit_null_checks = true;
+  bool explicit_so_checks = true;
+  bool explicit_suspend_checks = true;
+  bool has_explicit_checks_options = false;
+
   for (int i = 0; i < argc; i++) {
     const StringPiece option(argv[i]);
     const bool log_options = false;
@@ -998,6 +1016,31 @@
     } else if (option.starts_with("--dump-cfg-passes=")) {
       std::string dump_passes = option.substr(strlen("--dump-cfg-passes=")).data();
       PassDriverMEOpts::SetDumpPassList(dump_passes);
+    } else if (option.starts_with("--implicit-checks=")) {
+      std::string checks = option.substr(strlen("--implicit-checks=")).data();
+      std::vector<std::string> checkvec;
+      Split(checks, ',', checkvec);
+      for (auto& str : checkvec) {
+        std::string val = Trim(str);
+        if (val == "none") {
+          explicit_null_checks = true;
+          explicit_so_checks = true;
+          explicit_suspend_checks = true;
+        } else if (val == "null") {
+          explicit_null_checks = false;
+        } else if (val == "suspend") {
+          explicit_suspend_checks = false;
+        } else if (val == "stack") {
+          explicit_so_checks = false;
+        } else if (val == "all") {
+          explicit_null_checks = false;
+          explicit_so_checks = false;
+          explicit_suspend_checks = false;
+        } else {
+          Usage("--implicit-checks passed non-recognized value %s", val.c_str());
+        }
+        has_explicit_checks_options = true;
+      }
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1125,6 +1168,9 @@
     Usage("Unknown --compiler-filter value %s", compiler_filter_string);
   }
 
+  CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks,
+                            &explicit_suspend_checks);
+
   CompilerOptions compiler_options(compiler_filter,
                                    huge_method_threshold,
                                    large_method_threshold,
@@ -1133,7 +1179,10 @@
                                    num_dex_methods_threshold,
                                    generate_gdb_information,
                                    top_k_profile_threshold,
-                                   include_debug_symbols
+                                   include_debug_symbols,
+                                   explicit_null_checks,
+                                   explicit_so_checks,
+                                   explicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                                    , compiler_options.sea_ir_ = true;
 #endif
@@ -1204,6 +1253,18 @@
     return EXIT_FAILURE;
   }
   std::unique_ptr<Dex2Oat> dex2oat(p_dex2oat);
+
+  // TODO: Not sure whether it's a good idea to allow anything else but the runtime option in
+  // this case at all, as we'll have to throw away produced code for a mismatch.
+  if (!has_explicit_checks_options) {
+    if (instruction_set == kRuntimeISA) {
+      Runtime* runtime = Runtime::Current();
+      compiler_options.SetExplicitNullChecks(runtime->ExplicitNullChecks());
+      compiler_options.SetExplicitStackOverflowChecks(runtime->ExplicitStackOverflowChecks());
+      compiler_options.SetExplicitSuspendChecks(runtime->ExplicitSuspendChecks());
+    }
+  }
+
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now so that we don't starve GC.
   Thread* self = Thread::Current();
diff --git a/runtime/Android.mk b/runtime/Android.mk
index c40ae7a..992202a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -54,6 +54,7 @@
 	gc/collector/concurrent_copying.cc \
 	gc/collector/garbage_collector.cc \
 	gc/collector/immune_region.cc \
+	gc/collector/mark_compact.cc \
 	gc/collector/mark_sweep.cc \
 	gc/collector/partial_mark_sweep.cc \
 	gc/collector/semi_space.cc \
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 6a337b3..96ffc93 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -25,14 +25,14 @@
 namespace art {
 namespace arm {
 
-static const uint32_t gZero = 0;
+static constexpr uint32_t gZero = 0;
 
 void ArmContext::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfSRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[PC] = &pc_;
@@ -69,31 +69,46 @@
   }
 }
 
-void ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
+bool ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  DCHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ArmContext::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+  DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void ArmContext::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[R0] = const_cast<uint32_t*>(&gZero);
   gprs_[R1] = const_cast<uint32_t*>(&gZero);
-  gprs_[R2] = NULL;
-  gprs_[R3] = NULL;
+  gprs_[R2] = nullptr;
+  gprs_[R3] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
 
 void ArmContext::DoLongJump() {
-  uintptr_t gprs[16];
-  uint32_t fprs[32];
+  uintptr_t gprs[kNumberOfCoreRegisters];
+  uint32_t fprs[kNumberOfSRegisters];
   for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : ArmContext::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : ArmContext::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfSRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : ArmContext::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : ArmContext::kBadFprBase + i;
   }
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
   art_quick_do_long_jump(gprs, fprs);
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 2ccce8d..e894f16 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -32,31 +32,53 @@
 
   virtual ~ArmContext() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
-    SetGPR(PC, new_pc);
+  void SetPC(uintptr_t new_pc) OVERRIDE {
+    bool success = SetGPR(PC, new_pc);
+    CHECK(success) << "Failed to set PC register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, initialized to NULL or the specific registers below.
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index f81e2f9..2a82129 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -22,6 +22,7 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "instruction_set.h"
 #include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "thread.h"
@@ -59,7 +60,7 @@
   // get the method from the top of the stack.  However it's in r0.
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes);
+      reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes);
   if (overflow_addr == fault_addr) {
     *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   } else {
@@ -190,7 +191,7 @@
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
-  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+  uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes;
 
   Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
   CHECK_EQ(self, Thread::Current());
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 09e8b59..3eb92c8 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -28,14 +28,14 @@
 namespace art {
 namespace arm64 {
 
-static const uint64_t gZero = 0;
+static constexpr uint64_t gZero = 0;
 
 void Arm64Context::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[LR] = &pc_;
@@ -73,73 +73,88 @@
   }
 }
 
-void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  DCHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Arm64Context::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+  DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void Arm64Context::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[X0] = const_cast<uint64_t*>(&gZero);
-  gprs_[X1] = NULL;
-  gprs_[X2] = NULL;
-  gprs_[X3] = NULL;
-  gprs_[X4] = NULL;
-  gprs_[X5] = NULL;
-  gprs_[X6] = NULL;
-  gprs_[X7] = NULL;
-  gprs_[X8] = NULL;
-  gprs_[X9] = NULL;
-  gprs_[X10] = NULL;
-  gprs_[X11] = NULL;
-  gprs_[X12] = NULL;
-  gprs_[X13] = NULL;
-  gprs_[X14] = NULL;
-  gprs_[X15] = NULL;
+  gprs_[X1] = nullptr;
+  gprs_[X2] = nullptr;
+  gprs_[X3] = nullptr;
+  gprs_[X4] = nullptr;
+  gprs_[X5] = nullptr;
+  gprs_[X6] = nullptr;
+  gprs_[X7] = nullptr;
+  gprs_[X8] = nullptr;
+  gprs_[X9] = nullptr;
+  gprs_[X10] = nullptr;
+  gprs_[X11] = nullptr;
+  gprs_[X12] = nullptr;
+  gprs_[X13] = nullptr;
+  gprs_[X14] = nullptr;
+  gprs_[X15] = nullptr;
 
   // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
 
-  fprs_[D0] = NULL;
-  fprs_[D1] = NULL;
-  fprs_[D2] = NULL;
-  fprs_[D3] = NULL;
-  fprs_[D4] = NULL;
-  fprs_[D5] = NULL;
-  fprs_[D6] = NULL;
-  fprs_[D7] = NULL;
+  fprs_[D0] = nullptr;
+  fprs_[D1] = nullptr;
+  fprs_[D2] = nullptr;
+  fprs_[D3] = nullptr;
+  fprs_[D4] = nullptr;
+  fprs_[D5] = nullptr;
+  fprs_[D6] = nullptr;
+  fprs_[D7] = nullptr;
 
-  fprs_[D16] = NULL;
-  fprs_[D17] = NULL;
-  fprs_[D18] = NULL;
-  fprs_[D19] = NULL;
-  fprs_[D20] = NULL;
-  fprs_[D21] = NULL;
-  fprs_[D22] = NULL;
-  fprs_[D23] = NULL;
-  fprs_[D24] = NULL;
-  fprs_[D25] = NULL;
-  fprs_[D26] = NULL;
-  fprs_[D27] = NULL;
-  fprs_[D28] = NULL;
-  fprs_[D29] = NULL;
-  fprs_[D30] = NULL;
-  fprs_[D31] = NULL;
+  fprs_[D16] = nullptr;
+  fprs_[D17] = nullptr;
+  fprs_[D18] = nullptr;
+  fprs_[D19] = nullptr;
+  fprs_[D20] = nullptr;
+  fprs_[D21] = nullptr;
+  fprs_[D22] = nullptr;
+  fprs_[D23] = nullptr;
+  fprs_[D24] = nullptr;
+  fprs_[D25] = nullptr;
+  fprs_[D26] = nullptr;
+  fprs_[D27] = nullptr;
+  fprs_[D28] = nullptr;
+  fprs_[D29] = nullptr;
+  fprs_[D30] = nullptr;
+  fprs_[D31] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
 
 void Arm64Context::DoLongJump() {
   uint64_t gprs[32];
-  uint64_t fprs[32];
+  uint64_t fprs[kNumberOfDRegisters];
 
   // Do not use kNumberOfCoreRegisters, as this is with the distinction of SP and XZR
   for (size_t i = 0; i < 32; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : Arm64Context::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : Arm64Context::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : Arm64Context::kBadGprBase + i;
   }
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
   art_quick_do_long_jump(gprs, fprs);
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index d40e291..1f69869 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -32,31 +32,53 @@
 
   ~Arm64Context() {}
 
-  void Reset();
+  void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE;
 
-  void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  void SetPC(uintptr_t new_lr) {
-    SetGPR(LR, new_lr);
+  void SetPC(uintptr_t new_lr) OVERRIDE {
+    bool success = SetGPR(LR, new_lr);
+    CHECK(success) << "Failed to set LR register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  void SetGPR(uint32_t reg, uintptr_t value);
-  void SmashCallerSaves();
-  void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, initialized to NULL or the specific registers below.
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 84ee778..cbb2c27 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -72,17 +72,14 @@
 extern "C" void art_quick_lock_object(void*);
 extern "C" void art_quick_unlock_object(void*);
 
-// Math entrypoints.
-extern int32_t CmpgDouble(double a, double b);
-extern int32_t CmplDouble(double a, double b);
-extern int32_t CmpgFloat(float a, float b);
-extern int32_t CmplFloat(float a, float b);
-
 // Single-precision FP arithmetics.
-extern "C" float fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
+extern "C" float art_quick_fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
 
 // Double-precision FP arithmetics.
-extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+extern "C" double art_quick_fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+
+// Memcpy
+extern "C" void* art_quick_memcpy(void* __restrict, const void* __restrict, size_t);
 
 // Intrinsic entrypoints.
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
@@ -175,31 +172,31 @@
   qpoints->pUnlockObject = art_quick_unlock_object;
 
   // Math
-  // TODO NULL entrypoints not needed for ARM64 - generate inline.
-  qpoints->pCmpgDouble = CmpgDouble;
-  qpoints->pCmpgFloat = CmpgFloat;
-  qpoints->pCmplDouble = CmplDouble;
-  qpoints->pCmplFloat = CmplFloat;
-  qpoints->pFmod = fmod;
-  qpoints->pL2d = NULL;
-  qpoints->pFmodf = fmodf;
-  qpoints->pL2f = NULL;
-  qpoints->pD2iz = NULL;
-  qpoints->pF2iz = NULL;
-  qpoints->pIdivmod = NULL;
-  qpoints->pD2l = NULL;
-  qpoints->pF2l = NULL;
-  qpoints->pLdiv = NULL;
-  qpoints->pLmod = NULL;
-  qpoints->pLmul = NULL;
-  qpoints->pShlLong = NULL;
-  qpoints->pShrLong = NULL;
-  qpoints->pUshrLong = NULL;
+  // TODO nullptr entrypoints not needed for ARM64 - generate inline.
+  qpoints->pCmpgDouble = nullptr;
+  qpoints->pCmpgFloat = nullptr;
+  qpoints->pCmplDouble = nullptr;
+  qpoints->pCmplFloat = nullptr;
+  qpoints->pFmod = art_quick_fmod;
+  qpoints->pL2d = nullptr;
+  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pL2f = nullptr;
+  qpoints->pD2iz = nullptr;
+  qpoints->pF2iz = nullptr;
+  qpoints->pIdivmod = nullptr;
+  qpoints->pD2l = nullptr;
+  qpoints->pF2l = nullptr;
+  qpoints->pLdiv = nullptr;
+  qpoints->pLmod = nullptr;
+  qpoints->pLmul = nullptr;
+  qpoints->pShlLong = nullptr;
+  qpoints->pShrLong = nullptr;
+  qpoints->pUshrLong = nullptr;
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
-  qpoints->pMemcpy = memcpy;
+  qpoints->pMemcpy = art_quick_memcpy;
 
   // Invocation
   qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d704788..2e60b93 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1615,14 +1615,14 @@
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
     // easier to have an extra small stack area.
 
-    str x19, [sp, #-16]!      // Save integer result.
+    str x0, [sp, #-16]!       // Save integer result.
     .cfi_adjust_cfa_offset 16
     str d0,  [sp, #8]         // Save floating-point result.
 
-    mov   x0, xSELF           // Pass Thread.
     add   x1, sp, #16         // Pass SP.
     mov   x2, x0              // Pass integer result.
     fmov  x3, d0              // Pass floating-point result.
+    mov   x0, xSELF           // Pass Thread.
     bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
 
     mov   x9, x0              // Return address from instrumentation call.
@@ -1862,3 +1862,22 @@
     csel x0, x0, x1, ne          // x0 := x0 != 0 ? x0 : x1
     ret
 END art_quick_string_compareto
+
+// Macro to facilitate adding new entrypoints which call to native function directly.
+// Currently, xSELF is the only thing we need to take care of between managed code and AAPCS.
+// But we might introduce more differences.
+.macro NATIVE_DOWNCALL name, entrypoint
+    .extern \entrypoint
+ENTRY \name
+    sub    sp, sp, #16
+    stp    xSELF, xLR, [sp]
+    bl     \entrypoint
+    ldp    xSELF, xLR, [sp]
+    add    sp, sp, #16
+    ret
+END \name
+.endm
+
+NATIVE_DOWNCALL art_quick_fmod fmod
+NATIVE_DOWNCALL art_quick_fmodf fmodf
+NATIVE_DOWNCALL art_quick_memcpy memcpy
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index f7b7835..20a84dd 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -38,30 +38,40 @@
   // Re-initializes the registers for context re-use.
   virtual void Reset() = 0;
 
-  // Read values from callee saves in the given frame. The frame also holds
+  // Reads values from callee saves in the given frame. The frame also holds
   // the method that holds the layout.
   virtual void FillCalleeSaves(const StackVisitor& fr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  // Set the stack pointer value
+  // Sets the stack pointer value.
   virtual void SetSP(uintptr_t new_sp) = 0;
 
-  // Set the program counter value
+  // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
   // Gets the given GPRs address.
   virtual uintptr_t* GetGPRAddress(uint32_t reg) = 0;
 
-  // Read the given GPR
-  virtual uintptr_t GetGPR(uint32_t reg) = 0;
+  // Reads the given GPR. Returns true if we successfully read the register and
+  // set its value into 'val', returns false otherwise.
+  virtual bool GetGPR(uint32_t reg, uintptr_t* val) = 0;
 
-  // Set the given GPR.
-  virtual void SetGPR(uint32_t reg, uintptr_t value) = 0;
+  // Sets the given GPR. Returns true if we successfully write the given value
+  // into the register, returns false otherwise.
+  virtual bool SetGPR(uint32_t reg, uintptr_t value) = 0;
 
-  // Smash the caller save registers. If we're throwing, we don't want to return bogus values.
+  // Reads the given FPR. Returns true if we successfully read the register and
+  // set its value into 'val', returns false otherwise.
+  virtual bool GetFPR(uint32_t reg, uintptr_t* val) = 0;
+
+  // Sets the given FPR. Returns true if we successfully write the given value
+  // into the register, returns false otherwise.
+  virtual bool SetFPR(uint32_t reg, uintptr_t value) = 0;
+
+  // Smashes the caller save registers. If we're throwing, we don't want to return bogus values.
   virtual void SmashCallerSaves() = 0;
 
-  // Switch execution of the executing context to this context
+  // Switches execution of the executing context to this context
   virtual void DoLongJump() = 0;
 
  protected:
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index ad28891..789dbbb 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -24,14 +24,14 @@
 namespace art {
 namespace mips {
 
-static const uint32_t gZero = 0;
+static constexpr uint32_t gZero = 0;
 
 void MipsContext::Reset() {
   for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   for (size_t i = 0; i < kNumberOfFRegisters; i++) {
-    fprs_[i] = NULL;
+    fprs_[i] = nullptr;
   }
   gprs_[SP] = &sp_;
   gprs_[RA] = &ra_;
@@ -68,20 +68,35 @@
   }
 }
 
-void MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
+bool MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
   CHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool MipsContext::SetFPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+  CHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void MipsContext::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
   gprs_[V0] = const_cast<uint32_t*>(&gZero);
   gprs_[V1] = const_cast<uint32_t*>(&gZero);
-  gprs_[A1] = NULL;
-  gprs_[A2] = NULL;
-  gprs_[A3] = NULL;
+  gprs_[A1] = nullptr;
+  gprs_[A2] = nullptr;
+  gprs_[A3] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
@@ -90,10 +105,10 @@
   uintptr_t gprs[kNumberOfCoreRegisters];
   uint32_t fprs[kNumberOfFRegisters];
   for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
-    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : MipsContext::kBadGprBase + i;
+    gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : MipsContext::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfFRegisters; ++i) {
-    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : MipsContext::kBadGprBase + i;
+    fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : MipsContext::kBadGprBase + i;
   }
   art_quick_do_long_jump(gprs, fprs);
 }
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index d5f27ae..f2ee335 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -31,31 +31,53 @@
   }
   virtual ~MipsContext() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(SP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(SP, new_sp);
+    CHECK(success) << "Failed to set SP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
-    SetGPR(RA, new_pc);
+  void SetPC(uintptr_t new_pc) OVERRIDE {
+    bool success = SetGPR(RA, new_pc);
+    CHECK(success) << "Failed to set RA register";
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to registers in the stack, initialized to NULL except for the special cases below.
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 8c98d91..37049cf 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -24,11 +24,11 @@
 namespace art {
 namespace x86 {
 
-static const uintptr_t gZero = 0;
+static constexpr uintptr_t gZero = 0;
 
 void X86Context::Reset() {
   for (size_t  i = 0; i < kNumberOfCpuRegisters; i++) {
-    gprs_[i] = NULL;
+    gprs_[i] = nullptr;
   }
   gprs_[ESP] = &esp_;
   // Initialize registers with easy to spot debug values.
@@ -57,15 +57,19 @@
   // This needs to be 0 because we want a null/zero return value.
   gprs_[EAX] = const_cast<uintptr_t*>(&gZero);
   gprs_[EDX] = const_cast<uintptr_t*>(&gZero);
-  gprs_[ECX] = NULL;
-  gprs_[EBX] = NULL;
+  gprs_[ECX] = nullptr;
+  gprs_[EBX] = nullptr;
 }
 
-void X86Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool X86Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
   CHECK_NE(gprs_[reg], &gZero);
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void X86Context::DoLongJump() {
@@ -74,7 +78,7 @@
   // the top for the stack pointer that doesn't get popped in a pop-all.
   volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86Context::kBadGprBase + i;
+    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86Context::kBadGprBase + i;
   }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - kWordSize;
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index 1c51026..a350b25 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -31,32 +31,49 @@
   }
   virtual ~X86Context() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(ESP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(ESP, new_sp);
+    CHECK(success) << "Failed to set ESP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
+  void SetPC(uintptr_t new_pc) OVERRIDE {
     eip_ = new_pc;
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    LOG(FATAL) << "Floating-point registers are all caller save in X86";
+    return false;
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE {
+    LOG(FATAL) << "Floating-point registers are all caller save in X86";
+    return false;
+  }
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations, floating point registers are all caller save. Values are
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c30dca1..a85e250 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -69,8 +69,6 @@
 extern "C" void art_quick_unlock_object(void*);
 
 // Math entrypoints.
-extern "C" double art_quick_fmod(double, double);
-extern "C" float art_quick_fmodf(float, float);
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
@@ -175,9 +173,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = art_quick_fmod;
+  // qpoints->pFmod = NULL;  // Not needed on x86.
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  qpoints->pFmodf = art_quick_fmodf;
+  // qpoints->pFmodf = NULL;  // Not needed on x86.
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 28e4dd6..ecd8ce6 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -734,35 +734,6 @@
 
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
-DEFINE_FUNCTION art_quick_fmod
-    subl LITERAL(12), %esp        // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                      // pass arg4 b.hi
-    PUSH edx                      // pass arg3 b.lo
-    PUSH ecx                      // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    SETUP_GOT_NOSAVE              // clobbers EBX
-    call PLT_SYMBOL(fmod)         // (jdouble a, jdouble b)
-    fstpl (%esp)                  // pop return value off fp stack
-    movsd (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(28), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-28)
-    ret
-END_FUNCTION art_quick_fmod
-
-DEFINE_FUNCTION art_quick_fmodf
-    PUSH eax                      // alignment padding
-    PUSH ecx                      // pass arg2 b
-    PUSH eax                      // pass arg1 a
-    SETUP_GOT_NOSAVE              // clobbers EBX
-    call PLT_SYMBOL(fmodf)        // (jfloat a, jfloat b)
-    fstps (%esp)                  // pop return value off fp stack
-    movss (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(12), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-12)
-    ret
-END_FUNCTION art_quick_fmodf
-
 DEFINE_FUNCTION art_quick_d2l
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass arg2 a.hi
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 810ef94..0ccbd27 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -24,7 +24,7 @@
 namespace art {
 namespace x86_64 {
 
-static const uintptr_t gZero = 0;
+static constexpr uintptr_t gZero = 0;
 
 void X86_64Context::Reset() {
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
@@ -80,11 +80,26 @@
   gprs_[R11] = nullptr;
 }
 
-void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
+bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
   CHECK_NE(gprs_[reg], &gZero);
-  CHECK(gprs_[reg] != NULL);
-  *gprs_[reg] = value;
+  if (gprs_[reg] != nullptr) {
+    *gprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+  CHECK_NE(fprs_[reg], &gZero);
+  if (fprs_[reg] != nullptr) {
+    *fprs_[reg] = value;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void X86_64Context::DoLongJump() {
@@ -93,7 +108,7 @@
   // the top for the stack pointer that doesn't get popped in a pop-all.
   volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
   for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86_64Context::kBadGprBase + i;
+    gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i;
   }
   // We want to load the stack pointer one slot below so that the ret will pop eip.
   uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize;
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 055df61..902c3b9 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -31,32 +31,52 @@
   }
   virtual ~X86_64Context() {}
 
-  virtual void Reset();
+  void Reset() OVERRIDE;
 
-  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void SetSP(uintptr_t new_sp) {
-    SetGPR(RSP, new_sp);
+  void SetSP(uintptr_t new_sp) OVERRIDE {
+    bool success = SetGPR(RSP, new_sp);
+    CHECK(success) << "Failed to set RSP register";
   }
 
-  virtual void SetPC(uintptr_t new_pc) {
+  void SetPC(uintptr_t new_pc) OVERRIDE {
     rip_ = new_pc;
   }
 
-  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+  uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  virtual uintptr_t GetGPR(uint32_t reg) {
+  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    return *gprs_[reg];
+    if (gprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *gprs_[reg];
+      return true;
+    }
   }
 
-  virtual void SetGPR(uint32_t reg, uintptr_t value);
+  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  virtual void SmashCallerSaves();
-  virtual void DoLongJump();
+  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+    if (fprs_[reg] == nullptr) {
+      return false;
+    } else {
+      DCHECK(val != nullptr);
+      *val = *fprs_[reg];
+      return true;
+    }
+  }
+
+  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+
+  void SmashCallerSaves() OVERRIDE;
+  void DoLongJump() OVERRIDE;
 
  private:
   // Pointers to register locations. Values are initialized to NULL or the special registers below.
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2612417..92aabee 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -174,9 +174,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = fmod;
+  // qpoints->pFmod = NULL;  // Not needed on x86.
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  qpoints->pFmodf = fmodf;
+  // qpoints->pFmodf = NULL;  // Not needed on x86.
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index 63f2cf8..e766a8d 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -31,7 +31,7 @@
   if (kNeedSwapMutexes) {
     gSwapMutexes = new std::vector<Mutex*>;
     for (size_t i = 0; i < kSwapMutexCount; ++i) {
-      gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe"));
+      gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe", kSwapMutexesLock));
     }
   }
 }
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index a7d51e2..1e12be8 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -71,6 +71,10 @@
     return sum_;
   }
 
+  Value AdjustedSum() const {
+    return sum_ * kAdjust;
+  }
+
   Value Min() const {
     return min_value_added_;
   }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 1ba6180..81e62ab 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -55,6 +55,7 @@
 enum LockLevel {
   kLoggingLock = 0,
   kMemMapsLock,
+  kSwapMutexesLock,
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index f745088..16e0ec3 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_CLASS_LINKER_INL_H_
 
 #include "class_linker.h"
+#include "gc/heap-inl.h"
 #include "mirror/art_field.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
@@ -186,13 +187,19 @@
 
 inline mirror::IfTable* ClassLinker::AllocIfTable(Thread* self, size_t ifcount) {
   return down_cast<mirror::IfTable*>(
-      mirror::IfTable::Alloc(self, GetClassRoot(kObjectArrayClass), ifcount * mirror::IfTable::kMax));
+      mirror::IfTable::Alloc(self, GetClassRoot(kObjectArrayClass),
+                             ifcount * mirror::IfTable::kMax));
 }
 
 inline mirror::ObjectArray<mirror::ArtField>* ClassLinker::AllocArtFieldArray(Thread* self,
                                                                               size_t length) {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  // Can't have movable field arrays for mark compact since we need these arrays to always be valid
+  // so that we can do Object::VisitReferences in the case where the fields don't fit in the
+  // reference offsets word.
   return mirror::ObjectArray<mirror::ArtField>::Alloc(
-      self, GetClassRoot(kJavaLangReflectArtFieldArrayClass), length);
+      self, GetClassRoot(kJavaLangReflectArtFieldArrayClass), length,
+      kMoveFieldArrays ? heap->GetCurrentAllocator() : heap->GetCurrentNonMovingAllocator());
 }
 
 inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root)
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d684a50..d68aca9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1061,6 +1061,42 @@
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
 }
 
+void ClassLinker::VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  if ((flags & kVisitRootFlagAllRoots) != 0) {
+    for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+      callback(reinterpret_cast<mirror::Object**>(&it.second), arg, 0, kRootStickyClass);
+    }
+  } else if ((flags & kVisitRootFlagNewRoots) != 0) {
+    for (auto& pair : new_class_roots_) {
+      mirror::Object* old_ref = pair.second;
+      callback(reinterpret_cast<mirror::Object**>(&pair.second), arg, 0, kRootStickyClass);
+      if (UNLIKELY(pair.second != old_ref)) {
+        // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
+        // corresponding object. This is slow, but luckily for us, this may only happen with a
+        // concurrent moving GC.
+        for (auto it = class_table_.lower_bound(pair.first), end = class_table_.end();
+            it != end && it->first == pair.first; ++it) {
+          // If the class stored matches the old class, update it to the new value.
+          if (old_ref == it->second) {
+            it->second = pair.second;
+          }
+        }
+      }
+    }
+  }
+  if ((flags & kVisitRootFlagClearRootLog) != 0) {
+    new_class_roots_.clear();
+  }
+  if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
+    log_new_class_table_roots_ = true;
+  } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
+    log_new_class_table_roots_ = false;
+  }
+  // We deliberately ignore the class roots in the image since we
+  // handle image roots by using the MS/CMS rescanning of dirty cards.
+}
+
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
@@ -1087,41 +1123,7 @@
       log_new_dex_caches_roots_ = false;
     }
   }
-  {
-    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    if ((flags & kVisitRootFlagAllRoots) != 0) {
-      for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
-        callback(reinterpret_cast<mirror::Object**>(&it.second), arg, 0, kRootStickyClass);
-      }
-    } else if ((flags & kVisitRootFlagNewRoots) != 0) {
-      for (auto& pair : new_class_roots_) {
-        mirror::Object* old_ref = pair.second;
-        callback(reinterpret_cast<mirror::Object**>(&pair.second), arg, 0, kRootStickyClass);
-        if (UNLIKELY(pair.second != old_ref)) {
-          // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
-          // corresponding object. This is slow, but luckily for us, this may only happen with a
-          // concurrent moving GC.
-          for (auto it = class_table_.lower_bound(pair.first), end = class_table_.end();
-              it != end && it->first == pair.first; ++it) {
-            // If the class stored matches the old class, update it to the new value.
-            if (old_ref == it->second) {
-              it->second = pair.second;
-            }
-          }
-        }
-      }
-    }
-    if ((flags & kVisitRootFlagClearRootLog) != 0) {
-      new_class_roots_.clear();
-    }
-    if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
-      log_new_class_table_roots_ = true;
-    } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
-      log_new_class_table_roots_ = false;
-    }
-    // We deliberately ignore the class roots in the image since we
-    // handle image roots by using the MS/CMS rescanning of dirty cards.
-  }
+  VisitClassRoots(callback, arg, flags);
   callback(reinterpret_cast<mirror::Object**>(&array_iftable_), arg, 0, kRootVMInternal);
   DCHECK(array_iftable_ != nullptr);
   for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
@@ -1252,7 +1254,7 @@
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   InitializeClassVisitor visitor(class_size);
-  mirror::Object* k = (kMovingClasses) ?
+  mirror::Object* k = kMovingClasses ?
       heap->AllocObject<true>(self, java_lang_Class, class_size, visitor) :
       heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
   if (UNLIKELY(k == nullptr)) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 6d96aa2..62b5ea8 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -247,8 +247,10 @@
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+      LOCKS_EXCLUDED(Locks::classlinker_classes_lock_);
   void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
-      LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
+      LOCKS_EXCLUDED(dex_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file) const
       LOCKS_EXCLUDED(dex_lock_)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 50e9624..f19c353 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2346,100 +2346,125 @@
         return false;
       }
       uint16_t reg = DemangleSlot(slot_, m);
-
+      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
       switch (tag_) {
-      case JDWP::JT_BOOLEAN:
-        {
+        case JDWP::JT_BOOLEAN: {
           CHECK_EQ(width_, 1U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
-          JDWP::Set1(buf_+1, intVal != 0);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get boolean local " << reg << " = " << intVal;
+            JDWP::Set1(buf_+1, intVal != 0);
+          } else {
+            VLOG(jdwp) << "failed to get boolean local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_BYTE:
-        {
+        case JDWP::JT_BYTE: {
           CHECK_EQ(width_, 1U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
-          JDWP::Set1(buf_+1, intVal);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get byte local " << reg << " = " << intVal;
+            JDWP::Set1(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get byte local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_SHORT:
-      case JDWP::JT_CHAR:
-        {
+        case JDWP::JT_SHORT:
+        case JDWP::JT_CHAR: {
           CHECK_EQ(width_, 2U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
-          JDWP::Set2BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_INT:
-        {
-          CHECK_EQ(width_, 4U);
-          uint32_t intVal = GetVReg(m, reg, kIntVReg);
-          VLOG(jdwp) << "get int local " << reg << " = " << intVal;
-          JDWP::Set4BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_FLOAT:
-        {
-          CHECK_EQ(width_, 4U);
-          uint32_t intVal = GetVReg(m, reg, kFloatVReg);
-          VLOG(jdwp) << "get int/float local " << reg << " = " << intVal;
-          JDWP::Set4BE(buf_+1, intVal);
-        }
-        break;
-      case JDWP::JT_ARRAY:
-        {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
-          VLOG(jdwp) << "get array local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-            LOG(FATAL) << "Register " << reg << " expected to hold array: " << o;
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get short/char local " << reg << " = " << intVal;
+            JDWP::Set2BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get short/char local " << reg;
+            error_ = kFailureErrorCode;
           }
-          JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          break;
         }
-        break;
-      case JDWP::JT_CLASS_LOADER:
-      case JDWP::JT_CLASS_OBJECT:
-      case JDWP::JT_OBJECT:
-      case JDWP::JT_STRING:
-      case JDWP::JT_THREAD:
-      case JDWP::JT_THREAD_GROUP:
-        {
-          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
-          mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
-          VLOG(jdwp) << "get object local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
-            LOG(FATAL) << "Register " << reg << " expected to hold object: " << o;
+        case JDWP::JT_INT: {
+          CHECK_EQ(width_, 4U);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kIntVReg, &intVal)) {
+            VLOG(jdwp) << "get int local " << reg << " = " << intVal;
+            JDWP::Set4BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get int local " << reg;
+            error_ = kFailureErrorCode;
           }
-          tag_ = TagFromObject(soa_, o);
-          JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          break;
         }
-        break;
-      case JDWP::JT_DOUBLE:
-        {
+        case JDWP::JT_FLOAT: {
+          CHECK_EQ(width_, 4U);
+          uint32_t intVal;
+          if (GetVReg(m, reg, kFloatVReg, &intVal)) {
+            VLOG(jdwp) << "get float local " << reg << " = " << intVal;
+            JDWP::Set4BE(buf_+1, intVal);
+          } else {
+            VLOG(jdwp) << "failed to get float local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
+        }
+        case JDWP::JT_ARRAY:
+        case JDWP::JT_CLASS_LOADER:
+        case JDWP::JT_CLASS_OBJECT:
+        case JDWP::JT_OBJECT:
+        case JDWP::JT_STRING:
+        case JDWP::JT_THREAD:
+        case JDWP::JT_THREAD_GROUP: {
+          CHECK_EQ(width_, sizeof(JDWP::ObjectId));
+          uint32_t intVal;
+          if (GetVReg(m, reg, kReferenceVReg, &intVal)) {
+            mirror::Object* o = reinterpret_cast<mirror::Object*>(intVal);
+            VLOG(jdwp) << "get " << tag_ << " object local " << reg << " = " << o;
+            if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+              LOG(FATAL) << "Register " << reg << " expected to hold " << tag_ << " object: " << o;
+            }
+            tag_ = TagFromObject(soa_, o);
+            JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
+          } else {
+            VLOG(jdwp) << "failed to get " << tag_ << " object local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
+        }
+        case JDWP::JT_DOUBLE: {
           CHECK_EQ(width_, 8U);
-          uint32_t lo = GetVReg(m, reg, kDoubleLoVReg);
-          uint64_t hi = GetVReg(m, reg + 1, kDoubleHiVReg);
-          uint64_t longVal = (hi << 32) | lo;
-          VLOG(jdwp) << "get double/long local " << hi << ":" << lo << " = " << longVal;
-          JDWP::Set8BE(buf_+1, longVal);
+          uint32_t lo;
+          uint32_t hi;
+          if (GetVReg(m, reg, kDoubleLoVReg, &lo) && GetVReg(m, reg + 1, kDoubleHiVReg, &hi)) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "get double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            JDWP::Set8BE(buf_+1, longVal);
+          } else {
+            VLOG(jdwp) << "failed to get double local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      case JDWP::JT_LONG:
-        {
+        case JDWP::JT_LONG: {
           CHECK_EQ(width_, 8U);
-          uint32_t lo = GetVReg(m, reg, kLongLoVReg);
-          uint64_t hi = GetVReg(m, reg + 1, kLongHiVReg);
-          uint64_t longVal = (hi << 32) | lo;
-          VLOG(jdwp) << "get double/long local " << hi << ":" << lo << " = " << longVal;
-          JDWP::Set8BE(buf_+1, longVal);
+          uint32_t lo;
+          uint32_t hi;
+          if (GetVReg(m, reg, kLongLoVReg, &lo) && GetVReg(m, reg + 1, kLongHiVReg, &hi)) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "get long local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            JDWP::Set8BE(buf_+1, longVal);
+          } else {
+            VLOG(jdwp) << "failed to get long local " << reg;
+            error_ = kFailureErrorCode;
+          }
+          break;
         }
-        break;
-      default:
-        LOG(FATAL) << "Unknown tag " << tag_;
-        break;
+        default:
+          LOG(FATAL) << "Unknown tag " << tag_;
+          break;
       }
 
       // Prepend tag, which may have been updated.
@@ -2495,48 +2520,89 @@
         return false;
       }
       uint16_t reg = DemangleSlot(slot_, m);
-
+      constexpr JDWP::JdwpError kFailureErrorCode = JDWP::ERR_ABSENT_INFORMATION;
       switch (tag_) {
         case JDWP::JT_BOOLEAN:
         case JDWP::JT_BYTE:
           CHECK_EQ(width_, 1U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set boolean/byte local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_SHORT:
         case JDWP::JT_CHAR:
           CHECK_EQ(width_, 2U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set short/char local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_INT:
           CHECK_EQ(width_, 4U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kIntVReg)) {
+            VLOG(jdwp) << "failed to set int local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_FLOAT:
           CHECK_EQ(width_, 4U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg);
+          if (!SetVReg(m, reg, static_cast<uint32_t>(value_), kFloatVReg)) {
+            VLOG(jdwp) << "failed to set float local " << reg << " = "
+                       << static_cast<uint32_t>(value_);
+            error_ = kFailureErrorCode;
+          }
           break;
         case JDWP::JT_ARRAY:
+        case JDWP::JT_CLASS_LOADER:
+        case JDWP::JT_CLASS_OBJECT:
         case JDWP::JT_OBJECT:
         case JDWP::JT_STRING:
-        {
+        case JDWP::JT_THREAD:
+        case JDWP::JT_THREAD_GROUP: {
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = gRegistry->Get<mirror::Object*>(static_cast<JDWP::ObjectId>(value_));
           if (o == ObjectRegistry::kInvalidObject) {
-            UNIMPLEMENTED(FATAL) << "return an error code when given an invalid object to store";
+            VLOG(jdwp) << tag_ << " object " << o << " is an invalid object";
+            error_ = JDWP::ERR_INVALID_OBJECT;
+          } else if (!SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+                              kReferenceVReg)) {
+            VLOG(jdwp) << "failed to set " << tag_ << " object local " << reg << " = " << o;
+            error_ = kFailureErrorCode;
           }
-          SetVReg(m, reg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)), kReferenceVReg);
+          break;
         }
-        break;
-        case JDWP::JT_DOUBLE:
+        case JDWP::JT_DOUBLE: {
           CHECK_EQ(width_, 8U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kDoubleLoVReg);
-          SetVReg(m, reg + 1, static_cast<uint32_t>(value_ >> 32), kDoubleHiVReg);
+          const uint32_t lo = static_cast<uint32_t>(value_);
+          const uint32_t hi = static_cast<uint32_t>(value_ >> 32);
+          bool success = SetVReg(m, reg, lo, kDoubleLoVReg);
+          success &= SetVReg(m, reg + 1, hi, kDoubleHiVReg);
+          if (!success) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "failed to set double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            error_ = kFailureErrorCode;
+          }
           break;
-        case JDWP::JT_LONG:
+        }
+        case JDWP::JT_LONG: {
           CHECK_EQ(width_, 8U);
-          SetVReg(m, reg, static_cast<uint32_t>(value_), kLongLoVReg);
-          SetVReg(m, reg + 1, static_cast<uint32_t>(value_ >> 32), kLongHiVReg);
+          const uint32_t lo = static_cast<uint32_t>(value_);
+          const uint32_t hi = static_cast<uint32_t>(value_ >> 32);
+          bool success = SetVReg(m, reg, lo, kLongLoVReg);
+          success &= SetVReg(m, reg + 1, hi, kLongHiVReg);
+          if (!success) {
+            uint64_t longVal = (static_cast<uint64_t>(hi) << 32) | lo;
+            VLOG(jdwp) << "failed to set double local " << reg << " = "
+                       << hi << ":" << lo << " = " << longVal;
+            error_ = kFailureErrorCode;
+          }
           break;
+        }
         default:
           LOG(FATAL) << "Unknown tag " << tag_;
           break;
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 3d8b29f..ff836a4 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -511,13 +511,8 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* resolved_field =
       referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
-  if (UNLIKELY(resolved_field == NULL)) {
-    return NULL;
-  }
-  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-  // Check class is initiliazed or initializing.
-  if (UNLIKELY(!fields_class->IsInitializing())) {
-    return NULL;
+  if (UNLIKELY(resolved_field == nullptr)) {
+    return nullptr;
   }
   // Check for incompatible class change.
   bool is_primitive;
@@ -541,7 +536,15 @@
   }
   if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
     // Incompatible class change.
-    return NULL;
+    return nullptr;
+  }
+  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
+  if (is_static) {
+    // Check class is initialized else fail so that we can contend to initialize the class with
+    // other threads that may be racing to do this.
+    if (UNLIKELY(!fields_class->IsInitialized())) {
+      return nullptr;
+    }
   }
   mirror::Class* referring_class = referrer->GetDeclaringClass();
   if (UNLIKELY(!referring_class->CanAccess(fields_class) ||
@@ -549,11 +552,11 @@
                                                  resolved_field->GetAccessFlags()) ||
                (is_set && resolved_field->IsFinal() && (fields_class != referring_class)))) {
     // Illegal access.
-    return NULL;
+    return nullptr;
   }
   if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
                resolved_field->FieldSize() != expected_size)) {
-    return NULL;
+    return nullptr;
   }
   return resolved_field;
 }
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index f2e2bf7..329c175 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -33,7 +33,7 @@
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
     mirror::Class* declaringClass = method->GetDeclaringClass();
-    if (UNLIKELY(!declaringClass->IsInitializing())) {
+    if (UNLIKELY(!declaringClass->IsInitialized())) {
       self->PushShadowFrame(shadow_frame);
       StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 6825e78..2da016f 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -213,7 +213,7 @@
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
 
-    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
+    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitialized()) {
       // Ensure static method's class is initialized.
       Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
       if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 514d1aa..7a144b6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -484,7 +484,7 @@
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
 
-    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
+    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitialized()) {
       // Ensure static method's class is initialized.
       StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 224b33e..c0aa43e 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -52,7 +52,7 @@
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
   std::string error_msg;
   std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
-                                                 PROT_READ | PROT_WRITE, false, &error_msg));
+                                                       PROT_READ | PROT_WRITE, false, &error_msg));
   if (UNLIKELY(mem_map.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate bitmap " << name << ": " << error_msg;
     return nullptr;
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 0849171..27fb087 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -42,7 +42,6 @@
 class SpaceBitmap {
  public:
   typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg);
-
   typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg);
 
   // Initialize a space bitmap so that it points to a bitmap large enough to cover a heap at
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 16add0b..a17c36b 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -56,6 +56,7 @@
 }
 
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
+  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()).c_str());
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   timings_.Reset();
@@ -86,6 +87,7 @@
   for (uint64_t pause_time : pause_times_) {
     pause_histogram_.AddValue(pause_time / 1000);
   }
+  ATRACE_END();
 }
 
 void GarbageCollector::SwapBitmaps() {
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 02dd4d9..f4f9dbb 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -105,7 +105,7 @@
   }
 
   uint64_t GetTotalPausedTimeNs() const {
-    return pause_histogram_.Sum();
+    return pause_histogram_.AdjustedSum();
   }
 
   int64_t GetTotalFreedBytes() const {
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
new file mode 100644
index 0000000..595dc8f
--- /dev/null
+++ b/runtime/gc/collector/mark_compact.cc
@@ -0,0 +1,634 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mark_compact.h"
+
+#include "base/logging.h"
+#include "base/mutex-inl.h"
+#include "base/timing_logger.h"
+#include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table.h"
+#include "gc/accounting/remembered_set.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/reference_processor.h"
+#include "gc/space/bump_pointer_space.h"
+#include "gc/space/bump_pointer_space-inl.h"
+#include "gc/space/image_space.h"
+#include "gc/space/large_object_space.h"
+#include "gc/space/space-inl.h"
+#include "indirect_reference_table.h"
+#include "intern_table.h"
+#include "jni_internal.h"
+#include "mark_sweep-inl.h"
+#include "monitor.h"
+#include "mirror/art_field.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
+#include "mirror/reference-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
+#include "runtime.h"
+#include "stack.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+using ::art::mirror::Class;
+using ::art::mirror::Object;
+
+namespace art {
+namespace gc {
+namespace collector {
+
+void MarkCompact::BindBitmaps() {
+  timings_.StartSplit("BindBitmaps");
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  // Mark all of the spaces we never collect as immune.
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+    }
+  }
+  timings_.EndSplit();
+}
+
+MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix)
+    : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "mark compact"),
+      space_(nullptr), collector_name_(name_) {
+}
+
+void MarkCompact::RunPhases() {
+  Thread* self = Thread::Current();
+  InitializePhase();
+  CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
+  {
+    ScopedPause pause(this);
+    GetHeap()->PreGcVerificationPaused(this);
+    GetHeap()->PrePauseRosAllocVerification(this);
+    MarkingPhase();
+    ReclaimPhase();
+  }
+  GetHeap()->PostGcVerification(this);
+  FinishPhase();
+}
+
+void MarkCompact::ForwardObject(mirror::Object* obj) {
+  const size_t alloc_size = RoundUp(obj->SizeOf(), space::BumpPointerSpace::kAlignment);
+  LockWord lock_word = obj->GetLockWord(false);
+  // If we have a non empty lock word, store it and restore it later.
+  if (lock_word.GetValue() != LockWord().GetValue()) {
+    // Set the bit in the bitmap so that we know to restore it later.
+    objects_with_lockword_->Set(obj);
+    lock_words_to_restore_.push_back(lock_word);
+  }
+  obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(bump_pointer_)),
+                   false);
+  bump_pointer_ += alloc_size;
+  ++live_objects_in_space_;
+}
+
+class CalculateObjectForwardingAddressVisitor {
+ public:
+  explicit CalculateObjectForwardingAddressVisitor(MarkCompact* collector)
+      : collector_(collector) {}
+  void operator()(mirror::Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_,
+                                                                      Locks::heap_bitmap_lock_) {
+    DCHECK_ALIGNED(obj, space::BumpPointerSpace::kAlignment);
+    DCHECK(collector_->IsMarked(obj));
+    collector_->ForwardObject(obj);
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::CalculateObjectForwardingAddresses() {
+  timings_.NewSplit(__FUNCTION__);
+  // The bump pointer in the space where the next forwarding address will be.
+  bump_pointer_ = reinterpret_cast<byte*>(space_->Begin());
+  // Visit all the marked objects in the bitmap.
+  CalculateObjectForwardingAddressVisitor visitor(this);
+  objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
+                                               reinterpret_cast<uintptr_t>(space_->End()),
+                                               visitor);
+}
+
+void MarkCompact::InitializePhase() {
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  mark_stack_ = heap_->GetMarkStack();
+  DCHECK(mark_stack_ != nullptr);
+  immune_region_.Reset();
+  CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_;
+  // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  mark_bitmap_ = heap_->GetMarkBitmap();
+  live_objects_in_space_ = 0;
+}
+
+void MarkCompact::ProcessReferences(Thread* self) {
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  heap_->GetReferenceProcessor()->ProcessReferences(
+      false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback,
+      &ProcessMarkStackCallback, this);
+}
+
+class BitmapSetSlowPathVisitor {
+ public:
+  void operator()(const mirror::Object* obj) const {
+    // Marking a large object, make sure its aligned as a sanity check.
+    if (!IsAligned<kPageSize>(obj)) {
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
+      LOG(FATAL) << obj;
+    }
+  }
+};
+
+inline void MarkCompact::MarkObject(mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+  if (kUseBakerOrBrooksReadBarrier) {
+    // Verify all the objects have the correct forward pointer installed.
+    obj->AssertReadBarrierPointer();
+  }
+  if (immune_region_.ContainsObject(obj)) {
+    return;
+  }
+  if (objects_before_forwarding_->HasAddress(obj)) {
+    if (!objects_before_forwarding_->Set(obj)) {
+      MarkStackPush(obj);  // This object was not previously marked.
+    }
+  } else {
+    DCHECK(!space_->HasAddress(obj));
+    BitmapSetSlowPathVisitor visitor;
+    if (!mark_bitmap_->Set(obj, visitor)) {
+      // This object was not previously marked.
+      MarkStackPush(obj);
+    }
+  }
+}
+
+void MarkCompact::MarkingPhase() {
+  Thread* self = Thread::Current();
+  // Bitmap which describes which objects we have to move.
+  objects_before_forwarding_.reset(accounting::ContinuousSpaceBitmap::Create(
+      "objects before forwarding", space_->Begin(), space_->Size()));
+  // Bitmap which describes which lock words we need to restore.
+  objects_with_lockword_.reset(accounting::ContinuousSpaceBitmap::Create(
+      "objects with lock words", space_->Begin(), space_->Size()));
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self));
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  // Assume the cleared space is already empty.
+  BindBitmaps();
+  // Process dirty cards and add dirty cards to mod-union tables.
+  heap_->ProcessCards(timings_, false);
+  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // paused GC. This saves memory but only works for pause the world collectors.
+  timings_.NewSplit("ClearCardTable");
+  heap_->GetCardTable()->ClearCardTable();
+  // Need to do this before the checkpoint since we don't want any threads to add references to
+  // the live stack during the recursive mark.
+  timings_.NewSplit("SwapStacks");
+  if (kUseThreadLocalAllocationStack) {
+    heap_->RevokeAllThreadLocalAllocationStacks(self);
+  }
+  heap_->SwapStacks(self);
+  {
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    MarkRoots();
+    // Mark roots of immune spaces.
+    UpdateAndMarkModUnion();
+    // Recursively mark remaining objects.
+    MarkReachableObjects();
+  }
+  ProcessReferences(self);
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    SweepSystemWeaks();
+  }
+  // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
+  // before they are properly counted.
+  RevokeAllThreadLocalBuffers();
+  timings_.StartSplit("PreSweepingGcVerification");
+  // Disabled due to an issue where we have objects in the bump pointer space which reference dead
+  // objects.
+  // heap_->PreSweepingGcVerification(this);
+  timings_.EndSplit();
+}
+
+void MarkCompact::UpdateAndMarkModUnion() {
+  for (auto& space : heap_->GetContinuousSpaces()) {
+    // If the space is immune then we need to mark the references to other spaces.
+    if (immune_region_.ContainsSpace(space)) {
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+      if (table != nullptr) {
+        // TODO: Improve naming.
+        TimingLogger::ScopedSplit split(
+            space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
+                                     "UpdateAndMarkImageModUnionTable",
+                                     &timings_);
+        table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
+      }
+    }
+  }
+}
+
+void MarkCompact::MarkReachableObjects() {
+  timings_.StartSplit("MarkStackAsLive");
+  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+  heap_->MarkAllocStackAsLive(live_stack);
+  live_stack->Reset();
+  // Recursively process the mark stack.
+  ProcessMarkStack();
+}
+
+void MarkCompact::ReclaimPhase() {
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  // Reclaim unmarked objects.
+  Sweep(false);
+  // Swap the live and mark bitmaps for each space which we modified space. This is an
+  // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
+  // bitmaps.
+  timings_.StartSplit("SwapBitmapsAndUnBindBitmaps");
+  SwapBitmaps();
+  GetHeap()->UnBindBitmaps();  // Unbind the live and mark bitmaps.
+  Compact();
+  timings_.EndSplit();
+}
+
+void MarkCompact::ResizeMarkStack(size_t new_size) {
+  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  CHECK_LE(mark_stack_->Size(), new_size);
+  mark_stack_->Resize(new_size);
+  for (const auto& obj : temp) {
+    mark_stack_->PushBack(obj);
+  }
+}
+
+inline void MarkCompact::MarkStackPush(Object* obj) {
+  if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+    ResizeMarkStack(mark_stack_->Capacity() * 2);
+  }
+  // The object must be pushed on to the mark stack.
+  mark_stack_->PushBack(obj);
+}
+
+void MarkCompact::ProcessMarkStackCallback(void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->ProcessMarkStack();
+}
+
+mirror::Object* MarkCompact::MarkObjectCallback(mirror::Object* root, void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->MarkObject(root);
+  return root;
+}
+
+void MarkCompact::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                          void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->MarkObject(obj_ptr->AsMirrorPtr());
+}
+
+void MarkCompact::DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
+                                               void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->DelayReferenceReferent(klass, ref);
+}
+
+void MarkCompact::MarkRootCallback(Object** root, void* arg, uint32_t /*thread_id*/,
+                                   RootType /*root_type*/) {
+  reinterpret_cast<MarkCompact*>(arg)->MarkObject(*root);
+}
+
+void MarkCompact::UpdateRootCallback(Object** root, void* arg, uint32_t /*thread_id*/,
+                                     RootType /*root_type*/) {
+  mirror::Object* obj = *root;
+  mirror::Object* new_obj = reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
+  if (obj != new_obj) {
+    *root = new_obj;
+    DCHECK(new_obj != nullptr);
+  }
+}
+
+class UpdateObjectReferencesVisitor {
+ public:
+  explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+  void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+          EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    collector_->UpdateObjectReferences(obj);
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::UpdateReferences() {
+  timings_.NewSplit(__FUNCTION__);
+  Runtime* runtime = Runtime::Current();
+  // Update roots.
+  runtime->VisitRoots(UpdateRootCallback, this);
+  // Update object references in mod union tables and spaces.
+  for (const auto& space : heap_->GetContinuousSpaces()) {
+    // If the space is immune then we need to mark the references to other spaces.
+    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+    if (table != nullptr) {
+      // TODO: Improve naming.
+      TimingLogger::ScopedSplit split(
+          space->IsZygoteSpace() ? "UpdateZygoteModUnionTableReferences" :
+                                   "UpdateImageModUnionTableReferences",
+                                   &timings_);
+      table->UpdateAndMarkReferences(&UpdateHeapReferenceCallback, this);
+    } else {
+      // No mod union table, so we need to scan the space using bitmap visit.
+      // Scan the space using bitmap visit.
+      accounting::ContinuousSpaceBitmap* bitmap = space->GetLiveBitmap();
+      if (bitmap != nullptr) {
+        UpdateObjectReferencesVisitor visitor(this);
+        bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                 reinterpret_cast<uintptr_t>(space->End()),
+                                 visitor);
+      }
+    }
+  }
+  CHECK(!kMovingClasses)
+      << "Didn't update large object classes since they are assumed to not move.";
+  // Update the system weaks, these should already have been swept.
+  runtime->SweepSystemWeaks(&MarkedForwardingAddressCallback, this);
+  // Update the objects in the bump pointer space last, these objects don't have a bitmap.
+  UpdateObjectReferencesVisitor visitor(this);
+  objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
+                                               reinterpret_cast<uintptr_t>(space_->End()),
+                                               visitor);
+  // Update the reference processor cleared list.
+  heap_->GetReferenceProcessor()->UpdateRoots(&MarkedForwardingAddressCallback, this);
+}
+
+void MarkCompact::Compact() {
+  timings_.NewSplit(__FUNCTION__);
+  CalculateObjectForwardingAddresses();
+  UpdateReferences();
+  MoveObjects();
+  // Space
+  int64_t objects_freed = space_->GetObjectsAllocated() - live_objects_in_space_;
+  int64_t bytes_freed = reinterpret_cast<int64_t>(space_->End()) -
+      reinterpret_cast<int64_t>(bump_pointer_);
+  timings_.NewSplit("RecordFree");
+  space_->RecordFree(objects_freed, bytes_freed);
+  RecordFree(objects_freed, bytes_freed);
+  space_->SetEnd(bump_pointer_);
+  // Need to zero out the memory we freed. TODO: Use madvise for pages.
+  memset(bump_pointer_, 0, bytes_freed);
+}
+
+// Marks all objects in the root set.
+void MarkCompact::MarkRoots() {
+  timings_.NewSplit("MarkRoots");
+  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+}
+
+mirror::Object* MarkCompact::MarkedForwardingAddressCallback(mirror::Object* obj, void* arg) {
+  return reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
+}
+
+inline void MarkCompact::UpdateHeapReference(mirror::HeapReference<mirror::Object>* reference) {
+  mirror::Object* obj = reference->AsMirrorPtr();
+  if (obj != nullptr) {
+    mirror::Object* new_obj = GetMarkedForwardAddress(obj);
+    if (obj != new_obj) {
+      DCHECK(new_obj != nullptr);
+      reference->Assign(new_obj);
+    }
+  }
+}
+
+void MarkCompact::UpdateHeapReferenceCallback(mirror::HeapReference<mirror::Object>* reference,
+                                              void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->UpdateHeapReference(reference);
+}
+
+class UpdateReferenceVisitor {
+ public:
+  explicit UpdateReferenceVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+      ALWAYS_INLINE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->UpdateHeapReference(obj->GetFieldObjectReferenceAddr<kVerifyNone>(offset));
+  }
+
+  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->UpdateHeapReference(
+        ref->GetFieldObjectReferenceAddr<kVerifyNone>(mirror::Reference::ReferentOffset()));
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::UpdateObjectReferences(mirror::Object* obj) {
+  UpdateReferenceVisitor visitor(this);
+  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+}
+
+inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) const {
+  DCHECK(obj != nullptr);
+  if (objects_before_forwarding_->HasAddress(obj)) {
+    DCHECK(objects_before_forwarding_->Test(obj));
+    mirror::Object* ret =
+        reinterpret_cast<mirror::Object*>(obj->GetLockWord(false).ForwardingAddress());
+    DCHECK(ret != nullptr);
+    return ret;
+  }
+  DCHECK(!space_->HasAddress(obj));
+  DCHECK(IsMarked(obj));
+  return obj;
+}
+
+inline bool MarkCompact::IsMarked(const Object* object) const {
+  if (immune_region_.ContainsObject(object)) {
+    return true;
+  }
+  if (objects_before_forwarding_->HasAddress(object)) {
+    return objects_before_forwarding_->Test(object);
+  }
+  return mark_bitmap_->Test(object);
+}
+
+mirror::Object* MarkCompact::IsMarkedCallback(mirror::Object* object, void* arg) {
+  return reinterpret_cast<MarkCompact*>(arg)->IsMarked(object) ? object : nullptr;
+}
+
+bool MarkCompact::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref_ptr,
+                                              void* arg) {
+  // Side effect free since we call this before ever moving objects.
+  return reinterpret_cast<MarkCompact*>(arg)->IsMarked(ref_ptr->AsMirrorPtr());
+}
+
+void MarkCompact::SweepSystemWeaks() {
+  timings_.StartSplit("SweepSystemWeaks");
+  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
+  timings_.EndSplit();
+}
+
+bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
+  return space != space_ && !immune_region_.ContainsSpace(space);
+}
+
+class MoveObjectVisitor {
+ public:
+  explicit MoveObjectVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+  void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+          EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+      collector_->MoveObject(obj, obj->SizeOf());
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::MoveObject(mirror::Object* obj, size_t len) {
+  // Look at the forwarding address stored in the lock word to know where to copy.
+  DCHECK(space_->HasAddress(obj)) << obj;
+  uintptr_t dest_addr = obj->GetLockWord(false).ForwardingAddress();
+  mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest_addr);
+  DCHECK(space_->HasAddress(dest_obj)) << dest_obj;
+  // Use memmove since there may be overlap.
+  memmove(reinterpret_cast<void*>(dest_addr), reinterpret_cast<const void*>(obj), len);
+  // Restore the saved lock word if needed.
+  LockWord lock_word;
+  if (UNLIKELY(objects_with_lockword_->Test(obj))) {
+    lock_word = lock_words_to_restore_.front();
+    lock_words_to_restore_.pop_front();
+  }
+  dest_obj->SetLockWord(lock_word, false);
+}
+
+void MarkCompact::MoveObjects() {
+  timings_.NewSplit(__FUNCTION__);
+  // Move the objects in the before forwarding bitmap.
+  MoveObjectVisitor visitor(this);
+  objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
+                                               reinterpret_cast<uintptr_t>(space_->End()),
+                                               visitor);
+  CHECK(lock_words_to_restore_.empty());
+}
+
+void MarkCompact::Sweep(bool swap_bitmaps) {
+  DCHECK(mark_stack_->IsEmpty());
+  TimingLogger::ScopedSplit split("Sweep", &timings_);
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (!ShouldSweepSpace(alloc_space)) {
+        continue;
+      }
+      TimingLogger::ScopedSplit split(
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+      size_t freed_objects = 0;
+      size_t freed_bytes = 0;
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      RecordFree(freed_objects, freed_bytes);
+    }
+  }
+  SweepLargeObjects(swap_bitmaps);
+}
+
+void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
+  TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_);
+  size_t freed_objects = 0;
+  size_t freed_bytes = 0;
+  heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+  RecordFreeLargeObjects(freed_objects, freed_bytes);
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void MarkCompact::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
+                                                         &HeapReferenceMarkedCallback, this);
+}
+
+class MarkCompactMarkObjectVisitor {
+ public:
+  explicit MarkCompactMarkObjectVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const ALWAYS_INLINE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    // Object was already verified when we scanned it.
+    collector_->MarkObject(obj->GetFieldObject<mirror::Object, kVerifyNone>(offset));
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    collector_->DelayReferenceReferent(klass, ref);
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+// Visit all of the references of an object and update.
+void MarkCompact::ScanObject(Object* obj) {
+  MarkCompactMarkObjectVisitor visitor(this);
+  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+}
+
+// Scan anything that's on the mark stack.
+void MarkCompact::ProcessMarkStack() {
+  timings_.StartSplit("ProcessMarkStack");
+  while (!mark_stack_->IsEmpty()) {
+    Object* obj = mark_stack_->PopBack();
+    DCHECK(obj != nullptr);
+    ScanObject(obj);
+  }
+  timings_.EndSplit();
+}
+
+void MarkCompact::SetSpace(space::BumpPointerSpace* space) {
+  DCHECK(space != nullptr);
+  space_ = space;
+}
+
+void MarkCompact::FinishPhase() {
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  space_ = nullptr;
+  CHECK(mark_stack_->IsEmpty());
+  mark_stack_->Reset();
+  // Clear all of the spaces' mark bitmaps.
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  heap_->ClearMarkedObjects();
+  // Release our bitmaps.
+  objects_before_forwarding_.reset(nullptr);
+  objects_with_lockword_.reset(nullptr);
+}
+
+void MarkCompact::RevokeAllThreadLocalBuffers() {
+  timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  GetHeap()->RevokeAllThreadLocalBuffers();
+  timings_.EndSplit();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
new file mode 100644
index 0000000..25cfe0f
--- /dev/null
+++ b/runtime/gc/collector/mark_compact.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
+#define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
+
+#include <deque>
+#include <memory>  // For unique_ptr.
+
+#include "atomic.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "garbage_collector.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "immune_region.h"
+#include "lock_word.h"
+#include "object_callbacks.h"
+#include "offsets.h"
+
+namespace art {
+
+class Thread;
+
+namespace mirror {
+  class Class;
+  class Object;
+}  // namespace mirror
+
+namespace gc {
+
+class Heap;
+
+namespace accounting {
+  template <typename T> class AtomicStack;
+  typedef AtomicStack<mirror::Object*> ObjectStack;
+}  // namespace accounting
+
+namespace space {
+  class ContinuousMemMapAllocSpace;
+  class ContinuousSpace;
+}  // namespace space
+
+namespace collector {
+
+class MarkCompact : public GarbageCollector {
+ public:
+  explicit MarkCompact(Heap* heap, const std::string& name_prefix = "");
+  ~MarkCompact() {}
+
+  virtual void RunPhases() OVERRIDE NO_THREAD_SAFETY_ANALYSIS;
+  void InitializePhase();
+  void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void MarkReachableObjects()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  virtual GcType GetGcType() const OVERRIDE {
+    return kGcTypePartial;
+  }
+  virtual CollectorType GetCollectorType() const OVERRIDE {
+    return kCollectorTypeMC;
+  }
+
+  // Sets which space we will be copying objects in.
+  void SetSpace(space::BumpPointerSpace* space);
+
+  // Initializes internal structures.
+  void Init();
+
+  // Find the default mark bitmap.
+  void FindDefaultMarkBitmap();
+
+  void ScanObject(mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Marks the root set at the start of a garbage collection.
+  void MarkRoots()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
+  // the image. Mark that portion of the heap as immune.
+  void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+
+  void UnBindBitmaps()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void ProcessReferences(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void SweepSystemWeaks()
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t /*tid*/,
+                               RootType /*root_type*/)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref_ptr,
+                                          void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static void ProcessMarkStackCallback(void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  static void DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
+                                             void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Schedules an unmarked object for reference processing.
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+ protected:
+  // Returns null if the object is not marked, otherwise returns the forwarding address (same as
+  // object for non movable things).
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
+  // mark, otherwise we unmark.
+  bool MarkLargeObject(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Expand mark stack to 2x its current size.
+  void ResizeMarkStack(size_t new_size);
+
+  // Returns true if we should sweep the space.
+  bool ShouldSweepSpace(space::ContinuousSpace* space) const;
+
+  // Push an object onto the mark stack.
+  void MarkStackPush(mirror::Object* obj);
+
+  void UpdateAndMarkModUnion()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Recursively blackens objects on the mark stack.
+  void ProcessMarkStack()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  // 3 pass mark compact approach.
+  void Compact() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  // Calculate the forwarding address of objects marked as "live" in the objects_before_forwarding
+  // bitmap.
+  void CalculateObjectForwardingAddresses()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  // Update the references of objects by using the forwarding addresses.
+  void UpdateReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  static void UpdateRootCallback(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
+                                 RootType /*root_type*/)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  // Move objects and restore lock words.
+  void MoveObjects() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Move a single object to its forward address.
+  void MoveObject(mirror::Object* obj, size_t len) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Mark a single object.
+  void MarkObject(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
+                                                                Locks::mutator_lock_);
+  bool IsMarked(const mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void ForwardObject(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
+                                                                   Locks::mutator_lock_);
+  // Update a single heap reference.
+  void UpdateHeapReference(mirror::HeapReference<mirror::Object>* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void UpdateHeapReferenceCallback(mirror::HeapReference<mirror::Object>* reference,
+                                          void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Update all of the references of a single object.
+  void UpdateObjectReferences(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Revoke all the thread-local buffers.
+  void RevokeAllThreadLocalBuffers();
+
+  accounting::ObjectStack* mark_stack_;
+
+  // Immune region, every object inside the immune region is assumed to be marked.
+  ImmuneRegion immune_region_;
+
+  // Bump pointer space which we are collecting.
+  space::BumpPointerSpace* space_;
+  // Cached mark bitmap as an optimization.
+  accounting::HeapBitmap* mark_bitmap_;
+
+  // The name of the collector.
+  std::string collector_name_;
+
+  // The bump pointer in the space where the next forwarding address will be.
+  byte* bump_pointer_;
+  // How many live objects we have in the space.
+  size_t live_objects_in_space_;
+
+  // Bitmap which describes which objects we have to move, need to do / 2 so that we can handle
+  // objects which are only 8 bytes.
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> objects_before_forwarding_;
+  // Bitmap which describes which lock words we need to restore.
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> objects_with_lockword_;
+  // Which lock words we need to restore as we are moving objects.
+  std::deque<LockWord> lock_words_to_restore_;
+
+ private:
+  friend class BitmapSetSlowPathVisitor;
+  friend class CalculateObjectForwardingAddressVisitor;
+  friend class MarkCompactMarkObjectVisitor;
+  friend class MoveObjectVisitor;
+  friend class UpdateObjectReferencesVisitor;
+  friend class UpdateReferenceVisitor;
+  DISALLOW_COPY_AND_ASSIGN(MarkCompact);
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 890036b..fbb349e 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -43,10 +43,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 
-using ::art::mirror::ArtField;
-using ::art::mirror::Class;
 using ::art::mirror::Object;
-using ::art::mirror::ObjectArray;
 
 namespace art {
 namespace gc {
@@ -176,7 +173,7 @@
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true, &timings_, clear_soft_references_, &IsMarkedCallback, &MarkObjectCallback,
+      true, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback,
       &ProcessMarkStackCallback, this);
 }
 
@@ -374,6 +371,10 @@
   reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
 }
 
+bool MarkSweep::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg) {
+  return reinterpret_cast<MarkSweep*>(arg)->IsMarked(ref->AsMirrorPtr());
+}
+
 class MarkSweepMarkObjectSlowPath {
  public:
   explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
@@ -1170,11 +1171,11 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void MarkSweep::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref) {
-  DCHECK(klass != nullptr);
   if (kCountJavaLangRefs) {
     ++reference_count_;
   }
-  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, &HeapReferenceMarkedCallback,
+                                                         this);
 }
 
 class MarkObjectVisitor {
@@ -1268,8 +1269,7 @@
   timings_.EndSplit();
 }
 
-inline bool MarkSweep::IsMarked(const Object* object) const
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+inline bool MarkSweep::IsMarked(const Object* object) const {
   if (immune_region_.ContainsObject(object)) {
     return true;
   }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index a0a0dd8..2780099 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -178,6 +178,10 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -211,7 +215,8 @@
 
  protected:
   // Returns true if the object has its bit set in the mark bitmap.
-  bool IsMarked(const mirror::Object* object) const;
+  bool IsMarked(const mirror::Object* object) const
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index e5bb1cc..54e77a7 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -41,22 +41,12 @@
 #include "jni_internal.h"
 #include "mark_sweep-inl.h"
 #include "monitor.h"
-#include "mirror/art_field.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/class_loader.h"
-#include "mirror/dex_cache.h"
 #include "mirror/reference-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array.h"
-#include "mirror/object_array-inl.h"
 #include "runtime.h"
-#include "stack.h"
 #include "thread-inl.h"
 #include "thread_list.h"
-#include "verifier/method_verifier.h"
 
-using ::art::mirror::Class;
 using ::art::mirror::Object;
 
 namespace art {
@@ -164,7 +154,7 @@
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      false, &timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+      false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback,
       &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
@@ -649,6 +639,22 @@
   Runtime::Current()->VisitRoots(MarkRootCallback, this);
 }
 
+bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
+                                            void* arg) {
+  mirror::Object* obj = object->AsMirrorPtr();
+  mirror::Object* new_obj =
+      reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(obj);
+  if (new_obj == nullptr) {
+    return false;
+  }
+  if (new_obj != obj) {
+    // Write barrier is not necessary since it still points to the same object, just at a different
+    // address.
+    object->Assign(new_obj);
+  }
+  return true;
+}
+
 mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
@@ -698,7 +704,7 @@
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
-                                                         MarkedForwardingAddressCallback, this);
+                                                         &HeapReferenceMarkedCallback, this);
 }
 
 class SemiSpaceMarkObjectVisitor {
@@ -772,7 +778,7 @@
     // Already forwarded, must be marked.
     return obj;
   }
-  return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
+  return mark_bitmap_->Test(obj) ? obj : nullptr;
 }
 
 void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index a95abe4..bff0847 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -162,6 +162,10 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index c0a6b6a..530a3c9 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,6 +34,8 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
+  // Mark compact colector.
+  kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
   // A (mostly) concurrent copying collector.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f5f7a86..1c94d6f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -36,6 +36,7 @@
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/concurrent_copying.h"
+#include "gc/collector/mark_compact.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
 #include "gc/collector/semi_space.h"
@@ -285,17 +286,11 @@
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
 
-  if (collector::SemiSpace::kUseRememberedSet) {
+  if (collector::SemiSpace::kUseRememberedSet && non_moving_space_ != main_space_) {
     accounting::RememberedSet* non_moving_space_rem_set =
         new accounting::RememberedSet("Non-moving space remembered set", this, non_moving_space_);
     CHECK(non_moving_space_rem_set != nullptr) << "Failed to create non-moving space remembered set";
     AddRememberedSet(non_moving_space_rem_set);
-    if (main_space_ != nullptr && main_space_ != non_moving_space_) {
-      accounting::RememberedSet* main_space_rem_set =
-          new accounting::RememberedSet("Main space remembered set", this, main_space_);
-      CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
-      AddRememberedSet(main_space_rem_set);
-    }
   }
 
   // TODO: Count objects in the image space here.
@@ -337,9 +332,10 @@
     semi_space_collector_ = new collector::SemiSpace(this, generational,
                                                      generational ? "generational" : "");
     garbage_collectors_.push_back(semi_space_collector_);
-
     concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
     garbage_collectors_.push_back(concurrent_copying_collector_);
+    mark_compact_collector_ = new collector::MarkCompact(this);
+    garbage_collectors_.push_back(mark_compact_collector_);
   }
 
   if (GetImageSpace() != nullptr && main_space_ != nullptr) {
@@ -376,6 +372,9 @@
     // that getting primitive array elements is faster.
     can_move_objects = !have_zygote_space_;
   }
+  if (collector::SemiSpace::kUseRememberedSet && main_space_ != nullptr) {
+    RemoveRememberedSet(main_space_);
+  }
   if (kUseRosAlloc) {
     rosalloc_space_ = space::RosAllocSpace::CreateFromMemMap(
         mem_map, "main rosalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
@@ -390,6 +389,12 @@
     CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
+  if (collector::SemiSpace::kUseRememberedSet) {
+    accounting::RememberedSet* main_space_rem_set =
+        new accounting::RememberedSet("Main space remembered set", this, main_space_);
+    CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
+    AddRememberedSet(main_space_rem_set);
+  }
   VLOG(heap) << "Created main space " << main_space_;
 }
 
@@ -1338,8 +1343,9 @@
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
   uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
-  ThreadList* tl = Runtime::Current()->GetThreadList();
-  Thread* self = Thread::Current();
+  Runtime* const runtime = Runtime::Current();
+  ThreadList* const tl = runtime->GetThreadList();
+  Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
   const bool copying_transition =
@@ -1368,7 +1374,7 @@
     }
     usleep(1000);
   }
-  if (Runtime::Current()->IsShuttingDown(self)) {
+  if (runtime->IsShuttingDown(self)) {
     // Don't allow heap transitions to happen if the runtime is shutting down since these can
     // cause objects to get finalized.
     FinishGC(self, collector::kGcTypeNone);
@@ -1386,7 +1392,6 @@
         Compact(bump_pointer_space_, main_space_);
         // Remove the main space so that we don't try to trim it, this doens't work for debug
         // builds since RosAlloc attempts to read the magic number from a protected page.
-        // TODO: Clean this up by getting rid of the remove_as_default parameter.
         RemoveSpace(main_space_);
       }
       break;
@@ -1411,23 +1416,34 @@
   ChangeCollector(collector_type);
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
-  reference_processor_.EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences(self);
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
   int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   int32_t delta_allocated = before_allocated - after_allocated;
+  std::string saved_str;
+  if (delta_allocated >= 0) {
+    saved_str = " saved at least " + PrettySize(delta_allocated);
+  } else {
+    saved_str = " expanded " + PrettySize(-delta_allocated);
+  }
   LOG(INFO) << "Heap transition to " << process_state_ << " took "
-      << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated);
+      << PrettyDuration(duration) << saved_str;
 }
 
 void Heap::ChangeCollector(CollectorType collector_type) {
   // TODO: Only do this with all mutators suspended to avoid races.
   if (collector_type != collector_type_) {
+    if (collector_type == kCollectorTypeMC) {
+      // Don't allow mark compact unless support is compiled in.
+      CHECK(kMarkCompactSupport);
+    }
     collector_type_ = collector_type;
     gc_plan_.clear();
     switch (collector_type_) {
       case kCollectorTypeCC:  // Fall-through.
+      case kCollectorTypeMC:  // Fall-through.
       case kCollectorTypeSS:  // Fall-through.
       case kCollectorTypeGSS: {
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1617,9 +1633,9 @@
       madvise(main_space_->Begin(), main_space_->Capacity(), MADV_DONTNEED);
       MemMap* mem_map = main_space_->ReleaseMemMap();
       RemoveSpace(main_space_);
-      delete main_space_;
-      main_space_ = nullptr;
+      space::Space* old_main_space = main_space_;
       CreateMainMallocSpace(mem_map, kDefaultInitialSize, mem_map->Size(), mem_map->Size());
+      delete old_main_space;
       AddSpace(main_space_);
     } else {
       bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
@@ -1714,13 +1730,17 @@
 void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
                    space::ContinuousMemMapAllocSpace* source_space) {
   CHECK(kMovingCollector);
-  CHECK_NE(target_space, source_space) << "In-place compaction currently unsupported";
   if (target_space != source_space) {
     // Don't swap spaces since this isn't a typical semi space collection.
     semi_space_collector_->SetSwapSemiSpaces(false);
     semi_space_collector_->SetFromSpace(source_space);
     semi_space_collector_->SetToSpace(target_space);
     semi_space_collector_->Run(kGcCauseCollectorTransition, false);
+  } else {
+    CHECK(target_space->IsBumpPointerSpace())
+        << "In-place compaction is only supported for bump pointer spaces";
+    mark_compact_collector_->SetSpace(target_space->AsBumpPointerSpace());
+    mark_compact_collector_->Run(kGcCauseCollectorTransition, false);
   }
 }
 
@@ -1784,21 +1804,30 @@
   if (compacting_gc) {
     DCHECK(current_allocator_ == kAllocatorTypeBumpPointer ||
            current_allocator_ == kAllocatorTypeTLAB);
-    if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
-      gc_type = semi_space_collector_->GetGcType();
-      semi_space_collector_->SetFromSpace(bump_pointer_space_);
-      semi_space_collector_->SetToSpace(temp_space_);
-      collector = semi_space_collector_;
-      semi_space_collector_->SetSwapSemiSpaces(true);
-    } else if (collector_type_ == kCollectorTypeCC) {
-      gc_type = concurrent_copying_collector_->GetGcType();
-      collector = concurrent_copying_collector_;
-    } else {
-      LOG(FATAL) << "Unreachable - invalid collector type " << static_cast<size_t>(collector_type_);
+    switch (collector_type_) {
+      case kCollectorTypeSS:
+        // Fall-through.
+      case kCollectorTypeGSS:
+        semi_space_collector_->SetFromSpace(bump_pointer_space_);
+        semi_space_collector_->SetToSpace(temp_space_);
+        semi_space_collector_->SetSwapSemiSpaces(true);
+        collector = semi_space_collector_;
+        break;
+      case kCollectorTypeCC:
+        collector = concurrent_copying_collector_;
+        break;
+      case kCollectorTypeMC:
+        mark_compact_collector_->SetSpace(bump_pointer_space_);
+        collector = mark_compact_collector_;
+        break;
+      default:
+        LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_);
     }
-    temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-    CHECK(temp_space_->IsEmpty());
-    gc_type = collector::kGcTypeFull;
+    if (collector != mark_compact_collector_) {
+      temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      CHECK(temp_space_->IsEmpty());
+    }
+    gc_type = collector::kGcTypeFull;  // TODO: Not hard code this in.
   } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
       current_allocator_ == kAllocatorTypeDlMalloc) {
     collector = FindCollectorByGcType(gc_type);
@@ -1808,13 +1837,12 @@
   CHECK(collector != nullptr)
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
-  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
   collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
   // Enqueue cleared references.
-  reference_processor_.EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences(self);
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(collector);
   const size_t duration = collector->GetDurationNs();
@@ -1840,7 +1868,7 @@
                      << ((i != pause_times.size() - 1) ? "," : "");
     }
     LOG(INFO) << gc_cause << " " << collector->GetName()
-              << " GC freed "  <<  collector->GetFreedObjects() << "("
+              << " GC freed "  << collector->GetFreedObjects() << "("
               << PrettySize(collector->GetFreedBytes()) << ") AllocSpace objects, "
               << collector->GetFreedLargeObjects() << "("
               << PrettySize(collector->GetFreedLargeObjectBytes()) << ") LOS objects, "
@@ -1850,8 +1878,6 @@
     VLOG(heap) << ConstDumpable<TimingLogger>(collector->GetTimings());
   }
   FinishGC(self, gc_type);
-  ATRACE_END();
-
   // Inform DDMS that a GC completed.
   Dbg::GcDidFinish();
   return gc_type;
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 9b49373..368a20c 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -66,6 +66,7 @@
 namespace collector {
   class ConcurrentCopying;
   class GarbageCollector;
+  class MarkCompact;
   class MarkSweep;
   class SemiSpace;
 }  // namespace collector
@@ -573,7 +574,7 @@
   }
   static bool IsMovingGc(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
-        collector_type == kCollectorTypeCC;
+        collector_type == kCollectorTypeCC || collector_type == kCollectorTypeMC;
   }
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -952,12 +953,14 @@
 
   std::vector<collector::GarbageCollector*> garbage_collectors_;
   collector::SemiSpace* semi_space_collector_;
+  collector::MarkCompact* mark_compact_collector_;
   collector::ConcurrentCopying* concurrent_copying_collector_;
 
   const bool running_on_valgrind_;
   const bool use_tlab_;
 
   friend class collector::GarbageCollector;
+  friend class collector::MarkCompact;
   friend class collector::MarkSweep;
   friend class collector::SemiSpace;
   friend class ReferenceQueue;
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 7988af7..292781e 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -44,36 +44,35 @@
 
 mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) {
   mirror::Object* const referent = reference->GetReferent();
-  if (LIKELY(!slow_path_enabled_)) {
+  // If the referent is null then it is already cleared, we can just return null since there is no
+  // scenario where it becomes non-null during the reference processing phase.
+  if (LIKELY(!slow_path_enabled_) || referent == nullptr) {
     return referent;
   }
-  // Another fast path, the referent is cleared, we can just return null since there is no scenario
-  // where it becomes non-null.
-  if (referent == nullptr) {
-    return nullptr;
-  }
   MutexLock mu(self, lock_);
   while (slow_path_enabled_) {
-    mirror::Object* const referent = reference->GetReferent();
-    // If the referent became cleared, return it.
-    if (referent == nullptr) {
+    mirror::HeapReference<mirror::Object>* const referent_addr =
+        reference->GetReferentReferenceAddr();
+    // If the referent became cleared, return it. Don't need barrier since thread roots can't get
+    // updated until after we leave the function due to holding the mutator lock.
+    if (referent_addr->AsMirrorPtr() == nullptr) {
       return nullptr;
     }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
-    // it to the mutator as long as the GC is not preserving references. If the GC is
-    IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
+    // it to the mutator as long as the GC is not preserving references.
+    IsHeapReferenceMarkedCallback* const is_marked_callback =
+        process_references_args_.is_marked_callback_;
     if (LIKELY(is_marked_callback != nullptr)) {
-      mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
       // If it's null it means not marked, but it could become marked if the referent is reachable
       // by finalizer referents. So we can not return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
       // in the heap causing corruption since this field would get swept.
-      if (obj != nullptr) {
+      if (is_marked_callback(referent_addr, process_references_args_.arg_)) {
         if (!preserving_references_ ||
            (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
-          return obj;
+          return referent_addr->AsMirrorPtr();
         }
       }
     }
@@ -82,10 +81,14 @@
   return reference->GetReferent();
 }
 
-mirror::Object* ReferenceProcessor::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+bool ReferenceProcessor::PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj,
+                                                       void* arg) {
   auto* const args = reinterpret_cast<ProcessReferencesArgs*>(arg);
-  // TODO: Not preserve all soft references.
-  return args->mark_callback_(obj, args->arg_);
+  // TODO: Add smarter logic for preserving soft references.
+  mirror::Object* new_obj = args->mark_callback_(obj->AsMirrorPtr(), args->arg_);
+  DCHECK(new_obj != nullptr);
+  obj->Assign(new_obj);
+  return true;
 }
 
 void ReferenceProcessor::StartPreservingReferences(Thread* self) {
@@ -103,7 +106,7 @@
 // Process reference class instances and schedule finalizations.
 void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timings,
                                            bool clear_soft_references,
-                                           IsMarkedCallback* is_marked_callback,
+                                           IsHeapReferenceMarkedCallback* is_marked_callback,
                                            MarkObjectCallback* mark_object_callback,
                                            ProcessMarkStackCallback* process_mark_stack_callback,
                                            void* arg) {
@@ -132,8 +135,8 @@
     }
   }
   // Clear all remaining soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   {
     TimingLogger::ScopedSplit split(concurrent ? "EnqueueFinalizerReferences" :
         "(Paused)EnqueueFinalizerReferences", timings);
@@ -141,7 +144,7 @@
       StartPreservingReferences(self);
     }
     // Preserve all white objects with finalize methods and schedule them for finalization.
-    finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+    finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, is_marked_callback,
                                                           mark_object_callback, arg);
     process_mark_stack_callback(arg);
     if (concurrent) {
@@ -149,10 +152,10 @@
     }
   }
   // Clear all finalizer referent reachable soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   // Clear all phantom references with white referents.
-  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   // At this point all reference queues other than the cleared references should be empty.
   DCHECK(soft_reference_queue_.IsEmpty());
   DCHECK(weak_reference_queue_.IsEmpty());
@@ -176,39 +179,37 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void ReferenceProcessor::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                                                IsMarkedCallback is_marked_callback, void* arg) {
+                                                IsHeapReferenceMarkedCallback* is_marked_callback,
+                                                void* arg) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
+  DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
-  mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-  if (referent != nullptr) {
-    mirror::Object* forward_address = is_marked_callback(referent, arg);
-    // Null means that the object is not currently marked.
-    if (forward_address == nullptr) {
-      Thread* self = Thread::Current();
-      // TODO: Remove these locks, and use atomic stacks for storing references?
-      // We need to check that the references haven't already been enqueued since we can end up
-      // scanning the same reference multiple times due to dirty cards.
-      if (klass->IsSoftReferenceClass()) {
-        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsWeakReferenceClass()) {
-        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsFinalizerReferenceClass()) {
-        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsPhantomReferenceClass()) {
-        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else {
-        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
-                   << klass->GetAccessFlags();
-      }
-    } else if (referent != forward_address) {
-      // Referent is already marked and we need to update it.
-      ref->SetReferent<false>(forward_address);
+  mirror::HeapReference<mirror::Object>* referent = ref->GetReferentReferenceAddr();
+  if (referent->AsMirrorPtr() != nullptr && !is_marked_callback(referent, arg)) {
+    Thread* self = Thread::Current();
+    // TODO: Remove these locks, and use atomic stacks for storing references?
+    // We need to check that the references haven't already been enqueued since we can end up
+    // scanning the same reference multiple times due to dirty cards.
+    if (klass->IsSoftReferenceClass()) {
+      soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsWeakReferenceClass()) {
+      weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsFinalizerReferenceClass()) {
+      finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else if (klass->IsPhantomReferenceClass()) {
+      phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+    } else {
+      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                 << klass->GetAccessFlags();
     }
   }
 }
 
-void ReferenceProcessor::EnqueueClearedReferences() {
-  Thread* self = Thread::Current();
+void ReferenceProcessor::UpdateRoots(IsMarkedCallback* callback, void* arg) {
+  cleared_references_.UpdateRoots(callback, arg);
+}
+
+void ReferenceProcessor::EnqueueClearedReferences(Thread* self) {
   Locks::mutator_lock_->AssertNotHeld(self);
   if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index f082a9e..2771ea8 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -40,9 +40,10 @@
 class ReferenceProcessor {
  public:
   explicit ReferenceProcessor();
-  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  static bool PreserveSoftReferenceCallback(mirror::HeapReference<mirror::Object>* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ProcessReferences(bool concurrent, TimingLogger* timings, bool clear_soft_references,
-                         IsMarkedCallback* is_marked_callback,
+                         IsHeapReferenceMarkedCallback* is_marked_callback,
                          MarkObjectCallback* mark_object_callback,
                          ProcessMarkStackCallback* process_mark_stack_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -54,21 +55,23 @@
   // Decode the referent, may block if references are being processed.
   mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
-  void EnqueueClearedReferences() LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void EnqueueClearedReferences(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_);
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                              IsMarkedCallback is_marked_callback, void* arg)
+                              IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void UpdateRoots(IsMarkedCallback* callback, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
  private:
   class ProcessReferencesArgs {
    public:
-    ProcessReferencesArgs(IsMarkedCallback* is_marked_callback,
+    ProcessReferencesArgs(IsHeapReferenceMarkedCallback* is_marked_callback,
                           MarkObjectCallback* mark_callback, void* arg)
         : is_marked_callback_(is_marked_callback), mark_callback_(mark_callback), arg_(arg) {
     }
 
     // The is marked callback is null when the args aren't set up.
-    IsMarkedCallback* is_marked_callback_;
+    IsHeapReferenceMarkedCallback* is_marked_callback_;
     MarkObjectCallback* mark_callback_;
     void* arg_;
   };
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 3910c29..c3931e8 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -26,8 +26,7 @@
 namespace gc {
 
 ReferenceQueue::ReferenceQueue()
-    : lock_("reference queue lock"),
-      list_(nullptr) {
+    : lock_("reference queue lock"), list_(nullptr) {
 }
 
 void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
@@ -104,80 +103,71 @@
   }
 }
 
-void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references,
-                                          IsMarkedCallback* preserve_callback,
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references,
+                                          IsHeapReferenceMarkedCallback* preserve_callback,
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address == nullptr) {
-        // Referent is white, clear it.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          ref->ClearReferent<true>();
-        } else {
-          ref->ClearReferent<false>();
-        }
-        if (ref->IsEnqueuable()) {
-          cleared_references.EnqueuePendingReference(ref);
-        }
-      } else if (referent != forward_address) {
-        // Object moved, need to updated the referent.
-        ref->SetReferent<false>(forward_address);
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr && !preserve_callback(referent_addr, arg)) {
+      // Referent is white, clear it.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        ref->ClearReferent<true>();
+      } else {
+        ref->ClearReferent<false>();
+      }
+      if (ref->IsEnqueuable()) {
+        cleared_references->EnqueuePendingReference(ref);
       }
     }
   }
 }
 
-void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                                IsMarkedCallback* is_marked_callback,
+void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+                                                IsHeapReferenceMarkedCallback* is_marked_callback,
                                                 MarkObjectCallback* mark_object_callback,
                                                 void* arg) {
   while (!IsEmpty()) {
     mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = is_marked_callback(referent, arg);
-      // If the referent isn't marked, mark it and update the
-      if (forward_address == nullptr) {
-        forward_address = mark_object_callback(referent, arg);
-        // If the referent is non-null the reference must queuable.
-        DCHECK(ref->IsEnqueuable());
-        // Move the updated referent to the zombie field.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          ref->SetZombie<true>(forward_address);
-          ref->ClearReferent<true>();
-        } else {
-          ref->SetZombie<false>(forward_address);
-          ref->ClearReferent<false>();
-        }
-        cleared_references.EnqueueReference(ref);
-      } else if (referent != forward_address) {
-        ref->SetReferent<false>(forward_address);
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr && !is_marked_callback(referent_addr, arg)) {
+      mirror::Object* forward_address = mark_object_callback(referent_addr->AsMirrorPtr(), arg);
+      // If the referent is non-null the reference must queuable.
+      DCHECK(ref->IsEnqueuable());
+      // Move the updated referent to the zombie field.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        ref->SetZombie<true>(forward_address);
+        ref->ClearReferent<true>();
+      } else {
+        ref->SetZombie<false>(forward_address);
+        ref->ClearReferent<false>();
       }
+      cleared_references->EnqueueReference(ref);
     }
   }
 }
 
-void ReferenceQueue::ForwardSoftReferences(IsMarkedCallback* preserve_callback,
-                                                void* arg) {
+void ReferenceQueue::ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback,
+                                           void* arg) {
   if (UNLIKELY(IsEmpty())) {
     return;
   }
   mirror::Reference* const head = list_;
   mirror::Reference* ref = head;
   do {
-    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
-    if (referent != nullptr) {
-      mirror::Object* forward_address = preserve_callback(referent, arg);
-      if (forward_address != nullptr && forward_address != referent) {
-        ref->SetReferent<false>(forward_address);
-      }
+    mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
+    if (referent_addr->AsMirrorPtr() != nullptr) {
+      UNUSED(preserve_callback(referent_addr, arg));
     }
     ref = ref->GetPendingNext();
   } while (LIKELY(ref != head));
 }
 
+void ReferenceQueue::UpdateRoots(IsMarkedCallback* callback, void* arg) {
+  if (list_ != nullptr) {
+    list_ = down_cast<mirror::Reference*>(callback(list_, arg));
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 1d8cc1a..cd814bb 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -58,23 +58,22 @@
   mirror::Reference* DequeuePendingReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
   // zombie field, and the referent field is cleared.
-  void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                  IsMarkedCallback* is_marked_callback,
+  void EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
+                                  IsHeapReferenceMarkedCallback* is_marked_callback,
                                   MarkObjectCallback* mark_object_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void ForwardSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
+  void ForwardSoftReferences(IsHeapReferenceMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
-  void ClearWhiteReferences(ReferenceQueue& cleared_references,
-                            IsMarkedCallback* is_marked_callback,
-                            void* arg)
+  void ClearWhiteReferences(ReferenceQueue* cleared_references,
+                            IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) const
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsEmpty() const {
     return list_ == nullptr;
   }
@@ -84,12 +83,16 @@
   mirror::Reference* GetList() {
     return list_;
   }
+  // Visits list_, currently only used for the mark compact GC.
+  void UpdateRoots(IsMarkedCallback* callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
   // calling AtomicEnqueueIfNotEnqueued.
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  // The actual reference list. Not a root since it will be nullptr when the GC is not running.
+  // The actual reference list. Only a root for the mark compact GC since it will be null for other
+  // GC types.
   mirror::Reference* list_;
 };
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 9e61f30..feee34f 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -145,6 +145,12 @@
 
   accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
 
+  // Record objects / bytes freed.
+  void RecordFree(int32_t objects, int32_t bytes) {
+    objects_allocated_.FetchAndSubSequentiallyConsistent(objects);
+    bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes);
+  }
+
   // Object alignment within the space.
   static constexpr size_t kAlignment = 8;
 
diff --git a/runtime/globals.h b/runtime/globals.h
index 58c2118..3a906f1 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -74,8 +74,11 @@
 
 // Garbage collector constants.
 static constexpr bool kMovingCollector = true && !kUsePortableCompiler;
+static constexpr bool kMarkCompactSupport = false && kMovingCollector;
+// True if we allow moving field arrays, this can cause complication with mark compact.
+static constexpr bool kMoveFieldArrays = !kMarkCompactSupport;
 // True if we allow moving classes.
-static constexpr bool kMovingClasses = true;
+static constexpr bool kMovingClasses = !kMarkCompactSupport;
 // True if we allow moving fields.
 static constexpr bool kMovingFields = false;
 // True if we allow moving methods.
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 67e7100..96eeb8d 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -22,6 +22,7 @@
 
 #include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 #include "base/macros.h"
+#include "globals.h"       // For KB.
 
 namespace art {
 
@@ -36,6 +37,20 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+#if defined(__arm__)
+static constexpr InstructionSet kRuntimeISA = kArm;
+#elif defined(__aarch64__)
+static constexpr InstructionSet kRuntimeISA = kArm64;
+#elif defined(__mips__)
+static constexpr InstructionSet kRuntimeISA = kMips;
+#elif defined(__i386__)
+static constexpr InstructionSet kRuntimeISA = kX86;
+#elif defined(__x86_64__)
+static constexpr InstructionSet kRuntimeISA = kX86_64;
+#else
+static constexpr InstructionSet kRuntimeISA = kNone;
+#endif
+
 // Architecture-specific pointer sizes
 static constexpr size_t kArmPointerSize = 4;
 static constexpr size_t kArm64PointerSize = 8;
@@ -153,19 +168,33 @@
   }
 }
 
-#if defined(__arm__)
-static constexpr InstructionSet kRuntimeISA = kArm;
-#elif defined(__aarch64__)
-static constexpr InstructionSet kRuntimeISA = kArm64;
-#elif defined(__mips__)
-static constexpr InstructionSet kRuntimeISA = kMips;
-#elif defined(__i386__)
-static constexpr InstructionSet kRuntimeISA = kX86;
-#elif defined(__x86_64__)
-static constexpr InstructionSet kRuntimeISA = kX86_64;
-#else
-static constexpr InstructionSet kRuntimeISA = kNone;
-#endif
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+// TODO: shrink reserved space, in particular for 64bit.
+
+// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
+// But this one works rather well.
+static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB;
+// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
+// test-art-host-run-test-interpreter-018-stack-overflow
+// test-art-host-run-test-interpreter-107-int-math2
+static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
+
+static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+  return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes :
+           isa == kArm64 ? kArm64StackOverflowReservedBytes :
+           isa == kMips ? kMipsStackOverflowReservedBytes :
+           isa == kX86 ? kX86StackOverflowReservedBytes :
+           isa == kX86_64 ? kX86_64StackOverflowReservedBytes :
+           isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) :
+           (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+static constexpr size_t kRuntimeStackOverflowReservedBytes =
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 enum InstructionFeatures {
   kHwDiv  = 0x1,              // Supports hardware divide.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 6dbc6a0..cb4d444 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -454,7 +454,7 @@
   }
   self->EndAssertNoThreadSuspension(old_cause);
   // Do this after populating the shadow frame in case EnsureInitialized causes a GC.
-  if (method->IsStatic() && UNLIKELY(!method->GetDeclaringClass()->IsInitializing())) {
+  if (method->IsStatic() && UNLIKELY(!method->GetDeclaringClass()->IsInitialized())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
@@ -527,7 +527,7 @@
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
     mirror::Class* declaring_class = method->GetDeclaringClass();
-    if (UNLIKELY(!declaring_class->IsInitializing())) {
+    if (UNLIKELY(!declaring_class->IsInitialized())) {
       StackHandleScope<1> hs(self);
       HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
       if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 66406bf..5606d47 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -135,6 +135,8 @@
   mirror::ArtMethod* method = nullptr;
   if (is_static) {
     method = c->FindDirectMethod(name, sig);
+  } else if (c->IsInterface()) {
+    method = c->FindInterfaceMethod(name, sig);
   } else {
     method = c->FindVirtualMethod(name, sig);
     if (method == nullptr) {
@@ -3312,8 +3314,7 @@
 void* JavaVMExt::FindCodeForNativeMethod(mirror::ArtMethod* m) {
   CHECK(m->IsNative());
   mirror::Class* c = m->GetDeclaringClass();
-  // If this is a static method, it could be called before the class
-  // has been initialized.
+  // If this is a static method, it could be called before the class has been initialized.
   if (m->IsStatic()) {
     c = EnsureInitialized(Thread::Current(), c);
     if (c == nullptr) {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index f182e95..5e46c57 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -268,32 +268,38 @@
   jclass jlobject = env_->FindClass("java/lang/Object");
   jclass jlstring = env_->FindClass("java/lang/String");
   jclass jlnsme = env_->FindClass("java/lang/NoSuchMethodError");
+  jclass jncrbc = env_->FindClass("java/nio/channels/ReadableByteChannel");
 
-  // Sanity check that no exceptions are pending
+  // Sanity check that no exceptions are pending.
   ASSERT_FALSE(env_->ExceptionCheck());
 
   // Check that java.lang.Object.foo() doesn't exist and NoSuchMethodError is
-  // a pending exception
+  // a pending exception.
   jmethodID method = env_->GetMethodID(jlobject, "foo", "()V");
   EXPECT_EQ(nullptr, method);
   ExpectException(jlnsme);
 
-  // Check that java.lang.Object.equals() does exist
+  // Check that java.lang.Object.equals() does exist.
   method = env_->GetMethodID(jlobject, "equals", "(Ljava/lang/Object;)Z");
   EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Check that GetMethodID for java.lang.String.valueOf(int) fails as the
-  // method is static
+  // method is static.
   method = env_->GetMethodID(jlstring, "valueOf", "(I)Ljava/lang/String;");
   EXPECT_EQ(nullptr, method);
   ExpectException(jlnsme);
 
-  // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor
+  // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor.
   method = env_->GetMethodID(jlnsme, "<init>", "(Ljava/lang/String;)V");
   EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
+  // Check that GetMethodID can find a interface method inherited from another interface.
+  method = env_->GetMethodID(jncrbc, "close", "()V");
+  EXPECT_NE(nullptr, method);
+  EXPECT_FALSE(env_->ExceptionCheck());
+
   // Bad arguments.
   CheckJniAbortCatcher check_jni_abort_catcher;
   method = env_->GetMethodID(nullptr, "<init>", "(Ljava/lang/String;)V");
@@ -630,11 +636,13 @@
   jni_abort_catcher.Check(
       "attempt to get double primitive array elements with an object of type boolean[]");
   jbyteArray array2 = env_->NewByteArray(10);
-  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy), nullptr);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy),
+            nullptr);
   jni_abort_catcher.Check(
       "attempt to get boolean primitive array elements with an object of type byte[]");
   jobject object = env_->NewStringUTF("Test String");
-  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy), nullptr);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy),
+            nullptr);
   jni_abort_catcher.Check(
       "attempt to get boolean primitive array elements with an object of type java.lang.String");
 }
@@ -681,7 +689,8 @@
   jobject object = env_->NewStringUTF("Test String");
   env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), elements, 0);
   jni_abort_catcher.Check(
-      "attempt to release boolean primitive array elements with an object of type java.lang.String");
+      "attempt to release boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 TEST_F(JniInternalTest, GetReleasePrimitiveArrayCriticalOfWrongType) {
   CheckJniAbortCatcher jni_abort_catcher;
@@ -736,7 +745,8 @@
   env_->GetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
                               reinterpret_cast<jboolean*>(elements));
   jni_abort_catcher.Check(
-      "attempt to get region of boolean primitive array elements with an object of type java.lang.String");
+      "attempt to get region of boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 
 TEST_F(JniInternalTest, SetPrimitiveArrayRegionElementsOfWrongType) {
@@ -782,7 +792,8 @@
   env_->SetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
                               reinterpret_cast<jboolean*>(elements));
   jni_abort_catcher.Check(
-      "attempt to set region of boolean primitive array elements with an object of type java.lang.String");
+      "attempt to set region of boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 
 TEST_F(JniInternalTest, NewObjectArray) {
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 512a66f..6205f70 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -505,8 +505,10 @@
 
 template <bool kVisitClass, typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+  // Visit the static fields first so that we don't overwrite the SFields / IFields instance
+  // fields.
   VisitStaticFieldsReferences<kVisitClass>(this, visitor);
+  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
 }
 
 inline bool Class::IsArtFieldClass() const {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index a20f7b9..c798180 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -37,17 +37,17 @@
 namespace art {
 namespace mirror {
 
-Class* Class::java_lang_Class_ = NULL;
+Class* Class::java_lang_Class_ = nullptr;
 
 void Class::SetClassClass(Class* java_lang_Class) {
-  CHECK(java_lang_Class_ == NULL) << java_lang_Class_ << " " << java_lang_Class;
-  CHECK(java_lang_Class != NULL);
+  CHECK(java_lang_Class_ == nullptr) << java_lang_Class_ << " " << java_lang_Class;
+  CHECK(java_lang_Class != nullptr);
   java_lang_Class_ = java_lang_Class;
 }
 
 void Class::ResetClass() {
-  CHECK(java_lang_Class_ != NULL);
-  java_lang_Class_ = NULL;
+  CHECK(java_lang_Class_ != nullptr);
+  java_lang_Class_ = nullptr;
 }
 
 void Class::VisitRoots(RootCallback* callback, void* arg) {
@@ -146,7 +146,7 @@
   if ((descriptor[0] != 'L') && (descriptor[0] != '[')) {
     // The descriptor indicates that this is the class for
     // a primitive type; special-case the return value.
-    const char* c_name = NULL;
+    const char* c_name = nullptr;
     switch (descriptor[0]) {
     case 'Z': c_name = "boolean"; break;
     case 'B': c_name = "byte";    break;
@@ -196,10 +196,10 @@
   os << "----- " << (IsInterface() ? "interface" : "class") << " "
      << "'" << GetDescriptor() << "' cl=" << GetClassLoader() << " -----\n",
   os << "  objectSize=" << SizeOf() << " "
-     << "(" << (h_super.Get() != NULL ? h_super->SizeOf() : -1) << " from super)\n",
+     << "(" << (h_super.Get() != nullptr ? h_super->SizeOf() : -1) << " from super)\n",
   os << StringPrintf("  access=0x%04x.%04x\n",
       GetAccessFlags() >> 16, GetAccessFlags() & kAccJavaFlagsMask);
-  if (h_super.Get() != NULL) {
+  if (h_super.Get() != nullptr) {
     os << "  super='" << PrettyClass(h_super.Get()) << "' (cl=" << h_super->GetClassLoader()
        << ")\n";
   }
@@ -217,7 +217,7 @@
   }
   // After this point, this may have moved due to GetDirectInterface.
   os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
-     << (h_super.Get() != NULL ? h_super->NumVirtualMethods() : 0) << " in super):\n";
+     << (h_super.Get() != nullptr ? h_super->NumVirtualMethods() : 0) << " in super):\n";
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     os << StringPrintf("    %2zd: %s\n", i,
                        PrettyMethod(h_this->GetVirtualMethodDuringLinking(i)).c_str());
@@ -253,7 +253,7 @@
     // Sanity check that the number of bits set in the reference offset bitmap
     // agrees with the number of references
     size_t count = 0;
-    for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+    for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
       count += c->NumReferenceInstanceFieldsDuringLinking();
     }
     CHECK_EQ((size_t)POPCOUNT(new_reference_offsets), count);
@@ -329,40 +329,58 @@
   }
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
-  if (method != NULL) {
+  if (method != nullptr) {
     return method;
   }
 
   int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
+  for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
+}
+
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
+  // Check the current class before checking the interfaces.
+  ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
+  if (method != nullptr) {
+    return method;
+  }
+
+  int32_t iftable_count = GetIfTableCount();
+  IfTable* iftable = GetIfTable();
+  for (int32_t i = 0; i < iftable_count; ++i) {
+    method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature);
+    if (method != nullptr) {
+      return method;
+    }
+  }
+  return nullptr;
 }
 
 ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-  if (method != NULL) {
+  if (method != nullptr) {
     return method;
   }
 
   int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
+  for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) {
@@ -372,7 +390,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) {
@@ -382,7 +400,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
@@ -394,37 +412,37 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) {
@@ -434,7 +452,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) {
@@ -444,7 +462,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
@@ -456,37 +474,37 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindClassInitializer() {
@@ -498,7 +516,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
@@ -510,7 +528,7 @@
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
@@ -522,42 +540,42 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(name, type);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(dex_cache, dex_field_idx);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredStaticField(const StringPiece& name, const StringPiece& type) {
-  DCHECK(type != NULL);
+  DCHECK(type != nullptr);
   for (size_t i = 0; i < NumStaticFields(); ++i) {
     ArtField* f = GetStaticField(i);
     if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
@@ -569,7 +587,7 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
@@ -603,7 +621,7 @@
   for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
     // Wrap k incase it moves during GetDirectInterface.
@@ -625,7 +643,7 @@
 ArtField* Class::FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
                            const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  for (Class* k = klass.Get(); k != NULL; k = k->GetSuperClass()) {
+  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
     if (f != nullptr) {
@@ -652,10 +670,10 @@
 
 static void SetPreverifiedFlagOnMethods(mirror::ObjectArray<mirror::ArtMethod>* methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (methods != NULL) {
+  if (methods != nullptr) {
     for (int32_t index = 0, end = methods->GetLength(); index < end; ++index) {
       mirror::ArtMethod* method = methods->GetWithoutChecks(index);
-      DCHECK(method != NULL);
+      DCHECK(method != nullptr);
       if (!method->IsNative() && !method->IsAbstract()) {
         method->SetPreverified();
       }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 90381a7..c83f411 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -648,6 +648,9 @@
   ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& signature)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/iftable-inl.h b/runtime/mirror/iftable-inl.h
index ec3e514..3f20bf4 100644
--- a/runtime/mirror/iftable-inl.h
+++ b/runtime/mirror/iftable-inl.h
@@ -25,8 +25,9 @@
 inline void IfTable::SetInterface(int32_t i, Class* interface) {
   DCHECK(interface != NULL);
   DCHECK(interface->IsInterface());
-  DCHECK(Get((i * kMax) + kInterface) == NULL);
-  Set<false>((i * kMax) + kInterface, interface);
+  const size_t idx = i * kMax + kInterface;
+  DCHECK_EQ(Get(idx), static_cast<Object*>(nullptr));
+  Set<false>(idx, interface);
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 567ce3e..15ecd3c 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -26,6 +26,7 @@
 #include "class.h"
 #include "lock_word-inl.h"
 #include "monitor.h"
+#include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "runtime.h"
 #include "reference.h"
@@ -667,10 +668,9 @@
         mirror::ArtField* field = kIsStatic ? klass->GetStaticField(i) : klass->GetInstanceField(i);
         MemberOffset field_offset = field->GetOffset();
         // TODO: Do a simpler check?
-        if (!kVisitClass && UNLIKELY(field_offset.Uint32Value() == ClassOffset().Uint32Value())) {
-          continue;
+        if (kVisitClass || field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
+          visitor(this, field_offset, kIsStatic);
         }
-        visitor(this, field_offset, kIsStatic);
       }
     }
   }
@@ -693,18 +693,16 @@
 inline void Object::VisitReferences(const Visitor& visitor,
                                     const JavaLangRefVisitor& ref_visitor) {
   mirror::Class* klass = GetClass<kVerifyFlags>();
-  if (klass->IsVariableSize()) {
-    if (klass->IsClassClass()) {
-      AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
-    } else {
-      DCHECK(klass->IsArrayClass<kVerifyFlags>());
-      if (klass->IsObjectArrayClass<kVerifyNone>()) {
-        AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
-      } else if (kVisitClass) {
-        visitor(this, ClassOffset(), false);
-      }
+  if (klass == Class::GetJavaLangClass()) {
+    AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
+  } else if (klass->IsArrayClass()) {
+    if (klass->IsObjectArrayClass<kVerifyNone>()) {
+      AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
+    } else if (kVisitClass) {
+      visitor(this, ClassOffset(), false);
     }
   } else {
+    DCHECK(!klass->IsVariableSize());
     VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
     if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
       ref_visitor(klass, AsReference());
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 0b6e759..9c9d87b 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -21,6 +21,13 @@
 
 namespace art {
 
+namespace gc {
+
+class ReferenceProcessor;
+class ReferenceQueue;
+
+}  // namespace gc
+
 struct ReferenceOffsets;
 struct FinalizerReferenceOffsets;
 
@@ -41,7 +48,6 @@
   static MemberOffset ReferentOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Reference, referent_);
   }
-
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Object* GetReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldObjectVolatile<Object, kDefaultVerifyFlags, kReadBarrierOption>(
@@ -55,7 +61,6 @@
   void ClearReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
-
   // Volatile read/write is not necessary since the java pending next is only accessed from
   // the java threads for cleared references. Once these cleared references have a null referent,
   // we never end up reading their pending next from the GC again.
@@ -76,6 +81,11 @@
   bool IsEnqueuable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  // Note: This avoids a read barrier, it should only be used by the GC.
+  HeapReference<Object>* GetReferentReferenceAddr() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObjectReferenceAddr<kDefaultVerifyFlags>(ReferentOffset());
+  }
+
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Reference> pending_next_;  // Note this is Java volatile:
   HeapReference<Object> queue_;  // Note this is Java volatile:
@@ -83,6 +93,8 @@
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
   friend struct art::ReferenceOffsets;  // for verifying offset information
+  friend class gc::ReferenceProcessor;
+  friend class gc::ReferenceQueue;
   DISALLOW_IMPLICIT_CONSTRUCTORS(Reference);
 };
 
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index dd8ce16..d8c1c40 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -70,6 +70,11 @@
 // address the object (if the object didn't move, returns the object input parameter).
 typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
     __attribute__((warn_unused_result));
+
+// Returns true if the object in the heap reference is marked, if it is marked and has moved the
+// callback updates the heap reference contain the new value.
+typedef bool (IsHeapReferenceMarkedCallback)(mirror::HeapReference<mirror::Object>* object,
+    void* arg) __attribute__((warn_unused_result));
 typedef void (ProcessMarkStackCallback)(void* arg);
 
 }  // namespace art
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 0820330..7cdd8f5 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -113,6 +113,8 @@
     return gc::kCollectorTypeGSS;
   } else if (option == "CC") {
     return gc::kCollectorTypeCC;
+  } else if (option == "MC") {
+    return gc::kCollectorTypeMC;
   } else {
     return gc::kCollectorTypeNone;
   }
@@ -197,7 +199,7 @@
 #endif
   // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
   // parsing options.
-  background_collector_type_ = gc::kCollectorTypeNone;
+  background_collector_type_ = gc::kCollectorTypeSS;
   stack_size_ = 0;  // 0 means default.
   max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   low_memory_mode_ = false;
@@ -563,6 +565,10 @@
       if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_change_threshold_)) {
         return false;
       }
+    } else if (option == "-Xprofile-type:method") {
+      profiler_options_.profile_type_ = kProfilerMethod;
+    } else if (option == "-Xprofile-type:dexpc") {
+      profiler_options_.profile_type_ = kProfilerMethodAndDexPC;
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
@@ -806,6 +812,7 @@
   UsageMessage(stream, "  -Xprofile-start-immediately\n");
   UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
   UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
+  UsageMessage(stream, "  -Xprofile-type:{method,dexpc}\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 00bb501..2cd876a 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -63,7 +63,8 @@
 static void GetSample(Thread* thread, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   BackgroundMethodSamplingProfiler* profiler =
       reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  mirror::ArtMethod* method = thread->GetCurrentMethod(nullptr);
+  uint32_t dex_pc;
+  mirror::ArtMethod* method = thread->GetCurrentMethod(&dex_pc);
   if (false && method == nullptr) {
     LOG(INFO) << "No current method available";
     std::ostringstream os;
@@ -71,7 +72,7 @@
     std::string data(os.str());
     LOG(INFO) << data;
   }
-  profiler->RecordMethod(method);
+  profiler->RecordMethod(method, dex_pc);
 }
 
 // A closure that is called by the thread checkpoint code.
@@ -244,7 +245,7 @@
   }
 
   // Read the previous profile.
-  profile_table_.ReadPrevious(fd);
+  profile_table_.ReadPrevious(fd, options_.GetProfileType());
 
   // Move back to the start of the file.
   lseek(fd, 0, SEEK_SET);
@@ -360,7 +361,7 @@
 
 // A method has been hit, record its invocation in the method map.
 // The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method) {
+void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method, uint32_t dex_pc) {
   if (method == nullptr) {
     profile_table_.NullMethod();
     // Don't record a nullptr method.
@@ -393,7 +394,11 @@
 
   // Add to the profile table unless it is filtered out.
   if (!is_filtered) {
-    profile_table_.Put(method);
+    if (options_.GetProfileType() == kProfilerMethod) {
+      profile_table_.Put(method);
+    } else if (options_.GetProfileType() == kProfilerMethodAndDexPC) {
+      profile_table_.PutDexPC(method, dex_pc);
+    }
   }
 }
 
@@ -403,7 +408,7 @@
 }
 
 uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
-  return profile_table_.Write(os);
+  return profile_table_.Write(os, options_.GetProfileType());
 }
 
 // Profile Table.
@@ -414,19 +419,18 @@
     num_boot_methods_(0) {
   for (int i = 0; i < kHashSize; i++) {
     table[i] = nullptr;
+    dex_table[i] = nullptr;
   }
 }
 
 ProfileSampleResults::~ProfileSampleResults() {
-  for (int i = 0; i < kHashSize; i++) {
-     delete table[i];
-  }
+  Clear();
 }
 
 // Add a method to the profile table.  If it's the first time the method
 // has been seen, add it with count=1, otherwise increment the count.
 void ProfileSampleResults::Put(mirror::ArtMethod* method) {
-  lock_.Lock(Thread::Current());
+  MutexLock mu(Thread::Current(), lock_);
   uint32_t index = Hash(method);
   if (table[index] == nullptr) {
     table[index] = new Map();
@@ -438,11 +442,34 @@
     i->second++;
   }
   num_samples_++;
-  lock_.Unlock(Thread::Current());
+}
+
+// Add a method with dex pc to the profile table
+void ProfileSampleResults::PutDexPC(mirror::ArtMethod* method, uint32_t dex_pc) {
+  MutexLock mu(Thread::Current(), lock_);
+  uint32_t index = Hash(method);
+  if (dex_table[index] == nullptr) {
+    dex_table[index] = new MethodDexPCMap();
+  }
+  MethodDexPCMap::iterator i = dex_table[index]->find(method);
+  if (i == dex_table[index]->end()) {
+    DexPCCountMap* dex_pc_map = new DexPCCountMap();
+    (*dex_pc_map)[dex_pc] = 1;
+    (*dex_table[index])[method] = dex_pc_map;
+  } else {
+    DexPCCountMap* dex_pc_count = i->second;
+    DexPCCountMap::iterator dex_pc_i = dex_pc_count->find(dex_pc);
+    if (dex_pc_i == dex_pc_count->end()) {
+      (*dex_pc_count)[dex_pc] = 1;
+    } else {
+      dex_pc_i->second++;
+    }
+  }
+  num_samples_++;
 }
 
 // Write the profile table to the output stream.  Also merge with the previous profile.
-uint32_t ProfileSampleResults::Write(std::ostream &os) {
+uint32_t ProfileSampleResults::Write(std::ostream& os, ProfileDataType type) {
   ScopedObjectAccess soa(Thread::Current());
   num_samples_ += previous_num_samples_;
   num_null_methods_ += previous_num_null_methods_;
@@ -452,36 +479,101 @@
                  << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
-  for (int i = 0 ; i < kHashSize; i++) {
-    Map *map = table[i];
-    if (map != nullptr) {
-      for (const auto &meth_iter : *map) {
-        mirror::ArtMethod *method = meth_iter.first;
-        std::string method_name = PrettyMethod(method);
+  if (type == kProfilerMethod) {
+    for (int i = 0 ; i < kHashSize; i++) {
+      Map *map = table[i];
+      if (map != nullptr) {
+        for (const auto &meth_iter : *map) {
+          mirror::ArtMethod *method = meth_iter.first;
+          std::string method_name = PrettyMethod(method);
 
-        const DexFile::CodeItem* codeitem = method->GetCodeItem();
-        uint32_t method_size = 0;
-        if (codeitem != nullptr) {
-          method_size = codeitem->insns_size_in_code_units_;
-        }
-        uint32_t count = meth_iter.second;
+          const DexFile::CodeItem* codeitem = method->GetCodeItem();
+          uint32_t method_size = 0;
+          if (codeitem != nullptr) {
+            method_size = codeitem->insns_size_in_code_units_;
+          }
+          uint32_t count = meth_iter.second;
 
-        // Merge this profile entry with one from a previous run (if present).  Also
-        // remove the previous entry.
-        PreviousProfile::iterator pi = previous_.find(method_name);
-        if (pi != previous_.end()) {
-          count += pi->second.count_;
-          previous_.erase(pi);
+          // Merge this profile entry with one from a previous run (if present).  Also
+          // remove the previous entry.
+          PreviousProfile::iterator pi = previous_.find(method_name);
+          if (pi != previous_.end()) {
+            count += pi->second.count_;
+            previous_.erase(pi);
+          }
+          os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
+          ++num_methods;
         }
-        os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
-        ++num_methods;
+      }
+    }
+  } else if (type == kProfilerMethodAndDexPC) {
+    for (int i = 0 ; i < kHashSize; i++) {
+      MethodDexPCMap *dex_map = dex_table[i];
+      if (dex_map != nullptr) {
+        for (const auto &dex_pc_iter : *dex_map) {
+          mirror::ArtMethod *method = dex_pc_iter.first;
+          std::string method_name = PrettyMethod(method);
+
+          const DexFile::CodeItem* codeitem = method->GetCodeItem();
+          uint32_t method_size = 0;
+          if (codeitem != nullptr) {
+            method_size = codeitem->insns_size_in_code_units_;
+          }
+          DexPCCountMap* dex_pc_map = dex_pc_iter.second;
+          uint32_t total_count = 0;
+          for (const auto &dex_pc_i : *dex_pc_map) {
+            total_count += dex_pc_i.second;
+          }
+
+          PreviousProfile::iterator pi = previous_.find(method_name);
+          if (pi != previous_.end()) {
+            total_count += pi->second.count_;
+            DexPCCountMap* previous_dex_pc_map = pi->second.dex_pc_map_;
+            if (previous_dex_pc_map != nullptr) {
+              for (const auto &dex_pc_i : *previous_dex_pc_map) {
+                uint32_t dex_pc = dex_pc_i.first;
+                uint32_t count = dex_pc_i.second;
+                DexPCCountMap::iterator di = dex_pc_map->find(dex_pc);
+                if (di == dex_pc_map->end()) {
+                  (*dex_pc_map)[dex_pc] = count;
+                } else {
+                  di->second += count;
+                }
+              }
+            }
+            delete previous_dex_pc_map;
+            previous_.erase(pi);
+          }
+          std::vector<std::string> dex_pc_count_vector;
+          for (const auto &dex_pc_i : *dex_pc_map) {
+            dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+          }
+          // We write out profile data with dex pc information in the following format:
+          // "method/total_count/size/[pc_1:count_1,pc_2:count_2,...]".
+          os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
+              method_size, Join(dex_pc_count_vector, ',').c_str());
+          ++num_methods;
+        }
       }
     }
   }
 
   // Now we write out the remaining previous methods.
-  for (PreviousProfile::iterator pi = previous_.begin(); pi != previous_.end(); ++pi) {
-    os << StringPrintf("%s/%u/%u\n",  pi->first.c_str(), pi->second.count_, pi->second.method_size_);
+  for (const auto &pi : previous_) {
+    if (type == kProfilerMethod) {
+      os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
+    } else if (type == kProfilerMethodAndDexPC) {
+      os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
+      DexPCCountMap* previous_dex_pc_map = pi.second.dex_pc_map_;
+      if (previous_dex_pc_map != nullptr) {
+        std::vector<std::string> dex_pc_count_vector;
+        for (const auto &dex_pc_i : *previous_dex_pc_map) {
+          dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+        }
+        os << Join(dex_pc_count_vector, ',');
+      }
+      os << "]\n";
+    }
     ++num_methods;
   }
   return num_methods;
@@ -492,8 +584,20 @@
   num_null_methods_ = 0;
   num_boot_methods_ = 0;
   for (int i = 0; i < kHashSize; i++) {
-     delete table[i];
-     table[i] = nullptr;
+    delete table[i];
+    table[i] = nullptr;
+    if (dex_table[i] != nullptr) {
+      for (auto &di : *dex_table[i]) {
+        delete di.second;
+        di.second = nullptr;
+      }
+    }
+    delete dex_table[i];
+    dex_table[i] = nullptr;
+  }
+  for (auto &pi : previous_) {
+    delete pi.second.dex_pc_map_;
+    pi.second.dex_pc_map_ = nullptr;
   }
   previous_.clear();
 }
@@ -520,7 +624,7 @@
   return true;
 }
 
-void ProfileSampleResults::ReadPrevious(int fd) {
+void ProfileSampleResults::ReadPrevious(int fd, ProfileDataType type) {
   // Reset counters.
   previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
 
@@ -540,21 +644,35 @@
   previous_num_null_methods_ = atoi(summary_info[1].c_str());
   previous_num_boot_methods_ = atoi(summary_info[2].c_str());
 
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by /
+  // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
   while (true) {
     if (!ReadProfileLine(fd, line)) {
       break;
     }
     std::vector<std::string> info;
     Split(line, '/', info);
-    if (info.size() != 3) {
+    if (info.size() != 3 && info.size() != 4) {
       // Malformed.
       break;
     }
     std::string methodname = info[0];
-    uint32_t count = atoi(info[1].c_str());
+    uint32_t total_count = atoi(info[1].c_str());
     uint32_t size = atoi(info[2].c_str());
-    previous_[methodname] = PreviousValue(count, size);
+    DexPCCountMap* dex_pc_map = nullptr;
+    if (type == kProfilerMethodAndDexPC && info.size() == 4) {
+      dex_pc_map = new DexPCCountMap();
+      std::string dex_pc_counts_str = info[3].substr(1, info[3].size() - 2);
+      std::vector<std::string> dex_pc_count_pairs;
+      Split(dex_pc_counts_str, ',', dex_pc_count_pairs);
+      for (uint32_t i = 0; i < dex_pc_count_pairs.size(); ++i) {
+        std::vector<std::string> dex_pc_count;
+        Split(dex_pc_count_pairs[i], ':', dex_pc_count);
+        uint32_t dex_pc = atoi(dex_pc_count[0].c_str());
+        uint32_t count = atoi(dex_pc_count[1].c_str());
+        (*dex_pc_map)[dex_pc] = count;
+      }
+    }
+    previous_[methodname] = PreviousValue(total_count, size, dex_pc_map);
   }
 }
 
@@ -604,7 +722,7 @@
     }
     std::vector<std::string> info;
     Split(line, '/', info);
-    if (info.size() != 3) {
+    if (info.size() != 3 && info.size() != 4) {
       // Malformed.
       return false;
     }
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 0b18dbb..396dd23 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -53,8 +53,9 @@
   ~ProfileSampleResults();
 
   void Put(mirror::ArtMethod* method);
-  uint32_t Write(std::ostream &os);
-  void ReadPrevious(int fd);
+  void PutDexPC(mirror::ArtMethod* method, uint32_t pc);
+  uint32_t Write(std::ostream &os, ProfileDataType type);
+  void ReadPrevious(int fd, ProfileDataType type);
   void Clear();
   uint32_t GetNumSamples() { return num_samples_; }
   void NullMethod() { ++num_null_methods_; }
@@ -68,15 +69,21 @@
   uint32_t num_null_methods_;    // Number of samples where can don't know the method.
   uint32_t num_boot_methods_;    // Number of samples in the boot path.
 
-  typedef std::map<mirror::ArtMethod*, uint32_t> Map;   // Map of method vs its count.
+  typedef std::map<mirror::ArtMethod*, uint32_t> Map;  // Map of method vs its count.
   Map *table[kHashSize];
 
+  typedef std::map<uint32_t, uint32_t> DexPCCountMap;  // Map of dex pc vs its count
+  // Map of method vs dex pc counts in the method.
+  typedef std::map<mirror::ArtMethod*, DexPCCountMap*> MethodDexPCMap;
+  MethodDexPCMap *dex_table[kHashSize];
+
   struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0) {}
-    PreviousValue(uint32_t count, uint32_t method_size)
-      : count_(count), method_size_(method_size) {}
+    PreviousValue() : count_(0), method_size_(0), dex_pc_map_(nullptr) {}
+    PreviousValue(uint32_t count, uint32_t method_size, DexPCCountMap* dex_pc_map)
+      : count_(count), method_size_(method_size), dex_pc_map_(dex_pc_map) {}
     uint32_t count_;
     uint32_t method_size_;
+    DexPCCountMap* dex_pc_map_;
   };
 
   typedef std::map<std::string, PreviousValue> PreviousProfile;
@@ -114,7 +121,7 @@
   static void Stop() LOCKS_EXCLUDED(Locks::profiler_lock_, wait_lock_);
   static void Shutdown() LOCKS_EXCLUDED(Locks::profiler_lock_);
 
-  void RecordMethod(mirror::ArtMethod *method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void RecordMethod(mirror::ArtMethod *method, uint32_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Barrier& GetBarrier() {
     return *profiler_barrier_;
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
index 08e32cc..0b63003 100644
--- a/runtime/profiler_options.h
+++ b/runtime/profiler_options.h
@@ -22,6 +22,11 @@
 
 namespace art {
 
+enum ProfileDataType {
+  kProfilerMethod,          // Method only
+  kProfilerMethodAndDexPC,  // Method with Dex PC
+};
+
 class ProfilerOptions {
  public:
   static constexpr bool kDefaultEnabled = false;
@@ -32,6 +37,7 @@
   static constexpr bool kDefaultStartImmediately = false;
   static constexpr double kDefaultTopKThreshold = 90.0;
   static constexpr double kDefaultChangeInTopKThreshold = 10.0;
+  static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
 
   ProfilerOptions() :
     enabled_(kDefaultEnabled),
@@ -41,7 +47,8 @@
     backoff_coefficient_(kDefaultBackoffCoefficient),
     start_immediately_(kDefaultStartImmediately),
     top_k_threshold_(kDefaultTopKThreshold),
-    top_k_change_threshold_(kDefaultChangeInTopKThreshold) {}
+    top_k_change_threshold_(kDefaultChangeInTopKThreshold),
+    profile_type_(kDefaultProfileData) {}
 
   ProfilerOptions(bool enabled,
                  uint32_t period_s,
@@ -50,7 +57,8 @@
                  double backoff_coefficient,
                  bool start_immediately,
                  double top_k_threshold,
-                 double top_k_change_threshold):
+                 double top_k_change_threshold,
+                 ProfileDataType profile_type):
     enabled_(enabled),
     period_s_(period_s),
     duration_s_(duration_s),
@@ -58,7 +66,8 @@
     backoff_coefficient_(backoff_coefficient),
     start_immediately_(start_immediately),
     top_k_threshold_(top_k_threshold),
-    top_k_change_threshold_(top_k_change_threshold) {}
+    top_k_change_threshold_(top_k_change_threshold),
+    profile_type_(profile_type) {}
 
   bool IsEnabled() const {
     return enabled_;
@@ -92,6 +101,10 @@
     return top_k_change_threshold_;
   }
 
+  ProfileDataType GetProfileType() const {
+    return profile_type_;
+  }
+
  private:
   friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
     os << "enabled=" << po.enabled_
@@ -101,7 +114,8 @@
        << ", backoff_coefficient=" << po.backoff_coefficient_
        << ", start_immediately=" << po.start_immediately_
        << ", top_k_threshold=" << po.top_k_threshold_
-       << ", top_k_change_threshold=" << po.top_k_change_threshold_;
+       << ", top_k_change_threshold=" << po.top_k_change_threshold_
+       << ", profile_type=" << po.profile_type_;
     return os;
   }
 
@@ -123,6 +137,8 @@
   double top_k_threshold_;
   // How much the top K% samples needs to change in order for the app to be recompiled.
   double top_k_change_threshold_;
+  // The type of profile data dumped to the disk.
+  ProfileDataType profile_type_;
 };
 
 }  // namespace art
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 717381c..8aa7ea1 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -421,6 +421,9 @@
     int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
     if (fd >= 0) {
       close(fd);
+    } else if (errno != EEXIST) {
+      LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
+      return true;
     }
     StartProfiler(profile_output_filename_.c_str());
   }
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 7e922c5..132ac3e 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -142,7 +142,8 @@
   return GetMethod()->NativePcOffset(cur_quick_frame_pc_);
 }
 
-uint32_t StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const {
+bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
+                           uint32_t* val) const {
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
@@ -155,19 +156,30 @@
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-      return GetGPR(vmap_table.ComputeRegister(spill_mask, vmap_offset, kind));
+      uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      uintptr_t ptr_val;
+      bool success = false;
+      if (is_float) {
+        success = GetFPR(reg, &ptr_val);
+      } else {
+        success = GetGPR(reg, &ptr_val);
+      }
+      *val = ptr_val;
+      return success;
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
-      return *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
+      *val = *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                           frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      return true;
     }
   } else {
-    return cur_shadow_frame_->GetVReg(vreg);
+    *val = cur_shadow_frame_->GetVReg(vreg);
+    return true;
   }
 }
 
-void StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
+bool StackVisitor::SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
                            VRegKind kind) {
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably write registers without a context.
@@ -181,8 +193,12 @@
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-      const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kReferenceVReg);
-      SetGPR(reg, new_value);
+      const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      if (is_float) {
+        return SetFPR(reg, new_value);
+      } else {
+        return SetGPR(reg, new_value);
+      }
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
@@ -190,9 +206,11 @@
                                  frame_info.FrameSizeInBytes(), vreg, kRuntimeISA);
       byte* vreg_addr = reinterpret_cast<byte*>(GetCurrentQuickFrame()) + offset;
       *reinterpret_cast<uint32_t*>(vreg_addr) = new_value;
+      return true;
     }
   } else {
-    return cur_shadow_frame_->SetVReg(vreg, new_value);
+    cur_shadow_frame_->SetVReg(vreg, new_value);
+    return true;
   }
 }
 
@@ -201,14 +219,24 @@
   return context_->GetGPRAddress(reg);
 }
 
-uintptr_t StackVisitor::GetGPR(uint32_t reg) const {
+bool StackVisitor::GetGPR(uint32_t reg, uintptr_t* val) const {
   DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->GetGPR(reg);
+  return context_->GetGPR(reg, val);
 }
 
-void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
+bool StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  context_->SetGPR(reg, value);
+  return context_->SetGPR(reg, value);
+}
+
+bool StackVisitor::GetFPR(uint32_t reg, uintptr_t* val) const {
+  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  return context_->GetFPR(reg, val);
+}
+
+bool StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  return context_->SetFPR(reg, value);
 }
 
 uintptr_t StackVisitor::GetReturnPc() const {
diff --git a/runtime/stack.h b/runtime/stack.h
index 1991115..9402cdd 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -561,15 +561,21 @@
   bool GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const
+  bool GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
+  uint32_t GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t val;
+    bool success = GetVReg(m, vreg, kind, &val);
+    CHECK(success) << "Failed to read vreg " << vreg << " of kind " << kind;
+    return val;
+  }
+
+  bool SetVReg(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uintptr_t* GetGPRAddress(uint32_t reg) const;
-  uintptr_t GetGPR(uint32_t reg) const;
-  void SetGPR(uint32_t reg, uintptr_t value);
 
   // This is a fast-path for getting/setting values in a quick frame.
   uint32_t* GetVRegAddr(StackReference<mirror::ArtMethod>* cur_quick_frame,
@@ -700,6 +706,11 @@
   StackVisitor(Thread* thread, Context* context, size_t num_frames)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool GetGPR(uint32_t reg, uintptr_t* val) const;
+  bool SetGPR(uint32_t reg, uintptr_t value);
+  bool GetFPR(uint32_t reg, uintptr_t* val) const;
+  bool SetFPR(uint32_t reg, uintptr_t value);
+
   instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(uint32_t depth) const;
 
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6980530..3f8f4a3 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -220,7 +220,7 @@
     // It's likely that callers are trying to ensure they have at least a certain amount of
     // stack space, so we should add our reserved space on top of what they requested, rather
     // than implicitly take it away from them.
-    stack_size += Thread::kStackOverflowReservedBytes;
+    stack_size += kRuntimeStackOverflowReservedBytes;
   } else {
     // If we are going to use implicit stack checks, allocate space for the protected
     // region at the bottom of the stack.
@@ -489,7 +489,7 @@
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= kStackOverflowReservedBytes) {
+  if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -2200,7 +2200,7 @@
   if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
     // However, we seem to have already extended to use the full stack.
     LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << kStackOverflowReservedBytes << ")?";
+               << kRuntimeStackOverflowReservedBytes << ")?";
     DumpStack(LOG(ERROR));
     LOG(FATAL) << "Recursive stack overflow.";
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index bff9b52..7cd86de 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,6 +33,7 @@
 #include "gc/allocator/rosalloc.h"
 #include "globals.h"
 #include "handle_scope.h"
+#include "instruction_set.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -94,28 +95,8 @@
 
 class Thread {
  public:
-  // Space to throw a StackOverflowError in.
-  // TODO: shrink reserved space, in particular for 64bit.
-#if defined(__x86_64__)
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__aarch64__)
-  // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
-  // But this one works rather well.
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__i386__)
-  // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
-  // test-art-host-run-test-interpreter-018-stack-overflow
-  // test-art-host-run-test-interpreter-107-int-math2
-  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
-#else
-  static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
-#endif
   // How much of the reserved bytes is reserved for incoming signals.
   static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
-  // How much of the reserved bytes we may temporarily use during stack overflow checks as an
-  // optimization.
-  static constexpr size_t kStackOverflowReservedUsableBytes =
-      kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
   // For implicit overflow checks we reserve an extra piece of memory at the bottom
   // of the stack (lowest memory).  The higher portion of the memory
@@ -123,7 +104,7 @@
   // throwing the StackOverflow exception.
   static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
   static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
-    kStackOverflowReservedBytes;
+      kRuntimeStackOverflowReservedBytes;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -585,7 +566,7 @@
       // overflow region.
       tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize;
     } else {
-      tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes;
+      tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes;
     }
   }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2f4e805..c9c3bba 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3006,6 +3006,164 @@
   return res_method;
 }
 
+template <class T>
+mirror::ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator(T* it, const Instruction* inst,
+                                                                    MethodType method_type,
+                                                                    bool is_range,
+                                                                    mirror::ArtMethod* res_method) {
+  // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
+  // match the call to the signature. Also, we might be calling through an abstract method
+  // definition (which doesn't have register count values).
+  const size_t expected_args = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
+  /* caught by static verifier */
+  DCHECK(is_range || expected_args <= 5);
+  if (expected_args > code_item_->outs_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
+        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
+    return nullptr;
+  }
+
+  uint32_t arg[5];
+  if (!is_range) {
+    inst->GetVarArgs(arg);
+  }
+  uint32_t sig_registers = 0;
+
+  /*
+   * Check the "this" argument, which must be an instance of the class that declared the method.
+   * For an interface class, we don't do the full interface merge (see JoinClass), so we can't do a
+   * rigorous check here (which is okay since we have to do it at runtime).
+   */
+  if (method_type != METHOD_STATIC) {
+    const RegType& actual_arg_type = work_line_->GetInvocationThis(inst, is_range);
+    if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
+      CHECK(have_pending_hard_failure_);
+      return nullptr;
+    }
+    if (actual_arg_type.IsUninitializedReference()) {
+      if (res_method) {
+        if (!res_method->IsConstructor()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
+          return nullptr;
+        }
+      } else {
+        // Check whether the name of the called method is "<init>"
+        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        if (strcmp(dex_file_->GetMethodName(dex_file_->GetMethodId(method_idx)), "init") != 0) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
+          return nullptr;
+        }
+      }
+    }
+    if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
+      const RegType* res_method_class;
+      if (res_method != nullptr) {
+        mirror::Class* klass = res_method->GetDeclaringClass();
+        res_method_class = &reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
+                                                 klass->CannotBeAssignedFromOtherTypes());
+      } else {
+        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        const uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
+        res_method_class = &reg_types_.FromDescriptor(class_loader_->Get(),
+                                                      dex_file_->StringByTypeIdx(class_idx),
+                                                      false);
+      }
+      if (!res_method_class->IsAssignableFrom(actual_arg_type)) {
+        Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
+            VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
+                << "' not instance of '" << *res_method_class << "'";
+        // Continue on soft failures. We need to find possible hard failures to avoid problems in
+        // the compiler.
+        if (have_pending_hard_failure_) {
+          return nullptr;
+        }
+      }
+    }
+    sig_registers = 1;
+  }
+
+  for ( ; it->HasNext(); it->Next()) {
+    if (sig_registers >= expected_args) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
+          " arguments, found " << sig_registers << " or more.";
+      return nullptr;
+    }
+
+    const char* param_descriptor = it->GetDescriptor();
+
+    if (param_descriptor == nullptr) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation because of missing signature "
+          "component";
+      return nullptr;
+    }
+
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), param_descriptor,
+                                                        false);
+    uint32_t get_reg = is_range ? inst->VRegC_3rc() + static_cast<uint32_t>(sig_registers) :
+        arg[sig_registers];
+    if (reg_type.IsIntegralTypes()) {
+      const RegType& src_type = work_line_->GetRegisterType(get_reg);
+      if (!src_type.IsIntegralTypes()) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "register v" << get_reg << " has type " << src_type
+            << " but expected " << reg_type;
+        return res_method;
+      }
+    } else if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
+      // Continue on soft failures. We need to find possible hard failures to avoid problems in the
+      // compiler.
+      if (have_pending_hard_failure_) {
+        return res_method;
+      }
+    }
+    sig_registers += reg_type.IsLongOrDoubleTypes() ?  2 : 1;
+  }
+  if (expected_args != sig_registers) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << expected_args <<
+        " arguments, found " << sig_registers;
+    return nullptr;
+  }
+  return res_method;
+}
+
+void MethodVerifier::VerifyInvocationArgsUnresolvedMethod(const Instruction* inst,
+                                                          MethodType method_type,
+                                                          bool is_range) {
+  // As the method may not have been resolved, make this static check against what we expect.
+  // The main reason for this code block is to fail hard when we find an illegal use, e.g.,
+  // wrong number of arguments or wrong primitive types, even if the method could not be resolved.
+  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  DexFileParameterIterator it(*dex_file_,
+                              dex_file_->GetProtoId(dex_file_->GetMethodId(method_idx).proto_idx_));
+  VerifyInvocationArgsFromIterator<DexFileParameterIterator>(&it, inst, method_type, is_range,
+                                                             nullptr);
+}
+
+class MethodParamListDescriptorIterator {
+ public:
+  explicit MethodParamListDescriptorIterator(mirror::ArtMethod* res_method) :
+      res_method_(res_method), pos_(0), params_(res_method->GetParameterTypeList()),
+      params_size_(params_ == nullptr ? 0 : params_->Size()) {
+  }
+
+  bool HasNext() {
+    return pos_ < params_size_;
+  }
+
+  void Next() {
+    ++pos_;
+  }
+
+  const char* GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return res_method_->GetTypeDescriptorFromTypeIdx(params_->GetTypeItem(pos_).type_idx_);
+  }
+
+ private:
+  mirror::ArtMethod* res_method_;
+  size_t pos_;
+  const DexFile::TypeList* params_;
+  const size_t params_size_;
+};
+
 mirror::ArtMethod* MethodVerifier::VerifyInvocationArgs(const Instruction* inst,
                                                              MethodType method_type,
                                                              bool is_range,
@@ -3014,28 +3172,13 @@
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  // As the method may not have been resolved, make this static check against what we expect.
-  const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-  uint32_t shorty_idx = dex_file_->GetProtoId(method_id.proto_idx_).shorty_idx_;
-  uint32_t shorty_len;
-  const char* descriptor = dex_file_->StringDataAndUtf16LengthByIdx(shorty_idx, &shorty_len);
-  int32_t sig_registers = method_type == METHOD_STATIC ? 0 : 1;
-  for (size_t i = 1; i < shorty_len; i++) {
-    if (descriptor[i] == 'J' || descriptor[i] == 'D') {
-      sig_registers += 2;
-    } else {
-      sig_registers++;
-    }
-  }
-  if (inst->VRegA() != sig_registers) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
-        " arguments, found " << sig_registers;
-    return nullptr;
-  }
-
   mirror::ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == NULL) {  // error or class is unresolved
-    return NULL;
+    // Check what we can statically.
+    if (!have_pending_hard_failure_) {
+      VerifyInvocationArgsUnresolvedMethod(inst, method_type, is_range);
+    }
+    return nullptr;
   }
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
@@ -3047,7 +3190,7 @@
       Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
                                    << PrettyMethod(dex_method_idx_, *dex_file_)
                                    << " to super " << PrettyMethod(res_method);
-      return NULL;
+      return nullptr;
     }
     mirror::Class* super_klass = super.GetClass();
     if (res_method->GetMethodIndex() >= super_klass->GetVTable()->GetLength()) {
@@ -3056,95 +3199,14 @@
                                    << " to super " << super
                                    << "." << res_method->GetName()
                                    << res_method->GetSignature();
-      return NULL;
+      return nullptr;
     }
   }
-  // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
-  // match the call to the signature. Also, we might be calling through an abstract method
-  // definition (which doesn't have register count values).
-  const size_t expected_args = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
-  /* caught by static verifier */
-  DCHECK(is_range || expected_args <= 5);
-  if (expected_args > code_item_->outs_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
-        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
-    return NULL;
-  }
 
-  /*
-   * Check the "this" argument, which must be an instance of the class that declared the method.
-   * For an interface class, we don't do the full interface merge (see JoinClass), so we can't do a
-   * rigorous check here (which is okay since we have to do it at runtime).
-   */
-  size_t actual_args = 0;
-  if (!res_method->IsStatic()) {
-    const RegType& actual_arg_type = work_line_->GetInvocationThis(inst, is_range);
-    if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
-      return NULL;
-    }
-    if (actual_arg_type.IsUninitializedReference() && !res_method->IsConstructor()) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
-      return NULL;
-    }
-    if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
-      mirror::Class* klass = res_method->GetDeclaringClass();
-      const RegType& res_method_class =
-          reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
-                               klass->CannotBeAssignedFromOtherTypes());
-      if (!res_method_class.IsAssignableFrom(actual_arg_type)) {
-        Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
-            VERIFY_ERROR_BAD_CLASS_SOFT) << "'this' argument '" << actual_arg_type
-            << "' not instance of '" << res_method_class << "'";
-        return NULL;
-      }
-    }
-    actual_args++;
-  }
-  /*
-   * Process the target method's signature. This signature may or may not
-   * have been verified, so we can't assume it's properly formed.
-   */
-  const DexFile::TypeList* params = res_method->GetParameterTypeList();
-  size_t params_size = params == NULL ? 0 : params->Size();
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetVarArgs(arg);
-  }
-  for (size_t param_index = 0; param_index < params_size; param_index++) {
-    if (actual_args >= expected_args) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invalid call to '" << PrettyMethod(res_method)
-          << "'. Expected " << expected_args << " arguments, processing argument " << actual_args
-          << " (where longs/doubles count twice).";
-      return NULL;
-    }
-    const char* descriptor =
-        res_method->GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
-    if (descriptor == NULL) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-          << " missing signature component";
-      return NULL;
-    }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
-    uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
-    if (reg_type.IsIntegralTypes()) {
-      const RegType& src_type = work_line_->GetRegisterType(get_reg);
-      if (!src_type.IsIntegralTypes()) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "register v" << get_reg << " has type " << src_type
-                                          << " but expected " << reg_type;
-        return res_method;
-      }
-    } else if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
-      return res_method;
-    }
-    actual_args = reg_type.IsLongOrDoubleTypes() ? actual_args + 2 : actual_args + 1;
-  }
-  if (actual_args != expected_args) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-        << " expected " << expected_args << " arguments, found " << actual_args;
-    return NULL;
-  } else {
-    return res_method;
-  }
+  // Process the target method's signature. This signature may or may not
+  MethodParamListDescriptorIterator it(res_method);
+  return VerifyInvocationArgsFromIterator<MethodParamListDescriptorIterator>(&it, inst, method_type,
+                                                                             is_range, res_method);
 }
 
 mirror::ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 451c9e2..b6d5b35 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -565,6 +565,18 @@
                                           bool is_range, bool is_super)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Similar checks to the above, but on the proto. Will be used when the method cannot be
+  // resolved.
+  void VerifyInvocationArgsUnresolvedMethod(const Instruction* inst, MethodType method_type,
+                                            bool is_range)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <class T>
+  mirror::ArtMethod* VerifyInvocationArgsFromIterator(T* it, const Instruction* inst,
+                                                      MethodType method_type, bool is_range,
+                                                      mirror::ArtMethod* res_method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::ArtMethod* GetQuickInvokedMethod(const Instruction* inst,
                                            RegisterLine* reg_line,
                                            bool is_range)