Merge "ART: Add Clang's -Wused-but-marked-unused"
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 03370db..eeed877 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -132,7 +132,7 @@
                                const ArrayRef<const uint8_t>& vmap_table,
                                const ArrayRef<const uint8_t>& native_gc_map,
                                const ArrayRef<const uint8_t>& cfi_info,
-                               const ArrayRef<LinkerPatch>& patches)
+                               const ArrayRef<const LinkerPatch>& patches)
     : CompiledCode(driver, instruction_set, quick_code, !driver->DedupeEnabled()),
       owns_arrays_(!driver->DedupeEnabled()),
       frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
@@ -179,7 +179,7 @@
     const ArrayRef<const uint8_t>& vmap_table,
     const ArrayRef<const uint8_t>& native_gc_map,
     const ArrayRef<const uint8_t>& cfi_info,
-    const ArrayRef<LinkerPatch>& patches) {
+    const ArrayRef<const LinkerPatch>& patches) {
   SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
   CompiledMethod* ret = alloc.allocate(1);
   alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
@@ -200,7 +200,8 @@
   CompiledMethod* ret = alloc.allocate(1);
   alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
                   fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map,
-                  ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<LinkerPatch>());
+                  ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+                  ArrayRef<const LinkerPatch>());
   return ret;
 }
 
@@ -217,7 +218,7 @@
   alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
                   fp_spill_mask, nullptr, ArrayRef<const uint8_t>(),
                   ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
-                  cfi_info, ArrayRef<LinkerPatch>());
+                  cfi_info, ArrayRef<const LinkerPatch>());
   return ret;
 }
 
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 7497b17..506b47b 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -320,7 +320,7 @@
                  const ArrayRef<const uint8_t>& vmap_table,
                  const ArrayRef<const uint8_t>& native_gc_map,
                  const ArrayRef<const uint8_t>& cfi_info,
-                 const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
+                 const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>());
 
   virtual ~CompiledMethod();
 
@@ -336,7 +336,7 @@
       const ArrayRef<const uint8_t>& vmap_table,
       const ArrayRef<const uint8_t>& native_gc_map,
       const ArrayRef<const uint8_t>& cfi_info,
-      const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
+      const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>());
 
   static CompiledMethod* SwapAllocCompiledMethodStackMap(
       CompilerDriver* driver,
@@ -391,8 +391,8 @@
     return cfi_info_;
   }
 
-  const SwapVector<LinkerPatch>& GetPatches() const {
-    return patches_;
+  ArrayRef<const LinkerPatch> GetPatches() const {
+    return ArrayRef<const LinkerPatch>(patches_);
   }
 
  private:
@@ -417,7 +417,7 @@
   // For quick code, a FDE entry for the debug_frame section.
   SwapVector<uint8_t>* cfi_info_;
   // For quick code, linker patches needed by the method.
-  SwapVector<LinkerPatch> patches_;
+  const SwapVector<LinkerPatch> patches_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 483a5d0..f944c11 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1171,7 +1171,7 @@
       ArrayRef<const uint8_t>(vmap_encoder.GetData()),
       ArrayRef<const uint8_t>(native_gc_map_),
       cfi_ref,
-      ArrayRef<LinkerPatch>(patches_));
+      ArrayRef<const LinkerPatch>(patches_));
 }
 
 size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index abdfd6d..3b397cc 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -39,14 +39,14 @@
   static constexpr uint32_t kBlMinusMax = 0xf400d000;
 
   bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
-                             const ArrayRef<LinkerPatch>& method1_patches,
+                             const ArrayRef<const LinkerPatch>& method1_patches,
                              const ArrayRef<const uint8_t>& method3_code,
-                             const ArrayRef<LinkerPatch>& method3_patches,
+                             const ArrayRef<const LinkerPatch>& method3_patches,
                              uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
     const uint32_t method1_offset =
         CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
-    AddCompiledMethod(MethodRef(1u), method1_code, ArrayRef<LinkerPatch>(method1_patches));
+    AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
 
     // We want to put the method3 at a very precise offset.
     const uint32_t method3_offset = method1_offset + distance_without_thunks;
@@ -59,7 +59,7 @@
     const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
     std::vector<uint8_t> method2_raw_code(method2_size);
     ArrayRef<const uint8_t> method2_code(method2_raw_code);
-    AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<LinkerPatch>());
+    AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<const LinkerPatch>());
 
     AddCompiledMethod(MethodRef(3u), method3_code, method3_patches);
 
@@ -139,7 +139,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   static const uint8_t expected_code[] = {
@@ -152,11 +152,11 @@
   LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
   LinkerPatch method2_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches));
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
   Link();
 
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -179,7 +179,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -201,7 +201,7 @@
 
   constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */;
   bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
-                                            kNopCode, ArrayRef<LinkerPatch>(),
+                                            kNopCode, ArrayRef<const LinkerPatch>(),
                                             bl_offset_in_method1 + max_positive_disp);
   ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
 
@@ -220,7 +220,7 @@
   };
 
   constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<LinkerPatch>(),
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
                                             method3_code, method3_patches,
                                             just_over_max_negative_disp - bl_offset_in_method3);
   ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
@@ -241,7 +241,7 @@
 
   constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */;
   bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
-                                            kNopCode, ArrayRef<LinkerPatch>(),
+                                            kNopCode, ArrayRef<const LinkerPatch>(),
                                             bl_offset_in_method1 + just_over_max_positive_disp);
   ASSERT_TRUE(thunk_in_gap);
 
@@ -269,7 +269,7 @@
   };
 
   constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<LinkerPatch>(),
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
                                             method3_code, method3_patches,
                                             just_over_max_negative_disp - bl_offset_in_method3);
   ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b039936..b36e6d0 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -43,14 +43,14 @@
   static constexpr uint32_t kLdurInsn = 0xf840405fu;
 
   uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
-                                 const ArrayRef<LinkerPatch>& method1_patches,
+                                 const ArrayRef<const LinkerPatch>& method1_patches,
                                  const ArrayRef<const uint8_t>& last_method_code,
-                                 const ArrayRef<LinkerPatch>& last_method_patches,
+                                 const ArrayRef<const LinkerPatch>& last_method_patches,
                                  uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
     const uint32_t method1_offset =
         CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
-    AddCompiledMethod(MethodRef(1u), method1_code, ArrayRef<LinkerPatch>(method1_patches));
+    AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
     const uint32_t gap_start =
         CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
 
@@ -70,13 +70,13 @@
       uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
       gap_code.resize(chunk_code_size, 0u);
       AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
-                        ArrayRef<LinkerPatch>());
+                        ArrayRef<const LinkerPatch>());
       method_idx += 1u;
     }
     uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
     gap_code.resize(chunk_code_size, 0u);
     AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
-                      ArrayRef<LinkerPatch>());
+                      ArrayRef<const LinkerPatch>());
     method_idx += 1u;
 
     // Add the last method and link
@@ -174,7 +174,8 @@
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
     };
-    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), ArrayRef<LinkerPatch>(patches));
+    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
     Link();
 
     uint32_t method1_offset = GetMethodOffset(1u);
@@ -202,7 +203,8 @@
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
     };
-    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), ArrayRef<LinkerPatch>(patches));
+    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
     Link();
   }
 
@@ -300,7 +302,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   static const uint8_t expected_code[] = {
@@ -313,11 +315,11 @@
   LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
   LinkerPatch method2_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches));
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
   Link();
 
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -340,7 +342,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -363,7 +365,7 @@
 
   constexpr uint32_t max_positive_disp = 128 * MB - 4u;
   uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches,
-                                                   kNopCode, ArrayRef<LinkerPatch>(),
+                                                   kNopCode, ArrayRef<const LinkerPatch>(),
                                                    bl_offset_in_method1 + max_positive_disp);
   ASSERT_EQ(expected_last_method_idx, last_method_idx);
 
@@ -386,7 +388,7 @@
   };
 
   constexpr uint32_t max_negative_disp = 128 * MB;
-  uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<LinkerPatch>(),
+  uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
                                                    last_method_code, last_method_patches,
                                                    max_negative_disp - bl_offset_in_last_method);
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -411,7 +413,7 @@
 
   constexpr uint32_t just_over_max_positive_disp = 128 * MB;
   uint32_t last_method_idx = Create2MethodsWithGap(
-      method1_code, method1_patches, kNopCode, ArrayRef<LinkerPatch>(),
+      method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(),
       bl_offset_in_method1 + just_over_max_positive_disp);
   ASSERT_EQ(expected_last_method_idx, last_method_idx);
 
@@ -440,7 +442,7 @@
 
   constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
   uint32_t last_method_idx = Create2MethodsWithGap(
-      kNopCode, ArrayRef<LinkerPatch>(), last_method_code, last_method_patches,
+      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches,
       just_over_max_negative_disp - bl_offset_in_last_method);
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t last_method_offset = GetMethodOffset(last_method_idx);
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 08167b3..70630f3 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -69,7 +69,7 @@
 
   void AddCompiledMethod(MethodReference method_ref,
                          const ArrayRef<const uint8_t>& code,
-                         const ArrayRef<LinkerPatch>& patches) {
+                         const ArrayRef<const LinkerPatch>& patches) {
     compiled_method_refs_.push_back(method_ref);
     compiled_methods_.emplace_back(new CompiledMethod(
         &driver_, instruction_set_, code,
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
index c18a743..15ac47e 100644
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -45,7 +45,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   static const uint8_t expected_code[] = {
@@ -58,11 +58,11 @@
   LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
   LinkerPatch method2_patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches));
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
   Link();
 
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -87,7 +87,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1));
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
index 9d9529c..36e0f01 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -55,7 +55,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   static const uint8_t expected_code[] = {
@@ -68,11 +68,11 @@
   LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
   LinkerPatch method2_patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
   };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches));
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
   Link();
 
   uint32_t method1_offset = GetMethodOffset(1u);
@@ -97,7 +97,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
   };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
@@ -117,7 +117,7 @@
   LinkerPatch patches[] = {
       LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset),
   };
-  AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<LinkerPatch>(patches));
+  AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ff57603..b4de879 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -32,7 +32,7 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr wIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #176
     .cfi_adjust_cfa_offset 176
@@ -97,7 +97,7 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr wIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #96
     .cfi_adjust_cfa_offset 96
@@ -266,7 +266,7 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr wIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 697bf00..3d502e6 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -77,7 +77,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -120,7 +120,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -237,7 +237,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -248,7 +248,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 3a448a5..ce21f01 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -67,7 +67,7 @@
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10d
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the top quick frame.
@@ -110,7 +110,7 @@
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10d
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the stop quick frame.
@@ -170,7 +170,7 @@
     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10d
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 0d0017d..dba4af8 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -57,6 +57,11 @@
 #define STACK_REFERENCE_SIZE 4
 ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
 
+// Size of heap references
+#define COMPRESSED_REFERENCE_SIZE 4
+ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
+            sizeof(art::mirror::CompressedReference<art::mirror::Object>))
+
 // Note: these callee save methods loads require read barriers.
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
@@ -64,12 +69,12 @@
             art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kSaveAll))
 
 // Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET __SIZEOF_POINTER__
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET COMPRESSED_REFERENCE_SIZE
 ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
             art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsOnly))
 
 // Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * __SIZEOF_POINTER__)
+#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * COMPRESSED_REFERENCE_SIZE)
 ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
             art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsAndArgs))
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index cdd8e73..33d75d2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -242,7 +242,10 @@
       quick_generic_jni_trampoline_(nullptr),
       quick_to_interpreter_bridge_trampoline_(nullptr),
       image_pointer_size_(sizeof(void*)) {
-  memset(find_array_class_cache_, 0, kFindArrayCacheSize * sizeof(mirror::Class*));
+  CHECK(intern_table_ != nullptr);
+  for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
+    find_array_class_cache_[i] = GcRoot<mirror::Class>(nullptr);
+  }
 }
 
 void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) {
@@ -908,19 +911,20 @@
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
 }
 
-void ClassLinker::VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   if ((flags & kVisitRootFlagAllRoots) != 0) {
+    BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootStickyClass));
     for (GcRoot<mirror::Class>& root : class_table_) {
-      root.VisitRoot(callback, arg, RootInfo(kRootStickyClass));
+      buffered_visitor.VisitRoot(root);
     }
     for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
-      root.VisitRoot(callback, arg, RootInfo(kRootStickyClass));
+      buffered_visitor.VisitRoot(root);
     }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
-      root.VisitRoot(callback, arg, RootInfo(kRootStickyClass));
+      root.VisitRoot(visitor, RootInfo(kRootStickyClass));
       mirror::Class* new_ref = root.Read<kWithoutReadBarrier>();
       if (UNLIKELY(new_ref != old_ref)) {
         // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
@@ -947,18 +951,18 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  class_roots_.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
-  Thread* self = Thread::Current();
+void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  class_roots_.VisitRoot(visitor, RootInfo(kRootVMInternal));
+  Thread* const self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
     if ((flags & kVisitRootFlagAllRoots) != 0) {
       for (GcRoot<mirror::DexCache>& dex_cache : dex_caches_) {
-        dex_cache.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
+        dex_cache.VisitRoot(visitor, RootInfo(kRootVMInternal));
       }
     } else if ((flags & kVisitRootFlagNewRoots) != 0) {
       for (size_t index : new_dex_cache_roots_) {
-        dex_caches_[index].VisitRoot(callback, arg, RootInfo(kRootVMInternal));
+        dex_caches_[index].VisitRoot(visitor, RootInfo(kRootVMInternal));
       }
     }
     if ((flags & kVisitRootFlagClearRootLog) != 0) {
@@ -970,11 +974,10 @@
       log_new_dex_caches_roots_ = false;
     }
   }
-  VisitClassRoots(callback, arg, flags);
-  array_iftable_.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
-  DCHECK(!array_iftable_.IsNull());
+  VisitClassRoots(visitor, flags);
+  array_iftable_.VisitRoot(visitor, RootInfo(kRootVMInternal));
   for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
-    find_array_class_cache_[i].VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
+    find_array_class_cache_[i].VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   }
 }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 69a5337..577fec2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -299,10 +299,10 @@
   void VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+  void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 3e727e7..3f6c5a0 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -358,7 +358,8 @@
       const char* descriptor = dex.GetTypeDescriptor(type_id);
       AssertDexFileClass(class_loader, descriptor);
     }
-    class_linker_->VisitRoots(TestRootVisitor, nullptr, kVisitRootFlagAllRoots);
+    TestRootVisitor visitor;
+    class_linker_->VisitRoots(&visitor, kVisitRootFlagAllRoots);
     // Verify the dex cache has resolution methods in all resolved method slots
     mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
     mirror::ObjectArray<mirror::ArtMethod>* resolved_methods = dex_cache->GetResolvedMethods();
@@ -367,9 +368,12 @@
     }
   }
 
-  static void TestRootVisitor(mirror::Object** root, void*, const RootInfo&) {
-    EXPECT_TRUE(*root != nullptr);
-  }
+  class TestRootVisitor : public SingleRootVisitor {
+   public:
+    void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE {
+      EXPECT_TRUE(root != nullptr);
+    }
+  };
 };
 
 struct CheckOffset {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a767cf0..3f67f9e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -344,16 +344,14 @@
 // Breakpoints.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
 
-void DebugInvokeReq::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  receiver.VisitRootIfNonNull(callback, arg, root_info);  // null for static method call.
-  klass.VisitRoot(callback, arg, root_info);
-  method.VisitRoot(callback, arg, root_info);
+void DebugInvokeReq::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  receiver.VisitRootIfNonNull(visitor, root_info);  // null for static method call.
+  klass.VisitRoot(visitor, root_info);
+  method.VisitRoot(visitor, root_info);
 }
 
-void SingleStepControl::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  if (method_ != nullptr) {
-    callback(reinterpret_cast<mirror::Object**>(&method_), arg, root_info);
-  }
+void SingleStepControl::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  visitor->VisitRootIfNonNull(reinterpret_cast<mirror::Object**>(&method_), root_info);
 }
 
 void SingleStepControl::AddDexPc(uint32_t dex_pc) {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 4f4a781..62eda62 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -81,7 +81,7 @@
   Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable cond GUARDED_BY(lock);
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
@@ -117,7 +117,7 @@
     return dex_pcs_;
   }
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AddDexPc(uint32_t dex_pc);
@@ -648,7 +648,7 @@
   static void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index db7a4ef..56919bd 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -174,7 +174,7 @@
       thread->RevokeThreadLocalAllocationStack();
     }
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(ConcurrentCopying::ProcessRootCallback, concurrent_copying_);
+    thread->VisitRoots(concurrent_copying_);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
@@ -208,7 +208,7 @@
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
-      Runtime::Current()->VisitTransactionRoots(ConcurrentCopying::ProcessRootCallback, cc);
+      Runtime::Current()->VisitTransactionRoots(cc);
     }
   }
 
@@ -332,22 +332,20 @@
   }
   {
     TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
-    Runtime::Current()->VisitConstantRoots(ProcessRootCallback, this);
+    Runtime::Current()->VisitConstantRoots(this);
   }
   {
     TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
-    Runtime::Current()->GetInternTable()->VisitRoots(ProcessRootCallback,
-                                                     this, kVisitRootFlagAllRoots);
+    Runtime::Current()->GetInternTable()->VisitRoots(this, kVisitRootFlagAllRoots);
   }
   {
     TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
-    Runtime::Current()->GetClassLinker()->VisitRoots(ProcessRootCallback,
-                                                     this, kVisitRootFlagAllRoots);
+    Runtime::Current()->GetClassLinker()->VisitRoots(this, kVisitRootFlagAllRoots);
   }
   {
     // TODO: don't visit the transaction roots if it's not active.
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
-    Runtime::Current()->VisitNonThreadRoots(ProcessRootCallback, this);
+    Runtime::Current()->VisitNonThreadRoots(this);
   }
 
   // Immune spaces.
@@ -486,7 +484,7 @@
 
 // The following visitors are that used to verify that there's no
 // references to the from-space left after marking.
-class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor {
+class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
   explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
@@ -516,16 +514,14 @@
     }
   }
 
-  static void RootCallback(mirror::Object** root, void *arg, const RootInfo& /*root_info*/)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector);
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != nullptr);
-    visitor(*root);
+    operator()(root);
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor {
@@ -594,8 +590,8 @@
   // Roots.
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    Runtime::Current()->VisitRoots(
-        ConcurrentCopyingVerifyNoFromSpaceRefsVisitor::RootCallback, this);
+    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
+    Runtime::Current()->VisitRoots(&ref_visitor);
   }
   // The to-space.
   region_space_->WalkToSpace(ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor::ObjectCallback,
@@ -1087,11 +1083,6 @@
   }
 }
 
-void ConcurrentCopying::ProcessRootCallback(mirror::Object** root, void* arg,
-                                            const RootInfo& /*root_info*/) {
-  reinterpret_cast<ConcurrentCopying*>(arg)->Process(root);
-}
-
 // Used to scan ref fields of an object.
 class ConcurrentCopyingRefFieldsVisitor {
  public:
@@ -1144,25 +1135,54 @@
       offset, expected_ref, new_ref));
 }
 
-// Process a root.
-void ConcurrentCopying::Process(mirror::Object** root) {
-  mirror::Object* ref = *root;
-  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-    return;
-  }
-  mirror::Object* to_ref = Mark(ref);
-  if (to_ref == ref) {
-    return;
-  }
-  Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-  mirror::Object* expected_ref = ref;
-  mirror::Object* new_ref = to_ref;
-  do {
-    if (expected_ref != addr->LoadRelaxed()) {
-      // It was updated by the mutator.
-      break;
+// Process some roots.
+void ConcurrentCopying::VisitRoots(
+    mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    mirror::Object** root = roots[i];
+    mirror::Object* ref = *root;
+    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+      return;
     }
-  } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    mirror::Object* to_ref = Mark(ref);
+    if (to_ref == ref) {
+      return;
+    }
+    Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+    mirror::Object* expected_ref = ref;
+    mirror::Object* new_ref = to_ref;
+    do {
+      if (expected_ref != addr->LoadRelaxed()) {
+        // It was updated by the mutator.
+        break;
+      }
+    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+  }
+}
+
+void ConcurrentCopying::VisitRoots(
+    mirror::CompressedReference<mirror::Object>** roots, size_t count,
+    const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    mirror::CompressedReference<mirror::Object>* root = roots[i];
+    mirror::Object* ref = root->AsMirrorPtr();
+    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+      return;
+    }
+    mirror::Object* to_ref = Mark(ref);
+    if (to_ref == ref) {
+      return;
+    }
+    auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
+    auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
+    auto new_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(to_ref);
+    do {
+      if (ref != addr->LoadRelaxed().AsMirrorPtr()) {
+        // It was updated by the mutator.
+        break;
+      }
+    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+  }
 }
 
 // Fill the given memory block with a dummy object. Used to fill in a
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index bbb551a..a87053d 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -192,9 +192,11 @@
   void Scan(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Process(mirror::Object* obj, MemberOffset offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Process(mirror::Object** root) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void ProcessRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index ed5207a..c5a8d5d 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -22,6 +22,7 @@
 #include "base/timing_logger.h"
 #include "gc/collector_type.h"
 #include "gc/gc_cause.h"
+#include "gc_root.h"
 #include "gc_type.h"
 #include <stdint.h>
 #include <vector>
@@ -112,7 +113,7 @@
   DISALLOW_COPY_AND_ASSIGN(Iteration);
 };
 
-class GarbageCollector {
+class GarbageCollector : public RootVisitor {
  public:
   class SCOPED_LOCKABLE ScopedPause {
    public:
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index d1ce0bc..8902df8 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -309,19 +309,57 @@
   reinterpret_cast<MarkCompact*>(arg)->DelayReferenceReferent(klass, ref);
 }
 
-void MarkCompact::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkCompact*>(arg)->MarkObject(*root);
-}
-
-void MarkCompact::UpdateRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  mirror::Object* obj = *root;
-  mirror::Object* new_obj = reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
-  if (obj != new_obj) {
-    *root = new_obj;
-    DCHECK(new_obj != nullptr);
+void MarkCompact::VisitRoots(
+    mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(*roots[i]);
   }
 }
 
+void MarkCompact::VisitRoots(
+    mirror::CompressedReference<mirror::Object>** roots, size_t count,
+    const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(roots[i]->AsMirrorPtr());
+  }
+}
+
+class UpdateRootVisitor : public RootVisitor {
+ public:
+  explicit UpdateRootVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object* obj = *roots[i];
+      mirror::Object* new_obj = collector_->GetMarkedForwardAddress(obj);
+      if (obj != new_obj) {
+        *roots[i] = new_obj;
+        DCHECK(new_obj != nullptr);
+      }
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object* obj = roots[i]->AsMirrorPtr();
+      mirror::Object* new_obj = collector_->GetMarkedForwardAddress(obj);
+      if (obj != new_obj) {
+        roots[i]->Assign(new_obj);
+        DCHECK(new_obj != nullptr);
+      }
+    }
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
 class UpdateObjectReferencesVisitor {
  public:
   explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {
@@ -339,7 +377,8 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime* runtime = Runtime::Current();
   // Update roots.
-  runtime->VisitRoots(UpdateRootCallback, this);
+  UpdateRootVisitor update_root_visitor(this);
+  runtime->VisitRoots(&update_root_visitor);
   // Update object references in mod union tables and spaces.
   for (const auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
@@ -396,7 +435,7 @@
 // Marks all objects in the root set.
 void MarkCompact::MarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitRoots(this);
 }
 
 mirror::Object* MarkCompact::MarkedForwardingAddressCallback(mirror::Object* obj, void* arg) {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 06304bf..4337644 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -114,8 +114,12 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -245,6 +249,8 @@
   friend class MoveObjectVisitor;
   friend class UpdateObjectReferencesVisitor;
   friend class UpdateReferenceVisitor;
+  friend class UpdateRootVisitor;
+
   DISALLOW_COPY_AND_ASSIGN(MarkCompact);
 };
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index ee4e752..79d1034 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -462,42 +462,66 @@
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(*root);
+class VerifyRootMarkedVisitor : public SingleRootVisitor {
+ public:
+  explicit VerifyRootMarkedVisitor(MarkSweep* collector) : collector_(collector) { }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    CHECK(collector_->IsMarked(root)) << info.ToString();
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
+
+void MarkSweep::VisitRoots(mirror::Object*** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObjectNonNull(*roots[i]);
+  }
 }
 
-void MarkSweep::VerifyRootMarked(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  CHECK(reinterpret_cast<MarkSweep*>(arg)->IsMarked(*root));
+void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObjectNonNull(roots[i]->AsMirrorPtr());
+  }
 }
 
-void MarkSweep::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(*root);
-}
+class VerifyRootVisitor : public SingleRootVisitor {
+ public:
+  explicit VerifyRootVisitor(MarkSweep* collector) : collector_(collector) { }
 
-void MarkSweep::VerifyRootCallback(Object** root, void* arg, const RootInfo& root_info) {
-  reinterpret_cast<MarkSweep*>(arg)->VerifyRoot(*root, root_info);
-}
+  void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->VerifyRoot(root, info);
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
 
 void MarkSweep::VerifyRoot(const Object* root, const RootInfo& root_info) {
   // See if the root is on any space bitmap.
   if (heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
     space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
     if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-      LOG(ERROR) << "Found invalid root: " << root << " ";
-      root_info.Describe(LOG(ERROR));
+      LOG(ERROR) << "Found invalid root: " << root << " " << root_info;
     }
   }
 }
 
 void MarkSweep::VerifyRoots() {
-  Runtime::Current()->GetThreadList()->VisitRoots(VerifyRootCallback, this);
+  VerifyRootVisitor visitor(this);
+  Runtime::Current()->GetThreadList()->VisitRoots(&visitor);
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
-    Runtime::Current()->VisitRoots(MarkRootCallback, this);
+    Runtime::Current()->VisitRoots(this);
     RevokeAllThreadLocalAllocationStacks(self);
   } else {
     MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint);
@@ -510,13 +534,13 @@
 
 void MarkSweep::MarkNonThreadRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitNonThreadRoots(this);
 }
 
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags);
+  Runtime::Current()->VisitConcurrentRoots(this, flags);
 }
 
 class ScanObjectVisitor {
@@ -932,13 +956,12 @@
 void MarkSweep::ReMarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  Runtime::Current()->VisitRoots(
-      MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots |
-                                                          kVisitRootFlagStopLoggingNewRoots |
-                                                          kVisitRootFlagClearRootLog));
+  Runtime::Current()->VisitRoots(this, static_cast<VisitRootFlags>(
+      kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots | kVisitRootFlagClearRootLog));
   if (kVerifyRootsMarked) {
     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
-    Runtime::Current()->VisitRoots(VerifyRootMarked, this);
+    VerifyRootMarkedVisitor visitor(this);
+    Runtime::Current()->VisitRoots(&visitor);
   }
 }
 
@@ -968,7 +991,7 @@
   Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
 
-class CheckpointMarkThreadRoots : public Closure {
+class CheckpointMarkThreadRoots : public Closure, public RootVisitor {
  public:
   explicit CheckpointMarkThreadRoots(MarkSweep* mark_sweep,
                                      bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
@@ -977,13 +1000,30 @@
             revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
   }
 
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mark_sweep_->MarkObjectNonNullParallel(*roots[i]);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mark_sweep_->MarkObjectNonNullParallel(roots[i]->AsMirrorPtr());
+    }
+  }
+
   virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
     ATRACE_BEGIN("Marking thread roots");
     // Note: self is not necessarily equal to thread since thread may be suspended.
-    Thread* self = Thread::Current();
+    Thread* const self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->VisitRoots(MarkSweep::MarkRootParallelCallback, mark_sweep_);
+    thread->VisitRoots(this);
     ATRACE_END();
     if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
       ATRACE_BEGIN("RevokeRosAllocThreadLocalBuffers");
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 3f99e21..31cea17 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -185,11 +185,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void VerifyRootMarked(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -197,9 +198,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void MarkRootParallelCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Marks an object.
   void MarkObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -250,9 +248,8 @@
   // whether or not we care about pauses.
   size_t GetThreadCount(bool paused) const;
 
-  static void VerifyRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info);
-
-  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Push a single reference on a mark stack.
   void PushOnMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -326,18 +323,21 @@
   friend class CardScanTask;
   friend class CheckBitmapVisitor;
   friend class CheckReferenceVisitor;
+  friend class CheckpointMarkThreadRoots;
   friend class art::gc::Heap;
+  friend class FifoMarkStackChunk;
   friend class MarkObjectVisitor;
+  template<bool kUseFinger> friend class MarkStackTask;
+  friend class MarkSweepMarkObjectSlowPath;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
   friend class ModUnionReferenceVisitor;
-  friend class ModUnionVisitor;
+  friend class ModUnionScanImageRootVisitor;
   friend class ModUnionTableBitmap;
   friend class ModUnionTableReferenceCache;
-  friend class ModUnionScanImageRootVisitor;
-  template<bool kUseFinger> friend class MarkStackTask;
-  friend class FifoMarkStackChunk;
-  friend class MarkSweepMarkObjectSlowPath;
+  friend class ModUnionVisitor;
+  friend class VerifyRootMarkedVisitor;
+  friend class VerifyRootVisitor;
 
   DISALLOW_COPY_AND_ASSIGN(MarkSweep);
 };
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index b3d59f2..dbf01d8 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -603,18 +603,29 @@
   reinterpret_cast<SemiSpace*>(arg)->DelayReferenceReferent(klass, ref);
 }
 
-void SemiSpace::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
-  reinterpret_cast<SemiSpace*>(arg)->MarkObject(&ref);
-  if (*root != ref.AsMirrorPtr()) {
-    *root = ref.AsMirrorPtr();
+void SemiSpace::VisitRoots(mirror::Object*** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    auto* root = roots[i];
+    auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
+    MarkObject(&ref);
+    if (*root != ref.AsMirrorPtr()) {
+      *root = ref.AsMirrorPtr();
+    }
+  }
+}
+
+void SemiSpace::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(roots[i]);
   }
 }
 
 // Marks all objects in the root set.
 void SemiSpace::MarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitRoots(this);
 }
 
 bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 192fb14..61fbead 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -98,7 +98,7 @@
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Returns the new address of the object.
+  // Updates obj_ptr if the object has moved.
   template<bool kPoisonReferences>
   void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -133,8 +133,12 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index be7344a..d80bba6 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2395,13 +2395,21 @@
   gc_complete_cond_->Broadcast(self);
 }
 
-static void RootMatchesObjectVisitor(mirror::Object** root, void* arg,
-                                     const RootInfo& /*root_info*/) {
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
-  if (*root == obj) {
-    LOG(INFO) << "Object " << obj << " is a root";
+class RootMatchesObjectVisitor : public SingleRootVisitor {
+ public:
+  explicit RootMatchesObjectVisitor(const mirror::Object* obj) : obj_(obj) { }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (root == obj_) {
+      LOG(INFO) << "Object " << obj_ << " is a root " << info.ToString();
+    }
   }
-}
+
+ private:
+  const mirror::Object* const obj_;
+};
+
 
 class ScanVisitor {
  public:
@@ -2411,7 +2419,7 @@
 };
 
 // Verify a reference from an object.
-class VerifyReferenceVisitor {
+class VerifyReferenceVisitor : public SingleRootVisitor {
  public:
   explicit VerifyReferenceVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_)
@@ -2438,11 +2446,12 @@
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
-  static void VerifyRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+  void VisitRoot(mirror::Object* root, const RootInfo& root_info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    if (!visitor->VerifyReference(nullptr, *root, MemberOffset(0))) {
-      LOG(ERROR) << "Root " << *root << " is dead with type " << PrettyTypeOf(*root)
+    if (root == nullptr) {
+      LOG(ERROR) << "Root is null with info " << root_info.GetType();
+    } else if (!VerifyReference(nullptr, root, MemberOffset(0))) {
+      LOG(ERROR) << "Root " << root << " is dead with type " << PrettyTypeOf(root)
           << " thread_id= " << root_info.GetThreadId() << " root_type= " << root_info.GetType();
     }
   }
@@ -2534,12 +2543,11 @@
       }
 
       // Search to see if any of the roots reference our object.
-      void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
-      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
-
+      RootMatchesObjectVisitor visitor1(obj);
+      Runtime::Current()->VisitRoots(&visitor1);
       // Search to see if any of the roots reference our reference.
-      arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
-      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
+      RootMatchesObjectVisitor visitor2(ref);
+      Runtime::Current()->VisitRoots(&visitor2);
     }
     return false;
   }
@@ -2571,6 +2579,13 @@
     visitor->operator()(obj);
   }
 
+  void VerifyRoots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    VerifyReferenceVisitor visitor(heap_, fail_count_, verify_referent_);
+    Runtime::Current()->VisitRoots(&visitor);
+  }
+
   size_t GetFailureCount() const {
     return fail_count_->LoadSequentiallyConsistent();
   }
@@ -2637,7 +2652,7 @@
   // pointing to dead objects if they are not reachable.
   VisitObjectsPaused(VerifyObjectVisitor::VisitCallback, &visitor);
   // Verify the roots:
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRootCallback, &visitor);
+  visitor.VerifyRoots();
   if (visitor.GetFailureCount() > 0) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index b09de6f..9195b06 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -259,7 +259,6 @@
   }
   GetMemMap()->SetSize(new_capacity);
   limit_ = Begin() + new_capacity;
-  CHECK(temp_bitmap_.get() == nullptr);
 }
 
 }  // namespace space
diff --git a/runtime/gc_root-inl.h b/runtime/gc_root-inl.h
index a42ec08..57d5689 100644
--- a/runtime/gc_root-inl.h
+++ b/runtime/gc_root-inl.h
@@ -19,6 +19,8 @@
 
 #include "gc_root.h"
 
+#include <sstream>
+
 #include "read_barrier-inl.h"
 
 namespace art {
@@ -26,7 +28,17 @@
 template<class MirrorType>
 template<ReadBarrierOption kReadBarrierOption>
 inline MirrorType* GcRoot<MirrorType>::Read() const {
-  return ReadBarrier::BarrierForRoot<MirrorType, kReadBarrierOption>(&root_);
+  return down_cast<MirrorType*>(
+      ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(&root_));
+}
+template<class MirrorType>
+inline GcRoot<MirrorType>::GcRoot(MirrorType* ref)
+    : root_(mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref)) { }
+
+inline std::string RootInfo::ToString() const {
+  std::ostringstream oss;
+  Describe(oss);
+  return oss.str();
 }
 
 }  // namespace art
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index c5feda5..4164bbd 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"       // For Locks::mutator_lock_.
+#include "mirror/object_reference.h"
 
 namespace art {
 
@@ -26,6 +27,9 @@
 class Object;
 }  // namespace mirror
 
+template <size_t kBufferSize>
+class BufferedRootVisitor;
+
 enum RootType {
   kRootUnknown = 0,
   kRootJNIGlobal,
@@ -43,6 +47,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const RootType& root_type);
 
+// Only used by hprof. tid and root_type are only used by hprof.
 class RootInfo {
  public:
   // Thread id 0 is for non thread roots.
@@ -60,15 +65,64 @@
   virtual void Describe(std::ostream& os) const {
     os << "Type=" << type_ << " thread_id=" << thread_id_;
   }
+  std::string ToString() const;
 
  private:
   const RootType type_;
   const uint32_t thread_id_;
 };
 
-// Returns the new address of the object, returns root if it has not moved. tid and root_type are
-// only used by hprof.
-typedef void (RootCallback)(mirror::Object** root, void* arg, const RootInfo& root_info);
+inline std::ostream& operator<<(std::ostream& os, const RootInfo& root_info) {
+  root_info.Describe(os);
+  return os;
+}
+
+class RootVisitor {
+ public:
+  virtual ~RootVisitor() { }
+
+  // Single root versions, not overridable.
+  ALWAYS_INLINE void VisitRoot(mirror::Object** roots, const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VisitRoots(&roots, 1, info);
+  }
+
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::Object** roots, const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (*roots != nullptr) {
+      VisitRoot(roots, info);
+    }
+  }
+
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+};
+
+// Only visits roots one at a time, doesn't handle updating roots. Used when performance isn't
+// critical.
+class SingleRootVisitor : public RootVisitor {
+ private:
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      VisitRoot(*roots[i], info);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      VisitRoot(roots[i]->AsMirrorPtr(), info);
+    }
+  }
+
+  virtual void VisitRoot(mirror::Object* root, const RootInfo& info) = 0;
+};
 
 template<class MirrorType>
 class GcRoot {
@@ -76,37 +130,92 @@
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE MirrorType* Read() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoot(RootCallback* callback, void* arg, const RootInfo& info) const {
+  void VisitRoot(RootVisitor* visitor, const RootInfo& info) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!IsNull());
-    callback(reinterpret_cast<mirror::Object**>(&root_), arg, info);
+    mirror::CompressedReference<mirror::Object>* roots[1] = { &root_ };
+    visitor->VisitRoots(roots, 1u, info);
     DCHECK(!IsNull());
   }
 
-  void VisitRootIfNonNull(RootCallback* callback, void* arg, const RootInfo& info) const {
+  void VisitRootIfNonNull(RootVisitor* visitor, const RootInfo& info) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (!IsNull()) {
-      VisitRoot(callback, arg, info);
+      VisitRoot(visitor, info);
     }
   }
 
-  // This is only used by IrtIterator.
-  ALWAYS_INLINE MirrorType** AddressWithoutBarrier() {
+  ALWAYS_INLINE mirror::CompressedReference<mirror::Object>* AddressWithoutBarrier() {
     return &root_;
   }
 
-  bool IsNull() const {
+  ALWAYS_INLINE bool IsNull() const {
     // It's safe to null-check it without a read barrier.
-    return root_ == nullptr;
+    return root_.IsNull();
   }
 
-  ALWAYS_INLINE explicit GcRoot<MirrorType>() : root_(nullptr) {
+  ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  mutable mirror::CompressedReference<mirror::Object> root_;
+
+  template <size_t kBufferSize> friend class BufferedRootVisitor;
+};
+
+// Simple data structure for buffered root visiting to avoid virtual dispatch overhead. Currently
+// only for CompressedReferences since these are more common than the Object** roots which are only
+// for thread local roots.
+template <size_t kBufferSize>
+class BufferedRootVisitor {
+ public:
+  BufferedRootVisitor(RootVisitor* visitor, const RootInfo& root_info)
+      : visitor_(visitor), root_info_(root_info), buffer_pos_(0) {
   }
 
-  ALWAYS_INLINE explicit GcRoot<MirrorType>(MirrorType* ref)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : root_(ref) {
+  ~BufferedRootVisitor() {
+    Flush();
+  }
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(GcRoot<MirrorType>& root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!root.IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<MirrorType>* root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  void VisitRoot(GcRoot<MirrorType>& root) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VisitRoot(root.AddressWithoutBarrier());
+  }
+
+  template <class MirrorType>
+  void VisitRoot(mirror::CompressedReference<MirrorType>* root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(buffer_pos_ >= kBufferSize)) {
+      Flush();
+    }
+    roots_[buffer_pos_++] = root;
+  }
+
+  void Flush() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    visitor_->VisitRoots(roots_, buffer_pos_, root_info_);
+    buffer_pos_ = 0;
   }
 
  private:
-  mutable MirrorType* root_;
+  RootVisitor* const visitor_;
+  RootInfo root_info_;
+  mirror::CompressedReference<mirror::Object>* roots_[kBufferSize];
+  size_t buffer_pos_;
 };
 
 }  // namespace art
diff --git a/runtime/handle.h b/runtime/handle.h
index 6af3220..3ebb2d5 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -70,6 +70,16 @@
     return reinterpret_cast<jobject>(reference_);
   }
 
+  StackReference<mirror::Object>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    return reference_;
+  }
+
+  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reference_;
+  }
+
  protected:
   template<typename S>
   explicit Handle(StackReference<S>* reference)
@@ -80,14 +90,6 @@
       : reference_(handle.reference_) {
   }
 
-  StackReference<mirror::Object>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
-    return reference_;
-  }
-  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return reference_;
-  }
-
   StackReference<mirror::Object>* reference_;
 
  private:
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index f13afe2..d6a6595 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -403,9 +403,9 @@
   JDWP::JdwpNetStateBase* net_state_;
 };
 
-#define __ output->
+#define __ output_->
 
-class Hprof {
+class Hprof : public SingleRootVisitor {
  public:
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
@@ -426,9 +426,11 @@
     size_t max_length;
     {
       EndianOutput count_output;
-      ProcessHeap(&count_output, false);
+      output_ = &count_output;
+      ProcessHeap(false);
       overall_size = count_output.SumLength();
       max_length = count_output.MaxLength();
+      output_ = nullptr;
     }
 
     bool okay;
@@ -451,86 +453,70 @@
   }
 
  private:
-  struct Env {
-    Hprof* hprof;
-    EndianOutput* output;
-  };
-
-  static void RootVisitor(mirror::Object** obj, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(arg != nullptr);
-    DCHECK(obj != nullptr);
-    DCHECK(*obj != nullptr);
-    Env* env = reinterpret_cast<Env*>(arg);
-    env->hprof->VisitRoot(*obj, root_info, env->output);
-  }
-
   static void VisitObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(obj != nullptr);
     DCHECK(arg != nullptr);
-    Env* env = reinterpret_cast<Env*>(arg);
-    env->hprof->DumpHeapObject(obj, env->output);
+    reinterpret_cast<Hprof*>(arg)->DumpHeapObject(obj);
   }
 
-  void DumpHeapObject(mirror::Object* obj, EndianOutput* output)
+  void DumpHeapObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DumpHeapClass(mirror::Class* klass, EndianOutput* output)
+  void DumpHeapClass(mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DumpHeapArray(mirror::Array* obj, mirror::Class* klass, EndianOutput* output)
+  void DumpHeapArray(mirror::Array* obj, mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass, EndianOutput* output)
+  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void ProcessHeap(EndianOutput* output, bool header_first)
+  void ProcessHeap(bool header_first)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Reset current heap and object count.
     current_heap_ = HPROF_HEAP_DEFAULT;
     objects_in_segment_ = 0;
 
     if (header_first) {
-      ProcessHeader(output);
-      ProcessBody(output);
+      ProcessHeader();
+      ProcessBody();
     } else {
-      ProcessBody(output);
-      ProcessHeader(output);
+      ProcessBody();
+      ProcessHeader();
     }
   }
 
-  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Runtime* runtime = Runtime::Current();
+  void ProcessBody() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* const runtime = Runtime::Current();
     // Walk the roots and the heap.
-    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
+    output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
 
-    Env env = { this, output };
-    runtime->VisitRoots(RootVisitor, &env);
-    runtime->VisitImageRoots(RootVisitor, &env);
-    runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, &env);
+    runtime->VisitRoots(this);
+    runtime->VisitImageRoots(this);
+    runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, this);
 
-    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_END, kHprofTime);
-    output->EndRecord();
+    output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_END, kHprofTime);
+    output_->EndRecord();
   }
 
-  void ProcessHeader(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Write the header.
-    WriteFixedHeader(output);
+    WriteFixedHeader();
     // Write the string and class tables, and any stack traces, to the header.
     // (jhat requires that these appear before any of the data in the body that refers to them.)
-    WriteStringTable(output);
-    WriteClassTable(output);
-    WriteStackTraces(output);
-    output->EndRecord();
+    WriteStringTable();
+    WriteClassTable();
+    WriteStackTraces();
+    output_->EndRecord();
   }
 
-  void WriteClassTable(EndianOutput* output) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t nextSerialNumber = 1;
 
     for (mirror::Class* c : classes_) {
       CHECK(c != nullptr);
-      output->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime);
+      output_->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime);
       // LOAD CLASS format:
       // U4: class serial number (always > 0)
       // ID: class object ID. We use the address of the class object structure as its ID.
@@ -543,12 +529,12 @@
     }
   }
 
-  void WriteStringTable(EndianOutput* output) {
+  void WriteStringTable() {
     for (const std::pair<std::string, HprofStringId>& p : strings_) {
       const std::string& string = p.first;
       const size_t id = p.second;
 
-      output->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
+      output_->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
 
       // STRING format:
       // ID:  ID for this string
@@ -559,24 +545,24 @@
     }
   }
 
-  void StartNewHeapDumpSegment(EndianOutput* output) {
+  void StartNewHeapDumpSegment() {
     // This flushes the old segment and starts a new one.
-    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
+    output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
     objects_in_segment_ = 0;
     // Starting a new HEAP_DUMP resets the heap to default.
     current_heap_ = HPROF_HEAP_DEFAULT;
   }
 
-  void CheckHeapSegmentConstraints(EndianOutput* output) {
-    if (objects_in_segment_ >= kMaxObjectsPerSegment || output->Length() >= kMaxBytesPerSegment) {
-      StartNewHeapDumpSegment(output);
+  void CheckHeapSegmentConstraints() {
+    if (objects_in_segment_ >= kMaxObjectsPerSegment || output_->Length() >= kMaxBytesPerSegment) {
+      StartNewHeapDumpSegment();
     }
   }
 
-  void VisitRoot(const mirror::Object* obj, const RootInfo& root_info, EndianOutput* output)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoot(mirror::Object* obj, const RootInfo& root_info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeapTag heap_tag,
-                      uint32_t thread_serial, EndianOutput* output);
+                      uint32_t thread_serial);
 
   HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (c != nullptr) {
@@ -611,7 +597,7 @@
     return LookupStringId(PrettyDescriptor(c));
   }
 
-  void WriteFixedHeader(EndianOutput* output) {
+  void WriteFixedHeader() {
     // Write the file header.
     // U1: NUL-terminated magic string.
     const char magic[] = "JAVA PROFILE 1.0.3";
@@ -635,9 +621,9 @@
     __ AddU4(static_cast<uint32_t>(nowMs & 0xFFFFFFFF));
   }
 
-  void WriteStackTraces(EndianOutput* output) {
+  void WriteStackTraces() {
     // Write a dummy stack trace record so the analysis tools don't freak out.
-    output->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime);
+    output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime);
     __ AddU4(kHprofNullStackTrace);
     __ AddU4(kHprofNullThread);
     __ AddU4(0);    // no frames
@@ -679,13 +665,15 @@
     bool okay;
     {
       FileEndianOutput file_output(file.get(), max_length);
-      ProcessHeap(&file_output, true);
+      output_ = &file_output;
+      ProcessHeap(true);
       okay = !file_output.Errors();
 
       if (okay) {
         // Check for expected size.
         CHECK_EQ(file_output.SumLength(), overall_size);
       }
+      output_ = nullptr;
     }
 
     if (okay) {
@@ -721,13 +709,15 @@
 
     // Prepare the output and send the chunk header.
     NetStateEndianOutput net_output(net_state, max_length);
+    output_ = &net_output;
     net_output.AddU1List(chunk_header, kChunkHeaderSize);
 
     // Write the dump.
-    ProcessHeap(&net_output, true);
+    ProcessHeap(true);
 
     // Check for expected size.
     CHECK_EQ(net_output.SumLength(), overall_size + kChunkHeaderSize);
+    output_ = nullptr;
 
     return true;
   }
@@ -741,6 +731,8 @@
 
   uint64_t start_ns_;
 
+  EndianOutput* output_;
+
   HprofHeapId current_heap_;  // Which heap we're currently dumping.
   size_t objects_in_segment_;
 
@@ -811,12 +803,12 @@
 // only true when marking the root set or unreachable
 // objects.  Used to add rootset references to obj.
 void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeapTag heap_tag,
-                           uint32_t thread_serial, EndianOutput* output) {
+                           uint32_t thread_serial) {
   if (heap_tag == 0) {
     return;
   }
 
-  CheckHeapSegmentConstraints(output);
+  CheckHeapSegmentConstraints();
 
   switch (heap_tag) {
     // ID: object ID
@@ -892,7 +884,7 @@
   return kHprofNullStackTrace;
 }
 
-void Hprof::DumpHeapObject(mirror::Object* obj, EndianOutput* output) {
+void Hprof::DumpHeapObject(mirror::Object* obj) {
   // Ignore classes that are retired.
   if (obj->IsClass() && obj->AsClass()->IsRetired()) {
     return;
@@ -908,7 +900,7 @@
       heap_type = HPROF_HEAP_IMAGE;
     }
   }
-  CheckHeapSegmentConstraints(output);
+  CheckHeapSegmentConstraints();
 
   if (heap_type != current_heap_) {
     HprofStringId nameId;
@@ -945,18 +937,18 @@
     // allocated which hasn't been initialized yet.
   } else {
     if (obj->IsClass()) {
-      DumpHeapClass(obj->AsClass(), output);
+      DumpHeapClass(obj->AsClass());
     } else if (c->IsArrayClass()) {
-      DumpHeapArray(obj->AsArray(), c, output);
+      DumpHeapArray(obj->AsArray(), c);
     } else {
-      DumpHeapInstanceObject(obj, c, output);
+      DumpHeapInstanceObject(obj, c);
     }
   }
 
   ++objects_in_segment_;
 }
 
-void Hprof::DumpHeapClass(mirror::Class* klass, EndianOutput* output) {
+void Hprof::DumpHeapClass(mirror::Class* klass) {
   size_t sFieldCount = klass->NumStaticFields();
   if (sFieldCount != 0) {
     int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
@@ -1049,7 +1041,7 @@
   }
 }
 
-void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass, EndianOutput* output) {
+void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) {
   uint32_t length = obj->GetLength();
 
   if (obj->IsObjectArray()) {
@@ -1089,8 +1081,7 @@
   }
 }
 
-void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass,
-                                   EndianOutput* output) {
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) {
   // obj is an instance object.
   __ AddU1(HPROF_INSTANCE_DUMP);
   __ AddObjectId(obj);
@@ -1099,7 +1090,7 @@
 
   // Reserve some space for the length of the instance data, which we won't
   // know until we're done writing it.
-  size_t size_patch_offset = output->Length();
+  size_t size_patch_offset = output_->Length();
   __ AddU4(0x77777777);
 
   // Write the instance data;  fields for this class, followed by super class fields,
@@ -1139,10 +1130,10 @@
   }
 
   // Patch the instance field length.
-  __ UpdateU4(size_patch_offset, output->Length() - (size_patch_offset + 4));
+  __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
 }
 
-void Hprof::VisitRoot(const mirror::Object* obj, const RootInfo& info, EndianOutput* output) {
+void Hprof::VisitRoot(mirror::Object* obj, const RootInfo& info) {
   static const HprofHeapTag xlate[] = {
     HPROF_ROOT_UNKNOWN,
     HPROF_ROOT_JNI_GLOBAL,
@@ -1164,7 +1155,7 @@
   if (obj == nullptr) {
     return;
   }
-  MarkRootObject(obj, 0, xlate[info.GetType()], info.GetThreadId(), output);
+  MarkRootObject(obj, 0, xlate[info.GetType()], info.GetThreadId());
 }
 
 // If "direct_to_ddms" is true, the other arguments are ignored, and data is
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 1a3f107..a3aa1de 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -242,16 +242,10 @@
   madvise(release_start, release_end - release_start, MADV_DONTNEED);
 }
 
-void IndirectReferenceTable::VisitRoots(RootCallback* callback, void* arg,
-                                        const RootInfo& root_info) {
+void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  BufferedRootVisitor<128> root_visitor(visitor, root_info);
   for (auto ref : *this) {
-    if (*ref == nullptr) {
-      // Need to skip null entries to make it possible to do the
-      // non-null check after the call back.
-      continue;
-    }
-    callback(ref, arg, root_info);
-    DCHECK(*ref != nullptr);
+    root_visitor.VisitRootIfNonNull(*ref);
   }
 }
 
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 576a604..25b0281 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -218,7 +218,7 @@
   uint32_t serial_;
   GcRoot<mirror::Object> references_[kIRTPrevCount];
 };
-static_assert(sizeof(IrtEntry) == (1 + kIRTPrevCount) * sizeof(uintptr_t),
+static_assert(sizeof(IrtEntry) == (1 + kIRTPrevCount) * sizeof(uint32_t),
               "Unexpected sizeof(IrtEntry)");
 
 class IrtIterator {
@@ -233,9 +233,9 @@
     return *this;
   }
 
-  mirror::Object** operator*() {
+  GcRoot<mirror::Object>* operator*() {
     // This does not have a read barrier as this is used to visit roots.
-    return table_[i_].GetReference()->AddressWithoutBarrier();
+    return table_[i_].GetReference();
   }
 
   bool equals(const IrtIterator& rhs) const {
@@ -320,7 +320,7 @@
     return IrtIterator(table_, Capacity(), Capacity());
   }
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uint32_t GetSegmentState() const {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9adb4ac..dea157a 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1077,13 +1077,14 @@
   }
 }
 
-void Instrumentation::VisitRoots(RootCallback* callback, void* arg) {
+void Instrumentation::VisitRoots(RootVisitor* visitor) {
   WriterMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
   if (IsDeoptimizedMethodsEmpty()) {
     return;
   }
+  BufferedRootVisitor<128> roots(visitor, RootInfo(kRootVMInternal));
   for (auto pair : deoptimized_methods_) {
-    pair.second.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
+    roots.VisitRoot(pair.second);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8972f3a..77314c60 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -345,7 +345,7 @@
   void InstallStubsForMethod(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
 
  private:
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 19bfc4e..8e85435 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -53,14 +53,14 @@
   os << "Intern table: " << StrongSize() << " strong; " << WeakSize() << " weak\n";
 }
 
-void InternTable::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+void InternTable::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   if ((flags & kVisitRootFlagAllRoots) != 0) {
-    strong_interns_.VisitRoots(callback, arg);
+    strong_interns_.VisitRoots(visitor);
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_strong_intern_roots_) {
       mirror::String* old_ref = root.Read<kWithoutReadBarrier>();
-      root.VisitRoot(callback, arg, RootInfo(kRootInternedString));
+      root.VisitRoot(visitor, RootInfo(kRootInternedString));
       mirror::String* new_ref = root.Read<kWithoutReadBarrier>();
       if (new_ref != old_ref) {
         // The GC moved a root in the log. Need to search the strong interns and update the
@@ -335,12 +335,13 @@
   post_zygote_table_.Insert(GcRoot<mirror::String>(s));
 }
 
-void InternTable::Table::VisitRoots(RootCallback* callback, void* arg) {
+void InternTable::Table::VisitRoots(RootVisitor* visitor) {
+  BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootInternedString));
   for (auto& intern : pre_zygote_table_) {
-    intern.VisitRoot(callback, arg, RootInfo(kRootInternedString));
+    buffered_visitor.VisitRoot(intern);
   }
   for (auto& intern : post_zygote_table_) {
-    intern.VisitRoot(callback, arg, RootInfo(kRootInternedString));
+    buffered_visitor.VisitRoot(intern);
   }
 }
 
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 2e31b7e..200a764 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -80,7 +80,7 @@
   // Total number of strongly live interned strings.
   size_t WeakSize() const LOCKS_EXCLUDED(Locks::intern_table_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os) const;
@@ -125,7 +125,7 @@
     void Remove(mirror::String* s)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-    void VisitRoots(RootCallback* callback, void* arg)
+    void VisitRoots(RootVisitor* visitor)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
     void SweepWeaks(IsMarkedCallback* callback, void* arg)
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 09bfbf3..b795d72 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -748,19 +748,18 @@
 
 void JavaVMExt::SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), weak_globals_lock_);
-  for (mirror::Object** entry : weak_globals_) {
-    // Since this is called by the GC, we don't need a read barrier.
-    mirror::Object* obj = *entry;
-    if (obj == nullptr) {
-      // Need to skip null here to distinguish between null entries
-      // and cleared weak ref entries.
-      continue;
+  Runtime* const runtime = Runtime::Current();
+  for (auto* entry : weak_globals_) {
+    // Need to skip null here to distinguish between null entries and cleared weak ref entries.
+    if (!entry->IsNull()) {
+      // Since this is called by the GC, we don't need a read barrier.
+      mirror::Object* obj = entry->Read<kWithoutReadBarrier>();
+      mirror::Object* new_obj = callback(obj, arg);
+      if (new_obj == nullptr) {
+        new_obj = runtime->GetClearedJniWeakGlobal();
+      }
+      *entry = GcRoot<mirror::Object>(new_obj);
     }
-    mirror::Object* new_obj = callback(obj, arg);
-    if (new_obj == nullptr) {
-      new_obj = Runtime::Current()->GetClearedJniWeakGlobal();
-    }
-    *entry = new_obj;
   }
 }
 
@@ -769,10 +768,10 @@
   globals_.Trim();
 }
 
-void JavaVMExt::VisitRoots(RootCallback* callback, void* arg) {
+void JavaVMExt::VisitRoots(RootVisitor* visitor) {
   Thread* self = Thread::Current();
   ReaderMutexLock mu(self, globals_lock_);
-  globals_.VisitRoots(callback, arg, RootInfo(kRootJNIGlobal));
+  globals_.VisitRoots(visitor, RootInfo(kRootJNIGlobal));
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
 
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 037fbe5..deec6a9 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -103,7 +103,7 @@
 
   bool SetCheckJniEnabled(bool enabled);
 
-  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DisallowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 7f04992..6452f31 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -196,8 +196,8 @@
 }
 
 template<class T>
-inline void PrimitiveArray<T>::VisitRoots(RootCallback* callback, void* arg) {
-  array_class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+inline void PrimitiveArray<T>::VisitRoots(RootVisitor* visitor) {
+  array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 template<typename T>
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 83e3688..115fcf2 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -166,8 +166,7 @@
     array_class_ = GcRoot<Class>(nullptr);
   }
 
-  static void VisitRoots(RootCallback* callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   static GcRoot<Class> array_class_;
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 4c36753..83602d4 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -55,8 +55,8 @@
   SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), num_bytes.Uint32Value());
 }
 
-void ArtField::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_reflect_ArtField_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void ArtField::VisitRoots(RootVisitor* visitor) {
+  java_lang_reflect_ArtField_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 // TODO: we could speed up the search if fields are ordered by offsets.
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index d640165..9d95cb9 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -138,7 +138,7 @@
 
   static void SetClass(Class* java_lang_reflect_ArtField);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index c1f7594..edbbb4a 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -61,8 +61,8 @@
 }
 
 
-void ArtMethod::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_reflect_ArtMethod_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void ArtMethod::VisitRoots(RootVisitor* visitor) {
+  java_lang_reflect_ArtMethod_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 82e5d00..22481ce 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -488,7 +488,7 @@
 
   static void ResetClass();
 
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 29851a9..8fb8147 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -51,8 +51,8 @@
   java_lang_Class_ = GcRoot<Class>(nullptr);
 }
 
-void Class::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_Class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Class::VisitRoots(RootVisitor* visitor) {
+  java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 2dff383..b82a58f 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -971,7 +971,7 @@
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
   static void SetClassClass(Class* java_lang_Class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // When class is verified, set the kAccPreverified flag on each method.
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index 1724682..82cc26e 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -48,9 +48,9 @@
   array_class_ = GcRoot<Class>(nullptr);
 }
 
-void Field::VisitRoots(RootCallback* callback, void* arg) {
-  static_class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
-  array_class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Field::VisitRoots(RootVisitor* visitor) {
+  static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
+  array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 ArtField* Field::GetArtField() {
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index f54340a..cea06f5 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -89,7 +89,7 @@
 
   static void ResetArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Slow, try to use only for PrettyField and such.
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index b63d13d..5edda8b 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -43,6 +43,11 @@
 
   void Clear() {
     reference_ = 0;
+    DCHECK(IsNull());
+  }
+
+  bool IsNull() const {
+    return reference_ == 0;
   }
 
   uint32_t AsVRegValue() const {
@@ -86,6 +91,23 @@
       : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
 };
 
+// Standard compressed reference used in the runtime. Used for StackRefernce and GC roots.
+template<class MirrorType>
+class MANAGED CompressedReference : public mirror::ObjectReference<false, MirrorType> {
+ public:
+  CompressedReference<MirrorType>() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(nullptr) {}
+
+  static CompressedReference<MirrorType> FromMirrorPtr(MirrorType* p)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return CompressedReference<MirrorType>(p);
+  }
+
+ private:
+  CompressedReference<MirrorType>(MirrorType* p) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(p) {}
+};
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.cc b/runtime/mirror/reference.cc
index 35130e8..70bcf92 100644
--- a/runtime/mirror/reference.cc
+++ b/runtime/mirror/reference.cc
@@ -16,6 +16,9 @@
 
 #include "reference.h"
 
+#include "mirror/art_method.h"
+#include "gc_root-inl.h"
+
 namespace art {
 namespace mirror {
 
@@ -32,8 +35,8 @@
   java_lang_ref_Reference_ = GcRoot<Class>(nullptr);
 }
 
-void Reference::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_ref_Reference_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Reference::VisitRoots(RootVisitor* visitor) {
+  java_lang_ref_Reference_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 69ef69c..c11d79d 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -100,7 +100,7 @@
   }
   static void SetClass(Class* klass);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg);
+  static void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   // Note: This avoids a read barrier, it should only be used by the GC.
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index c2a67e8..ec2b495 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -67,8 +67,8 @@
                                  line_number);
 }
 
-void StackTraceElement::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_StackTraceElement_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void StackTraceElement::VisitRoots(RootVisitor* visitor) {
+  java_lang_StackTraceElement_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 70acd1c..dc7131e 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -54,7 +54,7 @@
 
   static void SetClass(Class* java_lang_StackTraceElement);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static Class* GetStackTraceElement() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_StackTraceElement_.IsNull());
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index e7c88c5..bd6a63c 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -253,8 +253,8 @@
   return countDiff;
 }
 
-void String::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_String_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void String::VisitRoots(RootVisitor* visitor) {
+  java_lang_String_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 6c22b9b..0670d0b 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -127,7 +127,7 @@
 
   static void SetClass(Class* java_lang_String);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // TODO: Make this private. It's only used on ObjectTest at the moment.
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index fdfeb47..b564649 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -144,8 +144,8 @@
   java_lang_Throwable_ = GcRoot<Class>(nullptr);
 }
 
-void Throwable::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_Throwable_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Throwable::VisitRoots(RootVisitor* visitor) {
+  java_lang_Throwable_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index c22475b..9cc0b6f 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -55,7 +55,7 @@
 
   static void SetClass(Class* java_lang_Throwable);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 6e3f1bc..760038a 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -248,13 +248,20 @@
 
 typedef std::map<std::string, mirror::String*> StringTable;
 
-static void PreloadDexCachesStringsCallback(mirror::Object** root, void* arg,
-                                            const RootInfo& /*root_info*/)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  StringTable& table = *reinterpret_cast<StringTable*>(arg);
-  mirror::String* string = const_cast<mirror::Object*>(*root)->AsString();
-  table[string->ToModifiedUtf8()] = string;
-}
+class PreloadDexCachesStringsVisitor : public SingleRootVisitor {
+ public:
+  explicit PreloadDexCachesStringsVisitor(StringTable* table) : table_(table) {
+  }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::String* string = root->AsString();
+    table_->operator[](string->ToModifiedUtf8()) = string;
+  }
+
+ private:
+  StringTable* const table_;
+};
 
 // Based on ClassLinker::ResolveString.
 static void PreloadDexCachesResolveString(Handle<mirror::DexCache> dex_cache, uint32_t string_idx,
@@ -469,8 +476,8 @@
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
   if (kPreloadDexCachesStrings) {
-    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsCallback, &strings,
-                                          kVisitRootFlagAllRoots);
+    PreloadDexCachesStringsVisitor visitor(&strings);
+    runtime->GetInternTable()->VisitRoots(&visitor, kVisitRootFlagAllRoots);
   }
 
   const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index c74fded..5631ff4 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -111,6 +111,48 @@
   }
 }
 
+// TODO: Reduce copy paste
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<MirrorType>* root) {
+  MirrorType* ref = root->AsMirrorPtr();
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
+    if (kMaybeDuringStartup && IsDuringStartup()) {
+      // During startup, the heap may not be initialized yet. Just
+      // return the given ref.
+      return ref;
+    }
+    // TODO: separate the read barrier code from the collector code more.
+    if (Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking()) {
+      ref = reinterpret_cast<MirrorType*>(Mark(ref));
+    }
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    return ref;
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
+    if (kMaybeDuringStartup && IsDuringStartup()) {
+      // During startup, the heap may not be initialized yet. Just
+      // return the given ref.
+      return ref;
+    }
+    if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+      auto old_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref);
+      ref = reinterpret_cast<MirrorType*>(Mark(ref));
+      auto new_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref);
+      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+      auto* atomic_root =
+          reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root);
+      atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
+    }
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    return ref;
+  } else {
+    return ref;
+  }
+}
+
 inline bool ReadBarrier::IsDuringStartup() {
   gc::Heap* heap = Runtime::Current()->GetHeap();
   if (heap == nullptr) {
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 474b46f..471b37c 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -20,6 +20,7 @@
 #include "base/mutex.h"
 #include "base/macros.h"
 #include "jni.h"
+#include "mirror/object_reference.h"
 #include "offsets.h"
 #include "read_barrier_c.h"
 
@@ -58,6 +59,13 @@
   ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // It's up to the implementation whether the given root gets updated
+  // whereas the return value must be an updated reference.
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            bool kMaybeDuringStartup = false>
+  ALWAYS_INLINE static MirrorType* BarrierForRoot(mirror::CompressedReference<MirrorType>* root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static bool IsDuringStartup();
 
   // Without the holder object.
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 357d454..ac36447 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -237,9 +237,10 @@
   DumpSummaryLine(os, prev, GetElementCount(prev), identical, equiv);
 }
 
-void ReferenceTable::VisitRoots(RootCallback* visitor, void* arg, const RootInfo& root_info) {
+void ReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  BufferedRootVisitor<128> buffered_visitor(visitor, root_info);
   for (GcRoot<mirror::Object>& root : entries_) {
-    root.VisitRoot(visitor, arg, root_info);
+    buffered_visitor.VisitRoot(root);
   }
 }
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 22cf1cd..94f16b6 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -49,7 +49,8 @@
 
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* visitor, void* arg, const RootInfo& root_info);
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   typedef std::vector<GcRoot<mirror::Object>,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 497123b..1cd0a96 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1291,67 +1291,67 @@
   return ncdfe;
 }
 
-void Runtime::VisitConstantRoots(RootCallback* callback, void* arg) {
+void Runtime::VisitConstantRoots(RootVisitor* visitor) {
   // Visit the classes held as static in mirror classes, these can be visited concurrently and only
   // need to be visited once per GC since they never change.
-  mirror::ArtField::VisitRoots(callback, arg);
-  mirror::ArtMethod::VisitRoots(callback, arg);
-  mirror::Class::VisitRoots(callback, arg);
-  mirror::Reference::VisitRoots(callback, arg);
-  mirror::StackTraceElement::VisitRoots(callback, arg);
-  mirror::String::VisitRoots(callback, arg);
-  mirror::Throwable::VisitRoots(callback, arg);
-  mirror::Field::VisitRoots(callback, arg);
+  mirror::ArtField::VisitRoots(visitor);
+  mirror::ArtMethod::VisitRoots(visitor);
+  mirror::Class::VisitRoots(visitor);
+  mirror::Reference::VisitRoots(visitor);
+  mirror::StackTraceElement::VisitRoots(visitor);
+  mirror::String::VisitRoots(visitor);
+  mirror::Throwable::VisitRoots(visitor);
+  mirror::Field::VisitRoots(visitor);
   // Visit all the primitive array types classes.
-  mirror::PrimitiveArray<uint8_t>::VisitRoots(callback, arg);   // BooleanArray
-  mirror::PrimitiveArray<int8_t>::VisitRoots(callback, arg);    // ByteArray
-  mirror::PrimitiveArray<uint16_t>::VisitRoots(callback, arg);  // CharArray
-  mirror::PrimitiveArray<double>::VisitRoots(callback, arg);    // DoubleArray
-  mirror::PrimitiveArray<float>::VisitRoots(callback, arg);     // FloatArray
-  mirror::PrimitiveArray<int32_t>::VisitRoots(callback, arg);   // IntArray
-  mirror::PrimitiveArray<int64_t>::VisitRoots(callback, arg);   // LongArray
-  mirror::PrimitiveArray<int16_t>::VisitRoots(callback, arg);   // ShortArray
+  mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor);   // BooleanArray
+  mirror::PrimitiveArray<int8_t>::VisitRoots(visitor);    // ByteArray
+  mirror::PrimitiveArray<uint16_t>::VisitRoots(visitor);  // CharArray
+  mirror::PrimitiveArray<double>::VisitRoots(visitor);    // DoubleArray
+  mirror::PrimitiveArray<float>::VisitRoots(visitor);     // FloatArray
+  mirror::PrimitiveArray<int32_t>::VisitRoots(visitor);   // IntArray
+  mirror::PrimitiveArray<int64_t>::VisitRoots(visitor);   // LongArray
+  mirror::PrimitiveArray<int16_t>::VisitRoots(visitor);   // ShortArray
 }
 
-void Runtime::VisitConcurrentRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  intern_table_->VisitRoots(callback, arg, flags);
-  class_linker_->VisitRoots(callback, arg, flags);
+void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  intern_table_->VisitRoots(visitor, flags);
+  class_linker_->VisitRoots(visitor, flags);
   if ((flags & kVisitRootFlagNewRoots) == 0) {
     // Guaranteed to have no new roots in the constant roots.
-    VisitConstantRoots(callback, arg);
+    VisitConstantRoots(visitor);
   }
 }
 
-void Runtime::VisitTransactionRoots(RootCallback* callback, void* arg) {
+void Runtime::VisitTransactionRoots(RootVisitor* visitor) {
   if (preinitialization_transaction_ != nullptr) {
-    preinitialization_transaction_->VisitRoots(callback, arg);
+    preinitialization_transaction_->VisitRoots(visitor);
   }
 }
 
-void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
-  java_vm_->VisitRoots(callback, arg);
-  sentinel_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  pre_allocated_OutOfMemoryError_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  resolution_method_.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
-  pre_allocated_NoClassDefFoundError_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  imt_conflict_method_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  imt_unimplemented_method_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  default_imt_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
+void Runtime::VisitNonThreadRoots(RootVisitor* visitor) {
+  java_vm_->VisitRoots(visitor);
+  sentinel_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  pre_allocated_OutOfMemoryError_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  resolution_method_.VisitRoot(visitor, RootInfo(kRootVMInternal));
+  pre_allocated_NoClassDefFoundError_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  imt_conflict_method_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  imt_unimplemented_method_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  default_imt_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_methods_[i].VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
+    callee_save_methods_[i].VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   }
-  verifier::MethodVerifier::VisitStaticRoots(callback, arg);
-  VisitTransactionRoots(callback, arg);
-  instrumentation_.VisitRoots(callback, arg);
+  verifier::MethodVerifier::VisitStaticRoots(visitor);
+  VisitTransactionRoots(visitor);
+  instrumentation_.VisitRoots(visitor);
 }
 
-void Runtime::VisitNonConcurrentRoots(RootCallback* callback, void* arg) {
-  thread_list_->VisitRoots(callback, arg);
-  VisitNonThreadRoots(callback, arg);
+void Runtime::VisitNonConcurrentRoots(RootVisitor* visitor) {
+  thread_list_->VisitRoots(visitor);
+  VisitNonThreadRoots(visitor);
 }
 
-void Runtime::VisitThreadRoots(RootCallback* callback, void* arg) {
-  thread_list_->VisitRoots(callback, arg);
+void Runtime::VisitThreadRoots(RootVisitor* visitor) {
+  thread_list_->VisitRoots(visitor);
 }
 
 size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
@@ -1359,12 +1359,12 @@
   return thread_list_->FlipThreadRoots(thread_flip_visitor, flip_callback, collector);
 }
 
-void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  VisitNonConcurrentRoots(callback, arg);
-  VisitConcurrentRoots(callback, arg, flags);
+void Runtime::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  VisitNonConcurrentRoots(visitor);
+  VisitConcurrentRoots(visitor, flags);
 }
 
-void Runtime::VisitImageRoots(RootCallback* callback, void* arg) {
+void Runtime::VisitImageRoots(RootVisitor* visitor) {
   for (auto* space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsImageSpace()) {
       auto* image_space = space->AsImageSpace();
@@ -1373,7 +1373,7 @@
         auto* obj = image_header.GetImageRoot(static_cast<ImageHeader::ImageRoot>(i));
         if (obj != nullptr) {
           auto* after_obj = obj;
-          callback(&after_obj, arg, RootInfo(kRootStickyClass));
+          visitor->VisitRoot(&after_obj, RootInfo(kRootStickyClass));
           CHECK_EQ(after_obj, obj);
         }
       }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index af6abbd..baa4d18 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -296,27 +296,27 @@
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
-  void VisitRoots(RootCallback* visitor, void* arg, VisitRootFlags flags = kVisitRootFlagAllRoots)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit image roots, only used for hprof since the GC uses the image space mod union table
   // instead.
-  void VisitImageRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitImageRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the roots we can do safely do concurrently.
-  void VisitConcurrentRoots(RootCallback* visitor, void* arg,
+  void VisitConcurrentRoots(RootVisitor* visitor,
                             VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
-  void VisitNonThreadRoots(RootCallback* visitor, void* arg)
+  void VisitNonThreadRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitTransactionRoots(RootCallback* visitor, void* arg)
+  void VisitTransactionRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the thread roots.
-  void VisitThreadRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitThreadRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Flip thread roots from from-space refs to to-space refs.
   size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
@@ -324,7 +324,7 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Visit all other roots which must be done with mutators suspended.
-  void VisitNonConcurrentRoots(RootCallback* visitor, void* arg)
+  void VisitNonConcurrentRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweep system weaks, the system weak is deleted if the visitor return nullptr. Otherwise, the
@@ -334,7 +334,7 @@
 
   // Constant roots are the roots which never change after the runtime is initialized, they only
   // need to be visited once per GC cycle.
-  void VisitConstantRoots(RootCallback* callback, void* arg)
+  void VisitConstantRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
diff --git a/runtime/stack.h b/runtime/stack.h
index aab54ba..fbb0aa4 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -59,19 +59,7 @@
 
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
-class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
- public:
-  StackReference<MirrorType>() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : mirror::ObjectReference<false, MirrorType>(nullptr) {}
-
-  static StackReference<MirrorType> FromMirrorPtr(MirrorType* p)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return StackReference<MirrorType>(p);
-  }
-
- private:
-  StackReference<MirrorType>(MirrorType* p) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : mirror::ObjectReference<false, MirrorType>(p) {}
+class MANAGED StackReference : public mirror::CompressedReference<MirrorType> {
 };
 
 // ShadowFrame has 2 possible layouts:
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8a6422d..79d2b13 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1196,26 +1196,37 @@
   }
 }
 
-static void MonitorExitVisitor(mirror::Object** object, void* arg, const RootInfo& /*root_info*/)
-    NO_THREAD_SAFETY_ANALYSIS {
-  Thread* self = reinterpret_cast<Thread*>(arg);
-  mirror::Object* entered_monitor = *object;
-  if (self->HoldsLock(entered_monitor)) {
-    LOG(WARNING) << "Calling MonitorExit on object "
-                 << object << " (" << PrettyTypeOf(entered_monitor) << ")"
-                 << " left locked by native thread "
-                 << *Thread::Current() << " which is detaching";
-    entered_monitor->MonitorExit(self);
+class MonitorExitVisitor : public SingleRootVisitor {
+ public:
+  explicit MonitorExitVisitor(Thread* self) : self_(self) { }
+
+  // NO_THREAD_SAFETY_ANALYSIS due to MonitorExit.
+  void VisitRoot(mirror::Object* entered_monitor, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
+    if (self_->HoldsLock(entered_monitor)) {
+      LOG(WARNING) << "Calling MonitorExit on object "
+                   << entered_monitor << " (" << PrettyTypeOf(entered_monitor) << ")"
+                   << " left locked by native thread "
+                   << *Thread::Current() << " which is detaching";
+      entered_monitor->MonitorExit(self_);
+    }
   }
-}
+
+ private:
+  Thread* const self_;
+};
 
 void Thread::Destroy() {
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
 
   if (tlsPtr_.jni_env != nullptr) {
-    // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
-    tlsPtr_.jni_env->monitors.VisitRoots(MonitorExitVisitor, self, RootInfo(kRootVMInternal));
+    {
+      ScopedObjectAccess soa(self);
+      MonitorExitVisitor visitor(self);
+      // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
+      tlsPtr_.jni_env->monitors.VisitRoots(&visitor, RootInfo(kRootVMInternal));
+    }
     // Release locally held global references which releasing may require the mutator lock.
     if (tlsPtr_.jpeer != nullptr) {
       // If pthread_create fails we don't have a jni env here.
@@ -1373,18 +1384,11 @@
   return tlsPtr_.managed_stack.ShadowFramesContain(hs_entry);
 }
 
-void Thread::HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id) {
+void Thread::HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id) {
+  BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootNativeStack, thread_id));
   for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
-    size_t num_refs = cur->NumberOfReferences();
-    for (size_t j = 0; j < num_refs; ++j) {
-      mirror::Object* object = cur->GetReference(j);
-      if (object != nullptr) {
-        mirror::Object* old_obj = object;
-        visitor(&object, arg, RootInfo(kRootNativeStack, thread_id));
-        if (old_obj != object) {
-          cur->SetReference(j, object);
-        }
-      }
+    for (size_t j = 0, count = cur->NumberOfReferences(); j < count; ++j) {
+      buffered_visitor.VisitRootIfNonNull(cur->GetHandle(j).GetReference());
     }
   }
 }
@@ -2084,7 +2088,7 @@
 template <typename RootVisitor>
 class ReferenceMapVisitor : public StackVisitor {
  public:
-  ReferenceMapVisitor(Thread* thread, Context* context, const RootVisitor& visitor)
+  ReferenceMapVisitor(Thread* thread, Context* context, RootVisitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(thread, context), visitor_(visitor) {}
 
@@ -2248,55 +2252,50 @@
   }
 
   // Visitor for when we visit a root.
-  const RootVisitor& visitor_;
+  RootVisitor& visitor_;
 };
 
 class RootCallbackVisitor {
  public:
-  RootCallbackVisitor(RootCallback* callback, void* arg, uint32_t tid)
-     : callback_(callback), arg_(arg), tid_(tid) {}
+  RootCallbackVisitor(RootVisitor* visitor, uint32_t tid) : visitor_(visitor), tid_(tid) {}
 
-  void operator()(mirror::Object** obj, size_t vreg, const StackVisitor* stack_visitor) const {
-    callback_(obj, arg_, JavaFrameRootInfo(tid_, stack_visitor, vreg));
+  void operator()(mirror::Object** obj, size_t vreg, const StackVisitor* stack_visitor) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    visitor_->VisitRoot(obj, JavaFrameRootInfo(tid_, stack_visitor, vreg));
   }
 
  private:
-  RootCallback* const callback_;
-  void* const arg_;
+  RootVisitor* const visitor_;
   const uint32_t tid_;
 };
 
-void Thread::VisitRoots(RootCallback* visitor, void* arg) {
-  uint32_t thread_id = GetThreadId();
-  if (tlsPtr_.opeer != nullptr) {
-    visitor(&tlsPtr_.opeer, arg, RootInfo(kRootThreadObject, thread_id));
-  }
+void Thread::VisitRoots(RootVisitor* visitor) {
+  const uint32_t thread_id = GetThreadId();
+  visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id));
   if (tlsPtr_.exception != nullptr && tlsPtr_.exception != GetDeoptimizationException()) {
-    visitor(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception), arg,
-            RootInfo(kRootNativeStack, thread_id));
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception),
+                   RootInfo(kRootNativeStack, thread_id));
   }
-  if (tlsPtr_.monitor_enter_object != nullptr) {
-    visitor(&tlsPtr_.monitor_enter_object, arg, RootInfo(kRootNativeStack, thread_id));
-  }
-  tlsPtr_.jni_env->locals.VisitRoots(visitor, arg, RootInfo(kRootJNILocal, thread_id));
-  tlsPtr_.jni_env->monitors.VisitRoots(visitor, arg, RootInfo(kRootJNIMonitor, thread_id));
-  HandleScopeVisitRoots(visitor, arg, thread_id);
+  visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id));
+  tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id));
+  tlsPtr_.jni_env->monitors.VisitRoots(visitor, RootInfo(kRootJNIMonitor, thread_id));
+  HandleScopeVisitRoots(visitor, thread_id);
   if (tlsPtr_.debug_invoke_req != nullptr) {
-    tlsPtr_.debug_invoke_req->VisitRoots(visitor, arg, RootInfo(kRootDebugger, thread_id));
+    tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
   }
   if (tlsPtr_.single_step_control != nullptr) {
-    tlsPtr_.single_step_control->VisitRoots(visitor, arg, RootInfo(kRootDebugger, thread_id));
+    tlsPtr_.single_step_control->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
   }
   if (tlsPtr_.deoptimization_shadow_frame != nullptr) {
-    RootCallbackVisitor visitorToCallback(visitor, arg, thread_id);
-    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitorToCallback);
+    RootCallbackVisitor visitor_to_callback(visitor, thread_id);
+    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
     for (ShadowFrame* shadow_frame = tlsPtr_.deoptimization_shadow_frame; shadow_frame != nullptr;
         shadow_frame = shadow_frame->GetLink()) {
       mapper.VisitShadowFrame(shadow_frame);
     }
   }
   if (tlsPtr_.shadow_frame_under_construction != nullptr) {
-    RootCallbackVisitor visitor_to_callback(visitor, arg, thread_id);
+    RootCallbackVisitor visitor_to_callback(visitor, thread_id);
     ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
     for (ShadowFrame* shadow_frame = tlsPtr_.shadow_frame_under_construction;
         shadow_frame != nullptr;
@@ -2305,33 +2304,34 @@
     }
   }
   if (tlsPtr_.method_verifier != nullptr) {
-    tlsPtr_.method_verifier->VisitRoots(visitor, arg, RootInfo(kRootNativeStack, thread_id));
+    tlsPtr_.method_verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id));
   }
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
-  RootCallbackVisitor visitor_to_callback(visitor, arg, thread_id);
+  RootCallbackVisitor visitor_to_callback(visitor, thread_id);
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context, visitor_to_callback);
   mapper.WalkStack();
   ReleaseLongJumpContext(context);
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
-    if (frame.this_object_ != nullptr) {
-      visitor(&frame.this_object_, arg, RootInfo(kRootVMInternal, thread_id));
-    }
-    DCHECK(frame.method_ != nullptr);
-    visitor(reinterpret_cast<mirror::Object**>(&frame.method_), arg,
-            RootInfo(kRootVMInternal, thread_id));
+    visitor->VisitRootIfNonNull(&frame.this_object_, RootInfo(kRootVMInternal, thread_id));
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&frame.method_),
+                       RootInfo(kRootVMInternal, thread_id));
   }
 }
 
-static void VerifyRoot(mirror::Object** root, void* /*arg*/, const RootInfo& /*root_info*/)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  VerifyObject(*root);
-}
+class VerifyRootVisitor : public SingleRootVisitor {
+ public:
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VerifyObject(root);
+  }
+};
 
 void Thread::VerifyStackImpl() {
+  VerifyRootVisitor visitor;
   std::unique_ptr<Context> context(Context::Create());
-  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap(), GetThreadId());
-  ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
+  RootCallbackVisitor visitor_to_callback(&visitor, GetThreadId());
+  ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitor_to_callback);
   mapper.WalkStack();
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 9d4d89d..f89e46b 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -485,7 +485,7 @@
       jobjectArray output_array = nullptr, int* stack_depth = nullptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE void VerifyStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -686,7 +686,7 @@
   // Is the given obj in this thread's stack indirect reference table?
   bool HandleScopeContains(jobject obj) const;
 
-  void HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id)
+  void HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   HandleScope* GetTopHandleScope() {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 1ab0093..560bcc1 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1156,10 +1156,10 @@
   }
 }
 
-void ThreadList::VisitRoots(RootCallback* callback, void* arg) const {
+void ThreadList::VisitRoots(RootVisitor* visitor) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VisitRoots(callback, arg);
+    thread->VisitRoots(visitor);
   }
 }
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index c18e285..fa747b8 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -136,7 +136,7 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
   void Unregister(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg) const
+  void VisitRoots(RootVisitor* visitor) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Return a copy of the thread list.
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 9b205c3..cc0f15f 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -221,24 +221,24 @@
   intern_string_logs_.clear();
 }
 
-void Transaction::VisitRoots(RootCallback* callback, void* arg) {
+void Transaction::VisitRoots(RootVisitor* visitor) {
   MutexLock mu(Thread::Current(), log_lock_);
-  VisitObjectLogs(callback, arg);
-  VisitArrayLogs(callback, arg);
-  VisitStringLogs(callback, arg);
+  VisitObjectLogs(visitor);
+  VisitArrayLogs(visitor);
+  VisitStringLogs(visitor);
 }
 
-void Transaction::VisitObjectLogs(RootCallback* callback, void* arg) {
+void Transaction::VisitObjectLogs(RootVisitor* visitor) {
   // List of moving roots.
   typedef std::pair<mirror::Object*, mirror::Object*> ObjectPair;
   std::list<ObjectPair> moving_roots;
 
   // Visit roots.
   for (auto it : object_logs_) {
-    it.second.VisitRoots(callback, arg);
+    it.second.VisitRoots(visitor);
     mirror::Object* old_root = it.first;
     mirror::Object* new_root = old_root;
-    callback(&new_root, arg, RootInfo(kRootUnknown));
+    visitor->VisitRoot(&new_root, RootInfo(kRootUnknown));
     if (new_root != old_root) {
       moving_roots.push_back(std::make_pair(old_root, new_root));
     }
@@ -256,7 +256,7 @@
   }
 }
 
-void Transaction::VisitArrayLogs(RootCallback* callback, void* arg) {
+void Transaction::VisitArrayLogs(RootVisitor* visitor) {
   // List of moving roots.
   typedef std::pair<mirror::Array*, mirror::Array*> ArrayPair;
   std::list<ArrayPair> moving_roots;
@@ -265,7 +265,7 @@
     mirror::Array* old_root = it.first;
     CHECK(!old_root->IsObjectArray());
     mirror::Array* new_root = old_root;
-    callback(reinterpret_cast<mirror::Object**>(&new_root), arg, RootInfo(kRootUnknown));
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&new_root), RootInfo(kRootUnknown));
     if (new_root != old_root) {
       moving_roots.push_back(std::make_pair(old_root, new_root));
     }
@@ -283,9 +283,9 @@
   }
 }
 
-void Transaction::VisitStringLogs(RootCallback* callback, void* arg) {
+void Transaction::VisitStringLogs(RootVisitor* visitor) {
   for (InternStringLog& log : intern_string_logs_) {
-    log.VisitRoots(callback, arg);
+    log.VisitRoots(visitor);
   }
 }
 
@@ -421,16 +421,12 @@
   }
 }
 
-void Transaction::ObjectLog::VisitRoots(RootCallback* callback, void* arg) {
+void Transaction::ObjectLog::VisitRoots(RootVisitor* visitor) {
   for (auto it : field_values_) {
     FieldValue& field_value = it.second;
     if (field_value.kind == ObjectLog::kReference) {
-      mirror::Object* obj =
-          reinterpret_cast<mirror::Object*>(static_cast<uintptr_t>(field_value.value));
-      if (obj != nullptr) {
-        callback(&obj, arg, RootInfo(kRootUnknown));
-        field_value.value = reinterpret_cast<uintptr_t>(obj);
-      }
+      visitor->VisitRootIfNonNull(reinterpret_cast<mirror::Object**>(&field_value.value),
+                                  RootInfo(kRootUnknown));
     }
   }
 }
@@ -472,8 +468,8 @@
   }
 }
 
-void Transaction::InternStringLog::VisitRoots(RootCallback* callback, void* arg) {
-  callback(reinterpret_cast<mirror::Object**>(&str_), arg, RootInfo(kRootInternedString));
+void Transaction::InternStringLog::VisitRoots(RootVisitor* visitor) {
+  visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&str_), RootInfo(kRootInternedString));
 }
 
 void Transaction::ArrayLog::LogValue(size_t index, uint64_t value) {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 1419a38..4d85662 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -100,7 +100,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(log_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg)
+  void VisitRoots(RootVisitor* visitor)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -116,7 +116,7 @@
     void LogReferenceValue(MemberOffset offset, mirror::Object* obj, bool is_volatile);
 
     void Undo(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-    void VisitRoots(RootCallback* callback, void* arg);
+    void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
     size_t Size() const {
       return field_values_.size();
@@ -184,7 +184,7 @@
     void Undo(InternTable* intern_table)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-    void VisitRoots(RootCallback* callback, void* arg);
+    void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
    private:
     mirror::String* str_;
@@ -207,13 +207,13 @@
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitObjectLogs(RootCallback* callback, void* arg)
+  void VisitObjectLogs(RootVisitor* visitor)
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitArrayLogs(RootCallback* callback, void* arg)
+  void VisitArrayLogs(RootVisitor* visitor)
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitStringLogs(RootCallback* callback, void* arg)
+  void VisitStringLogs(RootVisitor* visitor)
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1d04192..c6db7e5 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -4351,12 +4351,12 @@
   verifier::RegTypeCache::ShutDown();
 }
 
-void MethodVerifier::VisitStaticRoots(RootCallback* callback, void* arg) {
-  RegTypeCache::VisitStaticRoots(callback, arg);
+void MethodVerifier::VisitStaticRoots(RootVisitor* visitor) {
+  RegTypeCache::VisitStaticRoots(visitor);
 }
 
-void MethodVerifier::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  reg_types_.VisitRoots(callback, arg, root_info);
+void MethodVerifier::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  reg_types_.VisitRoots(visitor, root_info);
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 6b813ef..c813634 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -225,9 +225,9 @@
   // Describe VRegs at the given dex pc.
   std::vector<int32_t> DescribeVRegs(uint32_t dex_pc);
 
-  static void VisitStaticRoots(RootCallback* callback, void* arg)
+  static void VisitStaticRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& roots)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& roots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Accessors used by the compiler via CompilerCallback
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 97d0cbe..c8aa4fd 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -778,8 +778,8 @@
   }
 }
 
-void RegType::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) const {
-  klass_.VisitRootIfNonNull(callback, arg, root_info);
+void RegType::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) const {
+  klass_.VisitRootIfNonNull(visitor, root_info);
 }
 
 void UninitializedThisReferenceType::CheckInvariants() const {
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index d260650..e4d2c3e 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -262,7 +262,7 @@
 
   virtual ~RegType() {}
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) const
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 6e57857..b371d7e 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -557,33 +557,33 @@
   }
 }
 
-void RegTypeCache::VisitStaticRoots(RootCallback* callback, void* arg) {
+void RegTypeCache::VisitStaticRoots(RootVisitor* visitor) {
   // Visit the primitive types, this is required since if there are no active verifiers they wont
   // be in the entries array, and therefore not visited as roots.
   if (primitive_initialized_) {
     RootInfo ri(kRootUnknown);
-    UndefinedType::GetInstance()->VisitRoots(callback, arg, ri);
-    ConflictType::GetInstance()->VisitRoots(callback, arg, ri);
-    BooleanType::GetInstance()->VisitRoots(callback, arg, ri);
-    ByteType::GetInstance()->VisitRoots(callback, arg, ri);
-    ShortType::GetInstance()->VisitRoots(callback, arg, ri);
-    CharType::GetInstance()->VisitRoots(callback, arg, ri);
-    IntegerType::GetInstance()->VisitRoots(callback, arg, ri);
-    LongLoType::GetInstance()->VisitRoots(callback, arg, ri);
-    LongHiType::GetInstance()->VisitRoots(callback, arg, ri);
-    FloatType::GetInstance()->VisitRoots(callback, arg, ri);
-    DoubleLoType::GetInstance()->VisitRoots(callback, arg, ri);
-    DoubleHiType::GetInstance()->VisitRoots(callback, arg, ri);
+    UndefinedType::GetInstance()->VisitRoots(visitor, ri);
+    ConflictType::GetInstance()->VisitRoots(visitor, ri);
+    BooleanType::GetInstance()->VisitRoots(visitor, ri);
+    ByteType::GetInstance()->VisitRoots(visitor, ri);
+    ShortType::GetInstance()->VisitRoots(visitor, ri);
+    CharType::GetInstance()->VisitRoots(visitor, ri);
+    IntegerType::GetInstance()->VisitRoots(visitor, ri);
+    LongLoType::GetInstance()->VisitRoots(visitor, ri);
+    LongHiType::GetInstance()->VisitRoots(visitor, ri);
+    FloatType::GetInstance()->VisitRoots(visitor, ri);
+    DoubleLoType::GetInstance()->VisitRoots(visitor, ri);
+    DoubleHiType::GetInstance()->VisitRoots(visitor, ri);
     for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
-      small_precise_constants_[value - kMinSmallConstant]->VisitRoots(callback, arg, ri);
+      small_precise_constants_[value - kMinSmallConstant]->VisitRoots(visitor, ri);
     }
   }
 }
 
-void RegTypeCache::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
+void RegTypeCache::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
   // Exclude the static roots that are visited by VisitStaticRoots().
   for (size_t i = primitive_count_; i < entries_.size(); ++i) {
-    entries_[i]->VisitRoots(callback, arg, root_info);
+    entries_[i]->VisitRoots(visitor, root_info);
   }
 }
 
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 01032a0..4b3105c 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -137,9 +137,9 @@
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& RegTypeFromPrimitiveType(Primitive::Type) const;
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void VisitStaticRoots(RootCallback* callback, void* arg)
+  static void VisitStaticRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private: