Refactor and improve GC root handling

Changed GcRoot to use compressed references. Changed root visiting to
use virtual functions instead of function pointers. Changed root visting
interface to be an array of roots instead of a single root at a time.
Added buffered root marking helper to avoid dispatch overhead.

Root marking seems a bit faster on EvaluateAndApplyChanges due to batch
marking. Pause times unaffected.

Mips64 is untested but might work, maybe.

Before:
MarkConcurrentRoots: Sum: 67.678ms 99% C.I. 2us-664.999us Avg: 161.138us Max: 671us

After:
MarkConcurrentRoots: Sum: 54.806ms 99% C.I. 2us-499.986us Avg: 136.333us Max: 602us

Bug: 19264997

Change-Id: I0a71ebb5928f205b9b3f7945b25db6489d5657ca
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ff57603..b4de879 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -32,7 +32,7 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr wIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #176
     .cfi_adjust_cfa_offset 176
@@ -97,7 +97,7 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr wIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #96
     .cfi_adjust_cfa_offset 96
@@ -266,7 +266,7 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr wIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 697bf00..3d502e6 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -77,7 +77,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -120,7 +120,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -237,7 +237,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -248,7 +248,7 @@
     ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $v0, 0($v0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld      $v0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($v0)
+    lwu     $v0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($v0)
     sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 3a448a5..ce21f01 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -67,7 +67,7 @@
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10d
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the top quick frame.
@@ -110,7 +110,7 @@
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10d
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the stop quick frame.
@@ -170,7 +170,7 @@
     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
     THIS_LOAD_REQUIRES_READ_BARRIER
-    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10d
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 0d0017d..dba4af8 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -57,6 +57,11 @@
 #define STACK_REFERENCE_SIZE 4
 ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
 
+// Size of heap references
+#define COMPRESSED_REFERENCE_SIZE 4
+ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
+            sizeof(art::mirror::CompressedReference<art::mirror::Object>))
+
 // Note: these callee save methods loads require read barriers.
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
@@ -64,12 +69,12 @@
             art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kSaveAll))
 
 // Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET __SIZEOF_POINTER__
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET COMPRESSED_REFERENCE_SIZE
 ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
             art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsOnly))
 
 // Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * __SIZEOF_POINTER__)
+#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * COMPRESSED_REFERENCE_SIZE)
 ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
             art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsAndArgs))
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index cdd8e73..33d75d2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -242,7 +242,10 @@
       quick_generic_jni_trampoline_(nullptr),
       quick_to_interpreter_bridge_trampoline_(nullptr),
       image_pointer_size_(sizeof(void*)) {
-  memset(find_array_class_cache_, 0, kFindArrayCacheSize * sizeof(mirror::Class*));
+  CHECK(intern_table_ != nullptr);
+  for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
+    find_array_class_cache_[i] = GcRoot<mirror::Class>(nullptr);
+  }
 }
 
 void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) {
@@ -908,19 +911,20 @@
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
 }
 
-void ClassLinker::VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   if ((flags & kVisitRootFlagAllRoots) != 0) {
+    BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootStickyClass));
     for (GcRoot<mirror::Class>& root : class_table_) {
-      root.VisitRoot(callback, arg, RootInfo(kRootStickyClass));
+      buffered_visitor.VisitRoot(root);
     }
     for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
-      root.VisitRoot(callback, arg, RootInfo(kRootStickyClass));
+      buffered_visitor.VisitRoot(root);
     }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
-      root.VisitRoot(callback, arg, RootInfo(kRootStickyClass));
+      root.VisitRoot(visitor, RootInfo(kRootStickyClass));
       mirror::Class* new_ref = root.Read<kWithoutReadBarrier>();
       if (UNLIKELY(new_ref != old_ref)) {
         // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
@@ -947,18 +951,18 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  class_roots_.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
-  Thread* self = Thread::Current();
+void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  class_roots_.VisitRoot(visitor, RootInfo(kRootVMInternal));
+  Thread* const self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
     if ((flags & kVisitRootFlagAllRoots) != 0) {
       for (GcRoot<mirror::DexCache>& dex_cache : dex_caches_) {
-        dex_cache.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
+        dex_cache.VisitRoot(visitor, RootInfo(kRootVMInternal));
       }
     } else if ((flags & kVisitRootFlagNewRoots) != 0) {
       for (size_t index : new_dex_cache_roots_) {
-        dex_caches_[index].VisitRoot(callback, arg, RootInfo(kRootVMInternal));
+        dex_caches_[index].VisitRoot(visitor, RootInfo(kRootVMInternal));
       }
     }
     if ((flags & kVisitRootFlagClearRootLog) != 0) {
@@ -970,11 +974,10 @@
       log_new_dex_caches_roots_ = false;
     }
   }
-  VisitClassRoots(callback, arg, flags);
-  array_iftable_.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
-  DCHECK(!array_iftable_.IsNull());
+  VisitClassRoots(visitor, flags);
+  array_iftable_.VisitRoot(visitor, RootInfo(kRootVMInternal));
   for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
-    find_array_class_cache_[i].VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
+    find_array_class_cache_[i].VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   }
 }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 69a5337..577fec2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -299,10 +299,10 @@
   void VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+  void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 3e727e7..3f6c5a0 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -358,7 +358,8 @@
       const char* descriptor = dex.GetTypeDescriptor(type_id);
       AssertDexFileClass(class_loader, descriptor);
     }
-    class_linker_->VisitRoots(TestRootVisitor, nullptr, kVisitRootFlagAllRoots);
+    TestRootVisitor visitor;
+    class_linker_->VisitRoots(&visitor, kVisitRootFlagAllRoots);
     // Verify the dex cache has resolution methods in all resolved method slots
     mirror::DexCache* dex_cache = class_linker_->FindDexCache(dex);
     mirror::ObjectArray<mirror::ArtMethod>* resolved_methods = dex_cache->GetResolvedMethods();
@@ -367,9 +368,12 @@
     }
   }
 
-  static void TestRootVisitor(mirror::Object** root, void*, const RootInfo&) {
-    EXPECT_TRUE(*root != nullptr);
-  }
+  class TestRootVisitor : public SingleRootVisitor {
+   public:
+    void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE {
+      EXPECT_TRUE(root != nullptr);
+    }
+  };
 };
 
 struct CheckOffset {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a767cf0..3f67f9e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -344,16 +344,14 @@
 // Breakpoints.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
 
-void DebugInvokeReq::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  receiver.VisitRootIfNonNull(callback, arg, root_info);  // null for static method call.
-  klass.VisitRoot(callback, arg, root_info);
-  method.VisitRoot(callback, arg, root_info);
+void DebugInvokeReq::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  receiver.VisitRootIfNonNull(visitor, root_info);  // null for static method call.
+  klass.VisitRoot(visitor, root_info);
+  method.VisitRoot(visitor, root_info);
 }
 
-void SingleStepControl::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  if (method_ != nullptr) {
-    callback(reinterpret_cast<mirror::Object**>(&method_), arg, root_info);
-  }
+void SingleStepControl::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  visitor->VisitRootIfNonNull(reinterpret_cast<mirror::Object**>(&method_), root_info);
 }
 
 void SingleStepControl::AddDexPc(uint32_t dex_pc) {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 4f4a781..62eda62 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -81,7 +81,7 @@
   Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable cond GUARDED_BY(lock);
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
@@ -117,7 +117,7 @@
     return dex_pcs_;
   }
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AddDexPc(uint32_t dex_pc);
@@ -648,7 +648,7 @@
   static void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index db7a4ef..56919bd 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -174,7 +174,7 @@
       thread->RevokeThreadLocalAllocationStack();
     }
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(ConcurrentCopying::ProcessRootCallback, concurrent_copying_);
+    thread->VisitRoots(concurrent_copying_);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
@@ -208,7 +208,7 @@
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
-      Runtime::Current()->VisitTransactionRoots(ConcurrentCopying::ProcessRootCallback, cc);
+      Runtime::Current()->VisitTransactionRoots(cc);
     }
   }
 
@@ -332,22 +332,20 @@
   }
   {
     TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
-    Runtime::Current()->VisitConstantRoots(ProcessRootCallback, this);
+    Runtime::Current()->VisitConstantRoots(this);
   }
   {
     TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
-    Runtime::Current()->GetInternTable()->VisitRoots(ProcessRootCallback,
-                                                     this, kVisitRootFlagAllRoots);
+    Runtime::Current()->GetInternTable()->VisitRoots(this, kVisitRootFlagAllRoots);
   }
   {
     TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
-    Runtime::Current()->GetClassLinker()->VisitRoots(ProcessRootCallback,
-                                                     this, kVisitRootFlagAllRoots);
+    Runtime::Current()->GetClassLinker()->VisitRoots(this, kVisitRootFlagAllRoots);
   }
   {
     // TODO: don't visit the transaction roots if it's not active.
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
-    Runtime::Current()->VisitNonThreadRoots(ProcessRootCallback, this);
+    Runtime::Current()->VisitNonThreadRoots(this);
   }
 
   // Immune spaces.
@@ -486,7 +484,7 @@
 
 // The following visitors are that used to verify that there's no
 // references to the from-space left after marking.
-class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor {
+class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
   explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
@@ -516,16 +514,14 @@
     }
   }
 
-  static void RootCallback(mirror::Object** root, void *arg, const RootInfo& /*root_info*/)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector);
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != nullptr);
-    visitor(*root);
+    operator()(root);
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor {
@@ -594,8 +590,8 @@
   // Roots.
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    Runtime::Current()->VisitRoots(
-        ConcurrentCopyingVerifyNoFromSpaceRefsVisitor::RootCallback, this);
+    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
+    Runtime::Current()->VisitRoots(&ref_visitor);
   }
   // The to-space.
   region_space_->WalkToSpace(ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor::ObjectCallback,
@@ -1087,11 +1083,6 @@
   }
 }
 
-void ConcurrentCopying::ProcessRootCallback(mirror::Object** root, void* arg,
-                                            const RootInfo& /*root_info*/) {
-  reinterpret_cast<ConcurrentCopying*>(arg)->Process(root);
-}
-
 // Used to scan ref fields of an object.
 class ConcurrentCopyingRefFieldsVisitor {
  public:
@@ -1144,25 +1135,54 @@
       offset, expected_ref, new_ref));
 }
 
-// Process a root.
-void ConcurrentCopying::Process(mirror::Object** root) {
-  mirror::Object* ref = *root;
-  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-    return;
-  }
-  mirror::Object* to_ref = Mark(ref);
-  if (to_ref == ref) {
-    return;
-  }
-  Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-  mirror::Object* expected_ref = ref;
-  mirror::Object* new_ref = to_ref;
-  do {
-    if (expected_ref != addr->LoadRelaxed()) {
-      // It was updated by the mutator.
-      break;
+// Process some roots.
+void ConcurrentCopying::VisitRoots(
+    mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    mirror::Object** root = roots[i];
+    mirror::Object* ref = *root;
+    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+      return;
     }
-  } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    mirror::Object* to_ref = Mark(ref);
+    if (to_ref == ref) {
+      return;
+    }
+    Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+    mirror::Object* expected_ref = ref;
+    mirror::Object* new_ref = to_ref;
+    do {
+      if (expected_ref != addr->LoadRelaxed()) {
+        // It was updated by the mutator.
+        break;
+      }
+    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+  }
+}
+
+void ConcurrentCopying::VisitRoots(
+    mirror::CompressedReference<mirror::Object>** roots, size_t count,
+    const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    mirror::CompressedReference<mirror::Object>* root = roots[i];
+    mirror::Object* ref = root->AsMirrorPtr();
+    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+      return;
+    }
+    mirror::Object* to_ref = Mark(ref);
+    if (to_ref == ref) {
+      return;
+    }
+    auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
+    auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
+    auto new_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(to_ref);
+    do {
+      if (ref != addr->LoadRelaxed().AsMirrorPtr()) {
+        // It was updated by the mutator.
+        break;
+      }
+    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+  }
 }
 
 // Fill the given memory block with a dummy object. Used to fill in a
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index bbb551a..a87053d 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -192,9 +192,11 @@
   void Scan(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Process(mirror::Object* obj, MemberOffset offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Process(mirror::Object** root) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void ProcessRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index ed5207a..c5a8d5d 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -22,6 +22,7 @@
 #include "base/timing_logger.h"
 #include "gc/collector_type.h"
 #include "gc/gc_cause.h"
+#include "gc_root.h"
 #include "gc_type.h"
 #include <stdint.h>
 #include <vector>
@@ -112,7 +113,7 @@
   DISALLOW_COPY_AND_ASSIGN(Iteration);
 };
 
-class GarbageCollector {
+class GarbageCollector : public RootVisitor {
  public:
   class SCOPED_LOCKABLE ScopedPause {
    public:
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index d1ce0bc..8902df8 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -309,19 +309,57 @@
   reinterpret_cast<MarkCompact*>(arg)->DelayReferenceReferent(klass, ref);
 }
 
-void MarkCompact::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkCompact*>(arg)->MarkObject(*root);
-}
-
-void MarkCompact::UpdateRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  mirror::Object* obj = *root;
-  mirror::Object* new_obj = reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
-  if (obj != new_obj) {
-    *root = new_obj;
-    DCHECK(new_obj != nullptr);
+void MarkCompact::VisitRoots(
+    mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(*roots[i]);
   }
 }
 
+void MarkCompact::VisitRoots(
+    mirror::CompressedReference<mirror::Object>** roots, size_t count,
+    const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(roots[i]->AsMirrorPtr());
+  }
+}
+
+class UpdateRootVisitor : public RootVisitor {
+ public:
+  explicit UpdateRootVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object* obj = *roots[i];
+      mirror::Object* new_obj = collector_->GetMarkedForwardAddress(obj);
+      if (obj != new_obj) {
+        *roots[i] = new_obj;
+        DCHECK(new_obj != nullptr);
+      }
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object* obj = roots[i]->AsMirrorPtr();
+      mirror::Object* new_obj = collector_->GetMarkedForwardAddress(obj);
+      if (obj != new_obj) {
+        roots[i]->Assign(new_obj);
+        DCHECK(new_obj != nullptr);
+      }
+    }
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
 class UpdateObjectReferencesVisitor {
  public:
   explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {
@@ -339,7 +377,8 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime* runtime = Runtime::Current();
   // Update roots.
-  runtime->VisitRoots(UpdateRootCallback, this);
+  UpdateRootVisitor update_root_visitor(this);
+  runtime->VisitRoots(&update_root_visitor);
   // Update object references in mod union tables and spaces.
   for (const auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
@@ -396,7 +435,7 @@
 // Marks all objects in the root set.
 void MarkCompact::MarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitRoots(this);
 }
 
 mirror::Object* MarkCompact::MarkedForwardingAddressCallback(mirror::Object* obj, void* arg) {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 06304bf..4337644 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -114,8 +114,12 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -245,6 +249,8 @@
   friend class MoveObjectVisitor;
   friend class UpdateObjectReferencesVisitor;
   friend class UpdateReferenceVisitor;
+  friend class UpdateRootVisitor;
+
   DISALLOW_COPY_AND_ASSIGN(MarkCompact);
 };
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index ee4e752..79d1034 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -462,42 +462,66 @@
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(*root);
+class VerifyRootMarkedVisitor : public SingleRootVisitor {
+ public:
+  explicit VerifyRootMarkedVisitor(MarkSweep* collector) : collector_(collector) { }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    CHECK(collector_->IsMarked(root)) << info.ToString();
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
+
+void MarkSweep::VisitRoots(mirror::Object*** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObjectNonNull(*roots[i]);
+  }
 }
 
-void MarkSweep::VerifyRootMarked(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  CHECK(reinterpret_cast<MarkSweep*>(arg)->IsMarked(*root));
+void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObjectNonNull(roots[i]->AsMirrorPtr());
+  }
 }
 
-void MarkSweep::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(*root);
-}
+class VerifyRootVisitor : public SingleRootVisitor {
+ public:
+  explicit VerifyRootVisitor(MarkSweep* collector) : collector_(collector) { }
 
-void MarkSweep::VerifyRootCallback(Object** root, void* arg, const RootInfo& root_info) {
-  reinterpret_cast<MarkSweep*>(arg)->VerifyRoot(*root, root_info);
-}
+  void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->VerifyRoot(root, info);
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
 
 void MarkSweep::VerifyRoot(const Object* root, const RootInfo& root_info) {
   // See if the root is on any space bitmap.
   if (heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
     space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
     if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-      LOG(ERROR) << "Found invalid root: " << root << " ";
-      root_info.Describe(LOG(ERROR));
+      LOG(ERROR) << "Found invalid root: " << root << " " << root_info;
     }
   }
 }
 
 void MarkSweep::VerifyRoots() {
-  Runtime::Current()->GetThreadList()->VisitRoots(VerifyRootCallback, this);
+  VerifyRootVisitor visitor(this);
+  Runtime::Current()->GetThreadList()->VisitRoots(&visitor);
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
-    Runtime::Current()->VisitRoots(MarkRootCallback, this);
+    Runtime::Current()->VisitRoots(this);
     RevokeAllThreadLocalAllocationStacks(self);
   } else {
     MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint);
@@ -510,13 +534,13 @@
 
 void MarkSweep::MarkNonThreadRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitNonThreadRoots(this);
 }
 
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags);
+  Runtime::Current()->VisitConcurrentRoots(this, flags);
 }
 
 class ScanObjectVisitor {
@@ -932,13 +956,12 @@
 void MarkSweep::ReMarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  Runtime::Current()->VisitRoots(
-      MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots |
-                                                          kVisitRootFlagStopLoggingNewRoots |
-                                                          kVisitRootFlagClearRootLog));
+  Runtime::Current()->VisitRoots(this, static_cast<VisitRootFlags>(
+      kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots | kVisitRootFlagClearRootLog));
   if (kVerifyRootsMarked) {
     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
-    Runtime::Current()->VisitRoots(VerifyRootMarked, this);
+    VerifyRootMarkedVisitor visitor(this);
+    Runtime::Current()->VisitRoots(&visitor);
   }
 }
 
@@ -968,7 +991,7 @@
   Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
 
-class CheckpointMarkThreadRoots : public Closure {
+class CheckpointMarkThreadRoots : public Closure, public RootVisitor {
  public:
   explicit CheckpointMarkThreadRoots(MarkSweep* mark_sweep,
                                      bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
@@ -977,13 +1000,30 @@
             revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
   }
 
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mark_sweep_->MarkObjectNonNullParallel(*roots[i]);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mark_sweep_->MarkObjectNonNullParallel(roots[i]->AsMirrorPtr());
+    }
+  }
+
   virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
     ATRACE_BEGIN("Marking thread roots");
     // Note: self is not necessarily equal to thread since thread may be suspended.
-    Thread* self = Thread::Current();
+    Thread* const self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->VisitRoots(MarkSweep::MarkRootParallelCallback, mark_sweep_);
+    thread->VisitRoots(this);
     ATRACE_END();
     if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
       ATRACE_BEGIN("RevokeRosAllocThreadLocalBuffers");
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 3f99e21..31cea17 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -185,11 +185,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void VerifyRootMarked(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -197,9 +198,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void MarkRootParallelCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Marks an object.
   void MarkObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -250,9 +248,8 @@
   // whether or not we care about pauses.
   size_t GetThreadCount(bool paused) const;
 
-  static void VerifyRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info);
-
-  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Push a single reference on a mark stack.
   void PushOnMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -326,18 +323,21 @@
   friend class CardScanTask;
   friend class CheckBitmapVisitor;
   friend class CheckReferenceVisitor;
+  friend class CheckpointMarkThreadRoots;
   friend class art::gc::Heap;
+  friend class FifoMarkStackChunk;
   friend class MarkObjectVisitor;
+  template<bool kUseFinger> friend class MarkStackTask;
+  friend class MarkSweepMarkObjectSlowPath;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
   friend class ModUnionReferenceVisitor;
-  friend class ModUnionVisitor;
+  friend class ModUnionScanImageRootVisitor;
   friend class ModUnionTableBitmap;
   friend class ModUnionTableReferenceCache;
-  friend class ModUnionScanImageRootVisitor;
-  template<bool kUseFinger> friend class MarkStackTask;
-  friend class FifoMarkStackChunk;
-  friend class MarkSweepMarkObjectSlowPath;
+  friend class ModUnionVisitor;
+  friend class VerifyRootMarkedVisitor;
+  friend class VerifyRootVisitor;
 
   DISALLOW_COPY_AND_ASSIGN(MarkSweep);
 };
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index b3d59f2..dbf01d8 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -603,18 +603,29 @@
   reinterpret_cast<SemiSpace*>(arg)->DelayReferenceReferent(klass, ref);
 }
 
-void SemiSpace::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
-  reinterpret_cast<SemiSpace*>(arg)->MarkObject(&ref);
-  if (*root != ref.AsMirrorPtr()) {
-    *root = ref.AsMirrorPtr();
+void SemiSpace::VisitRoots(mirror::Object*** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    auto* root = roots[i];
+    auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
+    MarkObject(&ref);
+    if (*root != ref.AsMirrorPtr()) {
+      *root = ref.AsMirrorPtr();
+    }
+  }
+}
+
+void SemiSpace::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(roots[i]);
   }
 }
 
 // Marks all objects in the root set.
 void SemiSpace::MarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitRoots(this);
 }
 
 bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 192fb14..61fbead 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -98,7 +98,7 @@
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Returns the new address of the object.
+  // Updates obj_ptr if the object has moved.
   template<bool kPoisonReferences>
   void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -133,8 +133,12 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index be7344a..d80bba6 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2395,13 +2395,21 @@
   gc_complete_cond_->Broadcast(self);
 }
 
-static void RootMatchesObjectVisitor(mirror::Object** root, void* arg,
-                                     const RootInfo& /*root_info*/) {
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
-  if (*root == obj) {
-    LOG(INFO) << "Object " << obj << " is a root";
+class RootMatchesObjectVisitor : public SingleRootVisitor {
+ public:
+  explicit RootMatchesObjectVisitor(const mirror::Object* obj) : obj_(obj) { }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (root == obj_) {
+      LOG(INFO) << "Object " << obj_ << " is a root " << info.ToString();
+    }
   }
-}
+
+ private:
+  const mirror::Object* const obj_;
+};
+
 
 class ScanVisitor {
  public:
@@ -2411,7 +2419,7 @@
 };
 
 // Verify a reference from an object.
-class VerifyReferenceVisitor {
+class VerifyReferenceVisitor : public SingleRootVisitor {
  public:
   explicit VerifyReferenceVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_)
@@ -2438,11 +2446,12 @@
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
-  static void VerifyRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+  void VisitRoot(mirror::Object* root, const RootInfo& root_info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    if (!visitor->VerifyReference(nullptr, *root, MemberOffset(0))) {
-      LOG(ERROR) << "Root " << *root << " is dead with type " << PrettyTypeOf(*root)
+    if (root == nullptr) {
+      LOG(ERROR) << "Root is null with info " << root_info.GetType();
+    } else if (!VerifyReference(nullptr, root, MemberOffset(0))) {
+      LOG(ERROR) << "Root " << root << " is dead with type " << PrettyTypeOf(root)
           << " thread_id= " << root_info.GetThreadId() << " root_type= " << root_info.GetType();
     }
   }
@@ -2534,12 +2543,11 @@
       }
 
       // Search to see if any of the roots reference our object.
-      void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
-      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
-
+      RootMatchesObjectVisitor visitor1(obj);
+      Runtime::Current()->VisitRoots(&visitor1);
       // Search to see if any of the roots reference our reference.
-      arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
-      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
+      RootMatchesObjectVisitor visitor2(ref);
+      Runtime::Current()->VisitRoots(&visitor2);
     }
     return false;
   }
@@ -2571,6 +2579,13 @@
     visitor->operator()(obj);
   }
 
+  void VerifyRoots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    VerifyReferenceVisitor visitor(heap_, fail_count_, verify_referent_);
+    Runtime::Current()->VisitRoots(&visitor);
+  }
+
   size_t GetFailureCount() const {
     return fail_count_->LoadSequentiallyConsistent();
   }
@@ -2637,7 +2652,7 @@
   // pointing to dead objects if they are not reachable.
   VisitObjectsPaused(VerifyObjectVisitor::VisitCallback, &visitor);
   // Verify the roots:
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRootCallback, &visitor);
+  visitor.VerifyRoots();
   if (visitor.GetFailureCount() > 0) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
diff --git a/runtime/gc_root-inl.h b/runtime/gc_root-inl.h
index a42ec08..57d5689 100644
--- a/runtime/gc_root-inl.h
+++ b/runtime/gc_root-inl.h
@@ -19,6 +19,8 @@
 
 #include "gc_root.h"
 
+#include <sstream>
+
 #include "read_barrier-inl.h"
 
 namespace art {
@@ -26,7 +28,17 @@
 template<class MirrorType>
 template<ReadBarrierOption kReadBarrierOption>
 inline MirrorType* GcRoot<MirrorType>::Read() const {
-  return ReadBarrier::BarrierForRoot<MirrorType, kReadBarrierOption>(&root_);
+  return down_cast<MirrorType*>(
+      ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(&root_));
+}
+template<class MirrorType>
+inline GcRoot<MirrorType>::GcRoot(MirrorType* ref)
+    : root_(mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref)) { }
+
+inline std::string RootInfo::ToString() const {
+  std::ostringstream oss;
+  Describe(oss);
+  return oss.str();
 }
 
 }  // namespace art
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index c5feda5..4164bbd 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"       // For Locks::mutator_lock_.
+#include "mirror/object_reference.h"
 
 namespace art {
 
@@ -26,6 +27,9 @@
 class Object;
 }  // namespace mirror
 
+template <size_t kBufferSize>
+class BufferedRootVisitor;
+
 enum RootType {
   kRootUnknown = 0,
   kRootJNIGlobal,
@@ -43,6 +47,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const RootType& root_type);
 
+// Only used by hprof. tid and root_type are only used by hprof.
 class RootInfo {
  public:
   // Thread id 0 is for non thread roots.
@@ -60,15 +65,64 @@
   virtual void Describe(std::ostream& os) const {
     os << "Type=" << type_ << " thread_id=" << thread_id_;
   }
+  std::string ToString() const;
 
  private:
   const RootType type_;
   const uint32_t thread_id_;
 };
 
-// Returns the new address of the object, returns root if it has not moved. tid and root_type are
-// only used by hprof.
-typedef void (RootCallback)(mirror::Object** root, void* arg, const RootInfo& root_info);
+inline std::ostream& operator<<(std::ostream& os, const RootInfo& root_info) {
+  root_info.Describe(os);
+  return os;
+}
+
+class RootVisitor {
+ public:
+  virtual ~RootVisitor() { }
+
+  // Single root versions, not overridable.
+  ALWAYS_INLINE void VisitRoot(mirror::Object** roots, const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VisitRoots(&roots, 1, info);
+  }
+
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::Object** roots, const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (*roots != nullptr) {
+      VisitRoot(roots, info);
+    }
+  }
+
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+};
+
+// Only visits roots one at a time, doesn't handle updating roots. Used when performance isn't
+// critical.
+class SingleRootVisitor : public RootVisitor {
+ private:
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      VisitRoot(*roots[i], info);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      VisitRoot(roots[i]->AsMirrorPtr(), info);
+    }
+  }
+
+  virtual void VisitRoot(mirror::Object* root, const RootInfo& info) = 0;
+};
 
 template<class MirrorType>
 class GcRoot {
@@ -76,37 +130,92 @@
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE MirrorType* Read() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoot(RootCallback* callback, void* arg, const RootInfo& info) const {
+  void VisitRoot(RootVisitor* visitor, const RootInfo& info) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!IsNull());
-    callback(reinterpret_cast<mirror::Object**>(&root_), arg, info);
+    mirror::CompressedReference<mirror::Object>* roots[1] = { &root_ };
+    visitor->VisitRoots(roots, 1u, info);
     DCHECK(!IsNull());
   }
 
-  void VisitRootIfNonNull(RootCallback* callback, void* arg, const RootInfo& info) const {
+  void VisitRootIfNonNull(RootVisitor* visitor, const RootInfo& info) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (!IsNull()) {
-      VisitRoot(callback, arg, info);
+      VisitRoot(visitor, info);
     }
   }
 
-  // This is only used by IrtIterator.
-  ALWAYS_INLINE MirrorType** AddressWithoutBarrier() {
+  ALWAYS_INLINE mirror::CompressedReference<mirror::Object>* AddressWithoutBarrier() {
     return &root_;
   }
 
-  bool IsNull() const {
+  ALWAYS_INLINE bool IsNull() const {
     // It's safe to null-check it without a read barrier.
-    return root_ == nullptr;
+    return root_.IsNull();
   }
 
-  ALWAYS_INLINE explicit GcRoot<MirrorType>() : root_(nullptr) {
+  ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  mutable mirror::CompressedReference<mirror::Object> root_;
+
+  template <size_t kBufferSize> friend class BufferedRootVisitor;
+};
+
+// Simple data structure for buffered root visiting to avoid virtual dispatch overhead. Currently
+// only for CompressedReferences since these are more common than the Object** roots which are only
+// for thread local roots.
+template <size_t kBufferSize>
+class BufferedRootVisitor {
+ public:
+  BufferedRootVisitor(RootVisitor* visitor, const RootInfo& root_info)
+      : visitor_(visitor), root_info_(root_info), buffer_pos_(0) {
   }
 
-  ALWAYS_INLINE explicit GcRoot<MirrorType>(MirrorType* ref)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : root_(ref) {
+  ~BufferedRootVisitor() {
+    Flush();
+  }
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(GcRoot<MirrorType>& root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!root.IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<MirrorType>* root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  void VisitRoot(GcRoot<MirrorType>& root) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VisitRoot(root.AddressWithoutBarrier());
+  }
+
+  template <class MirrorType>
+  void VisitRoot(mirror::CompressedReference<MirrorType>* root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(buffer_pos_ >= kBufferSize)) {
+      Flush();
+    }
+    roots_[buffer_pos_++] = root;
+  }
+
+  void Flush() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    visitor_->VisitRoots(roots_, buffer_pos_, root_info_);
+    buffer_pos_ = 0;
   }
 
  private:
-  mutable MirrorType* root_;
+  RootVisitor* const visitor_;
+  RootInfo root_info_;
+  mirror::CompressedReference<mirror::Object>* roots_[kBufferSize];
+  size_t buffer_pos_;
 };
 
 }  // namespace art
diff --git a/runtime/handle.h b/runtime/handle.h
index 6af3220..3ebb2d5 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -70,6 +70,16 @@
     return reinterpret_cast<jobject>(reference_);
   }
 
+  StackReference<mirror::Object>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    return reference_;
+  }
+
+  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reference_;
+  }
+
  protected:
   template<typename S>
   explicit Handle(StackReference<S>* reference)
@@ -80,14 +90,6 @@
       : reference_(handle.reference_) {
   }
 
-  StackReference<mirror::Object>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
-    return reference_;
-  }
-  ALWAYS_INLINE const StackReference<mirror::Object>* GetReference() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return reference_;
-  }
-
   StackReference<mirror::Object>* reference_;
 
  private:
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 656569c..df1e0d2 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -403,9 +403,9 @@
   JDWP::JdwpNetStateBase* net_state_;
 };
 
-#define __ output->
+#define __ output_->
 
-class Hprof {
+class Hprof : public SingleRootVisitor {
  public:
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
@@ -426,9 +426,11 @@
     size_t max_length;
     {
       EndianOutput count_output;
-      ProcessHeap(&count_output, false);
+      output_ = &count_output;
+      ProcessHeap(false);
       overall_size = count_output.SumLength();
       max_length = count_output.MaxLength();
+      output_ = nullptr;
     }
 
     bool okay;
@@ -451,86 +453,70 @@
   }
 
  private:
-  struct Env {
-    Hprof* hprof;
-    EndianOutput* output;
-  };
-
-  static void RootVisitor(mirror::Object** obj, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(arg != nullptr);
-    DCHECK(obj != nullptr);
-    DCHECK(*obj != nullptr);
-    Env* env = reinterpret_cast<Env*>(arg);
-    env->hprof->VisitRoot(*obj, root_info, env->output);
-  }
-
   static void VisitObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(obj != nullptr);
     DCHECK(arg != nullptr);
-    Env* env = reinterpret_cast<Env*>(arg);
-    env->hprof->DumpHeapObject(obj, env->output);
+    reinterpret_cast<Hprof*>(arg)->DumpHeapObject(obj);
   }
 
-  void DumpHeapObject(mirror::Object* obj, EndianOutput* output)
+  void DumpHeapObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DumpHeapClass(mirror::Class* klass, EndianOutput* output)
+  void DumpHeapClass(mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DumpHeapArray(mirror::Array* obj, mirror::Class* klass, EndianOutput* output)
+  void DumpHeapArray(mirror::Array* obj, mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass, EndianOutput* output)
+  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void ProcessHeap(EndianOutput* output, bool header_first)
+  void ProcessHeap(bool header_first)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Reset current heap and object count.
     current_heap_ = HPROF_HEAP_DEFAULT;
     objects_in_segment_ = 0;
 
     if (header_first) {
-      ProcessHeader(output);
-      ProcessBody(output);
+      ProcessHeader();
+      ProcessBody();
     } else {
-      ProcessBody(output);
-      ProcessHeader(output);
+      ProcessBody();
+      ProcessHeader();
     }
   }
 
-  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Runtime* runtime = Runtime::Current();
+  void ProcessBody() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* const runtime = Runtime::Current();
     // Walk the roots and the heap.
-    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
+    output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
 
-    Env env = { this, output };
-    runtime->VisitRoots(RootVisitor, &env);
-    runtime->VisitImageRoots(RootVisitor, &env);
-    runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, &env);
+    runtime->VisitRoots(this);
+    runtime->VisitImageRoots(this);
+    runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, this);
 
-    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_END, kHprofTime);
-    output->EndRecord();
+    output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_END, kHprofTime);
+    output_->EndRecord();
   }
 
-  void ProcessHeader(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Write the header.
-    WriteFixedHeader(output);
+    WriteFixedHeader();
     // Write the string and class tables, and any stack traces, to the header.
     // (jhat requires that these appear before any of the data in the body that refers to them.)
-    WriteStringTable(output);
-    WriteClassTable(output);
-    WriteStackTraces(output);
-    output->EndRecord();
+    WriteStringTable();
+    WriteClassTable();
+    WriteStackTraces();
+    output_->EndRecord();
   }
 
-  void WriteClassTable(EndianOutput* output) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t nextSerialNumber = 1;
 
     for (mirror::Class* c : classes_) {
       CHECK(c != nullptr);
-      output->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime);
+      output_->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime);
       // LOAD CLASS format:
       // U4: class serial number (always > 0)
       // ID: class object ID. We use the address of the class object structure as its ID.
@@ -543,12 +529,12 @@
     }
   }
 
-  void WriteStringTable(EndianOutput* output) {
+  void WriteStringTable() {
     for (const std::pair<std::string, HprofStringId>& p : strings_) {
       const std::string& string = p.first;
       const size_t id = p.second;
 
-      output->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
+      output_->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
 
       // STRING format:
       // ID:  ID for this string
@@ -559,24 +545,24 @@
     }
   }
 
-  void StartNewHeapDumpSegment(EndianOutput* output) {
+  void StartNewHeapDumpSegment() {
     // This flushes the old segment and starts a new one.
-    output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
+    output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
     objects_in_segment_ = 0;
     // Starting a new HEAP_DUMP resets the heap to default.
     current_heap_ = HPROF_HEAP_DEFAULT;
   }
 
-  void CheckHeapSegmentConstraints(EndianOutput* output) {
-    if (objects_in_segment_ >= kMaxObjectsPerSegment || output->Length() >= kMaxBytesPerSegment) {
-      StartNewHeapDumpSegment(output);
+  void CheckHeapSegmentConstraints() {
+    if (objects_in_segment_ >= kMaxObjectsPerSegment || output_->Length() >= kMaxBytesPerSegment) {
+      StartNewHeapDumpSegment();
     }
   }
 
-  void VisitRoot(const mirror::Object* obj, const RootInfo& root_info, EndianOutput* output)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoot(mirror::Object* obj, const RootInfo& root_info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeapTag heap_tag,
-                      uint32_t thread_serial, EndianOutput* output);
+                      uint32_t thread_serial);
 
   HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (c != nullptr) {
@@ -611,7 +597,7 @@
     return LookupStringId(PrettyDescriptor(c));
   }
 
-  void WriteFixedHeader(EndianOutput* output) {
+  void WriteFixedHeader() {
     // Write the file header.
     // U1: NUL-terminated magic string.
     const char magic[] = "JAVA PROFILE 1.0.3";
@@ -635,9 +621,9 @@
     __ AddU4(static_cast<uint32_t>(nowMs & 0xFFFFFFFF));
   }
 
-  void WriteStackTraces(EndianOutput* output) {
+  void WriteStackTraces() {
     // Write a dummy stack trace record so the analysis tools don't freak out.
-    output->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime);
+    output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime);
     __ AddU4(kHprofNullStackTrace);
     __ AddU4(kHprofNullThread);
     __ AddU4(0);    // no frames
@@ -679,13 +665,15 @@
     bool okay;
     {
       FileEndianOutput file_output(file.get(), max_length);
-      ProcessHeap(&file_output, true);
+      output_ = &file_output;
+      ProcessHeap(true);
       okay = !file_output.Errors();
 
       if (okay) {
         // Check for expected size.
         CHECK_EQ(file_output.SumLength(), overall_size);
       }
+      output_ = nullptr;
     }
 
     if (okay) {
@@ -721,13 +709,15 @@
 
     // Prepare the output and send the chunk header.
     NetStateEndianOutput net_output(net_state, max_length);
+    output_ = &net_output;
     net_output.AddU1List(chunk_header, kChunkHeaderSize);
 
     // Write the dump.
-    ProcessHeap(&net_output, true);
+    ProcessHeap(true);
 
     // Check for expected size.
     CHECK_EQ(net_output.SumLength(), overall_size + kChunkHeaderSize);
+    output_ = nullptr;
 
     return true;
   }
@@ -741,6 +731,8 @@
 
   uint64_t start_ns_;
 
+  EndianOutput* output_;
+
   HprofHeapId current_heap_;  // Which heap we're currently dumping.
   size_t objects_in_segment_;
 
@@ -811,12 +803,12 @@
 // only true when marking the root set or unreachable
 // objects.  Used to add rootset references to obj.
 void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeapTag heap_tag,
-                           uint32_t thread_serial, EndianOutput* output) {
+                           uint32_t thread_serial) {
   if (heap_tag == 0) {
     return;
   }
 
-  CheckHeapSegmentConstraints(output);
+  CheckHeapSegmentConstraints();
 
   switch (heap_tag) {
     // ID: object ID
@@ -892,7 +884,7 @@
   return kHprofNullStackTrace;
 }
 
-void Hprof::DumpHeapObject(mirror::Object* obj, EndianOutput* output) {
+void Hprof::DumpHeapObject(mirror::Object* obj) {
   // Ignore classes that are retired.
   if (obj->IsClass() && obj->AsClass()->IsRetired()) {
     return;
@@ -908,7 +900,7 @@
       heap_type = HPROF_HEAP_IMAGE;
     }
   }
-  CheckHeapSegmentConstraints(output);
+  CheckHeapSegmentConstraints();
 
   if (heap_type != current_heap_) {
     HprofStringId nameId;
@@ -945,18 +937,18 @@
     // allocated which hasn't been initialized yet.
   } else {
     if (obj->IsClass()) {
-      DumpHeapClass(obj->AsClass(), output);
+      DumpHeapClass(obj->AsClass());
     } else if (c->IsArrayClass()) {
-      DumpHeapArray(obj->AsArray(), c, output);
+      DumpHeapArray(obj->AsArray(), c);
     } else {
-      DumpHeapInstanceObject(obj, c, output);
+      DumpHeapInstanceObject(obj, c);
     }
   }
 
   ++objects_in_segment_;
 }
 
-void Hprof::DumpHeapClass(mirror::Class* klass, EndianOutput* output) {
+void Hprof::DumpHeapClass(mirror::Class* klass) {
   size_t sFieldCount = klass->NumStaticFields();
   if (sFieldCount != 0) {
     int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
@@ -1049,7 +1041,7 @@
   }
 }
 
-void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass, EndianOutput* output) {
+void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) {
   uint32_t length = obj->GetLength();
 
   if (obj->IsObjectArray()) {
@@ -1089,8 +1081,7 @@
   }
 }
 
-void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass,
-                                   EndianOutput* output) {
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) {
   // obj is an instance object.
   __ AddU1(HPROF_INSTANCE_DUMP);
   __ AddObjectId(obj);
@@ -1099,7 +1090,7 @@
 
   // Reserve some space for the length of the instance data, which we won't
   // know until we're done writing it.
-  size_t size_patch_offset = output->Length();
+  size_t size_patch_offset = output_->Length();
   __ AddU4(0x77777777);
 
   // Write the instance data;  fields for this class, followed by super class fields,
@@ -1139,10 +1130,10 @@
   }
 
   // Patch the instance field length.
-  __ UpdateU4(size_patch_offset, output->Length() - (size_patch_offset + 4));
+  __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
 }
 
-void Hprof::VisitRoot(const mirror::Object* obj, const RootInfo& info, EndianOutput* output) {
+void Hprof::VisitRoot(mirror::Object* obj, const RootInfo& info) {
   static const HprofHeapTag xlate[] = {
     HPROF_ROOT_UNKNOWN,
     HPROF_ROOT_JNI_GLOBAL,
@@ -1164,7 +1155,7 @@
   if (obj == nullptr) {
     return;
   }
-  MarkRootObject(obj, 0, xlate[info.GetType()], info.GetThreadId(), output);
+  MarkRootObject(obj, 0, xlate[info.GetType()], info.GetThreadId());
 }
 
 // If "direct_to_ddms" is true, the other arguments are ignored, and data is
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 1a3f107..a3aa1de 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -242,16 +242,10 @@
   madvise(release_start, release_end - release_start, MADV_DONTNEED);
 }
 
-void IndirectReferenceTable::VisitRoots(RootCallback* callback, void* arg,
-                                        const RootInfo& root_info) {
+void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  BufferedRootVisitor<128> root_visitor(visitor, root_info);
   for (auto ref : *this) {
-    if (*ref == nullptr) {
-      // Need to skip null entries to make it possible to do the
-      // non-null check after the call back.
-      continue;
-    }
-    callback(ref, arg, root_info);
-    DCHECK(*ref != nullptr);
+    root_visitor.VisitRootIfNonNull(*ref);
   }
 }
 
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 576a604..25b0281 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -218,7 +218,7 @@
   uint32_t serial_;
   GcRoot<mirror::Object> references_[kIRTPrevCount];
 };
-static_assert(sizeof(IrtEntry) == (1 + kIRTPrevCount) * sizeof(uintptr_t),
+static_assert(sizeof(IrtEntry) == (1 + kIRTPrevCount) * sizeof(uint32_t),
               "Unexpected sizeof(IrtEntry)");
 
 class IrtIterator {
@@ -233,9 +233,9 @@
     return *this;
   }
 
-  mirror::Object** operator*() {
+  GcRoot<mirror::Object>* operator*() {
     // This does not have a read barrier as this is used to visit roots.
-    return table_[i_].GetReference()->AddressWithoutBarrier();
+    return table_[i_].GetReference();
   }
 
   bool equals(const IrtIterator& rhs) const {
@@ -320,7 +320,7 @@
     return IrtIterator(table_, Capacity(), Capacity());
   }
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uint32_t GetSegmentState() const {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9adb4ac..dea157a 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1077,13 +1077,14 @@
   }
 }
 
-void Instrumentation::VisitRoots(RootCallback* callback, void* arg) {
+void Instrumentation::VisitRoots(RootVisitor* visitor) {
   WriterMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
   if (IsDeoptimizedMethodsEmpty()) {
     return;
   }
+  BufferedRootVisitor<128> roots(visitor, RootInfo(kRootVMInternal));
   for (auto pair : deoptimized_methods_) {
-    pair.second.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
+    roots.VisitRoot(pair.second);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8972f3a..77314c60 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -345,7 +345,7 @@
   void InstallStubsForMethod(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
 
  private:
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 19bfc4e..8e85435 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -53,14 +53,14 @@
   os << "Intern table: " << StrongSize() << " strong; " << WeakSize() << " weak\n";
 }
 
-void InternTable::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+void InternTable::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   if ((flags & kVisitRootFlagAllRoots) != 0) {
-    strong_interns_.VisitRoots(callback, arg);
+    strong_interns_.VisitRoots(visitor);
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_strong_intern_roots_) {
       mirror::String* old_ref = root.Read<kWithoutReadBarrier>();
-      root.VisitRoot(callback, arg, RootInfo(kRootInternedString));
+      root.VisitRoot(visitor, RootInfo(kRootInternedString));
       mirror::String* new_ref = root.Read<kWithoutReadBarrier>();
       if (new_ref != old_ref) {
         // The GC moved a root in the log. Need to search the strong interns and update the
@@ -335,12 +335,13 @@
   post_zygote_table_.Insert(GcRoot<mirror::String>(s));
 }
 
-void InternTable::Table::VisitRoots(RootCallback* callback, void* arg) {
+void InternTable::Table::VisitRoots(RootVisitor* visitor) {
+  BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootInternedString));
   for (auto& intern : pre_zygote_table_) {
-    intern.VisitRoot(callback, arg, RootInfo(kRootInternedString));
+    buffered_visitor.VisitRoot(intern);
   }
   for (auto& intern : post_zygote_table_) {
-    intern.VisitRoot(callback, arg, RootInfo(kRootInternedString));
+    buffered_visitor.VisitRoot(intern);
   }
 }
 
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 2e31b7e..200a764 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -80,7 +80,7 @@
   // Total number of strongly live interned strings.
   size_t WeakSize() const LOCKS_EXCLUDED(Locks::intern_table_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os) const;
@@ -125,7 +125,7 @@
     void Remove(mirror::String* s)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-    void VisitRoots(RootCallback* callback, void* arg)
+    void VisitRoots(RootVisitor* visitor)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
     void SweepWeaks(IsMarkedCallback* callback, void* arg)
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 09bfbf3..b795d72 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -748,19 +748,18 @@
 
 void JavaVMExt::SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), weak_globals_lock_);
-  for (mirror::Object** entry : weak_globals_) {
-    // Since this is called by the GC, we don't need a read barrier.
-    mirror::Object* obj = *entry;
-    if (obj == nullptr) {
-      // Need to skip null here to distinguish between null entries
-      // and cleared weak ref entries.
-      continue;
+  Runtime* const runtime = Runtime::Current();
+  for (auto* entry : weak_globals_) {
+    // Need to skip null here to distinguish between null entries and cleared weak ref entries.
+    if (!entry->IsNull()) {
+      // Since this is called by the GC, we don't need a read barrier.
+      mirror::Object* obj = entry->Read<kWithoutReadBarrier>();
+      mirror::Object* new_obj = callback(obj, arg);
+      if (new_obj == nullptr) {
+        new_obj = runtime->GetClearedJniWeakGlobal();
+      }
+      *entry = GcRoot<mirror::Object>(new_obj);
     }
-    mirror::Object* new_obj = callback(obj, arg);
-    if (new_obj == nullptr) {
-      new_obj = Runtime::Current()->GetClearedJniWeakGlobal();
-    }
-    *entry = new_obj;
   }
 }
 
@@ -769,10 +768,10 @@
   globals_.Trim();
 }
 
-void JavaVMExt::VisitRoots(RootCallback* callback, void* arg) {
+void JavaVMExt::VisitRoots(RootVisitor* visitor) {
   Thread* self = Thread::Current();
   ReaderMutexLock mu(self, globals_lock_);
-  globals_.VisitRoots(callback, arg, RootInfo(kRootJNIGlobal));
+  globals_.VisitRoots(visitor, RootInfo(kRootJNIGlobal));
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
 
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 037fbe5..deec6a9 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -103,7 +103,7 @@
 
   bool SetCheckJniEnabled(bool enabled);
 
-  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DisallowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 7f04992..6452f31 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -196,8 +196,8 @@
 }
 
 template<class T>
-inline void PrimitiveArray<T>::VisitRoots(RootCallback* callback, void* arg) {
-  array_class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+inline void PrimitiveArray<T>::VisitRoots(RootVisitor* visitor) {
+  array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 template<typename T>
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 83e3688..115fcf2 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -166,8 +166,7 @@
     array_class_ = GcRoot<Class>(nullptr);
   }
 
-  static void VisitRoots(RootCallback* callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   static GcRoot<Class> array_class_;
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 4c36753..83602d4 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -55,8 +55,8 @@
   SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), num_bytes.Uint32Value());
 }
 
-void ArtField::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_reflect_ArtField_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void ArtField::VisitRoots(RootVisitor* visitor) {
+  java_lang_reflect_ArtField_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 // TODO: we could speed up the search if fields are ordered by offsets.
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index d640165..9d95cb9 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -138,7 +138,7 @@
 
   static void SetClass(Class* java_lang_reflect_ArtField);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index c1f7594..edbbb4a 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -61,8 +61,8 @@
 }
 
 
-void ArtMethod::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_reflect_ArtMethod_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void ArtMethod::VisitRoots(RootVisitor* visitor) {
+  java_lang_reflect_ArtMethod_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 82e5d00..22481ce 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -488,7 +488,7 @@
 
   static void ResetClass();
 
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 29851a9..8fb8147 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -51,8 +51,8 @@
   java_lang_Class_ = GcRoot<Class>(nullptr);
 }
 
-void Class::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_Class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Class::VisitRoots(RootVisitor* visitor) {
+  java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 2dff383..b82a58f 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -971,7 +971,7 @@
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
   static void SetClassClass(Class* java_lang_Class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // When class is verified, set the kAccPreverified flag on each method.
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index 1724682..82cc26e 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -48,9 +48,9 @@
   array_class_ = GcRoot<Class>(nullptr);
 }
 
-void Field::VisitRoots(RootCallback* callback, void* arg) {
-  static_class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
-  array_class_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Field::VisitRoots(RootVisitor* visitor) {
+  static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
+  array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 ArtField* Field::GetArtField() {
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index f54340a..cea06f5 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -89,7 +89,7 @@
 
   static void ResetArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Slow, try to use only for PrettyField and such.
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index b63d13d..5edda8b 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -43,6 +43,11 @@
 
   void Clear() {
     reference_ = 0;
+    DCHECK(IsNull());
+  }
+
+  bool IsNull() const {
+    return reference_ == 0;
   }
 
   uint32_t AsVRegValue() const {
@@ -86,6 +91,23 @@
       : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
 };
 
+// Standard compressed reference used in the runtime. Used for StackRefernce and GC roots.
+template<class MirrorType>
+class MANAGED CompressedReference : public mirror::ObjectReference<false, MirrorType> {
+ public:
+  CompressedReference<MirrorType>() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(nullptr) {}
+
+  static CompressedReference<MirrorType> FromMirrorPtr(MirrorType* p)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return CompressedReference<MirrorType>(p);
+  }
+
+ private:
+  CompressedReference<MirrorType>(MirrorType* p) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(p) {}
+};
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.cc b/runtime/mirror/reference.cc
index 35130e8..70bcf92 100644
--- a/runtime/mirror/reference.cc
+++ b/runtime/mirror/reference.cc
@@ -16,6 +16,9 @@
 
 #include "reference.h"
 
+#include "mirror/art_method.h"
+#include "gc_root-inl.h"
+
 namespace art {
 namespace mirror {
 
@@ -32,8 +35,8 @@
   java_lang_ref_Reference_ = GcRoot<Class>(nullptr);
 }
 
-void Reference::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_ref_Reference_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Reference::VisitRoots(RootVisitor* visitor) {
+  java_lang_ref_Reference_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 69ef69c..c11d79d 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -100,7 +100,7 @@
   }
   static void SetClass(Class* klass);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg);
+  static void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   // Note: This avoids a read barrier, it should only be used by the GC.
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index c2a67e8..ec2b495 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -67,8 +67,8 @@
                                  line_number);
 }
 
-void StackTraceElement::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_StackTraceElement_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void StackTraceElement::VisitRoots(RootVisitor* visitor) {
+  java_lang_StackTraceElement_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 70acd1c..dc7131e 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -54,7 +54,7 @@
 
   static void SetClass(Class* java_lang_StackTraceElement);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static Class* GetStackTraceElement() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_StackTraceElement_.IsNull());
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index e7c88c5..bd6a63c 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -253,8 +253,8 @@
   return countDiff;
 }
 
-void String::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_String_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void String::VisitRoots(RootVisitor* visitor) {
+  java_lang_String_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 6c22b9b..0670d0b 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -127,7 +127,7 @@
 
   static void SetClass(Class* java_lang_String);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // TODO: Make this private. It's only used on ObjectTest at the moment.
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index fdfeb47..b564649 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -144,8 +144,8 @@
   java_lang_Throwable_ = GcRoot<Class>(nullptr);
 }
 
-void Throwable::VisitRoots(RootCallback* callback, void* arg) {
-  java_lang_Throwable_.VisitRootIfNonNull(callback, arg, RootInfo(kRootStickyClass));
+void Throwable::VisitRoots(RootVisitor* visitor) {
+  java_lang_Throwable_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index c22475b..9cc0b6f 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -55,7 +55,7 @@
 
   static void SetClass(Class* java_lang_Throwable);
   static void ResetClass();
-  static void VisitRoots(RootCallback* callback, void* arg)
+  static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 6e3f1bc..760038a 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -248,13 +248,20 @@
 
 typedef std::map<std::string, mirror::String*> StringTable;
 
-static void PreloadDexCachesStringsCallback(mirror::Object** root, void* arg,
-                                            const RootInfo& /*root_info*/)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  StringTable& table = *reinterpret_cast<StringTable*>(arg);
-  mirror::String* string = const_cast<mirror::Object*>(*root)->AsString();
-  table[string->ToModifiedUtf8()] = string;
-}
+class PreloadDexCachesStringsVisitor : public SingleRootVisitor {
+ public:
+  explicit PreloadDexCachesStringsVisitor(StringTable* table) : table_(table) {
+  }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::String* string = root->AsString();
+    table_->operator[](string->ToModifiedUtf8()) = string;
+  }
+
+ private:
+  StringTable* const table_;
+};
 
 // Based on ClassLinker::ResolveString.
 static void PreloadDexCachesResolveString(Handle<mirror::DexCache> dex_cache, uint32_t string_idx,
@@ -469,8 +476,8 @@
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
   if (kPreloadDexCachesStrings) {
-    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsCallback, &strings,
-                                          kVisitRootFlagAllRoots);
+    PreloadDexCachesStringsVisitor visitor(&strings);
+    runtime->GetInternTable()->VisitRoots(&visitor, kVisitRootFlagAllRoots);
   }
 
   const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index c74fded..5631ff4 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -111,6 +111,48 @@
   }
 }
 
+// TODO: Reduce copy paste
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<MirrorType>* root) {
+  MirrorType* ref = root->AsMirrorPtr();
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
+    if (kMaybeDuringStartup && IsDuringStartup()) {
+      // During startup, the heap may not be initialized yet. Just
+      // return the given ref.
+      return ref;
+    }
+    // TODO: separate the read barrier code from the collector code more.
+    if (Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking()) {
+      ref = reinterpret_cast<MirrorType*>(Mark(ref));
+    }
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    return ref;
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
+    if (kMaybeDuringStartup && IsDuringStartup()) {
+      // During startup, the heap may not be initialized yet. Just
+      // return the given ref.
+      return ref;
+    }
+    if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+      auto old_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref);
+      ref = reinterpret_cast<MirrorType*>(Mark(ref));
+      auto new_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref);
+      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+      auto* atomic_root =
+          reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root);
+      atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
+    }
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    return ref;
+  } else {
+    return ref;
+  }
+}
+
 inline bool ReadBarrier::IsDuringStartup() {
   gc::Heap* heap = Runtime::Current()->GetHeap();
   if (heap == nullptr) {
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 474b46f..471b37c 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -20,6 +20,7 @@
 #include "base/mutex.h"
 #include "base/macros.h"
 #include "jni.h"
+#include "mirror/object_reference.h"
 #include "offsets.h"
 #include "read_barrier_c.h"
 
@@ -58,6 +59,13 @@
   ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // It's up to the implementation whether the given root gets updated
+  // whereas the return value must be an updated reference.
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            bool kMaybeDuringStartup = false>
+  ALWAYS_INLINE static MirrorType* BarrierForRoot(mirror::CompressedReference<MirrorType>* root)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static bool IsDuringStartup();
 
   // Without the holder object.
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 357d454..ac36447 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -237,9 +237,10 @@
   DumpSummaryLine(os, prev, GetElementCount(prev), identical, equiv);
 }
 
-void ReferenceTable::VisitRoots(RootCallback* visitor, void* arg, const RootInfo& root_info) {
+void ReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  BufferedRootVisitor<128> buffered_visitor(visitor, root_info);
   for (GcRoot<mirror::Object>& root : entries_) {
-    root.VisitRoot(visitor, arg, root_info);
+    buffered_visitor.VisitRoot(root);
   }
 }
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 22cf1cd..94f16b6 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -49,7 +49,8 @@
 
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* visitor, void* arg, const RootInfo& root_info);
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   typedef std::vector<GcRoot<mirror::Object>,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 497123b..1cd0a96 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1291,67 +1291,67 @@
   return ncdfe;
 }
 
-void Runtime::VisitConstantRoots(RootCallback* callback, void* arg) {
+void Runtime::VisitConstantRoots(RootVisitor* visitor) {
   // Visit the classes held as static in mirror classes, these can be visited concurrently and only
   // need to be visited once per GC since they never change.
-  mirror::ArtField::VisitRoots(callback, arg);
-  mirror::ArtMethod::VisitRoots(callback, arg);
-  mirror::Class::VisitRoots(callback, arg);
-  mirror::Reference::VisitRoots(callback, arg);
-  mirror::StackTraceElement::VisitRoots(callback, arg);
-  mirror::String::VisitRoots(callback, arg);
-  mirror::Throwable::VisitRoots(callback, arg);
-  mirror::Field::VisitRoots(callback, arg);
+  mirror::ArtField::VisitRoots(visitor);
+  mirror::ArtMethod::VisitRoots(visitor);
+  mirror::Class::VisitRoots(visitor);
+  mirror::Reference::VisitRoots(visitor);
+  mirror::StackTraceElement::VisitRoots(visitor);
+  mirror::String::VisitRoots(visitor);
+  mirror::Throwable::VisitRoots(visitor);
+  mirror::Field::VisitRoots(visitor);
   // Visit all the primitive array types classes.
-  mirror::PrimitiveArray<uint8_t>::VisitRoots(callback, arg);   // BooleanArray
-  mirror::PrimitiveArray<int8_t>::VisitRoots(callback, arg);    // ByteArray
-  mirror::PrimitiveArray<uint16_t>::VisitRoots(callback, arg);  // CharArray
-  mirror::PrimitiveArray<double>::VisitRoots(callback, arg);    // DoubleArray
-  mirror::PrimitiveArray<float>::VisitRoots(callback, arg);     // FloatArray
-  mirror::PrimitiveArray<int32_t>::VisitRoots(callback, arg);   // IntArray
-  mirror::PrimitiveArray<int64_t>::VisitRoots(callback, arg);   // LongArray
-  mirror::PrimitiveArray<int16_t>::VisitRoots(callback, arg);   // ShortArray
+  mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor);   // BooleanArray
+  mirror::PrimitiveArray<int8_t>::VisitRoots(visitor);    // ByteArray
+  mirror::PrimitiveArray<uint16_t>::VisitRoots(visitor);  // CharArray
+  mirror::PrimitiveArray<double>::VisitRoots(visitor);    // DoubleArray
+  mirror::PrimitiveArray<float>::VisitRoots(visitor);     // FloatArray
+  mirror::PrimitiveArray<int32_t>::VisitRoots(visitor);   // IntArray
+  mirror::PrimitiveArray<int64_t>::VisitRoots(visitor);   // LongArray
+  mirror::PrimitiveArray<int16_t>::VisitRoots(visitor);   // ShortArray
 }
 
-void Runtime::VisitConcurrentRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  intern_table_->VisitRoots(callback, arg, flags);
-  class_linker_->VisitRoots(callback, arg, flags);
+void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  intern_table_->VisitRoots(visitor, flags);
+  class_linker_->VisitRoots(visitor, flags);
   if ((flags & kVisitRootFlagNewRoots) == 0) {
     // Guaranteed to have no new roots in the constant roots.
-    VisitConstantRoots(callback, arg);
+    VisitConstantRoots(visitor);
   }
 }
 
-void Runtime::VisitTransactionRoots(RootCallback* callback, void* arg) {
+void Runtime::VisitTransactionRoots(RootVisitor* visitor) {
   if (preinitialization_transaction_ != nullptr) {
-    preinitialization_transaction_->VisitRoots(callback, arg);
+    preinitialization_transaction_->VisitRoots(visitor);
   }
 }
 
-void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
-  java_vm_->VisitRoots(callback, arg);
-  sentinel_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  pre_allocated_OutOfMemoryError_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  resolution_method_.VisitRoot(callback, arg, RootInfo(kRootVMInternal));
-  pre_allocated_NoClassDefFoundError_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  imt_conflict_method_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  imt_unimplemented_method_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
-  default_imt_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
+void Runtime::VisitNonThreadRoots(RootVisitor* visitor) {
+  java_vm_->VisitRoots(visitor);
+  sentinel_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  pre_allocated_OutOfMemoryError_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  resolution_method_.VisitRoot(visitor, RootInfo(kRootVMInternal));
+  pre_allocated_NoClassDefFoundError_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  imt_conflict_method_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  imt_unimplemented_method_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  default_imt_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_methods_[i].VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
+    callee_save_methods_[i].VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   }
-  verifier::MethodVerifier::VisitStaticRoots(callback, arg);
-  VisitTransactionRoots(callback, arg);
-  instrumentation_.VisitRoots(callback, arg);
+  verifier::MethodVerifier::VisitStaticRoots(visitor);
+  VisitTransactionRoots(visitor);
+  instrumentation_.VisitRoots(visitor);
 }
 
-void Runtime::VisitNonConcurrentRoots(RootCallback* callback, void* arg) {
-  thread_list_->VisitRoots(callback, arg);
-  VisitNonThreadRoots(callback, arg);
+void Runtime::VisitNonConcurrentRoots(RootVisitor* visitor) {
+  thread_list_->VisitRoots(visitor);
+  VisitNonThreadRoots(visitor);
 }
 
-void Runtime::VisitThreadRoots(RootCallback* callback, void* arg) {
-  thread_list_->VisitRoots(callback, arg);
+void Runtime::VisitThreadRoots(RootVisitor* visitor) {
+  thread_list_->VisitRoots(visitor);
 }
 
 size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
@@ -1359,12 +1359,12 @@
   return thread_list_->FlipThreadRoots(thread_flip_visitor, flip_callback, collector);
 }
 
-void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  VisitNonConcurrentRoots(callback, arg);
-  VisitConcurrentRoots(callback, arg, flags);
+void Runtime::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  VisitNonConcurrentRoots(visitor);
+  VisitConcurrentRoots(visitor, flags);
 }
 
-void Runtime::VisitImageRoots(RootCallback* callback, void* arg) {
+void Runtime::VisitImageRoots(RootVisitor* visitor) {
   for (auto* space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsImageSpace()) {
       auto* image_space = space->AsImageSpace();
@@ -1373,7 +1373,7 @@
         auto* obj = image_header.GetImageRoot(static_cast<ImageHeader::ImageRoot>(i));
         if (obj != nullptr) {
           auto* after_obj = obj;
-          callback(&after_obj, arg, RootInfo(kRootStickyClass));
+          visitor->VisitRoot(&after_obj, RootInfo(kRootStickyClass));
           CHECK_EQ(after_obj, obj);
         }
       }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index af6abbd..baa4d18 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -296,27 +296,27 @@
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
-  void VisitRoots(RootCallback* visitor, void* arg, VisitRootFlags flags = kVisitRootFlagAllRoots)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit image roots, only used for hprof since the GC uses the image space mod union table
   // instead.
-  void VisitImageRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitImageRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the roots we can do safely do concurrently.
-  void VisitConcurrentRoots(RootCallback* visitor, void* arg,
+  void VisitConcurrentRoots(RootVisitor* visitor,
                             VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
-  void VisitNonThreadRoots(RootCallback* visitor, void* arg)
+  void VisitNonThreadRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitTransactionRoots(RootCallback* visitor, void* arg)
+  void VisitTransactionRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the thread roots.
-  void VisitThreadRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitThreadRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Flip thread roots from from-space refs to to-space refs.
   size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
@@ -324,7 +324,7 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Visit all other roots which must be done with mutators suspended.
-  void VisitNonConcurrentRoots(RootCallback* visitor, void* arg)
+  void VisitNonConcurrentRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweep system weaks, the system weak is deleted if the visitor return nullptr. Otherwise, the
@@ -334,7 +334,7 @@
 
   // Constant roots are the roots which never change after the runtime is initialized, they only
   // need to be visited once per GC cycle.
-  void VisitConstantRoots(RootCallback* callback, void* arg)
+  void VisitConstantRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
diff --git a/runtime/stack.h b/runtime/stack.h
index aab54ba..fbb0aa4 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -59,19 +59,7 @@
 
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
-class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
- public:
-  StackReference<MirrorType>() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : mirror::ObjectReference<false, MirrorType>(nullptr) {}
-
-  static StackReference<MirrorType> FromMirrorPtr(MirrorType* p)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return StackReference<MirrorType>(p);
-  }
-
- private:
-  StackReference<MirrorType>(MirrorType* p) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : mirror::ObjectReference<false, MirrorType>(p) {}
+class MANAGED StackReference : public mirror::CompressedReference<MirrorType> {
 };
 
 // ShadowFrame has 2 possible layouts:
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8a6422d..79d2b13 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1196,26 +1196,37 @@
   }
 }
 
-static void MonitorExitVisitor(mirror::Object** object, void* arg, const RootInfo& /*root_info*/)
-    NO_THREAD_SAFETY_ANALYSIS {
-  Thread* self = reinterpret_cast<Thread*>(arg);
-  mirror::Object* entered_monitor = *object;
-  if (self->HoldsLock(entered_monitor)) {
-    LOG(WARNING) << "Calling MonitorExit on object "
-                 << object << " (" << PrettyTypeOf(entered_monitor) << ")"
-                 << " left locked by native thread "
-                 << *Thread::Current() << " which is detaching";
-    entered_monitor->MonitorExit(self);
+class MonitorExitVisitor : public SingleRootVisitor {
+ public:
+  explicit MonitorExitVisitor(Thread* self) : self_(self) { }
+
+  // NO_THREAD_SAFETY_ANALYSIS due to MonitorExit.
+  void VisitRoot(mirror::Object* entered_monitor, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
+    if (self_->HoldsLock(entered_monitor)) {
+      LOG(WARNING) << "Calling MonitorExit on object "
+                   << entered_monitor << " (" << PrettyTypeOf(entered_monitor) << ")"
+                   << " left locked by native thread "
+                   << *Thread::Current() << " which is detaching";
+      entered_monitor->MonitorExit(self_);
+    }
   }
-}
+
+ private:
+  Thread* const self_;
+};
 
 void Thread::Destroy() {
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
 
   if (tlsPtr_.jni_env != nullptr) {
-    // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
-    tlsPtr_.jni_env->monitors.VisitRoots(MonitorExitVisitor, self, RootInfo(kRootVMInternal));
+    {
+      ScopedObjectAccess soa(self);
+      MonitorExitVisitor visitor(self);
+      // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
+      tlsPtr_.jni_env->monitors.VisitRoots(&visitor, RootInfo(kRootVMInternal));
+    }
     // Release locally held global references which releasing may require the mutator lock.
     if (tlsPtr_.jpeer != nullptr) {
       // If pthread_create fails we don't have a jni env here.
@@ -1373,18 +1384,11 @@
   return tlsPtr_.managed_stack.ShadowFramesContain(hs_entry);
 }
 
-void Thread::HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id) {
+void Thread::HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id) {
+  BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootNativeStack, thread_id));
   for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
-    size_t num_refs = cur->NumberOfReferences();
-    for (size_t j = 0; j < num_refs; ++j) {
-      mirror::Object* object = cur->GetReference(j);
-      if (object != nullptr) {
-        mirror::Object* old_obj = object;
-        visitor(&object, arg, RootInfo(kRootNativeStack, thread_id));
-        if (old_obj != object) {
-          cur->SetReference(j, object);
-        }
-      }
+    for (size_t j = 0, count = cur->NumberOfReferences(); j < count; ++j) {
+      buffered_visitor.VisitRootIfNonNull(cur->GetHandle(j).GetReference());
     }
   }
 }
@@ -2084,7 +2088,7 @@
 template <typename RootVisitor>
 class ReferenceMapVisitor : public StackVisitor {
  public:
-  ReferenceMapVisitor(Thread* thread, Context* context, const RootVisitor& visitor)
+  ReferenceMapVisitor(Thread* thread, Context* context, RootVisitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(thread, context), visitor_(visitor) {}
 
@@ -2248,55 +2252,50 @@
   }
 
   // Visitor for when we visit a root.
-  const RootVisitor& visitor_;
+  RootVisitor& visitor_;
 };
 
 class RootCallbackVisitor {
  public:
-  RootCallbackVisitor(RootCallback* callback, void* arg, uint32_t tid)
-     : callback_(callback), arg_(arg), tid_(tid) {}
+  RootCallbackVisitor(RootVisitor* visitor, uint32_t tid) : visitor_(visitor), tid_(tid) {}
 
-  void operator()(mirror::Object** obj, size_t vreg, const StackVisitor* stack_visitor) const {
-    callback_(obj, arg_, JavaFrameRootInfo(tid_, stack_visitor, vreg));
+  void operator()(mirror::Object** obj, size_t vreg, const StackVisitor* stack_visitor) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    visitor_->VisitRoot(obj, JavaFrameRootInfo(tid_, stack_visitor, vreg));
   }
 
  private:
-  RootCallback* const callback_;
-  void* const arg_;
+  RootVisitor* const visitor_;
   const uint32_t tid_;
 };
 
-void Thread::VisitRoots(RootCallback* visitor, void* arg) {
-  uint32_t thread_id = GetThreadId();
-  if (tlsPtr_.opeer != nullptr) {
-    visitor(&tlsPtr_.opeer, arg, RootInfo(kRootThreadObject, thread_id));
-  }
+void Thread::VisitRoots(RootVisitor* visitor) {
+  const uint32_t thread_id = GetThreadId();
+  visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id));
   if (tlsPtr_.exception != nullptr && tlsPtr_.exception != GetDeoptimizationException()) {
-    visitor(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception), arg,
-            RootInfo(kRootNativeStack, thread_id));
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception),
+                   RootInfo(kRootNativeStack, thread_id));
   }
-  if (tlsPtr_.monitor_enter_object != nullptr) {
-    visitor(&tlsPtr_.monitor_enter_object, arg, RootInfo(kRootNativeStack, thread_id));
-  }
-  tlsPtr_.jni_env->locals.VisitRoots(visitor, arg, RootInfo(kRootJNILocal, thread_id));
-  tlsPtr_.jni_env->monitors.VisitRoots(visitor, arg, RootInfo(kRootJNIMonitor, thread_id));
-  HandleScopeVisitRoots(visitor, arg, thread_id);
+  visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id));
+  tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id));
+  tlsPtr_.jni_env->monitors.VisitRoots(visitor, RootInfo(kRootJNIMonitor, thread_id));
+  HandleScopeVisitRoots(visitor, thread_id);
   if (tlsPtr_.debug_invoke_req != nullptr) {
-    tlsPtr_.debug_invoke_req->VisitRoots(visitor, arg, RootInfo(kRootDebugger, thread_id));
+    tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
   }
   if (tlsPtr_.single_step_control != nullptr) {
-    tlsPtr_.single_step_control->VisitRoots(visitor, arg, RootInfo(kRootDebugger, thread_id));
+    tlsPtr_.single_step_control->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id));
   }
   if (tlsPtr_.deoptimization_shadow_frame != nullptr) {
-    RootCallbackVisitor visitorToCallback(visitor, arg, thread_id);
-    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitorToCallback);
+    RootCallbackVisitor visitor_to_callback(visitor, thread_id);
+    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
     for (ShadowFrame* shadow_frame = tlsPtr_.deoptimization_shadow_frame; shadow_frame != nullptr;
         shadow_frame = shadow_frame->GetLink()) {
       mapper.VisitShadowFrame(shadow_frame);
     }
   }
   if (tlsPtr_.shadow_frame_under_construction != nullptr) {
-    RootCallbackVisitor visitor_to_callback(visitor, arg, thread_id);
+    RootCallbackVisitor visitor_to_callback(visitor, thread_id);
     ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
     for (ShadowFrame* shadow_frame = tlsPtr_.shadow_frame_under_construction;
         shadow_frame != nullptr;
@@ -2305,33 +2304,34 @@
     }
   }
   if (tlsPtr_.method_verifier != nullptr) {
-    tlsPtr_.method_verifier->VisitRoots(visitor, arg, RootInfo(kRootNativeStack, thread_id));
+    tlsPtr_.method_verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id));
   }
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
-  RootCallbackVisitor visitor_to_callback(visitor, arg, thread_id);
+  RootCallbackVisitor visitor_to_callback(visitor, thread_id);
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context, visitor_to_callback);
   mapper.WalkStack();
   ReleaseLongJumpContext(context);
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
-    if (frame.this_object_ != nullptr) {
-      visitor(&frame.this_object_, arg, RootInfo(kRootVMInternal, thread_id));
-    }
-    DCHECK(frame.method_ != nullptr);
-    visitor(reinterpret_cast<mirror::Object**>(&frame.method_), arg,
-            RootInfo(kRootVMInternal, thread_id));
+    visitor->VisitRootIfNonNull(&frame.this_object_, RootInfo(kRootVMInternal, thread_id));
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&frame.method_),
+                       RootInfo(kRootVMInternal, thread_id));
   }
 }
 
-static void VerifyRoot(mirror::Object** root, void* /*arg*/, const RootInfo& /*root_info*/)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  VerifyObject(*root);
-}
+class VerifyRootVisitor : public SingleRootVisitor {
+ public:
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VerifyObject(root);
+  }
+};
 
 void Thread::VerifyStackImpl() {
+  VerifyRootVisitor visitor;
   std::unique_ptr<Context> context(Context::Create());
-  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap(), GetThreadId());
-  ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
+  RootCallbackVisitor visitor_to_callback(&visitor, GetThreadId());
+  ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitor_to_callback);
   mapper.WalkStack();
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 9d4d89d..f89e46b 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -485,7 +485,7 @@
       jobjectArray output_array = nullptr, int* stack_depth = nullptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE void VerifyStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -686,7 +686,7 @@
   // Is the given obj in this thread's stack indirect reference table?
   bool HandleScopeContains(jobject obj) const;
 
-  void HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id)
+  void HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   HandleScope* GetTopHandleScope() {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 1ab0093..560bcc1 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1156,10 +1156,10 @@
   }
 }
 
-void ThreadList::VisitRoots(RootCallback* callback, void* arg) const {
+void ThreadList::VisitRoots(RootVisitor* visitor) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VisitRoots(callback, arg);
+    thread->VisitRoots(visitor);
   }
 }
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index c18e285..fa747b8 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -136,7 +136,7 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
   void Unregister(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg) const
+  void VisitRoots(RootVisitor* visitor) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Return a copy of the thread list.
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 9b205c3..cc0f15f 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -221,24 +221,24 @@
   intern_string_logs_.clear();
 }
 
-void Transaction::VisitRoots(RootCallback* callback, void* arg) {
+void Transaction::VisitRoots(RootVisitor* visitor) {
   MutexLock mu(Thread::Current(), log_lock_);
-  VisitObjectLogs(callback, arg);
-  VisitArrayLogs(callback, arg);
-  VisitStringLogs(callback, arg);
+  VisitObjectLogs(visitor);
+  VisitArrayLogs(visitor);
+  VisitStringLogs(visitor);
 }
 
-void Transaction::VisitObjectLogs(RootCallback* callback, void* arg) {
+void Transaction::VisitObjectLogs(RootVisitor* visitor) {
   // List of moving roots.
   typedef std::pair<mirror::Object*, mirror::Object*> ObjectPair;
   std::list<ObjectPair> moving_roots;
 
   // Visit roots.
   for (auto it : object_logs_) {
-    it.second.VisitRoots(callback, arg);
+    it.second.VisitRoots(visitor);
     mirror::Object* old_root = it.first;
     mirror::Object* new_root = old_root;
-    callback(&new_root, arg, RootInfo(kRootUnknown));
+    visitor->VisitRoot(&new_root, RootInfo(kRootUnknown));
     if (new_root != old_root) {
       moving_roots.push_back(std::make_pair(old_root, new_root));
     }
@@ -256,7 +256,7 @@
   }
 }
 
-void Transaction::VisitArrayLogs(RootCallback* callback, void* arg) {
+void Transaction::VisitArrayLogs(RootVisitor* visitor) {
   // List of moving roots.
   typedef std::pair<mirror::Array*, mirror::Array*> ArrayPair;
   std::list<ArrayPair> moving_roots;
@@ -265,7 +265,7 @@
     mirror::Array* old_root = it.first;
     CHECK(!old_root->IsObjectArray());
     mirror::Array* new_root = old_root;
-    callback(reinterpret_cast<mirror::Object**>(&new_root), arg, RootInfo(kRootUnknown));
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&new_root), RootInfo(kRootUnknown));
     if (new_root != old_root) {
       moving_roots.push_back(std::make_pair(old_root, new_root));
     }
@@ -283,9 +283,9 @@
   }
 }
 
-void Transaction::VisitStringLogs(RootCallback* callback, void* arg) {
+void Transaction::VisitStringLogs(RootVisitor* visitor) {
   for (InternStringLog& log : intern_string_logs_) {
-    log.VisitRoots(callback, arg);
+    log.VisitRoots(visitor);
   }
 }
 
@@ -421,16 +421,12 @@
   }
 }
 
-void Transaction::ObjectLog::VisitRoots(RootCallback* callback, void* arg) {
+void Transaction::ObjectLog::VisitRoots(RootVisitor* visitor) {
   for (auto it : field_values_) {
     FieldValue& field_value = it.second;
     if (field_value.kind == ObjectLog::kReference) {
-      mirror::Object* obj =
-          reinterpret_cast<mirror::Object*>(static_cast<uintptr_t>(field_value.value));
-      if (obj != nullptr) {
-        callback(&obj, arg, RootInfo(kRootUnknown));
-        field_value.value = reinterpret_cast<uintptr_t>(obj);
-      }
+      visitor->VisitRootIfNonNull(reinterpret_cast<mirror::Object**>(&field_value.value),
+                                  RootInfo(kRootUnknown));
     }
   }
 }
@@ -472,8 +468,8 @@
   }
 }
 
-void Transaction::InternStringLog::VisitRoots(RootCallback* callback, void* arg) {
-  callback(reinterpret_cast<mirror::Object**>(&str_), arg, RootInfo(kRootInternedString));
+void Transaction::InternStringLog::VisitRoots(RootVisitor* visitor) {
+  visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&str_), RootInfo(kRootInternedString));
 }
 
 void Transaction::ArrayLog::LogValue(size_t index, uint64_t value) {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 1419a38..4d85662 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -100,7 +100,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(log_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg)
+  void VisitRoots(RootVisitor* visitor)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -116,7 +116,7 @@
     void LogReferenceValue(MemberOffset offset, mirror::Object* obj, bool is_volatile);
 
     void Undo(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-    void VisitRoots(RootCallback* callback, void* arg);
+    void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
     size_t Size() const {
       return field_values_.size();
@@ -184,7 +184,7 @@
     void Undo(InternTable* intern_table)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-    void VisitRoots(RootCallback* callback, void* arg);
+    void VisitRoots(RootVisitor* visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
    private:
     mirror::String* str_;
@@ -207,13 +207,13 @@
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitObjectLogs(RootCallback* callback, void* arg)
+  void VisitObjectLogs(RootVisitor* visitor)
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitArrayLogs(RootCallback* callback, void* arg)
+  void VisitArrayLogs(RootVisitor* visitor)
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitStringLogs(RootCallback* callback, void* arg)
+  void VisitStringLogs(RootVisitor* visitor)
       EXCLUSIVE_LOCKS_REQUIRED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1d04192..c6db7e5 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -4351,12 +4351,12 @@
   verifier::RegTypeCache::ShutDown();
 }
 
-void MethodVerifier::VisitStaticRoots(RootCallback* callback, void* arg) {
-  RegTypeCache::VisitStaticRoots(callback, arg);
+void MethodVerifier::VisitStaticRoots(RootVisitor* visitor) {
+  RegTypeCache::VisitStaticRoots(visitor);
 }
 
-void MethodVerifier::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  reg_types_.VisitRoots(callback, arg, root_info);
+void MethodVerifier::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
+  reg_types_.VisitRoots(visitor, root_info);
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 6b813ef..c813634 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -225,9 +225,9 @@
   // Describe VRegs at the given dex pc.
   std::vector<int32_t> DescribeVRegs(uint32_t dex_pc);
 
-  static void VisitStaticRoots(RootCallback* callback, void* arg)
+  static void VisitStaticRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& roots)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& roots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Accessors used by the compiler via CompilerCallback
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 97d0cbe..c8aa4fd 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -778,8 +778,8 @@
   }
 }
 
-void RegType::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) const {
-  klass_.VisitRootIfNonNull(callback, arg, root_info);
+void RegType::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) const {
+  klass_.VisitRootIfNonNull(visitor, root_info);
 }
 
 void UninitializedThisReferenceType::CheckInvariants() const {
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index d260650..e4d2c3e 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -262,7 +262,7 @@
 
   virtual ~RegType() {}
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) const
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 6e57857..b371d7e 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -557,33 +557,33 @@
   }
 }
 
-void RegTypeCache::VisitStaticRoots(RootCallback* callback, void* arg) {
+void RegTypeCache::VisitStaticRoots(RootVisitor* visitor) {
   // Visit the primitive types, this is required since if there are no active verifiers they wont
   // be in the entries array, and therefore not visited as roots.
   if (primitive_initialized_) {
     RootInfo ri(kRootUnknown);
-    UndefinedType::GetInstance()->VisitRoots(callback, arg, ri);
-    ConflictType::GetInstance()->VisitRoots(callback, arg, ri);
-    BooleanType::GetInstance()->VisitRoots(callback, arg, ri);
-    ByteType::GetInstance()->VisitRoots(callback, arg, ri);
-    ShortType::GetInstance()->VisitRoots(callback, arg, ri);
-    CharType::GetInstance()->VisitRoots(callback, arg, ri);
-    IntegerType::GetInstance()->VisitRoots(callback, arg, ri);
-    LongLoType::GetInstance()->VisitRoots(callback, arg, ri);
-    LongHiType::GetInstance()->VisitRoots(callback, arg, ri);
-    FloatType::GetInstance()->VisitRoots(callback, arg, ri);
-    DoubleLoType::GetInstance()->VisitRoots(callback, arg, ri);
-    DoubleHiType::GetInstance()->VisitRoots(callback, arg, ri);
+    UndefinedType::GetInstance()->VisitRoots(visitor, ri);
+    ConflictType::GetInstance()->VisitRoots(visitor, ri);
+    BooleanType::GetInstance()->VisitRoots(visitor, ri);
+    ByteType::GetInstance()->VisitRoots(visitor, ri);
+    ShortType::GetInstance()->VisitRoots(visitor, ri);
+    CharType::GetInstance()->VisitRoots(visitor, ri);
+    IntegerType::GetInstance()->VisitRoots(visitor, ri);
+    LongLoType::GetInstance()->VisitRoots(visitor, ri);
+    LongHiType::GetInstance()->VisitRoots(visitor, ri);
+    FloatType::GetInstance()->VisitRoots(visitor, ri);
+    DoubleLoType::GetInstance()->VisitRoots(visitor, ri);
+    DoubleHiType::GetInstance()->VisitRoots(visitor, ri);
     for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
-      small_precise_constants_[value - kMinSmallConstant]->VisitRoots(callback, arg, ri);
+      small_precise_constants_[value - kMinSmallConstant]->VisitRoots(visitor, ri);
     }
   }
 }
 
-void RegTypeCache::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
+void RegTypeCache::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
   // Exclude the static roots that are visited by VisitStaticRoots().
   for (size_t i = primitive_count_; i < entries_.size(); ++i) {
-    entries_[i]->VisitRoots(callback, arg, root_info);
+    entries_[i]->VisitRoots(visitor, root_info);
   }
 }
 
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 01032a0..4b3105c 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -137,9 +137,9 @@
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& RegTypeFromPrimitiveType(Primitive::Type) const;
 
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+  void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void VisitStaticRoots(RootCallback* callback, void* arg)
+  static void VisitStaticRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private: