Refactor and improve GC root handling

Changed GcRoot to use compressed references. Changed root visiting to
use virtual functions instead of function pointers. Changed root visting
interface to be an array of roots instead of a single root at a time.
Added buffered root marking helper to avoid dispatch overhead.

Root marking seems a bit faster on EvaluateAndApplyChanges due to batch
marking. Pause times unaffected.

Mips64 is untested but might work, maybe.

Before:
MarkConcurrentRoots: Sum: 67.678ms 99% C.I. 2us-664.999us Avg: 161.138us Max: 671us

After:
MarkConcurrentRoots: Sum: 54.806ms 99% C.I. 2us-499.986us Avg: 136.333us Max: 602us

Bug: 19264997

Change-Id: I0a71ebb5928f205b9b3f7945b25db6489d5657ca
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index db7a4ef..56919bd 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -174,7 +174,7 @@
       thread->RevokeThreadLocalAllocationStack();
     }
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(ConcurrentCopying::ProcessRootCallback, concurrent_copying_);
+    thread->VisitRoots(concurrent_copying_);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
@@ -208,7 +208,7 @@
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
-      Runtime::Current()->VisitTransactionRoots(ConcurrentCopying::ProcessRootCallback, cc);
+      Runtime::Current()->VisitTransactionRoots(cc);
     }
   }
 
@@ -332,22 +332,20 @@
   }
   {
     TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
-    Runtime::Current()->VisitConstantRoots(ProcessRootCallback, this);
+    Runtime::Current()->VisitConstantRoots(this);
   }
   {
     TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
-    Runtime::Current()->GetInternTable()->VisitRoots(ProcessRootCallback,
-                                                     this, kVisitRootFlagAllRoots);
+    Runtime::Current()->GetInternTable()->VisitRoots(this, kVisitRootFlagAllRoots);
   }
   {
     TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
-    Runtime::Current()->GetClassLinker()->VisitRoots(ProcessRootCallback,
-                                                     this, kVisitRootFlagAllRoots);
+    Runtime::Current()->GetClassLinker()->VisitRoots(this, kVisitRootFlagAllRoots);
   }
   {
     // TODO: don't visit the transaction roots if it's not active.
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
-    Runtime::Current()->VisitNonThreadRoots(ProcessRootCallback, this);
+    Runtime::Current()->VisitNonThreadRoots(this);
   }
 
   // Immune spaces.
@@ -486,7 +484,7 @@
 
 // The following visitors are that used to verify that there's no
 // references to the from-space left after marking.
-class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor {
+class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
   explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
@@ -516,16 +514,14 @@
     }
   }
 
-  static void RootCallback(mirror::Object** root, void *arg, const RootInfo& /*root_info*/)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
-    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector);
+  void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != nullptr);
-    visitor(*root);
+    operator()(root);
   }
 
  private:
-  ConcurrentCopying* collector_;
+  ConcurrentCopying* const collector_;
 };
 
 class ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor {
@@ -594,8 +590,8 @@
   // Roots.
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    Runtime::Current()->VisitRoots(
-        ConcurrentCopyingVerifyNoFromSpaceRefsVisitor::RootCallback, this);
+    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
+    Runtime::Current()->VisitRoots(&ref_visitor);
   }
   // The to-space.
   region_space_->WalkToSpace(ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor::ObjectCallback,
@@ -1087,11 +1083,6 @@
   }
 }
 
-void ConcurrentCopying::ProcessRootCallback(mirror::Object** root, void* arg,
-                                            const RootInfo& /*root_info*/) {
-  reinterpret_cast<ConcurrentCopying*>(arg)->Process(root);
-}
-
 // Used to scan ref fields of an object.
 class ConcurrentCopyingRefFieldsVisitor {
  public:
@@ -1144,25 +1135,54 @@
       offset, expected_ref, new_ref));
 }
 
-// Process a root.
-void ConcurrentCopying::Process(mirror::Object** root) {
-  mirror::Object* ref = *root;
-  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-    return;
-  }
-  mirror::Object* to_ref = Mark(ref);
-  if (to_ref == ref) {
-    return;
-  }
-  Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-  mirror::Object* expected_ref = ref;
-  mirror::Object* new_ref = to_ref;
-  do {
-    if (expected_ref != addr->LoadRelaxed()) {
-      // It was updated by the mutator.
-      break;
+// Process some roots.
+void ConcurrentCopying::VisitRoots(
+    mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    mirror::Object** root = roots[i];
+    mirror::Object* ref = *root;
+    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+      return;
     }
-  } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    mirror::Object* to_ref = Mark(ref);
+    if (to_ref == ref) {
+      return;
+    }
+    Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+    mirror::Object* expected_ref = ref;
+    mirror::Object* new_ref = to_ref;
+    do {
+      if (expected_ref != addr->LoadRelaxed()) {
+        // It was updated by the mutator.
+        break;
+      }
+    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+  }
+}
+
+void ConcurrentCopying::VisitRoots(
+    mirror::CompressedReference<mirror::Object>** roots, size_t count,
+    const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    mirror::CompressedReference<mirror::Object>* root = roots[i];
+    mirror::Object* ref = root->AsMirrorPtr();
+    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+      return;
+    }
+    mirror::Object* to_ref = Mark(ref);
+    if (to_ref == ref) {
+      return;
+    }
+    auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
+    auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
+    auto new_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(to_ref);
+    do {
+      if (ref != addr->LoadRelaxed().AsMirrorPtr()) {
+        // It was updated by the mutator.
+        break;
+      }
+    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+  }
 }
 
 // Fill the given memory block with a dummy object. Used to fill in a
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index bbb551a..a87053d 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -192,9 +192,11 @@
   void Scan(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Process(mirror::Object* obj, MemberOffset offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Process(mirror::Object** root) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void ProcessRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index ed5207a..c5a8d5d 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -22,6 +22,7 @@
 #include "base/timing_logger.h"
 #include "gc/collector_type.h"
 #include "gc/gc_cause.h"
+#include "gc_root.h"
 #include "gc_type.h"
 #include <stdint.h>
 #include <vector>
@@ -112,7 +113,7 @@
   DISALLOW_COPY_AND_ASSIGN(Iteration);
 };
 
-class GarbageCollector {
+class GarbageCollector : public RootVisitor {
  public:
   class SCOPED_LOCKABLE ScopedPause {
    public:
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index d1ce0bc..8902df8 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -309,19 +309,57 @@
   reinterpret_cast<MarkCompact*>(arg)->DelayReferenceReferent(klass, ref);
 }
 
-void MarkCompact::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkCompact*>(arg)->MarkObject(*root);
-}
-
-void MarkCompact::UpdateRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  mirror::Object* obj = *root;
-  mirror::Object* new_obj = reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
-  if (obj != new_obj) {
-    *root = new_obj;
-    DCHECK(new_obj != nullptr);
+void MarkCompact::VisitRoots(
+    mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(*roots[i]);
   }
 }
 
+void MarkCompact::VisitRoots(
+    mirror::CompressedReference<mirror::Object>** roots, size_t count,
+    const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(roots[i]->AsMirrorPtr());
+  }
+}
+
+class UpdateRootVisitor : public RootVisitor {
+ public:
+  explicit UpdateRootVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object* obj = *roots[i];
+      mirror::Object* new_obj = collector_->GetMarkedForwardAddress(obj);
+      if (obj != new_obj) {
+        *roots[i] = new_obj;
+        DCHECK(new_obj != nullptr);
+      }
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object* obj = roots[i]->AsMirrorPtr();
+      mirror::Object* new_obj = collector_->GetMarkedForwardAddress(obj);
+      if (obj != new_obj) {
+        roots[i]->Assign(new_obj);
+        DCHECK(new_obj != nullptr);
+      }
+    }
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
 class UpdateObjectReferencesVisitor {
  public:
   explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {
@@ -339,7 +377,8 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime* runtime = Runtime::Current();
   // Update roots.
-  runtime->VisitRoots(UpdateRootCallback, this);
+  UpdateRootVisitor update_root_visitor(this);
+  runtime->VisitRoots(&update_root_visitor);
   // Update object references in mod union tables and spaces.
   for (const auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
@@ -396,7 +435,7 @@
 // Marks all objects in the root set.
 void MarkCompact::MarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitRoots(this);
 }
 
 mirror::Object* MarkCompact::MarkedForwardingAddressCallback(mirror::Object* obj, void* arg) {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 06304bf..4337644 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -114,8 +114,12 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -245,6 +249,8 @@
   friend class MoveObjectVisitor;
   friend class UpdateObjectReferencesVisitor;
   friend class UpdateReferenceVisitor;
+  friend class UpdateRootVisitor;
+
   DISALLOW_COPY_AND_ASSIGN(MarkCompact);
 };
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index ee4e752..79d1034 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -462,42 +462,66 @@
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(*root);
+class VerifyRootMarkedVisitor : public SingleRootVisitor {
+ public:
+  explicit VerifyRootMarkedVisitor(MarkSweep* collector) : collector_(collector) { }
+
+  void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    CHECK(collector_->IsMarked(root)) << info.ToString();
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
+
+void MarkSweep::VisitRoots(mirror::Object*** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObjectNonNull(*roots[i]);
+  }
 }
 
-void MarkSweep::VerifyRootMarked(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  CHECK(reinterpret_cast<MarkSweep*>(arg)->IsMarked(*root));
+void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObjectNonNull(roots[i]->AsMirrorPtr());
+  }
 }
 
-void MarkSweep::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(*root);
-}
+class VerifyRootVisitor : public SingleRootVisitor {
+ public:
+  explicit VerifyRootVisitor(MarkSweep* collector) : collector_(collector) { }
 
-void MarkSweep::VerifyRootCallback(Object** root, void* arg, const RootInfo& root_info) {
-  reinterpret_cast<MarkSweep*>(arg)->VerifyRoot(*root, root_info);
-}
+  void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->VerifyRoot(root, info);
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
 
 void MarkSweep::VerifyRoot(const Object* root, const RootInfo& root_info) {
   // See if the root is on any space bitmap.
   if (heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
     space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
     if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-      LOG(ERROR) << "Found invalid root: " << root << " ";
-      root_info.Describe(LOG(ERROR));
+      LOG(ERROR) << "Found invalid root: " << root << " " << root_info;
     }
   }
 }
 
 void MarkSweep::VerifyRoots() {
-  Runtime::Current()->GetThreadList()->VisitRoots(VerifyRootCallback, this);
+  VerifyRootVisitor visitor(this);
+  Runtime::Current()->GetThreadList()->VisitRoots(&visitor);
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
-    Runtime::Current()->VisitRoots(MarkRootCallback, this);
+    Runtime::Current()->VisitRoots(this);
     RevokeAllThreadLocalAllocationStacks(self);
   } else {
     MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint);
@@ -510,13 +534,13 @@
 
 void MarkSweep::MarkNonThreadRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitNonThreadRoots(this);
 }
 
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags);
+  Runtime::Current()->VisitConcurrentRoots(this, flags);
 }
 
 class ScanObjectVisitor {
@@ -932,13 +956,12 @@
 void MarkSweep::ReMarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  Runtime::Current()->VisitRoots(
-      MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots |
-                                                          kVisitRootFlagStopLoggingNewRoots |
-                                                          kVisitRootFlagClearRootLog));
+  Runtime::Current()->VisitRoots(this, static_cast<VisitRootFlags>(
+      kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots | kVisitRootFlagClearRootLog));
   if (kVerifyRootsMarked) {
     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
-    Runtime::Current()->VisitRoots(VerifyRootMarked, this);
+    VerifyRootMarkedVisitor visitor(this);
+    Runtime::Current()->VisitRoots(&visitor);
   }
 }
 
@@ -968,7 +991,7 @@
   Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
 
-class CheckpointMarkThreadRoots : public Closure {
+class CheckpointMarkThreadRoots : public Closure, public RootVisitor {
  public:
   explicit CheckpointMarkThreadRoots(MarkSweep* mark_sweep,
                                      bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
@@ -977,13 +1000,30 @@
             revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
   }
 
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mark_sweep_->MarkObjectNonNullParallel(*roots[i]);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mark_sweep_->MarkObjectNonNullParallel(roots[i]->AsMirrorPtr());
+    }
+  }
+
   virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
     ATRACE_BEGIN("Marking thread roots");
     // Note: self is not necessarily equal to thread since thread may be suspended.
-    Thread* self = Thread::Current();
+    Thread* const self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->VisitRoots(MarkSweep::MarkRootParallelCallback, mark_sweep_);
+    thread->VisitRoots(this);
     ATRACE_END();
     if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
       ATRACE_BEGIN("RevokeRosAllocThreadLocalBuffers");
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 3f99e21..31cea17 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -185,11 +185,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void VerifyRootMarked(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -197,9 +198,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void MarkRootParallelCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Marks an object.
   void MarkObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -250,9 +248,8 @@
   // whether or not we care about pauses.
   size_t GetThreadCount(bool paused) const;
 
-  static void VerifyRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info);
-
-  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Push a single reference on a mark stack.
   void PushOnMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -326,18 +323,21 @@
   friend class CardScanTask;
   friend class CheckBitmapVisitor;
   friend class CheckReferenceVisitor;
+  friend class CheckpointMarkThreadRoots;
   friend class art::gc::Heap;
+  friend class FifoMarkStackChunk;
   friend class MarkObjectVisitor;
+  template<bool kUseFinger> friend class MarkStackTask;
+  friend class MarkSweepMarkObjectSlowPath;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
   friend class ModUnionReferenceVisitor;
-  friend class ModUnionVisitor;
+  friend class ModUnionScanImageRootVisitor;
   friend class ModUnionTableBitmap;
   friend class ModUnionTableReferenceCache;
-  friend class ModUnionScanImageRootVisitor;
-  template<bool kUseFinger> friend class MarkStackTask;
-  friend class FifoMarkStackChunk;
-  friend class MarkSweepMarkObjectSlowPath;
+  friend class ModUnionVisitor;
+  friend class VerifyRootMarkedVisitor;
+  friend class VerifyRootVisitor;
 
   DISALLOW_COPY_AND_ASSIGN(MarkSweep);
 };
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index b3d59f2..dbf01d8 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -603,18 +603,29 @@
   reinterpret_cast<SemiSpace*>(arg)->DelayReferenceReferent(klass, ref);
 }
 
-void SemiSpace::MarkRootCallback(Object** root, void* arg, const RootInfo& /*root_info*/) {
-  auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
-  reinterpret_cast<SemiSpace*>(arg)->MarkObject(&ref);
-  if (*root != ref.AsMirrorPtr()) {
-    *root = ref.AsMirrorPtr();
+void SemiSpace::VisitRoots(mirror::Object*** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    auto* root = roots[i];
+    auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
+    MarkObject(&ref);
+    if (*root != ref.AsMirrorPtr()) {
+      *root = ref.AsMirrorPtr();
+    }
+  }
+}
+
+void SemiSpace::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                           const RootInfo& info ATTRIBUTE_UNUSED) {
+  for (size_t i = 0; i < count; ++i) {
+    MarkObject(roots[i]);
   }
 }
 
 // Marks all objects in the root set.
 void SemiSpace::MarkRoots() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+  Runtime::Current()->VisitRoots(this);
 }
 
 bool SemiSpace::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* object,
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 192fb14..61fbead 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -98,7 +98,7 @@
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Returns the new address of the object.
+  // Updates obj_ptr if the object has moved.
   template<bool kPoisonReferences>
   void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -133,8 +133,12 @@
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);