Check point root marking.

Added thread list checkpoint function, this goes through every thread and runs
the checkpoint on each thread. Threads that are runnable run the checkpoint
callback themselves in the next suspend check, while suspended threads are
left suspended but have the callback called on them.

Added a checkpoint visitor member to each thread, this visitor called when the
checkpoint request flag is set during transitions to suspended from runnable.

Using the checkpoint to mark the roots reduces the first pause of partial /
full gc to around 1 ms.

Change-Id: I97239cc72ee0e4a3397e9138a62ee559268dce0a
diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc
index 980eed1..8637370 100644
--- a/src/gc/mark_sweep.cc
+++ b/src/gc/mark_sweep.cc
@@ -19,6 +19,7 @@
 #include <climits>
 #include <vector>
 
+#include "barrier.h"
 #include "card_table.h"
 #include "class_loader.h"
 #include "dex_cache.h"
@@ -37,10 +38,10 @@
 #include "thread.h"
 #include "thread_list.h"
 
-static const bool kUseMarkStackPrefetch = true;
-
 namespace art {
 
+static const bool kUseMarkStackPrefetch = true;
+
 class SetFingerVisitor {
  public:
   SetFingerVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) {
@@ -68,7 +69,8 @@
       phantom_reference_list_(NULL),
       cleared_reference_list_(NULL),
       freed_bytes_(0), freed_objects_(0),
-      class_count_(0), array_count_(0), other_count_(0) {
+      class_count_(0), array_count_(0), other_count_(0),
+      gc_barrier_(new Barrier) {
   DCHECK(mark_stack_ != NULL);
 }
 
@@ -200,6 +202,10 @@
   Runtime::Current()->VisitNonConcurrentRoots(MarkObjectVisitor, this);
 }
 
+void MarkSweep::MarkNonThreadRoots() {
+  Runtime::Current()->VisitNonThreadRoots(MarkObjectVisitor, this);
+}
+
 void MarkSweep::MarkConcurrentRoots() {
   Runtime::Current()->VisitConcurrentRoots(MarkObjectVisitor, this);
 }
@@ -519,6 +525,38 @@
   Thread* self;
 };
 
+class CheckpointMarkThreadRoots : public Thread::CheckpointFunction {
+ public:
+  CheckpointMarkThreadRoots(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
+
+  }
+
+  virtual void Run(Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
+    // Note: self is not necessarily equal to thread since thread may be suspended.
+    Thread* self = Thread::Current();
+    DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc);
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    thread->VisitRoots(MarkSweep::MarkObjectVisitor, mark_sweep_);
+    mark_sweep_->GetBarrier().Pass(self);
+  }
+
+ private:
+  MarkSweep* mark_sweep_;
+};
+
+Barrier& MarkSweep::GetBarrier() {
+  return *gc_barrier_;
+}
+
+void MarkSweep::MarkRootsCheckpoint() {
+  UniquePtr<CheckpointMarkThreadRoots> check_point(new CheckpointMarkThreadRoots(this));
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  // Increment the count of the barrier. If all of the checkpoints have already been finished then
+  // will hit 0 and continue. Otherwise we are still waiting for some checkpoints, so the counter
+  // will go positive and we will unblock when it hits zero.
+  gc_barrier_->Increment(Thread::Current(), thread_list->RunCheckpoint(check_point.get()));
+}
+
 void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
   size_t freed_objects = num_ptrs;
   size_t freed_bytes = 0;
@@ -538,8 +576,7 @@
     freed_bytes += space->FreeList(self, num_ptrs, ptrs);
   } else {
     for (size_t i = 0; i < num_ptrs; ++i) {
-      Object* obj = static_cast<Object*>(ptrs[i]);
-      freed_bytes += space->Free(self, obj);
+      freed_bytes += space->Free(self, ptrs[i]);
     }
   }