Add concurrent reference processing.

Concurrent reference processing currently works by going into native
code from java.lang.ref.Reference.get(). From there, we have a fast
path if the references aren't being processed which returns the
referent without needing to access any locks. In the slow path we
block until reference processing is complete. It may be possible to
improve the slow path if the referent is blackened.

TODO: Investigate doing the fast path in java code by using racy reads
of a static volatile boolean. This will work as long as there are no
suspend points inbetween the boolean read and referent read.

Bug: 14381653

Change-Id: I1546b55be4691fe4ff4aa6d857b234cce7187d87
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index b8051c9..ff2eda0 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -31,6 +31,7 @@
 #include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -166,18 +167,9 @@
 void MarkSweep::ProcessReferences(Thread* self) {
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &IsMarkedCallback,
-                               &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
-}
-
-void MarkSweep::PreProcessReferences() {
-  if (IsConcurrent()) {
-    // No reason to do this for non-concurrent GC since pre processing soft references only helps
-    // pauses.
-    timings_.NewSplit("PreProcessReferences");
-    GetHeap()->ProcessSoftReferences(timings_, clear_soft_references_, &IsMarkedCallback,
-                                     &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
-  }
+  GetHeap()->GetReferenceProcessor()->ProcessReferences(
+      true, &timings_, clear_soft_references_, &IsMarkedCallback, &MarkObjectCallback,
+      &ProcessMarkStackCallback, this);
 }
 
 void MarkSweep::PausePhase() {
@@ -192,7 +184,6 @@
     // Scan dirty objects, this is only required if we are not doing concurrent GC.
     RecursiveMarkDirtyObjects(true, accounting::CardTable::kCardDirty);
   }
-  ProcessReferences(self);
   {
     TimingLogger::ScopedSplit split("SwapStacks", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -210,6 +201,9 @@
   // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
   // reference to a string that is about to be swept.
   Runtime::Current()->DisallowNewSystemWeaks();
+  // Enable the reference processing slow path, needs to be done with mutators paused since there
+  // is no lock in the GetReferent fast path.
+  GetHeap()->GetReferenceProcessor()->EnableSlowPath();
 }
 
 void MarkSweep::PreCleanCards() {
@@ -265,7 +259,6 @@
   MarkReachableObjects();
   // Pre-clean dirtied cards to reduce pauses.
   PreCleanCards();
-  PreProcessReferences();
 }
 
 void MarkSweep::UpdateAndMarkModUnion() {
@@ -290,6 +283,8 @@
 void MarkSweep::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
+  // Process the references concurrently.
+  ProcessReferences(self);
   SweepSystemWeaks(self);
   Runtime::Current()->AllowNewSystemWeaks();
   {
@@ -1168,7 +1163,7 @@
   if (kCountJavaLangRefs) {
     ++reference_count_;
   }
-  heap_->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
 }
 
 class MarkObjectVisitor {
@@ -1198,8 +1193,8 @@
   ScanObjectVisit(obj, mark_visitor, ref_visitor);
 }
 
-void MarkSweep::ProcessMarkStackPausedCallback(void* arg) {
-  reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(true);
+void MarkSweep::ProcessMarkStackCallback(void* arg) {
+  reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(false);
 }
 
 void MarkSweep::ProcessMarkStackParallel(size_t thread_count) {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index bfc70d1..3ebc0af 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -123,10 +123,6 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void PreProcessReferences()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Update and mark references from immune spaces.
   void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -191,8 +187,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void ProcessMarkStackPausedCallback(void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  static void ProcessMarkStackCallback(void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void MarkRootParallelCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                        RootType root_type)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index f5d6299..cfe0489 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -30,6 +30,7 @@
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/reference_processor.h"
 #include "gc/space/bump_pointer_space.h"
 #include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/image_space.h"
@@ -162,8 +163,9 @@
 void SemiSpace::ProcessReferences(Thread* self) {
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
-                               &MarkObjectCallback, &ProcessMarkStackCallback, this);
+  GetHeap()->GetReferenceProcessor()->ProcessReferences(
+      false, &timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+      &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
 void SemiSpace::MarkingPhase() {
@@ -698,7 +700,8 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
-  heap_->DelayReferenceReferent(klass, reference, MarkedForwardingAddressCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
+                                                         MarkedForwardingAddressCallback, this);
 }
 
 class SemiSpaceMarkObjectVisitor {