Add concurrent reference processing.

Concurrent reference processing currently works by going into native
code from java.lang.ref.Reference.get(). From there, we have a fast
path if the references aren't being processed which returns the
referent without needing to access any locks. In the slow path we
block until reference processing is complete. It may be possible to
improve the slow path if the referent is blackened.

TODO: Investigate doing the fast path in java code by using racy reads
of a static volatile boolean. This will work as long as there are no
suspend points inbetween the boolean read and referent read.

Bug: 14381653

Change-Id: I1546b55be4691fe4ff4aa6d857b234cce7187d87
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index bfc70d1..3ebc0af 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -123,10 +123,6 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void PreProcessReferences()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Update and mark references from immune spaces.
   void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -191,8 +187,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void ProcessMarkStackPausedCallback(void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  static void ProcessMarkStackCallback(void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void MarkRootParallelCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                        RootType root_type)