Concurrent class linker and intern table root marking

We now mark the class linker and intern table roots concurrently
(with mutators unpaused), only re-marking these roots in the second pause if
they get dirtied.

Reduces root marking time by ~1ms for each pause.

Change-Id: I833fc557bac9a2930868db715587318293fa4655
diff --git a/src/class_linker.cc b/src/class_linker.cc
index 83661cb..0903781 100644
--- a/src/class_linker.cc
+++ b/src/class_linker.cc
@@ -222,6 +222,7 @@
       class_roots_(NULL),
       array_iftable_(NULL),
       init_done_(false),
+      is_dirty_(false),
       intern_table_(intern_table) {
   CHECK_EQ(arraysize(class_roots_descriptors_), size_t(kClassRootsMax));
 }
@@ -1043,7 +1044,7 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
+void ClassLinker::VisitRoots(Heap::RootVisitor* visitor, void* arg) {
   visitor(class_roots_, arg);
   Thread* self = Thread::Current();
   {
@@ -1065,6 +1066,7 @@
   }
 
   visitor(array_iftable_, arg);
+  is_dirty_ = false;
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) const {
@@ -1746,6 +1748,7 @@
   CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()));
   dex_caches_.push_back(dex_cache.get());
   dex_cache->SetDexFile(&dex_file);
+  Dirty();
 }
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file) {
@@ -1990,6 +1993,7 @@
     return existing;
   }
   classes.insert(std::make_pair(hash, klass));
+  Dirty();
   return NULL;
 }
 
diff --git a/src/class_linker.h b/src/class_linker.h
index 096d602..460fcd2 100644
--- a/src/class_linker.h
+++ b/src/class_linker.h
@@ -262,7 +262,7 @@
   void VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg) const
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_);
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
 
   DexCache* FindDexCache(const DexFile& dex_file) const
@@ -378,6 +378,14 @@
   pid_t GetClassesLockOwner(); // For SignalCatcher.
   pid_t GetDexLockOwner(); // For SignalCatcher.
 
+  bool IsDirty() const {
+    return is_dirty_;
+  }
+
+  void Dirty() {
+    is_dirty_ = true;
+  }
+
  private:
   explicit ClassLinker(InternTable*);
 
@@ -636,6 +644,7 @@
   IfTable* array_iftable_;
 
   bool init_done_;
+  bool is_dirty_;
 
   InternTable* intern_table_;
 
diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc
index 03bbb6a..e4cb4d6 100644
--- a/src/gc/mark_sweep.cc
+++ b/src/gc/mark_sweep.cc
@@ -79,6 +79,8 @@
   FindDefaultMarkBitmap();
   // TODO: if concurrent, enable card marking in compiler
   // TODO: check that the mark bitmap is entirely clear.
+  // Mark any concurrent roots as dirty since we need to scan them at least once during this GC.
+  Runtime::Current()->DirtyRoots();
 }
 
 void MarkSweep::FindDefaultMarkBitmap() {
@@ -195,7 +197,11 @@
 
 // Marks all objects in the root set.
 void MarkSweep::MarkRoots() {
-  Runtime::Current()->VisitRoots(MarkObjectVisitor, this);
+  Runtime::Current()->VisitNonConcurrentRoots(MarkObjectVisitor, this);
+}
+
+void MarkSweep::MarkConcurrentRoots() {
+  Runtime::Current()->VisitConcurrentRoots(MarkObjectVisitor, this);
 }
 
 class CheckObjectVisitor {
diff --git a/src/gc/mark_sweep.h b/src/gc/mark_sweep.h
index 76c5428..ed74f99 100644
--- a/src/gc/mark_sweep.h
+++ b/src/gc/mark_sweep.h
@@ -52,6 +52,9 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void MarkConcurrentRoots();
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // Verify that image roots point to only marked objects within the alloc space.
   void VerifyImageRoots() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
diff --git a/src/heap.cc b/src/heap.cc
index 98845d8..4ca80a6 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -1045,6 +1045,7 @@
     mark_sweep.FindDefaultMarkBitmap();
 
     mark_sweep.MarkRoots();
+    mark_sweep.MarkConcurrentRoots();
     timings.AddSplit("MarkRoots");
 
     // Roots are marked on the bitmap and the mark_stack is empty.
@@ -1615,6 +1616,10 @@
       root_end = NanoTime();
       timings.AddSplit("RootEnd");
 
+      // Mark the roots which we can do concurrently.
+      mark_sweep.MarkConcurrentRoots();
+      timings.AddSplit("MarkConcurrentRoots");
+
       WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       UpdateAndMarkModUnion(&mark_sweep, timings, gc_type);
 
diff --git a/src/intern_table.cc b/src/intern_table.cc
index 5ad3958..817ce1e 100644
--- a/src/intern_table.cc
+++ b/src/intern_table.cc
@@ -21,7 +21,7 @@
 
 namespace art {
 
-InternTable::InternTable() : intern_table_lock_("InternTable lock") {
+InternTable::InternTable() : intern_table_lock_("InternTable lock"), is_dirty_(false) {
 }
 
 size_t InternTable::Size() const {
@@ -36,12 +36,13 @@
      << image_strong_interns_.size() << " image strong\n";
 }
 
-void InternTable::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
+void InternTable::VisitRoots(Heap::RootVisitor* visitor, void* arg) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
   typedef Table::const_iterator It; // TODO: C++0x auto
   for (It it = strong_interns_.begin(), end = strong_interns_.end(); it != end; ++it) {
     visitor(it->second, arg);
   }
+  is_dirty_ = false;
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
@@ -97,6 +98,9 @@
       return image;
     }
 
+    // Mark as dirty so that we rescan the roots.
+    Dirty();
+
     // There is no match in the strong table, check the weak table.
     String* weak = Lookup(weak_interns_, s, hash_code);
     if (weak != NULL) {
diff --git a/src/intern_table.h b/src/intern_table.h
index 6f56773..93d20b2 100644
--- a/src/intern_table.h
+++ b/src/intern_table.h
@@ -65,10 +65,15 @@
 
   size_t Size() const;
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const;
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg);
 
   void DumpForSigQuit(std::ostream& os) const;
 
+  bool IsDirty() const { return is_dirty_; }
+  void Dirty() {
+    is_dirty_ = true;
+  }
+
  private:
   typedef std::multimap<int32_t, String*> Table;
 
@@ -81,6 +86,7 @@
   void Remove(Table& table, const String* s, uint32_t hash_code);
 
   mutable Mutex intern_table_lock_;
+  bool is_dirty_;
   Table image_strong_interns_ GUARDED_BY(intern_table_lock_);
   Table strong_interns_ GUARDED_BY(intern_table_lock_);
   Table weak_interns_ GUARDED_BY(intern_table_lock_);
diff --git a/src/runtime.cc b/src/runtime.cc
index f93d687..4b7338b 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -993,10 +993,17 @@
   thread_list_->Unregister(self);
 }
 
-void Runtime::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
+void Runtime::VisitConcurrentRoots(Heap::RootVisitor* visitor, void* arg) {
+  if (intern_table_->IsDirty()) {
+    intern_table_->VisitRoots(visitor, arg);
+  }
+  if (class_linker_->IsDirty()) {
+    class_linker_->VisitRoots(visitor, arg);
+  }
+}
+
+void Runtime::VisitNonConcurrentRoots(Heap::RootVisitor* visitor, void* arg) {
   Dbg::VisitRoots(visitor, arg);
-  class_linker_->VisitRoots(visitor, arg);
-  intern_table_->VisitRoots(visitor, arg);
   java_vm_->VisitRoots(visitor, arg);
   thread_list_->VisitRoots(visitor, arg);
   if (pre_allocated_OutOfMemoryError_ != NULL) {
@@ -1013,6 +1020,16 @@
   }
 }
 
+void Runtime::DirtyRoots() {
+  intern_table_->Dirty();
+  class_linker_->Dirty();
+}
+
+void Runtime::VisitRoots(Heap::RootVisitor* visitor, void* arg) {
+  VisitConcurrentRoots(visitor, arg);
+  VisitNonConcurrentRoots(visitor, arg);
+}
+
 void Runtime::SetJniDlsymLookupStub(ByteArray* jni_stub_array) {
   CHECK(jni_stub_array != NULL)  << " jni_stub_array=" << jni_stub_array;
   CHECK(jni_stub_array_ == NULL || jni_stub_array_ == jni_stub_array)
diff --git a/src/runtime.h b/src/runtime.h
index a6c662c..44823a0 100644
--- a/src/runtime.h
+++ b/src/runtime.h
@@ -224,9 +224,20 @@
     return "2.0.0";
   }
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const
+  // Force all the roots which can be marked concurrently to be dirty.
+  void DirtyRoots();
+
+  // Visit all the roots.
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Visit all of the roots we can do safely do concurrently.
+  void VisitConcurrentRoots(Heap::RootVisitor* visitor, void* arg);
+
+  // Visit all other roots which must be done with mutators suspended.
+  void VisitNonConcurrentRoots(Heap::RootVisitor* visitor, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool HasJniDlsymLookupStub() const {
     return jni_stub_array_ != NULL;
   }