Avoid marking old class linker and intern table roots during pause.

The new root visiting logic has a concept of a root log which holds
new roots which were added since the start of the GC. This is an
optimization since it lets us only mark these newly added roots
during the pause (or pre-cleaning) since the other roots intern table
and class linker roots were marked concurrently at the start of the
GC.

Before (EvaluateAndApplyChanges):
MarkConcurrentRoots:	Sum: 605.193ms
After:
MarkConcurrentRoots:	Sum: 271.858ms

This should also reduce pathological GC pauses which used to be able
to happen when the intern table or class linker became "dirty"
during the concurrent GC.

Change-Id: I433fab021f2c339d50c35aaae7161a50a0901dec
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 8366e71..e5ca721 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -178,8 +178,8 @@
       array_iftable_(nullptr),
       find_array_class_cache_next_victim_(0),
       init_done_(false),
-      dex_caches_dirty_(false),
-      class_table_dirty_(false),
+      log_new_dex_caches_roots_(false),
+      log_new_class_table_roots_(false),
       intern_table_(intern_table),
       portable_resolution_trampoline_(nullptr),
       quick_resolution_trampoline_(nullptr),
@@ -1059,32 +1059,61 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty) {
+void ClassLinker::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
   callback(reinterpret_cast<mirror::Object**>(&class_roots_), arg, 0, kRootVMInternal);
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
-    if (!only_dirty || dex_caches_dirty_) {
+    if ((flags & kVisitRootFlagAllRoots) != 0) {
       for (mirror::DexCache*& dex_cache : dex_caches_) {
         callback(reinterpret_cast<mirror::Object**>(&dex_cache), arg, 0, kRootVMInternal);
-        DCHECK(dex_cache != nullptr);
       }
-      if (clean_dirty) {
-        dex_caches_dirty_ = false;
+    } else if ((flags & kVisitRootFlagNewRoots) != 0) {
+      for (size_t index : new_dex_cache_roots_) {
+        callback(reinterpret_cast<mirror::Object**>(&dex_caches_[index]), arg, 0, kRootVMInternal);
       }
     }
+    if ((flags & kVisitRootFlagClearRootLog) != 0) {
+      new_dex_cache_roots_.clear();
+    }
+    if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
+      log_new_dex_caches_roots_ = true;
+    } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
+      log_new_dex_caches_roots_ = false;
+    }
   }
   {
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    if (!only_dirty || class_table_dirty_) {
+    if ((flags & kVisitRootFlagAllRoots) != 0) {
       for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
         callback(reinterpret_cast<mirror::Object**>(&it.second), arg, 0, kRootStickyClass);
-        DCHECK(it.second != nullptr);
       }
-      if (clean_dirty) {
-        class_table_dirty_ = false;
+    } else if ((flags & kVisitRootFlagNewRoots) != 0) {
+      for (auto& pair : new_class_roots_) {
+        mirror::Object* old_ref = pair.second;
+        callback(reinterpret_cast<mirror::Object**>(&pair.second), arg, 0, kRootStickyClass);
+        if (UNLIKELY(pair.second != old_ref)) {
+          // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
+          // corresponding object. This is slow, but luckily for us, this may only happen with a
+          // concurrent moving GC.
+          for (auto it = class_table_.lower_bound(pair.first), end = class_table_.end();
+              it != end && it->first == pair.first; ++it) {
+            // If the class stored matches the old class, update it to the new value.
+            if (old_ref == it->second) {
+              it->second = pair.second;
+            }
+          }
+        }
       }
     }
+    if ((flags & kVisitRootFlagClearRootLog) != 0) {
+      new_class_roots_.clear();
+    }
+    if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
+      log_new_class_table_roots_ = true;
+    } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
+      log_new_class_table_roots_ = false;
+    }
     // We deliberately ignore the class roots in the image since we
     // handle image roots by using the MS/CMS rescanning of dirty cards.
   }
@@ -1094,7 +1123,6 @@
     if (find_array_class_cache_[i] != nullptr) {
       callback(reinterpret_cast<mirror::Object**>(&find_array_class_cache_[i]), arg, 0,
                kRootVMInternal);
-      DCHECK(find_array_class_cache_[i] != nullptr);
     }
   }
 }
@@ -1998,7 +2026,10 @@
       << dex_cache->GetLocation()->ToModifiedUtf8() << " " << dex_file.GetLocation();
   dex_caches_.push_back(dex_cache.get());
   dex_cache->SetDexFile(&dex_file);
-  dex_caches_dirty_ = true;
+  if (log_new_dex_caches_roots_) {
+    // TODO: This is not safe if we can remove dex caches.
+    new_dex_cache_roots_.push_back(dex_caches_.size() - 1);
+  }
 }
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file) {
@@ -2275,7 +2306,9 @@
   }
   VerifyObject(klass);
   class_table_.insert(std::make_pair(hash, klass));
-  class_table_dirty_ = true;
+  if (log_new_class_table_roots_) {
+    new_class_roots_.push_back(std::make_pair(hash, klass));
+  }
   return NULL;
 }
 
@@ -2384,11 +2417,13 @@
               << PrettyClassAndClassLoader(klass);
         } else {
           class_table_.insert(std::make_pair(hash, klass));
+          if (log_new_class_table_roots_) {
+            new_class_roots_.push_back(std::make_pair(hash, klass));
+          }
         }
       }
     }
   }
-  class_table_dirty_ = true;
   dex_cache_image_class_lookup_required_ = false;
   self->EndAssertNoThreadSuspension(old_no_suspend_cause);
 }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index e31a6cd..aad7cfc 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -51,6 +51,8 @@
 
 typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
 
+enum VisitRootFlags : uint8_t;
+
 class ClassLinker {
  public:
   // Interface method table size. Increasing this value reduces the chance of two interface methods
@@ -242,7 +244,7 @@
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file) const
@@ -542,6 +544,7 @@
   std::vector<const DexFile*> boot_class_path_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::vector<size_t> new_dex_cache_roots_ GUARDED_BY(dex_lock_);;
   std::vector<mirror::DexCache*> dex_caches_ GUARDED_BY(dex_lock_);
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
@@ -551,6 +554,7 @@
   // Class::descriptor_ and Class::class_loader_.
   typedef std::multimap<size_t, mirror::Class*> Table;
   Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::vector<std::pair<size_t, mirror::Class*> > new_class_roots_;
 
   // Do we need to search dex caches to find image classes?
   bool dex_cache_image_class_lookup_required_;
@@ -638,8 +642,8 @@
   size_t find_array_class_cache_next_victim_;
 
   bool init_done_;
-  bool dex_caches_dirty_ GUARDED_BY(dex_lock_);
-  bool class_table_dirty_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  bool log_new_dex_caches_roots_ GUARDED_BY(dex_lock_);
+  bool log_new_class_table_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   InternTable* intern_table_;
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 55c23f4..e6aa9c2 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -334,7 +334,7 @@
       const char* descriptor = dex->GetTypeDescriptor(type_id);
       AssertDexFileClass(class_loader, descriptor);
     }
-    class_linker_->VisitRoots(TestRootVisitor, NULL, false, false);
+    class_linker_->VisitRoots(TestRootVisitor, NULL, kVisitRootFlagAllRoots);
     // Verify the dex cache has resolution methods in all resolved method slots
     mirror::DexCache* dex_cache = class_linker_->FindDexCache(*dex);
     mirror::ObjectArray<mirror::ArtMethod>* resolved_methods = dex_cache->GetResolvedMethods();
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index c39e56f..4aff68a 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -86,6 +86,7 @@
 
 // Turn off kCheckLocks when profiling the GC since it slows the GC down by up to 40%.
 static constexpr bool kCheckLocks = kDebugLocking;
+static constexpr bool kVerifyRoots = kIsDebugBuild;
 
 void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
@@ -255,7 +256,8 @@
     MarkThreadRoots(self);
     // TODO: Only mark the dirty roots.
     MarkNonThreadRoots();
-    MarkConcurrentRoots();
+    MarkConcurrentRoots(
+        static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
     // Process the newly aged cards.
     RecursiveMarkDirtyObjects(false, accounting::CardTable::kCardDirty - 1);
     // TODO: Empty allocation stack to reduce the number of objects we need to test / mark as live
@@ -286,17 +288,8 @@
   heap_->SwapStacks(self);
 
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-    // If we exclusively hold the mutator lock, all threads must be suspended.
-    MarkRoots();
-    RevokeAllThreadLocalAllocationStacks(self);
-  } else {
-    MarkThreadRoots(self);
-    // At this point the live stack should no longer have any mutators which push into it.
-    MarkNonThreadRoots();
-  }
+  MarkRoots(self);
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
-  MarkConcurrentRoots();
   UpdateAndMarkModUnion();
   MarkReachableObjects();
   // Pre-clean dirtied cards to reduce pauses.
@@ -583,17 +576,16 @@
   }
 }
 
-void MarkSweep::MarkRoot(const Object* obj) {
-  if (obj != NULL) {
-    MarkObjectNonNull(obj);
-  }
-}
-
 void MarkSweep::MarkRootParallelCallback(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
                                          RootType /*root_type*/) {
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(*root);
 }
 
+void MarkSweep::VerifyRootMarked(Object** root, void* arg, uint32_t /*thread_id*/,
+                                 RootType /*root_type*/) {
+  CHECK(reinterpret_cast<MarkSweep*>(arg)->IsMarked(*root));
+}
+
 void MarkSweep::MarkRootCallback(Object** root, void* arg, uint32_t /*thread_id*/,
                                  RootType /*root_type*/) {
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(*root);
@@ -621,11 +613,20 @@
   Runtime::Current()->GetThreadList()->VerifyRoots(VerifyRootCallback, this);
 }
 
-// Marks all objects in the root set.
-void MarkSweep::MarkRoots() {
-  timings_.StartSplit("MarkRoots");
-  Runtime::Current()->VisitNonConcurrentRoots(MarkRootCallback, this);
-  timings_.EndSplit();
+void MarkSweep::MarkRoots(Thread* self) {
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // If we exclusively hold the mutator lock, all threads must be suspended.
+    timings_.StartSplit("MarkRoots");
+    Runtime::Current()->VisitRoots(MarkRootCallback, this);
+    timings_.EndSplit();
+    RevokeAllThreadLocalAllocationStacks(self);
+  } else {
+    MarkThreadRoots(self);
+    // At this point the live stack should no longer have any mutators which push into it.
+    MarkNonThreadRoots();
+    MarkConcurrentRoots(
+        static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
+  }
 }
 
 void MarkSweep::MarkNonThreadRoots() {
@@ -634,10 +635,10 @@
   timings_.EndSplit();
 }
 
-void MarkSweep::MarkConcurrentRoots() {
+void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
   timings_.StartSplit("MarkConcurrentRoots");
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, false, true);
+  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags);
   timings_.EndSplit();
 }
 
@@ -1003,9 +1004,18 @@
 }
 
 void MarkSweep::ReMarkRoots() {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
   timings_.StartSplit("(Paused)ReMarkRoots");
-  Runtime::Current()->VisitRoots(MarkRootCallback, this, true, true);
+  Runtime::Current()->VisitRoots(
+      MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots |
+                                                          kVisitRootFlagStopLoggingNewRoots |
+                                                          kVisitRootFlagClearRootLog));
   timings_.EndSplit();
+  if (kVerifyRoots) {
+    timings_.StartSplit("(Paused)VerifyRoots");
+    Runtime::Current()->VisitRoots(VerifyRootMarked, this);
+    timings_.EndSplit();
+  }
 }
 
 void MarkSweep::SweepSystemWeaks() {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index c55b2b2..5c0a233 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -36,6 +36,7 @@
 
 class StackVisitor;
 class Thread;
+enum VisitRootFlags : uint8_t;
 
 namespace gc {
 
@@ -85,8 +86,8 @@
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Marks the root set at the start of a garbage collection.
-  void MarkRoots()
+  // Marks all objects in the root set at the start of a garbage collection.
+  void MarkRoots(Thread* self)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -94,7 +95,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void MarkConcurrentRoots()
+  void MarkConcurrentRoots(VisitRootFlags flags)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -193,6 +194,11 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static void VerifyRootMarked(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
+                               RootType /*root_type*/)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static void ProcessMarkStackPausedCallback(void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
@@ -205,10 +211,6 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void MarkRoot(const mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   Barrier& GetBarrier() {
     return *gc_barrier_;
   }
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 4668a19..a577f90 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -619,7 +619,7 @@
 void SemiSpace::MarkRoots() {
   timings_.StartSplit("MarkRoots");
   // TODO: Visit up image roots as well?
-  Runtime::Current()->VisitRoots(MarkRootCallback, this, false, true);
+  Runtime::Current()->VisitRoots(MarkRootCallback, this);
   timings_.EndSplit();
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index fc591e7..b97b9ec 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1899,11 +1899,11 @@
 
       // Search to see if any of the roots reference our object.
       void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
-      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg, false, false);
+      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
 
       // Search to see if any of the roots reference our reference.
       arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
-      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg, false, false);
+      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
     } else {
       LOG(ERROR) << "Root " << ref << " is dead with type " << PrettyTypeOf(ref);
     }
@@ -1975,7 +1975,7 @@
   // pointing to dead objects if they are not reachable.
   VisitObjects(VerifyObjectVisitor::VisitCallback, &visitor);
   // Verify the roots:
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor, false, false);
+  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor);
   if (visitor.Failed()) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index c5a8328..fc8b594 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -428,7 +428,7 @@
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
     // Walk the roots and the heap.
     current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT, HPROF_TIME);
-    Runtime::Current()->VisitRoots(RootVisitor, this, false, false);
+    Runtime::Current()->VisitRoots(RootVisitor, this);
     Thread* self = Thread::Current();
     {
       ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 660efe4..524798d 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 InternTable::InternTable()
-    : is_dirty_(false), allow_new_interns_(true),
+    : log_new_roots_(false), allow_new_interns_(true),
       new_intern_condition_("New intern condition", *Locks::intern_table_lock_) {
 }
 
@@ -43,23 +43,45 @@
      << weak_interns_.size() << " weak\n";
 }
 
-void InternTable::VisitRoots(RootCallback* callback, void* arg,
-                             bool only_dirty, bool clean_dirty) {
+void InternTable::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  if (!only_dirty || is_dirty_) {
+  if ((flags & kVisitRootFlagAllRoots) != 0) {
     for (auto& strong_intern : strong_interns_) {
       callback(reinterpret_cast<mirror::Object**>(&strong_intern.second), arg, 0,
                kRootInternedString);
       DCHECK(strong_intern.second != nullptr);
     }
-    if (clean_dirty) {
-      is_dirty_ = false;
-    }
+  } else if ((flags & kVisitRootFlagNewRoots) != 0) {
+    for (auto& pair : new_strong_intern_roots_) {
+       mirror::String* old_ref = pair.second;
+       callback(reinterpret_cast<mirror::Object**>(&pair.second), arg, 0, kRootInternedString);
+       if (UNLIKELY(pair.second != old_ref)) {
+         // Uh ohes, GC moved a root in the log. Need to search the strong interns and update the
+         // corresponding object. This is slow, but luckily for us, this may only happen with a
+         // concurrent moving GC.
+         for (auto it = strong_interns_.lower_bound(pair.first), end = strong_interns_.end();
+             it != end && it->first == pair.first; ++it) {
+           // If the class stored matches the old class, update it to the new value.
+           if (old_ref == it->second) {
+             it->second = pair.second;
+           }
+         }
+       }
+     }
+  }
+
+  if ((flags & kVisitRootFlagClearRootLog) != 0) {
+    new_strong_intern_roots_.clear();
+  }
+  if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
+    log_new_roots_ = true;
+  } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
+    log_new_roots_ = false;
   }
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::Lookup(Table& table, mirror::String* s, uint32_t hash_code) {
+mirror::String* InternTable::Lookup(Table& table, mirror::String* s, int32_t hash_code) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (auto it = table.find(hash_code), end = table.end(); it != end; ++it) {
     mirror::String* existing_string = it->second;
@@ -70,29 +92,28 @@
   return NULL;
 }
 
-mirror::String* InternTable::InsertStrong(mirror::String* s, uint32_t hash_code) {
+mirror::String* InternTable::InsertStrong(mirror::String* s, int32_t hash_code) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordStrongStringInsertion(s, hash_code);
   }
-  return Insert(strong_interns_, s, hash_code);
+  if (log_new_roots_) {
+    new_strong_intern_roots_.push_back(std::make_pair(hash_code, s));
+  }
+  strong_interns_.insert(std::make_pair(hash_code, s));
+  return s;
 }
 
-mirror::String* InternTable::InsertWeak(mirror::String* s, uint32_t hash_code) {
+mirror::String* InternTable::InsertWeak(mirror::String* s, int32_t hash_code) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringInsertion(s, hash_code);
   }
-  return Insert(weak_interns_, s, hash_code);
-}
-
-mirror::String* InternTable::Insert(Table& table, mirror::String* s, uint32_t hash_code) {
-  Locks::intern_table_lock_->AssertHeld(Thread::Current());
-  table.insert(std::make_pair(hash_code, s));
+  weak_interns_.insert(std::make_pair(hash_code, s));
   return s;
 }
 
-void InternTable::RemoveWeak(mirror::String* s, uint32_t hash_code) {
+void InternTable::RemoveWeak(mirror::String* s, int32_t hash_code) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringRemoval(s, hash_code);
@@ -100,8 +121,7 @@
   Remove(weak_interns_, s, hash_code);
 }
 
-void InternTable::Remove(Table& table, mirror::String* s, uint32_t hash_code) {
-  Locks::intern_table_lock_->AssertHeld(Thread::Current());
+void InternTable::Remove(Table& table, mirror::String* s, int32_t hash_code) {
   for (auto it = table.find(hash_code), end = table.end(); it != end; ++it) {
     if (it->second == s) {
       table.erase(it);
@@ -111,19 +131,19 @@
 }
 
 // Insert/remove methods used to undo changes made during an aborted transaction.
-mirror::String* InternTable::InsertStrongFromTransaction(mirror::String* s, uint32_t hash_code) {
+mirror::String* InternTable::InsertStrongFromTransaction(mirror::String* s, int32_t hash_code) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   return InsertStrong(s, hash_code);
 }
-mirror::String* InternTable::InsertWeakFromTransaction(mirror::String* s, uint32_t hash_code) {
+mirror::String* InternTable::InsertWeakFromTransaction(mirror::String* s, int32_t hash_code) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   return InsertWeak(s, hash_code);
 }
-void InternTable::RemoveStrongFromTransaction(mirror::String* s, uint32_t hash_code) {
+void InternTable::RemoveStrongFromTransaction(mirror::String* s, int32_t hash_code) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   Remove(strong_interns_, s, hash_code);
 }
-void InternTable::RemoveWeakFromTransaction(mirror::String* s, uint32_t hash_code) {
+void InternTable::RemoveWeakFromTransaction(mirror::String* s, int32_t hash_code) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   Remove(weak_interns_, s, hash_code);
 }
@@ -184,9 +204,6 @@
       return strong;
     }
 
-    // Mark as dirty so that we rescan the roots.
-    is_dirty_ = true;
-
     // Check the image for a match.
     mirror::String* image = LookupStringFromImage(s);
     if (image != NULL) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index cc48480..fd921f3 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -24,6 +24,9 @@
 #include <map>
 
 namespace art {
+
+enum VisitRootFlags : uint8_t;
+
 namespace mirror {
 class String;
 }  // namespace mirror
@@ -63,7 +66,7 @@
 
   size_t Size() const;
 
-  void VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty);
+  void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags);
 
   void DumpForSigQuit(std::ostream& os) const;
 
@@ -77,34 +80,34 @@
       LOCKS_EXCLUDED(Locks::intern_table_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::String* Lookup(Table& table, mirror::String* s, uint32_t hash_code)
+  mirror::String* Lookup(Table& table, mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::String* InsertStrong(mirror::String* s, uint32_t hash_code)
+  mirror::String* InsertStrong(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  mirror::String* InsertWeak(mirror::String* s, uint32_t hash_code)
+  mirror::String* InsertWeak(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  mirror::String* Insert(Table& table, mirror::String* s, uint32_t hash_code)
+  void RemoveWeak(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  void RemoveWeak(mirror::String* s, uint32_t hash_code)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  void Remove(Table& table, mirror::String* s, uint32_t hash_code)
+  void Remove(Table& table, mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
 
   // Transaction rollback access.
-  mirror::String* InsertStrongFromTransaction(mirror::String* s, uint32_t hash_code)
+  mirror::String* InsertStrongFromTransaction(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  mirror::String* InsertWeakFromTransaction(mirror::String* s, uint32_t hash_code)
+  mirror::String* InsertWeakFromTransaction(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  void RemoveStrongFromTransaction(mirror::String* s, uint32_t hash_code)
+  void RemoveStrongFromTransaction(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  void RemoveWeakFromTransaction(mirror::String* s, uint32_t hash_code)
+  void RemoveWeakFromTransaction(mirror::String* s, int32_t hash_code)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   friend class Transaction;
 
-  bool is_dirty_ GUARDED_BY(Locks::intern_table_lock_);
+  bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   Table strong_interns_ GUARDED_BY(Locks::intern_table_lock_);
+  std::vector<std::pair<int32_t, mirror::String*> > new_strong_intern_roots_
+      GUARDED_BY(Locks::intern_table_lock_);
   Table weak_interns_ GUARDED_BY(Locks::intern_table_lock_);
 };
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index d9bb121..f48e8ad 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -31,6 +31,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "object_utils.h"
+#include "runtime.h"
 #include "scoped_fast_native_object_access.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -222,8 +223,7 @@
 }
 
 // Based on ClassLinker::ResolveString.
-static void PreloadDexCachesResolveString(SirtRef<mirror::DexCache>& dex_cache,
-                                          uint32_t string_idx,
+static void PreloadDexCachesResolveString(SirtRef<mirror::DexCache>& dex_cache, uint32_t string_idx,
                                           StringTable& strings)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::String* string = dex_cache->GetResolvedString(string_idx);
@@ -435,7 +435,8 @@
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
   if (kPreloadDexCachesStrings) {
-    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsCallback, &strings, false, false);
+    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsCallback, &strings,
+                                          kVisitRootFlagAllRoots);
   }
 
   const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index e80a473..7546729 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -797,18 +797,9 @@
   return pre_allocated_OutOfMemoryError_;
 }
 
-void Runtime::VisitConcurrentRoots(RootCallback* callback, void* arg, bool only_dirty,
-                                   bool clean_dirty) {
-  intern_table_->VisitRoots(callback, arg, only_dirty, clean_dirty);
-  class_linker_->VisitRoots(callback, arg, only_dirty, clean_dirty);
-  // TODO: is it the right place ?
-  if (preinitialization_transaction != nullptr) {
-    preinitialization_transaction->VisitRoots(callback, arg);
-  }
-}
-
-void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
-  // Visit the classes held as static in mirror classes.
+void Runtime::VisitConstantRoots(RootCallback* callback, void* arg) {
+  // Visit the classes held as static in mirror classes, these can be visited concurrently and only
+  // need to be visited once since they never change.
   mirror::ArtField::VisitRoots(callback, arg);
   mirror::ArtMethod::VisitRoots(callback, arg);
   mirror::Class::VisitRoots(callback, arg);
@@ -824,6 +815,18 @@
   mirror::PrimitiveArray<int32_t>::VisitRoots(callback, arg);   // IntArray
   mirror::PrimitiveArray<int64_t>::VisitRoots(callback, arg);   // LongArray
   mirror::PrimitiveArray<int16_t>::VisitRoots(callback, arg);   // ShortArray
+}
+
+void Runtime::VisitConcurrentRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+  intern_table_->VisitRoots(callback, arg, flags);
+  class_linker_->VisitRoots(callback, arg, flags);
+  if ((flags & kVisitRootFlagNewRoots) == 0) {
+    // Guaranteed to have no new roots in the constant roots.
+    VisitConstantRoots(callback, arg);
+  }
+}
+
+void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
   java_vm_->VisitRoots(callback, arg);
   if (pre_allocated_OutOfMemoryError_ != nullptr) {
     callback(reinterpret_cast<mirror::Object**>(&pre_allocated_OutOfMemoryError_), arg, 0,
@@ -838,7 +841,6 @@
   if (HasDefaultImt()) {
     callback(reinterpret_cast<mirror::Object**>(&default_imt_), arg, 0, kRootVMInternal);
   }
-
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     if (callee_save_methods_[i] != nullptr) {
       callback(reinterpret_cast<mirror::Object**>(&callee_save_methods_[i]), arg, 0,
@@ -851,6 +853,9 @@
       verifier->VisitRoots(callback, arg);
     }
   }
+  if (preinitialization_transaction != nullptr) {
+    preinitialization_transaction->VisitRoots(callback, arg);
+  }
 }
 
 void Runtime::VisitNonConcurrentRoots(RootCallback* callback, void* arg) {
@@ -858,8 +863,8 @@
   VisitNonThreadRoots(callback, arg);
 }
 
-void Runtime::VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty) {
-  VisitConcurrentRoots(callback, arg, only_dirty, clean_dirty);
+void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+  VisitConcurrentRoots(callback, arg, flags);
   VisitNonConcurrentRoots(callback, arg);
 }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 2b3f100..f12c3d8 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -68,6 +68,14 @@
 class Trace;
 class Transaction;
 
+enum VisitRootFlags : uint8_t {
+  kVisitRootFlagAllRoots = 0x1,
+  kVisitRootFlagNewRoots = 0x2,
+  kVisitRootFlagStartLoggingNewRoots = 0x4,
+  kVisitRootFlagStopLoggingNewRoots = 0x8,
+  kVisitRootFlagClearRootLog = 0x10,
+};
+
 class Runtime {
  public:
   typedef std::vector<std::pair<std::string, const void*> > Options;
@@ -222,11 +230,12 @@
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
-  void VisitRoots(RootCallback* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitRoots(RootCallback* visitor, void* arg, VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the roots we can do safely do concurrently.
-  void VisitConcurrentRoots(RootCallback* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitConcurrentRoots(RootCallback* visitor, void* arg,
+                            VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
@@ -242,6 +251,11 @@
   void SweepSystemWeaks(IsMarkedCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Constant roots are the roots which never change after the runtime is initialized, they only
+  // need to be visited once per GC cycle.
+  void VisitConstantRoots(RootCallback* callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Returns a special method that calls into a trampoline for runtime method resolution
   mirror::ArtMethod* GetResolutionMethod() const {
     CHECK(HasResolutionMethod());