Replace ObjectSet with LargeObjectBitmap.

Speeds up large object marking since large objects no longer required
a lock. Changed the GCs to use the heap bitmap for marking objects
which aren't in the fast path. This eliminates the need for a
MarkLargeObject function.

Maps before (10 GC iterations):
Mean partial time: 180ms
Mean sticky time: 151ms

Maps after:
Mean partial time: 161ms
Mean sticky time: 101ms

Note: the GC durations are long due to recent ergonomic changes and
because the fast bulk free hasn't yet been enabled. Over 50% of the
GC time is spent in RosAllocSpace::FreeList.

Bug: 13571028

Change-Id: Id8f94718aeaa13052672ccbae1e8edf77d653f62
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index d99136a..6380cba 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -185,12 +185,12 @@
     }
   }
   for (const auto& disc_space : GetHeap()->GetDiscontinuousSpaces()) {
-    space::LargeObjectSpace* space = down_cast<space::LargeObjectSpace*>(disc_space);
-    accounting::ObjectSet* live_set = space->GetLiveObjects();
-    accounting::ObjectSet* mark_set = space->GetMarkObjects();
-    heap_->GetLiveBitmap()->ReplaceObjectSet(live_set, mark_set);
-    heap_->GetMarkBitmap()->ReplaceObjectSet(mark_set, live_set);
-    down_cast<space::LargeObjectSpace*>(space)->SwapBitmaps();
+    space::LargeObjectSpace* space = disc_space->AsLargeObjectSpace();
+    accounting::LargeObjectBitmap* live_set = space->GetLiveBitmap();
+    accounting::LargeObjectBitmap* mark_set = space->GetMarkBitmap();
+    heap_->GetLiveBitmap()->ReplaceLargeObjectBitmap(live_set, mark_set);
+    heap_->GetMarkBitmap()->ReplaceLargeObjectBitmap(mark_set, live_set);
+    space->SwapBitmaps();
   }
 }
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index f07e6f1..8af4fd8 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -99,7 +99,6 @@
                        name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
       gc_barrier_(new Barrier(0)),
-      large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
       is_concurrent_(is_concurrent) {
 }
@@ -293,14 +292,20 @@
   TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::ContinuousSpaceBitmap* bitmap = space->GetMarkBitmap();
+    // We want to have the main space instead of non moving if possible.
     if (bitmap != nullptr &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       current_space_bitmap_ = bitmap;
-      return;
+      // If we are not the non moving space exit the loop early since this will be good enough.
+      if (space != heap_->GetNonMovingSpace()) {
+        break;
+      }
     }
   }
-  GetHeap()->DumpSpaces();
-  LOG(FATAL) << "Could not find a default mark bitmap";
+  if (current_space_bitmap_ == nullptr) {
+    heap_->DumpSpaces();
+    LOG(FATAL) << "Could not find a default mark bitmap";
+  }
 }
 
 void MarkSweep::ExpandMarkStack() {
@@ -322,7 +327,7 @@
 }
 
 inline void MarkSweep::MarkObjectNonNullParallel(Object* obj) {
-  DCHECK(obj != NULL);
+  DCHECK(obj != nullptr);
   if (MarkObjectParallel(obj)) {
     MutexLock mu(Thread::Current(), mark_stack_lock_);
     if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
@@ -343,6 +348,31 @@
   reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
 }
 
+class MarkSweepMarkObjectSlowPath {
+ public:
+  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
+  }
+
+  void operator()(const Object* obj) const ALWAYS_INLINE {
+    if (kProfileLargeObjects) {
+      // TODO: Differentiate between marking and testing somehow.
+      ++mark_sweep_->large_object_test_;
+      ++mark_sweep_->large_object_mark_;
+    }
+    space::LargeObjectSpace* large_object_space = mark_sweep_->GetHeap()->GetLargeObjectsSpace();
+    if (UNLIKELY(!IsAligned<kPageSize>(obj) ||
+                 (kIsDebugBuild && !large_object_space->Contains(obj)))) {
+      LOG(ERROR) << "Tried to mark " << obj << " not contained by any spaces";
+      LOG(ERROR) << "Attempting see if it's a bad root";
+      mark_sweep_->VerifyRoots();
+      LOG(FATAL) << "Can't mark invalid object";
+    }
+  }
+
+ private:
+  MarkSweep* const mark_sweep_;
+};
+
 inline void MarkSweep::MarkObjectNonNull(Object* obj) {
   DCHECK(obj != nullptr);
   if (kUseBakerOrBrooksReadBarrier) {
@@ -353,27 +383,24 @@
     if (kCountMarkedObjects) {
       ++mark_immune_count_;
     }
-    DCHECK(IsMarked(obj));
-    return;
-  }
-  // Try to take advantage of locality of references within a space, failing this find the space
-  // the hard way.
-  accounting::ContinuousSpaceBitmap* object_bitmap = current_space_bitmap_;
-  if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    object_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
+    DCHECK(mark_bitmap_->Test(obj));
+  } else if (LIKELY(current_space_bitmap_->HasAddress(obj))) {
+    if (kCountMarkedObjects) {
+      ++mark_fastpath_count_;
+    }
+    if (UNLIKELY(!current_space_bitmap_->Set(obj))) {
+      PushOnMarkStack(obj);  // This object was not previously marked.
+    }
+  } else {
     if (kCountMarkedObjects) {
       ++mark_slowpath_count_;
     }
-    if (UNLIKELY(object_bitmap == nullptr)) {
-      MarkLargeObject(obj, true);
-      return;
+    MarkSweepMarkObjectSlowPath visitor(this);
+    // TODO: We already know that the object is not in the current_space_bitmap_ but MarkBitmap::Set
+    // will check again.
+    if (!mark_bitmap_->Set(obj, visitor)) {
+      PushOnMarkStack(obj);  // Was not already marked, push.
     }
-  } else if (kCountMarkedObjects) {
-    ++mark_fastpath_count_;
-  }
-  // This object was not previously marked.
-  if (!object_bitmap->Set(obj)) {
-    PushOnMarkStack(obj);
   }
 }
 
@@ -387,34 +414,6 @@
   mark_stack_->PushBack(obj);
 }
 
-// Rare case, probably not worth inlining since it will increase instruction cache miss rate.
-bool MarkSweep::MarkLargeObject(const Object* obj, bool set) {
-  // TODO: support >1 discontinuous space.
-  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-  accounting::ObjectSet* large_objects = large_object_space->GetMarkObjects();
-  if (kProfileLargeObjects) {
-    ++large_object_test_;
-  }
-  if (UNLIKELY(!large_objects->Test(obj))) {
-    if (!large_object_space->Contains(obj)) {
-      LOG(ERROR) << "Tried to mark " << obj << " not contained by any spaces";
-      LOG(ERROR) << "Attempting see if it's a bad root";
-      VerifyRoots();
-      LOG(FATAL) << "Can't mark bad root";
-    }
-    if (kProfileLargeObjects) {
-      ++large_object_mark_;
-    }
-    if (set) {
-      large_objects->Set(obj);
-    } else {
-      large_objects->Clear(obj);
-    }
-    return true;
-  }
-  return false;
-}
-
 inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
   DCHECK(obj != nullptr);
   if (kUseBakerOrBrooksReadBarrier) {
@@ -428,19 +427,11 @@
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
   accounting::ContinuousSpaceBitmap* object_bitmap = current_space_bitmap_;
-  if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::ContinuousSpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
-    if (new_bitmap != NULL) {
-      object_bitmap = new_bitmap;
-    } else {
-      // TODO: Remove the Thread::Current here?
-      // TODO: Convert this to some kind of atomic marking?
-      MutexLock mu(Thread::Current(), large_object_lock_);
-      return MarkLargeObject(obj, true);
-    }
+  if (LIKELY(object_bitmap->HasAddress(obj))) {
+    return !object_bitmap->AtomicTestAndSet(obj);
   }
-  // Return true if the object was not previously marked.
-  return !object_bitmap->AtomicTestAndSet(obj);
+  MarkSweepMarkObjectSlowPath visitor(this);
+  return !mark_bitmap_->AtomicTestAndSet(obj, visitor);
 }
 
 // Used to mark objects when processing the mark stack. If an object is null, it is not marked.
@@ -719,7 +710,7 @@
 
 size_t MarkSweep::GetThreadCount(bool paused) const {
   if (heap_->GetThreadPool() == nullptr || !heap_->CareAboutPauseTimes()) {
-    return 0;
+    return 1;
   }
   if (paused) {
     return heap_->GetParallelGCThreadCount() + 1;
@@ -733,7 +724,7 @@
   ThreadPool* thread_pool = GetHeap()->GetThreadPool();
   size_t thread_count = GetThreadCount(paused);
   // The parallel version with only one thread is faster for card scanning, TODO: fix.
-  if (kParallelCardScan && thread_count > 0) {
+  if (kParallelCardScan && thread_count > 1) {
     Thread* self = Thread::Current();
     // Can't have a different split for each space since multiple spaces can have their cards being
     // scanned at the same time.
@@ -944,14 +935,11 @@
 
 void MarkSweep::VerifyIsLive(const Object* obj) {
   if (!heap_->GetLiveBitmap()->Test(obj)) {
-    space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
-    if (!large_object_space->GetLiveObjects()->Test(obj)) {
-      if (std::find(heap_->allocation_stack_->Begin(), heap_->allocation_stack_->End(), obj) ==
-          heap_->allocation_stack_->End()) {
-        // Object not found!
-        heap_->DumpSpaces();
-        LOG(FATAL) << "Found dead object " << obj;
-      }
+    if (std::find(heap_->allocation_stack_->Begin(), heap_->allocation_stack_->End(), obj) ==
+        heap_->allocation_stack_->End()) {
+      // Object not found!
+      heap_->DumpSpaces();
+      LOG(FATAL) << "Found dead object " << obj;
     }
   }
 }
@@ -1086,8 +1074,8 @@
   }
   // Handle the large object space.
   space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-  accounting::ObjectSet* large_live_objects = large_object_space->GetLiveObjects();
-  accounting::ObjectSet* large_mark_objects = large_object_space->GetMarkObjects();
+  accounting::LargeObjectBitmap* large_live_objects = large_object_space->GetLiveBitmap();
+  accounting::LargeObjectBitmap* large_mark_objects = large_object_space->GetMarkBitmap();
   if (swap_bitmaps) {
     std::swap(large_live_objects, large_mark_objects);
   }
@@ -1131,7 +1119,6 @@
   timings_.EndSplit();
 
   DCHECK(mark_stack_->IsEmpty());
-  TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
@@ -1149,13 +1136,13 @@
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
-  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_);
   size_t freed_objects = 0;
   size_t freed_bytes = 0;
-  GetHeap()->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+  heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
   freed_large_objects_.FetchAndAdd(freed_objects);
   freed_large_object_bytes_.FetchAndAdd(freed_bytes);
-  GetHeap()->RecordFree(freed_objects, freed_bytes);
+  heap_->RecordFree(freed_objects, freed_bytes);
 }
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 6dbb270..41a7764 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -227,11 +227,6 @@
   // Marks an object atomically, safe to use from multiple threads.
   void MarkObjectNonNullParallel(mirror::Object* obj);
 
-  // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
-  // mark, otherwise we unmark.
-  bool MarkLargeObject(const mirror::Object* obj, bool set)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) LOCKS_EXCLUDED(large_object_lock_);
-
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(const mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
@@ -315,7 +310,6 @@
   size_t live_stack_freeze_size_;
 
   UniquePtr<Barrier> gc_barrier_;
-  Mutex large_object_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   Mutex mark_stack_lock_ ACQUIRED_AFTER(Locks::classlinker_classes_lock_);
 
   const bool is_concurrent_;
@@ -326,8 +320,6 @@
   friend class CheckBitmapVisitor;
   friend class CheckReferenceVisitor;
   friend class art::gc::Heap;
-  friend class InternTableEntryIsUnmarked;
-  friend class MarkIfReachesAllocspaceVisitor;
   friend class MarkObjectVisitor;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
@@ -336,10 +328,9 @@
   friend class ModUnionTableBitmap;
   friend class ModUnionTableReferenceCache;
   friend class ModUnionScanImageRootVisitor;
-  friend class ScanBitmapVisitor;
-  friend class ScanImageRootVisitor;
   template<bool kUseFinger> friend class MarkStackTask;
   friend class FifoMarkStackChunk;
+  friend class MarkSweepMarkObjectSlowPath;
 
   DISALLOW_COPY_AND_ASSIGN(MarkSweep);
 };
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index d03baf1..55140f6 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -26,6 +26,21 @@
 namespace gc {
 namespace collector {
 
+class BitmapSetSlowPathVisitor {
+ public:
+  explicit BitmapSetSlowPathVisitor(SemiSpace* semi_space) : semi_space_(semi_space) {
+  }
+
+  void operator()(const mirror::Object* obj) const {
+    CHECK(!semi_space_->to_space_->HasAddress(obj)) << "Marking " << obj << " in to_space_";
+    // Marking a large object, make sure its aligned as a sanity check.
+    CHECK(IsAligned<kPageSize>(obj));
+  }
+
+ private:
+  SemiSpace* const semi_space_;
+};
+
 inline mirror::Object* SemiSpace::GetForwardingAddressInFromSpace(mirror::Object* obj) const {
   DCHECK(from_space_->HasAddress(obj));
   LockWord lock_word = obj->GetLockWord(false);
@@ -53,7 +68,7 @@
     if (from_space_->HasAddress(obj)) {
       mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
       // If the object has already been moved, return the new forward address.
-      if (forward_address == nullptr) {
+      if (UNLIKELY(forward_address == nullptr)) {
         forward_address = MarkNonForwardedObject(obj);
         DCHECK(forward_address != nullptr);
         // Make sure to only update the forwarding address AFTER you copy the object so that the
@@ -65,25 +80,17 @@
       }
       obj_ptr->Assign(forward_address);
     } else {
-      accounting::ContinuousSpaceBitmap* object_bitmap =
-          heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
-      if (LIKELY(object_bitmap != nullptr)) {
-        if (generational_) {
-          // If a bump pointer space only collection, we should not
-          // reach here as we don't/won't mark the objects in the
-          // non-moving space (except for the promoted objects.)  Note
-          // the non-moving space is added to the immune space.
-          DCHECK(whole_heap_collection_);
-        }
-        if (!object_bitmap->Set(obj)) {
-          // This object was not previously marked.
-          MarkStackPush(obj);
-        }
-      } else {
-        CHECK(!to_space_->HasAddress(obj)) << "Marking " << obj << " in to_space_";
-        if (MarkLargeObject(obj)) {
-          MarkStackPush(obj);
-        }
+      BitmapSetSlowPathVisitor visitor(this);
+      if (kIsDebugBuild && mark_bitmap_->GetContinuousSpaceBitmap(obj) != nullptr) {
+        // If a bump pointer space only collection, we should not
+        // reach here as we don't/won't mark the objects in the
+        // non-moving space (except for the promoted objects.)  Note
+        // the non-moving space is added to the immune space.
+        DCHECK(!generational_ || whole_heap_collection_);
+      }
+      if (!mark_bitmap_->Set(obj, visitor)) {
+        // This object was not previously marked.
+        MarkStackPush(obj);
       }
     }
   }
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 4a1bf18..b67bbb1 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -126,6 +126,11 @@
   CHECK(from_space_->CanMoveObjects()) << "Attempting to move from " << *from_space_;
   // Set the initial bitmap.
   to_space_live_bitmap_ = to_space_->GetLiveBitmap();
+  {
+    // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    mark_bitmap_ = heap_->GetMarkBitmap();
+  }
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
@@ -314,8 +319,8 @@
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
   heap_->MarkAllocStackAsLive(live_stack);
   live_stack->Reset();
-  timings_.EndSplit();
 
+  timings_.NewSplit("UpdateAndMarkRememberedSets");
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune and has no mod union table (the
     // non-moving space when the bump pointer space only collection is
@@ -353,6 +358,7 @@
   }
 
   if (is_large_object_space_immune_) {
+    timings_.NewSplit("VisitLargeObjects");
     DCHECK(generational_ && !whole_heap_collection_);
     // Delay copying the live set to the marked set until here from
     // BindBitmaps() as the large objects on the allocation stack may
@@ -364,13 +370,13 @@
     // classes (primitive array classes) that could move though they
     // don't contain any other references.
     space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-    accounting::ObjectSet* large_live_objects = large_object_space->GetLiveObjects();
+    accounting::LargeObjectBitmap* large_live_bitmap = large_object_space->GetLiveBitmap();
     SemiSpaceScanObjectVisitor visitor(this);
-    for (const Object* obj : large_live_objects->GetObjects()) {
-      visitor(const_cast<Object*>(obj));
-    }
+    large_live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(large_object_space->Begin()),
+                                        reinterpret_cast<uintptr_t>(large_object_space->End()),
+                                        visitor);
   }
-
+  timings_.EndSplit();
   // Recursively process the mark stack.
   ProcessMarkStack();
 }
@@ -452,19 +458,6 @@
   mark_stack_->PushBack(obj);
 }
 
-// Rare case, probably not worth inlining since it will increase instruction cache miss rate.
-bool SemiSpace::MarkLargeObject(const Object* obj) {
-  // TODO: support >1 discontinuous space.
-  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-  DCHECK(large_object_space->Contains(obj));
-  accounting::ObjectSet* large_objects = large_object_space->GetMarkObjects();
-  if (UNLIKELY(!large_objects->Test(obj))) {
-    large_objects->Set(obj);
-    return true;
-  }
-  return false;
-}
-
 static inline size_t CopyAvoidingDirtyingPages(void* dest, const void* src, size_t size) {
   if (LIKELY(size <= static_cast<size_t>(kPageSize))) {
     // We will dirty the current page and somewhere in the middle of the next page. This means
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b6726b2..3d635f0 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -201,6 +201,8 @@
   // Cached live bitmap as an optimization.
   accounting::ContinuousSpaceBitmap* to_space_live_bitmap_;
   space::ContinuousMemMapAllocSpace* from_space_;
+  // Cached mark bitmap as an optimization.
+  accounting::HeapBitmap* mark_bitmap_;
 
   Thread* self_;
 
@@ -248,6 +250,7 @@
   static constexpr int kDefaultWholeHeapCollectionInterval = 5;
 
  private:
+  friend class BitmapSetSlowPathVisitor;
   DISALLOW_COPY_AND_ASSIGN(SemiSpace);
 };