Enable concurrent sweeping for non-concurrent GC.

Refactored the GarbageCollector to let all of the phases be run by
the collector's RunPhases virtual method. This lets the GC decide
which phases should be concurrent and reduces how much baked in GC
logic resides in GarbageCollector.

Enabled concurrent sweeping in the semi space and non concurrent
mark sweep GCs. Changed the semi-space collector to have a swap semi
spaces boolean which can be changed with a setter.

Fixed tests to pass with GSS collector, there was an error related to
the large object space limit.

Before (EvaluateAndApplyChanges):
GSS paused GC time 7.81s/7.81s, score: 3920

After (EvaluateAndApplyChanges):
GSS paused GC time 6.94s/7.71s, score: 3900

Benchmark score doesn't go up since the GC happens in the allocating
thread. There is a slight reduction in pause times experienced by
other threads (0.8s total).

Added options for pre sweeping GC heap verification and pre sweeping
rosalloc verification.

Bug: 14226004
Bug: 14250892
Bug: 14386356

Change-Id: Ib557d0590c1ed82a639d0f0281ba67cf8cae938c
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index ab26a9c..ce7c75a 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -33,10 +33,7 @@
 
   ~ConcurrentCopying() {}
 
-  virtual void InitializePhase() OVERRIDE {}
-  virtual void MarkingPhase() OVERRIDE {}
-  virtual void ReclaimPhase() OVERRIDE {}
-  virtual void FinishPhase() OVERRIDE {}
+  virtual void RunPhases() OVERRIDE {}
   virtual GcType GetGcType() const OVERRIDE {
     return kGcTypePartial;
   }
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 615ec98..f9a6abe 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -14,10 +14,7 @@
  * limitations under the License.
  */
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-
 #include <stdio.h>
-#include <cutils/trace.h>
 
 #include "garbage_collector.h"
 
@@ -46,9 +43,6 @@
   ResetCumulativeStatistics();
 }
 
-void GarbageCollector::PausePhase() {
-}
-
 void GarbageCollector::RegisterPause(uint64_t nano_length) {
   pause_times_.push_back(nano_length);
 }
@@ -62,7 +56,6 @@
 }
 
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   timings_.Reset();
@@ -70,88 +63,12 @@
   duration_ns_ = 0;
   clear_soft_references_ = clear_soft_references;
   gc_cause_ = gc_cause;
-
   // Reset stats.
   freed_bytes_ = 0;
   freed_large_object_bytes_ = 0;
   freed_objects_ = 0;
   freed_large_objects_ = 0;
-
-  CollectorType collector_type = GetCollectorType();
-  switch (collector_type) {
-    case kCollectorTypeMS:      // Fall through.
-    case kCollectorTypeSS:      // Fall through.
-    case kCollectorTypeGSS: {
-      InitializePhase();
-      // Pause is the entire length of the GC.
-      uint64_t pause_start = NanoTime();
-      ATRACE_BEGIN("Application threads suspended");
-      // Mutator lock may be already exclusively held when we do garbage collections for changing
-      // the current collector / allocator during process state updates.
-      if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-        // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
-        RevokeAllThreadLocalBuffers();
-        MarkingPhase();
-        PausePhase();
-        ReclaimPhase();
-        // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
-      } else {
-        ATRACE_BEGIN("Suspending mutator threads");
-        thread_list->SuspendAll();
-        ATRACE_END();
-        GetHeap()->PreGcRosAllocVerification(&timings_);
-        RevokeAllThreadLocalBuffers();
-        MarkingPhase();
-        PausePhase();
-        ReclaimPhase();
-        GetHeap()->PostGcRosAllocVerification(&timings_);
-        ATRACE_BEGIN("Resuming mutator threads");
-        thread_list->ResumeAll();
-        ATRACE_END();
-      }
-      ATRACE_END();
-      RegisterPause(NanoTime() - pause_start);
-      FinishPhase();
-      break;
-    }
-    case kCollectorTypeCMS: {
-      InitializePhase();
-      CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
-      {
-        ReaderMutexLock mu(self, *Locks::mutator_lock_);
-        MarkingPhase();
-      }
-      uint64_t pause_start = NanoTime();
-      ATRACE_BEGIN("Suspending mutator threads");
-      thread_list->SuspendAll();
-      ATRACE_END();
-      ATRACE_BEGIN("All mutator threads suspended");
-      GetHeap()->PreGcRosAllocVerification(&timings_);
-      PausePhase();
-      RevokeAllThreadLocalBuffers();
-      GetHeap()->PostGcRosAllocVerification(&timings_);
-      ATRACE_END();
-      uint64_t pause_end = NanoTime();
-      ATRACE_BEGIN("Resuming mutator threads");
-      thread_list->ResumeAll();
-      ATRACE_END();
-      RegisterPause(pause_end - pause_start);
-      {
-        ReaderMutexLock mu(self, *Locks::mutator_lock_);
-        ReclaimPhase();
-      }
-      FinishPhase();
-      break;
-    }
-    case kCollectorTypeCC: {
-      // To be implemented.
-      break;
-    }
-    default: {
-      LOG(FATAL) << "Unreachable collector type=" << static_cast<size_t>(collector_type);
-      break;
-    }
-  }
+  RunPhases();  // Run all the GC phases.
   // Add the current timings to the cumulative timings.
   cumulative_timings_.AddLogger(timings_);
   // Update cumulative statistics with how many bytes the GC iteration freed.
@@ -159,6 +76,12 @@
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
   uint64_t end_time = NanoTime();
   duration_ns_ = end_time - start_time;
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // The entire GC was paused, clear the fake pauses which might be in the pause times and add
+    // the whole GC duration.
+    pause_times_.clear();
+    RegisterPause(duration_ns_);
+  }
   total_time_ns_ += GetDurationNs();
   for (uint64_t pause_time : pause_times_) {
     pause_histogram_.AddValue(pause_time / 1000);
@@ -213,6 +136,16 @@
   total_freed_bytes_ = 0;
 }
 
+GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector)
+    : start_time_(NanoTime()), collector_(collector) {
+  Runtime::Current()->GetThreadList()->SuspendAll();
+}
+
+GarbageCollector::ScopedPause::~ScopedPause() {
+  collector_->RegisterPause(NanoTime() - start_time_);
+  Runtime::Current()->GetThreadList()->ResumeAll();
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index b19ac3f..ca4a1d5 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -35,6 +35,16 @@
 
 class GarbageCollector {
  public:
+  class SCOPED_LOCKABLE ScopedPause {
+   public:
+    explicit ScopedPause(GarbageCollector* collector) EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_);
+    ~ScopedPause() UNLOCK_FUNCTION();
+
+   private:
+    const uint64_t start_time_;
+    GarbageCollector* const collector_;
+  };
+
   GarbageCollector(Heap* heap, const std::string& name);
   virtual ~GarbageCollector() { }
 
@@ -125,20 +135,8 @@
   }
 
  protected:
-  // The initial phase. Done without mutators paused.
-  virtual void InitializePhase() = 0;
-
-  // Mark all reachable objects, done concurrently.
-  virtual void MarkingPhase() = 0;
-
-  // Phase of the GC which is run with mutator lock exclusively held.
-  virtual void PausePhase();
-
-  // Called with mutators running.
-  virtual void ReclaimPhase() = 0;
-
-  // Called after the GC is finished. Done without mutators paused.
-  virtual void FinishPhase() = 0;
+  // Run all of the GC phases.
+  virtual void RunPhases() = 0;
 
   // Revoke all the thread-local buffers.
   virtual void RevokeAllThreadLocalBuffers() = 0;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 007eb23..9cd740e 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -130,9 +130,37 @@
     // Always clear soft references if a non-sticky collection.
     clear_soft_references_ = GetGcType() != collector::kGcTypeSticky;
   }
-  // Do any pre GC verification.
-  timings_.NewSplit("PreGcVerification");
-  heap_->PreGcVerification(this);
+}
+
+void MarkSweep::RunPhases() {
+  Thread* self = Thread::Current();
+  InitializePhase();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  if (IsConcurrent()) {
+    GetHeap()->PreGcVerification(this);
+    {
+      ReaderMutexLock mu(self, *Locks::mutator_lock_);
+      MarkingPhase();
+    }
+    ScopedPause pause(this);
+    GetHeap()->PrePauseRosAllocVerification(this);
+    PausePhase();
+    RevokeAllThreadLocalBuffers();
+  } else {
+    ScopedPause pause(this);
+    GetHeap()->PreGcVerificationPaused(this);
+    MarkingPhase();
+    GetHeap()->PrePauseRosAllocVerification(this);
+    PausePhase();
+    RevokeAllThreadLocalBuffers();
+  }
+  {
+    // Sweeping always done concurrently, even for non concurrent mark sweep.
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    ReclaimPhase();
+  }
+  GetHeap()->PostGcVerification(this);
+  FinishPhase();
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
@@ -166,7 +194,7 @@
   }
   ProcessReferences(self);
   {
-    timings_.NewSplit("SwapStacks");
+    TimingLogger::ScopedSplit split("SwapStacks", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     heap_->SwapStacks(self);
     live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
@@ -177,13 +205,11 @@
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
-  if (IsConcurrent()) {
-    // Disallow new system weaks to prevent a race which occurs when someone adds a new system
-    // weak before we sweep them. Since this new system weak may not be marked, the GC may
-    // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
-    // reference to a string that is about to be swept.
-    Runtime::Current()->DisallowNewSystemWeaks();
-  }
+  // Disallow new system weaks to prevent a race which occurs when someone adds a new system
+  // weak before we sweep them. Since this new system weak may not be marked, the GC may
+  // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
+  // reference to a string that is about to be swept.
+  Runtime::Current()->DisallowNewSystemWeaks();
 }
 
 void MarkSweep::PreCleanCards() {
@@ -265,9 +291,7 @@
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
   SweepSystemWeaks(self);
-  if (IsConcurrent()) {
-    Runtime::Current()->AllowNewSystemWeaks();
-  }
+  Runtime::Current()->AllowNewSystemWeaks();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
@@ -1256,9 +1280,6 @@
 
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  // Can't enqueue references if we hold the mutator lock.
-  timings_.NewSplit("PostGcVerification");
-  heap_->PostGcVerification(this);
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
              << " other=" << other_count_;
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 41a7764..0c5a0da 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -56,11 +56,12 @@
 
   ~MarkSweep() {}
 
-  virtual void InitializePhase() OVERRIDE;
-  virtual void MarkingPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void PausePhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ReclaimPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void FinishPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void RunPhases() OVERRIDE NO_THREAD_SAFETY_ANALYSIS;
+  void InitializePhase();
+  void MarkingPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PausePhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ReclaimPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FinishPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MarkReachableObjects()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 3b9e853..65bbbd2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -106,7 +106,37 @@
       bytes_promoted_since_last_whole_heap_collection_(0),
       whole_heap_collection_(true),
       whole_heap_collection_interval_counter_(0),
-      collector_name_(name_) {
+      collector_name_(name_),
+      swap_semi_spaces_(true) {
+}
+
+void SemiSpace::RunPhases() {
+  Thread* self = Thread::Current();
+  InitializePhase();
+  // Semi-space collector is special since it is sometimes called with the mutators suspended
+  // during the zygote creation and collector transitions. If we already exclusively hold the
+  // mutator lock, then we can't lock it again since it will cause a deadlock.
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    GetHeap()->PreGcVerificationPaused(this);
+    GetHeap()->PrePauseRosAllocVerification(this);
+    MarkingPhase();
+    ReclaimPhase();
+    GetHeap()->PostGcVerificationPaused(this);
+  } else {
+    Locks::mutator_lock_->AssertNotHeld(self);
+    {
+      ScopedPause pause(this);
+      GetHeap()->PreGcVerificationPaused(this);
+      GetHeap()->PrePauseRosAllocVerification(this);
+      MarkingPhase();
+    }
+    {
+      ReaderMutexLock mu(self, *Locks::mutator_lock_);
+      ReclaimPhase();
+    }
+    GetHeap()->PostGcVerification(this);
+  }
+  FinishPhase();
 }
 
 void SemiSpace::InitializePhase() {
@@ -119,9 +149,6 @@
   bytes_moved_ = 0;
   objects_moved_ = 0;
   self_ = Thread::Current();
-  // Do any pre GC verification.
-  timings_.NewSplit("PreGcVerification");
-  heap_->PreGcVerification(this);
   CHECK(from_space_->CanMoveObjects()) << "Attempting to move from " << *from_space_;
   // Set the initial bitmap.
   to_space_live_bitmap_ = to_space_->GetLiveBitmap();
@@ -140,6 +167,7 @@
 }
 
 void SemiSpace::MarkingPhase() {
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self_));
   if (kStoreStackTraces) {
     Locks::mutator_lock_->AssertExclusiveHeld(self_);
     // Store the stack traces into the runtime fault string in case we get a heap corruption
@@ -214,12 +242,51 @@
     heap_->RevokeAllThreadLocalAllocationStacks(self_);
   }
   heap_->SwapStacks(self_);
-  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
-  MarkRoots();
-  // Mark roots of immune spaces.
-  UpdateAndMarkModUnion();
-  // Recursively mark remaining objects.
-  MarkReachableObjects();
+  {
+    WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
+    MarkRoots();
+    // Mark roots of immune spaces.
+    UpdateAndMarkModUnion();
+    // Recursively mark remaining objects.
+    MarkReachableObjects();
+  }
+  ProcessReferences(self_);
+  {
+    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
+    SweepSystemWeaks();
+  }
+  timings_.NewSplit("RecordFree");
+  // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
+  // before they are properly counted.
+  RevokeAllThreadLocalBuffers();
+  // Record freed memory.
+  uint64_t from_bytes = from_space_->GetBytesAllocated();
+  uint64_t to_bytes = bytes_moved_;
+  uint64_t from_objects = from_space_->GetObjectsAllocated();
+  uint64_t to_objects = objects_moved_;
+  CHECK_LE(to_objects, from_objects);
+  int64_t freed_bytes = from_bytes - to_bytes;
+  int64_t freed_objects = from_objects - to_objects;
+  freed_bytes_.FetchAndAdd(freed_bytes);
+  freed_objects_.FetchAndAdd(freed_objects);
+  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
+  // space.
+  heap_->RecordFree(freed_objects, freed_bytes);
+
+  // Clear and protect the from space.
+  from_space_->Clear();
+  VLOG(heap) << "Protecting space " << *from_space_;
+  if (kProtectFromSpace) {
+    from_space_->GetMemMap()->Protect(PROT_NONE);
+  } else {
+    from_space_->GetMemMap()->Protect(PROT_READ);
+  }
+  if (swap_semi_spaces_) {
+    heap_->SwapSemiSpaces();
+  }
+  timings_.StartSplit("PreSweepingGcVerification");
+  heap_->PreSweepingGcVerification(this);
+  timings_.EndSplit();
 }
 
 void SemiSpace::UpdateAndMarkModUnion() {
@@ -383,28 +450,6 @@
 
 void SemiSpace::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
-  ProcessReferences(self_);
-  {
-    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
-    SweepSystemWeaks();
-  }
-  // Record freed memory.
-  uint64_t from_bytes = from_space_->GetBytesAllocated();
-  uint64_t to_bytes = bytes_moved_;
-  uint64_t from_objects = from_space_->GetObjectsAllocated();
-  uint64_t to_objects = objects_moved_;
-  CHECK_LE(to_objects, from_objects);
-  int64_t freed_bytes = from_bytes - to_bytes;
-  int64_t freed_objects = from_objects - to_objects;
-  freed_bytes_.FetchAndAdd(freed_bytes);
-  freed_objects_.FetchAndAdd(freed_objects);
-  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
-  // space.
-  heap_->RecordFree(freed_objects, freed_bytes);
-
-  timings_.StartSplit("PreSweepingGcVerification");
-  heap_->PreSweepingGcVerification(this);
-  timings_.EndSplit();
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
@@ -419,16 +464,6 @@
     TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
     GetHeap()->UnBindBitmaps();
   }
-  // TODO: Do this before doing verification since the from space may have objects which weren't
-  // moved and point to dead objects.
-  from_space_->Clear();
-  // Protect the from space.
-  VLOG(heap) << "Protecting space " << *from_space_;
-  if (kProtectFromSpace) {
-    from_space_->GetMemMap()->Protect(PROT_NONE);
-  } else {
-    from_space_->GetMemMap()->Protect(PROT_READ);
-  }
   if (saved_bytes_ > 0) {
     VLOG(heap) << "Avoided dirtying " << PrettySize(saved_bytes_);
   }
@@ -765,9 +800,6 @@
 
 void SemiSpace::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  Heap* heap = GetHeap();
-  timings_.NewSplit("PostGcVerification");
-  heap->PostGcVerification(this);
   // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
   // further action is done by the heap.
   to_space_ = nullptr;
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 51b0869..d468561 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -61,12 +61,13 @@
 
   ~SemiSpace() {}
 
-  virtual void InitializePhase() OVERRIDE;
-  virtual void MarkingPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+  virtual void RunPhases() OVERRIDE NO_THREAD_SAFETY_ANALYSIS;
+  virtual void InitializePhase();
+  virtual void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  virtual void ReclaimPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+  virtual void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  virtual void FinishPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void MarkReachableObjects()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
   virtual GcType GetGcType() const OVERRIDE {
@@ -82,6 +83,12 @@
   // Set the space where we copy objects from.
   void SetFromSpace(space::ContinuousMemMapAllocSpace* from_space);
 
+  // Set whether or not we swap the semi spaces in the heap. This needs to be done with mutators
+  // suspended.
+  void SetSwapSemiSpaces(bool swap_semi_spaces) {
+    swap_semi_spaces_ = swap_semi_spaces;
+  }
+
   // Initializes internal structures.
   void Init();
 
@@ -253,6 +260,9 @@
   // collections.
   static constexpr int kDefaultWholeHeapCollectionInterval = 5;
 
+  // Whether or not we swap the semi spaces in the heap during the marking phase.
+  bool swap_semi_spaces_;
+
  private:
   friend class BitmapSetSlowPathVisitor;
   DISALLOW_COPY_AND_ASSIGN(SemiSpace);