Background compaction support.

When the process state changes to a state which does not perceives
jank, we copy from the main free-list backed allocation space to
the bump pointer space and enable the semispace allocator.

When we transition back to foreground, we copy back to a free-list
backed space.

Create a seperate non-moving space which only holds non-movable
objects. This enables us to quickly wipe the current alloc space
(DlMalloc / RosAlloc) when we transition to background.

Added multiple alloc space support to the sticky mark sweep GC.

Added a -XX:BackgroundGC option which lets you specify
which GC to use for background apps. Passing in
-XX:BackgroundGC=SS makes the heap compact the heap for apps which
do not perceive jank.

Results:
Simple background foreground test:
0. Reboot phone, unlock.
1. Open browser, click on home.
2. Open calculator, click on home.
3. Open calendar, click on home.
4. Open camera, click on home.
5. Open clock, click on home.
6. adb shell dumpsys meminfo

PSS Normal ART:
Sample 1:
    88468 kB: Dalvik
     3188 kB: Dalvik Other
Sample 2:
    81125 kB: Dalvik
     3080 kB: Dalvik Other

PSS Dalvik:
Total PSS by category:
Sample 1:
    81033 kB: Dalvik
    27787 kB: Dalvik Other
Sample 2:
    81901 kB: Dalvik
    28869 kB: Dalvik Other

PSS ART + Background Compaction:
Sample 1:
    71014 kB: Dalvik
     1412 kB: Dalvik Other
Sample 2:
    73859 kB: Dalvik
     1400 kB: Dalvik Other

Dalvik other reduction can be explained by less deep allocation
stacks / less live bitmaps / less dirty cards.

TODO improvements: Recycle mem-maps which are unused in the current
state. Not hardcode 64 MB capacity of non movable space (avoid
returning linear alloc nightmares). Figure out ways to deal with low
virtual address memory problems.

Bug: 8981901

Change-Id: Ib235d03f45548ffc08a06b8ae57bf5bada49d6f3
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 6baee54..4822e64 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -65,6 +65,7 @@
 
 void GarbageCollector::Run(bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   pause_times_.clear();
   duration_ns_ = 0;
@@ -82,14 +83,23 @@
     // Pause is the entire length of the GC.
     uint64_t pause_start = NanoTime();
     ATRACE_BEGIN("Application threads suspended");
-    thread_list->SuspendAll();
-    GetHeap()->RevokeAllThreadLocalBuffers();
-    MarkingPhase();
-    ReclaimPhase();
-    thread_list->ResumeAll();
+    // Mutator lock may be already exclusively held when we do garbage collections for changing the
+    // current collector / allocator during process state updates.
+    if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      GetHeap()->RevokeAllThreadLocalBuffers();
+      MarkingPhase();
+      ReclaimPhase();
+    } else {
+      thread_list->SuspendAll();
+      GetHeap()->RevokeAllThreadLocalBuffers();
+      MarkingPhase();
+      ReclaimPhase();
+      thread_list->ResumeAll();
+    }
     ATRACE_END();
     RegisterPause(NanoTime() - pause_start);
   } else {
+    CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     Thread* self = Thread::Current();
     {
       ReaderMutexLock mu(self, *Locks::mutator_lock_);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index a6fb35d..937ff6d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1043,66 +1043,88 @@
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
-  // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
-  // going to free.
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
-  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
-  if (swap_bitmaps) {
-    std::swap(live_bitmap, mark_bitmap);
-    std::swap(large_live_objects, large_mark_objects);
-  }
-
+  Thread* self = Thread::Current();
+  mirror::Object* chunk_free_buffer[kSweepArrayChunkFreeSize];
+  size_t chunk_free_pos = 0;
   size_t freed_bytes = 0;
   size_t freed_large_object_bytes = 0;
   size_t freed_objects = 0;
   size_t freed_large_objects = 0;
-  size_t count = allocations->Size();
+  // How many objects are left in the array, modified after each space is swept.
   Object** objects = const_cast<Object**>(allocations->Begin());
-  Object** out = objects;
-  Object** objects_to_chunk_free = out;
-
-  // Empty the allocation stack.
-  Thread* self = Thread::Current();
+  size_t count = allocations->Size();
+  // Change the order to ensure that the non-moving space last swept as an optimization.
+  std::vector<space::ContinuousSpace*> sweep_spaces;
+  space::ContinuousSpace* non_moving_space = nullptr;
+  for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
+    if (space->IsAllocSpace() && !IsImmuneSpace(space) && space->GetLiveBitmap() != nullptr) {
+      if (space == heap_->GetNonMovingSpace()) {
+        non_moving_space = space;
+      } else {
+        sweep_spaces.push_back(space);
+      }
+    }
+  }
+  // Unlikely to sweep a significant amount of non_movable objects, so we do these after the after
+  // the other alloc spaces as an optimization.
+  if (non_moving_space != nullptr) {
+    sweep_spaces.push_back(non_moving_space);
+  }
+  // Start by sweeping the continuous spaces.
+  for (space::ContinuousSpace* space : sweep_spaces) {
+    space::AllocSpace* alloc_space = space->AsAllocSpace();
+    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    if (swap_bitmaps) {
+      std::swap(live_bitmap, mark_bitmap);
+    }
+    Object** out = objects;
+    for (size_t i = 0; i < count; ++i) {
+      Object* obj = objects[i];
+      if (space->HasAddress(obj)) {
+        // This object is in the space, remove it from the array and add it to the sweep buffer
+        // if needed.
+        if (!mark_bitmap->Test(obj)) {
+          if (chunk_free_pos >= kSweepArrayChunkFreeSize) {
+            timings_.StartSplit("FreeList");
+            freed_objects += chunk_free_pos;
+            freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+            timings_.EndSplit();
+            chunk_free_pos = 0;
+          }
+          chunk_free_buffer[chunk_free_pos++] = obj;
+        }
+      } else {
+        *(out++) = obj;
+      }
+    }
+    if (chunk_free_pos > 0) {
+      timings_.StartSplit("FreeList");
+      freed_objects += chunk_free_pos;
+      freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+      timings_.EndSplit();
+      chunk_free_pos = 0;
+    }
+    // All of the references which space contained are no longer in the allocation stack, update
+    // the count.
+    count = out - objects;
+  }
+  // Handle the large object space.
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
+  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
+  if (swap_bitmaps) {
+    std::swap(large_live_objects, large_mark_objects);
+  }
   for (size_t i = 0; i < count; ++i) {
     Object* obj = objects[i];
-    // There should only be objects in the AllocSpace/LargeObjectSpace in the allocation stack.
-    if (LIKELY(mark_bitmap->HasAddress(obj))) {
-      if (!mark_bitmap->Test(obj)) {
-        // Don't bother un-marking since we clear the mark bitmap anyways.
-        *(out++) = obj;
-        // Free objects in chunks.
-        DCHECK_GE(out, objects_to_chunk_free);
-        DCHECK_LE(static_cast<size_t>(out - objects_to_chunk_free), kSweepArrayChunkFreeSize);
-        if (static_cast<size_t>(out - objects_to_chunk_free) == kSweepArrayChunkFreeSize) {
-          timings_.StartSplit("FreeList");
-          size_t chunk_freed_objects = out - objects_to_chunk_free;
-          freed_objects += chunk_freed_objects;
-          freed_bytes += space->FreeList(self, chunk_freed_objects, objects_to_chunk_free);
-          objects_to_chunk_free = out;
-          timings_.EndSplit();
-        }
-      }
-    } else if (!large_mark_objects->Test(obj)) {
+    // Handle large objects.
+    if (!large_mark_objects->Test(obj)) {
       ++freed_large_objects;
       freed_large_object_bytes += large_object_space->Free(self, obj);
     }
   }
-  // Free the remaining objects in chunks.
-  DCHECK_GE(out, objects_to_chunk_free);
-  DCHECK_LE(static_cast<size_t>(out - objects_to_chunk_free), kSweepArrayChunkFreeSize);
-  if (out - objects_to_chunk_free > 0) {
-    timings_.StartSplit("FreeList");
-    size_t chunk_freed_objects = out - objects_to_chunk_free;
-    freed_objects += chunk_freed_objects;
-    freed_bytes += space->FreeList(self, chunk_freed_objects, objects_to_chunk_free);
-    timings_.EndSplit();
-  }
-  CHECK_EQ(count, allocations->Size());
   timings_.EndSplit();
 
   timings_.StartSplit("RecordFree");
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 0dd8792..0150609 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -99,9 +99,13 @@
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      ImmuneSpace(space);
+    if (space->GetLiveBitmap() != nullptr) {
+      if (space == to_space_) {
+        BindLiveToMarkBitmap(to_space_);
+      } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
+          || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+        ImmuneSpace(space);
+      }
     }
   }
   timings_.EndSplit();
@@ -115,11 +119,6 @@
       immune_end_(nullptr),
       to_space_(nullptr),
       from_space_(nullptr),
-      soft_reference_list_(nullptr),
-      weak_reference_list_(nullptr),
-      finalizer_reference_list_(nullptr),
-      phantom_reference_list_(nullptr),
-      cleared_reference_list_(nullptr),
       self_(nullptr),
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0) {
@@ -132,15 +131,12 @@
   DCHECK(mark_stack_ != nullptr);
   immune_begin_ = nullptr;
   immune_end_ = nullptr;
-  soft_reference_list_ = nullptr;
-  weak_reference_list_ = nullptr;
-  finalizer_reference_list_ = nullptr;
-  phantom_reference_list_ = nullptr;
-  cleared_reference_list_ = nullptr;
   self_ = Thread::Current();
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
   heap_->PreGcVerification(this);
+  // Set the initial bitmap.
+  to_space_live_bitmap_ = to_space_->GetLiveBitmap();
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
@@ -229,17 +225,18 @@
     SweepSystemWeaks();
   }
   // Record freed memory.
-  int from_bytes = from_space_->GetBytesAllocated();
-  int to_bytes = to_space_->GetBytesAllocated();
-  int from_objects = from_space_->GetObjectsAllocated();
-  int to_objects = to_space_->GetObjectsAllocated();
-  int freed_bytes = from_bytes - to_bytes;
-  int freed_objects = from_objects - to_objects;
-  CHECK_GE(freed_bytes, 0);
+  uint64_t from_bytes = from_space_->GetBytesAllocated();
+  uint64_t to_bytes = to_space_->GetBytesAllocated();
+  uint64_t from_objects = from_space_->GetObjectsAllocated();
+  uint64_t to_objects = to_space_->GetObjectsAllocated();
+  CHECK_LE(to_objects, from_objects);
+  int64_t freed_bytes = from_bytes - to_bytes;
+  int64_t freed_objects = from_objects - to_objects;
   freed_bytes_.FetchAndAdd(freed_bytes);
   freed_objects_.FetchAndAdd(freed_objects);
-  heap_->RecordFree(static_cast<size_t>(freed_objects), static_cast<size_t>(freed_bytes));
-
+  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
+  // space.
+  heap_->RecordFree(freed_objects, freed_bytes);
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
@@ -356,6 +353,9 @@
         // Make sure to only update the forwarding address AFTER you copy the object so that the
         // monitor word doesn't get stomped over.
         obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)));
+        if (to_space_live_bitmap_ != nullptr) {
+          to_space_live_bitmap_->Set(forward_address);
+        }
         MarkStackPush(forward_address);
       } else {
         DCHECK(to_space_->HasAddress(forward_address) ||
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b0724f9..b76ef5f 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -131,10 +131,6 @@
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  mirror::Object* GetClearedReferences() {
-    return cleared_reference_list_;
-  }
-
   // TODO: enable thread safety analysis when in use by multiple worker threads.
   template <typename MarkVisitor>
   void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
@@ -269,16 +265,12 @@
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
 
-  // Destination and source spaces.
+  // Destination and source spaces (can be any type of ContinuousMemMapAllocSpace which either has
+  // a live bitmap or doesn't).
   space::ContinuousMemMapAllocSpace* to_space_;
+  accounting::SpaceBitmap* to_space_live_bitmap_;  // Cached live bitmap as an optimization.
   space::ContinuousMemMapAllocSpace* from_space_;
 
-  mirror::Object* soft_reference_list_;
-  mirror::Object* weak_reference_list_;
-  mirror::Object* finalizer_reference_list_;
-  mirror::Object* phantom_reference_list_;
-  mirror::Object* cleared_reference_list_;
-
   Thread* self_;
 
   // Used for kEnableSimplePromo. The end/top of the bump pointer
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index ee6077a..c562e8c 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -56,8 +56,7 @@
 }
 
 void StickyMarkSweep::Sweep(bool swap_bitmaps) {
-  accounting::ObjectStack* live_stack = GetHeap()->GetLiveStack();
-  SweepArray(live_stack, false);
+  SweepArray(GetHeap()->GetLiveStack(), false);
 }
 
 void StickyMarkSweep::MarkThreadRoots(Thread* self) {