Background compaction support.

When the process state changes to a state which does not perceives
jank, we copy from the main free-list backed allocation space to
the bump pointer space and enable the semispace allocator.

When we transition back to foreground, we copy back to a free-list
backed space.

Create a seperate non-moving space which only holds non-movable
objects. This enables us to quickly wipe the current alloc space
(DlMalloc / RosAlloc) when we transition to background.

Added multiple alloc space support to the sticky mark sweep GC.

Added a -XX:BackgroundGC option which lets you specify
which GC to use for background apps. Passing in
-XX:BackgroundGC=SS makes the heap compact the heap for apps which
do not perceive jank.

Results:
Simple background foreground test:
0. Reboot phone, unlock.
1. Open browser, click on home.
2. Open calculator, click on home.
3. Open calendar, click on home.
4. Open camera, click on home.
5. Open clock, click on home.
6. adb shell dumpsys meminfo

PSS Normal ART:
Sample 1:
    88468 kB: Dalvik
     3188 kB: Dalvik Other
Sample 2:
    81125 kB: Dalvik
     3080 kB: Dalvik Other

PSS Dalvik:
Total PSS by category:
Sample 1:
    81033 kB: Dalvik
    27787 kB: Dalvik Other
Sample 2:
    81901 kB: Dalvik
    28869 kB: Dalvik Other

PSS ART + Background Compaction:
Sample 1:
    71014 kB: Dalvik
     1412 kB: Dalvik Other
Sample 2:
    73859 kB: Dalvik
     1400 kB: Dalvik Other

Dalvik other reduction can be explained by less deep allocation
stacks / less live bitmaps / less dirty cards.

TODO improvements: Recycle mem-maps which are unused in the current
state. Not hardcode 64 MB capacity of non movable space (avoid
returning linear alloc nightmares). Figure out ways to deal with low
virtual address memory problems.

Bug: 8981901

Change-Id: Ib235d03f45548ffc08a06b8ae57bf5bada49d6f3
diff --git a/runtime/gc/accounting/heap_bitmap.cc b/runtime/gc/accounting/heap_bitmap.cc
index 5589461..6625b7b 100644
--- a/runtime/gc/accounting/heap_bitmap.cc
+++ b/runtime/gc/accounting/heap_bitmap.cc
@@ -55,11 +55,23 @@
   continuous_space_bitmaps_.push_back(bitmap);
 }
 
+void HeapBitmap::RemoveContinuousSpaceBitmap(accounting::SpaceBitmap* bitmap) {
+  auto it = std::find(continuous_space_bitmaps_.begin(), continuous_space_bitmaps_.end(), bitmap);
+  DCHECK(it != continuous_space_bitmaps_.end());
+  continuous_space_bitmaps_.erase(it);
+}
+
 void HeapBitmap::AddDiscontinuousObjectSet(SpaceSetMap* set) {
   DCHECK(set != NULL);
   discontinuous_space_sets_.push_back(set);
 }
 
+void HeapBitmap::RemoveDiscontinuousObjectSet(SpaceSetMap* set) {
+  auto it = std::find(discontinuous_space_sets_.begin(), discontinuous_space_sets_.end(), set);
+  DCHECK(it != discontinuous_space_sets_.end());
+  discontinuous_space_sets_.erase(it);
+}
+
 void HeapBitmap::Walk(SpaceBitmap::Callback* callback, void* arg) {
   for (const auto& bitmap : continuous_space_bitmaps_) {
     bitmap->Walk(callback, arg);
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 24ebbaa..bed2c1e 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -105,7 +105,9 @@
   const Heap* const heap_;
 
   void AddContinuousSpaceBitmap(SpaceBitmap* bitmap);
+  void RemoveContinuousSpaceBitmap(SpaceBitmap* bitmap);
   void AddDiscontinuousObjectSet(SpaceSetMap* set);
+  void RemoveDiscontinuousObjectSet(SpaceSetMap* set);
 
   // Bitmaps covering continuous spaces.
   SpaceBitmapVector continuous_space_bitmaps_;
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index b428e74..6d9dde7 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -82,6 +82,8 @@
     if (ref != nullptr) {
       Object* new_ref = visitor_(ref, arg_);
       if (new_ref != ref) {
+        // Use SetFieldPtr to avoid card mark as an optimization which reduces dirtied pages and
+        // improves performance.
         obj->SetFieldPtr(offset, new_ref, true);
       }
     }
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 6baee54..4822e64 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -65,6 +65,7 @@
 
 void GarbageCollector::Run(bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   pause_times_.clear();
   duration_ns_ = 0;
@@ -82,14 +83,23 @@
     // Pause is the entire length of the GC.
     uint64_t pause_start = NanoTime();
     ATRACE_BEGIN("Application threads suspended");
-    thread_list->SuspendAll();
-    GetHeap()->RevokeAllThreadLocalBuffers();
-    MarkingPhase();
-    ReclaimPhase();
-    thread_list->ResumeAll();
+    // Mutator lock may be already exclusively held when we do garbage collections for changing the
+    // current collector / allocator during process state updates.
+    if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      GetHeap()->RevokeAllThreadLocalBuffers();
+      MarkingPhase();
+      ReclaimPhase();
+    } else {
+      thread_list->SuspendAll();
+      GetHeap()->RevokeAllThreadLocalBuffers();
+      MarkingPhase();
+      ReclaimPhase();
+      thread_list->ResumeAll();
+    }
     ATRACE_END();
     RegisterPause(NanoTime() - pause_start);
   } else {
+    CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     Thread* self = Thread::Current();
     {
       ReaderMutexLock mu(self, *Locks::mutator_lock_);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index a6fb35d..937ff6d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1043,66 +1043,88 @@
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
-  // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
-  // going to free.
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
-  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
-  if (swap_bitmaps) {
-    std::swap(live_bitmap, mark_bitmap);
-    std::swap(large_live_objects, large_mark_objects);
-  }
-
+  Thread* self = Thread::Current();
+  mirror::Object* chunk_free_buffer[kSweepArrayChunkFreeSize];
+  size_t chunk_free_pos = 0;
   size_t freed_bytes = 0;
   size_t freed_large_object_bytes = 0;
   size_t freed_objects = 0;
   size_t freed_large_objects = 0;
-  size_t count = allocations->Size();
+  // How many objects are left in the array, modified after each space is swept.
   Object** objects = const_cast<Object**>(allocations->Begin());
-  Object** out = objects;
-  Object** objects_to_chunk_free = out;
-
-  // Empty the allocation stack.
-  Thread* self = Thread::Current();
+  size_t count = allocations->Size();
+  // Change the order to ensure that the non-moving space last swept as an optimization.
+  std::vector<space::ContinuousSpace*> sweep_spaces;
+  space::ContinuousSpace* non_moving_space = nullptr;
+  for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
+    if (space->IsAllocSpace() && !IsImmuneSpace(space) && space->GetLiveBitmap() != nullptr) {
+      if (space == heap_->GetNonMovingSpace()) {
+        non_moving_space = space;
+      } else {
+        sweep_spaces.push_back(space);
+      }
+    }
+  }
+  // Unlikely to sweep a significant amount of non_movable objects, so we do these after the after
+  // the other alloc spaces as an optimization.
+  if (non_moving_space != nullptr) {
+    sweep_spaces.push_back(non_moving_space);
+  }
+  // Start by sweeping the continuous spaces.
+  for (space::ContinuousSpace* space : sweep_spaces) {
+    space::AllocSpace* alloc_space = space->AsAllocSpace();
+    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    if (swap_bitmaps) {
+      std::swap(live_bitmap, mark_bitmap);
+    }
+    Object** out = objects;
+    for (size_t i = 0; i < count; ++i) {
+      Object* obj = objects[i];
+      if (space->HasAddress(obj)) {
+        // This object is in the space, remove it from the array and add it to the sweep buffer
+        // if needed.
+        if (!mark_bitmap->Test(obj)) {
+          if (chunk_free_pos >= kSweepArrayChunkFreeSize) {
+            timings_.StartSplit("FreeList");
+            freed_objects += chunk_free_pos;
+            freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+            timings_.EndSplit();
+            chunk_free_pos = 0;
+          }
+          chunk_free_buffer[chunk_free_pos++] = obj;
+        }
+      } else {
+        *(out++) = obj;
+      }
+    }
+    if (chunk_free_pos > 0) {
+      timings_.StartSplit("FreeList");
+      freed_objects += chunk_free_pos;
+      freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+      timings_.EndSplit();
+      chunk_free_pos = 0;
+    }
+    // All of the references which space contained are no longer in the allocation stack, update
+    // the count.
+    count = out - objects;
+  }
+  // Handle the large object space.
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
+  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
+  if (swap_bitmaps) {
+    std::swap(large_live_objects, large_mark_objects);
+  }
   for (size_t i = 0; i < count; ++i) {
     Object* obj = objects[i];
-    // There should only be objects in the AllocSpace/LargeObjectSpace in the allocation stack.
-    if (LIKELY(mark_bitmap->HasAddress(obj))) {
-      if (!mark_bitmap->Test(obj)) {
-        // Don't bother un-marking since we clear the mark bitmap anyways.
-        *(out++) = obj;
-        // Free objects in chunks.
-        DCHECK_GE(out, objects_to_chunk_free);
-        DCHECK_LE(static_cast<size_t>(out - objects_to_chunk_free), kSweepArrayChunkFreeSize);
-        if (static_cast<size_t>(out - objects_to_chunk_free) == kSweepArrayChunkFreeSize) {
-          timings_.StartSplit("FreeList");
-          size_t chunk_freed_objects = out - objects_to_chunk_free;
-          freed_objects += chunk_freed_objects;
-          freed_bytes += space->FreeList(self, chunk_freed_objects, objects_to_chunk_free);
-          objects_to_chunk_free = out;
-          timings_.EndSplit();
-        }
-      }
-    } else if (!large_mark_objects->Test(obj)) {
+    // Handle large objects.
+    if (!large_mark_objects->Test(obj)) {
       ++freed_large_objects;
       freed_large_object_bytes += large_object_space->Free(self, obj);
     }
   }
-  // Free the remaining objects in chunks.
-  DCHECK_GE(out, objects_to_chunk_free);
-  DCHECK_LE(static_cast<size_t>(out - objects_to_chunk_free), kSweepArrayChunkFreeSize);
-  if (out - objects_to_chunk_free > 0) {
-    timings_.StartSplit("FreeList");
-    size_t chunk_freed_objects = out - objects_to_chunk_free;
-    freed_objects += chunk_freed_objects;
-    freed_bytes += space->FreeList(self, chunk_freed_objects, objects_to_chunk_free);
-    timings_.EndSplit();
-  }
-  CHECK_EQ(count, allocations->Size());
   timings_.EndSplit();
 
   timings_.StartSplit("RecordFree");
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 0dd8792..0150609 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -99,9 +99,13 @@
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      ImmuneSpace(space);
+    if (space->GetLiveBitmap() != nullptr) {
+      if (space == to_space_) {
+        BindLiveToMarkBitmap(to_space_);
+      } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
+          || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+        ImmuneSpace(space);
+      }
     }
   }
   timings_.EndSplit();
@@ -115,11 +119,6 @@
       immune_end_(nullptr),
       to_space_(nullptr),
       from_space_(nullptr),
-      soft_reference_list_(nullptr),
-      weak_reference_list_(nullptr),
-      finalizer_reference_list_(nullptr),
-      phantom_reference_list_(nullptr),
-      cleared_reference_list_(nullptr),
       self_(nullptr),
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0) {
@@ -132,15 +131,12 @@
   DCHECK(mark_stack_ != nullptr);
   immune_begin_ = nullptr;
   immune_end_ = nullptr;
-  soft_reference_list_ = nullptr;
-  weak_reference_list_ = nullptr;
-  finalizer_reference_list_ = nullptr;
-  phantom_reference_list_ = nullptr;
-  cleared_reference_list_ = nullptr;
   self_ = Thread::Current();
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
   heap_->PreGcVerification(this);
+  // Set the initial bitmap.
+  to_space_live_bitmap_ = to_space_->GetLiveBitmap();
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
@@ -229,17 +225,18 @@
     SweepSystemWeaks();
   }
   // Record freed memory.
-  int from_bytes = from_space_->GetBytesAllocated();
-  int to_bytes = to_space_->GetBytesAllocated();
-  int from_objects = from_space_->GetObjectsAllocated();
-  int to_objects = to_space_->GetObjectsAllocated();
-  int freed_bytes = from_bytes - to_bytes;
-  int freed_objects = from_objects - to_objects;
-  CHECK_GE(freed_bytes, 0);
+  uint64_t from_bytes = from_space_->GetBytesAllocated();
+  uint64_t to_bytes = to_space_->GetBytesAllocated();
+  uint64_t from_objects = from_space_->GetObjectsAllocated();
+  uint64_t to_objects = to_space_->GetObjectsAllocated();
+  CHECK_LE(to_objects, from_objects);
+  int64_t freed_bytes = from_bytes - to_bytes;
+  int64_t freed_objects = from_objects - to_objects;
   freed_bytes_.FetchAndAdd(freed_bytes);
   freed_objects_.FetchAndAdd(freed_objects);
-  heap_->RecordFree(static_cast<size_t>(freed_objects), static_cast<size_t>(freed_bytes));
-
+  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
+  // space.
+  heap_->RecordFree(freed_objects, freed_bytes);
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
@@ -356,6 +353,9 @@
         // Make sure to only update the forwarding address AFTER you copy the object so that the
         // monitor word doesn't get stomped over.
         obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)));
+        if (to_space_live_bitmap_ != nullptr) {
+          to_space_live_bitmap_->Set(forward_address);
+        }
         MarkStackPush(forward_address);
       } else {
         DCHECK(to_space_->HasAddress(forward_address) ||
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b0724f9..b76ef5f 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -131,10 +131,6 @@
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  mirror::Object* GetClearedReferences() {
-    return cleared_reference_list_;
-  }
-
   // TODO: enable thread safety analysis when in use by multiple worker threads.
   template <typename MarkVisitor>
   void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
@@ -269,16 +265,12 @@
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
 
-  // Destination and source spaces.
+  // Destination and source spaces (can be any type of ContinuousMemMapAllocSpace which either has
+  // a live bitmap or doesn't).
   space::ContinuousMemMapAllocSpace* to_space_;
+  accounting::SpaceBitmap* to_space_live_bitmap_;  // Cached live bitmap as an optimization.
   space::ContinuousMemMapAllocSpace* from_space_;
 
-  mirror::Object* soft_reference_list_;
-  mirror::Object* weak_reference_list_;
-  mirror::Object* finalizer_reference_list_;
-  mirror::Object* phantom_reference_list_;
-  mirror::Object* cleared_reference_list_;
-
   Thread* self_;
 
   // Used for kEnableSimplePromo. The end/top of the bump pointer
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index ee6077a..c562e8c 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -56,8 +56,7 @@
 }
 
 void StickyMarkSweep::Sweep(bool swap_bitmaps) {
-  accounting::ObjectStack* live_stack = GetHeap()->GetLiveStack();
-  SweepArray(live_stack, false);
+  SweepArray(GetHeap()->GetLiveStack(), false);
 }
 
 void StickyMarkSweep::MarkThreadRoots(Thread* self) {
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index af1b26b..5e1136b 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -52,8 +52,14 @@
   size_t bytes_allocated;
   obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated);
   if (UNLIKELY(obj == nullptr)) {
+    bool is_current_allocator = allocator == GetCurrentAllocator();
     obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &klass);
     if (obj == nullptr) {
+      bool after_is_current_allocator = allocator == GetCurrentAllocator();
+      if (is_current_allocator && !after_is_current_allocator) {
+        // If the allocator changed, we need to restart the allocation.
+        return AllocObject<kInstrumented>(self, klass, byte_count);
+      }
       return nullptr;
     }
   }
@@ -120,14 +126,6 @@
   if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
-  if (kInstrumented) {
-    if (UNLIKELY(running_on_valgrind_ && allocator_type == kAllocatorTypeFreeList)) {
-      return non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
-    }
-  } else {
-    // If running on valgrind, we should be using the instrumented path.
-    DCHECK(!running_on_valgrind_);
-  }
   mirror::Object* ret;
   switch (allocator_type) {
     case kAllocatorTypeBumpPointer: {
@@ -139,16 +137,30 @@
       }
       break;
     }
-    case kAllocatorTypeFreeList: {
-      if (kUseRosAlloc) {
-        ret = reinterpret_cast<space::RosAllocSpace*>(non_moving_space_)->AllocNonvirtual(
-            self, alloc_size, bytes_allocated);
+    case kAllocatorTypeRosAlloc: {
+      if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
+        // If running on valgrind, we should be using the instrumented path.
+        ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated);
       } else {
-        ret = reinterpret_cast<space::DlMallocSpace*>(non_moving_space_)->AllocNonvirtual(
-            self, alloc_size, bytes_allocated);
+        DCHECK(!running_on_valgrind_);
+        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated);
       }
       break;
     }
+    case kAllocatorTypeDlMalloc: {
+      if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
+        // If running on valgrind, we should be using the instrumented path.
+        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated);
+      } else {
+        DCHECK(!running_on_valgrind_);
+        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated);
+      }
+      break;
+    }
+    case kAllocatorTypeNonMoving: {
+      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
+      break;
+    }
     case kAllocatorTypeLOS: {
       ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated);
       // Note that the bump pointer spaces aren't necessarily next to
@@ -159,15 +171,15 @@
     }
     case kAllocatorTypeTLAB: {
       alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
-      if (UNLIKELY(self->TLABSize() < alloc_size)) {
+      if (UNLIKELY(self->TlabSize() < alloc_size)) {
         // Try allocating a new thread local buffer, if the allocaiton fails the space must be
         // full so return nullptr.
-        if (!bump_pointer_space_->AllocNewTLAB(self, alloc_size + kDefaultTLABSize)) {
+        if (!bump_pointer_space_->AllocNewTlab(self, alloc_size + kDefaultTLABSize)) {
           return nullptr;
         }
       }
       // The allocation can't fail.
-      ret = self->AllocTLAB(alloc_size);
+      ret = self->AllocTlab(alloc_size);
       DCHECK(ret != nullptr);
       *bytes_allocated = alloc_size;
       break;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e08106b..6e2bf91 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -75,13 +75,17 @@
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
-           CollectorType post_zygote_collector_type, size_t parallel_gc_threads,
-           size_t conc_gc_threads, bool low_memory_mode, size_t long_pause_log_threshold,
-           size_t long_gc_log_threshold, bool ignore_max_footprint, bool use_tlab)
+           CollectorType post_zygote_collector_type, CollectorType background_collector_type,
+           size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
+           size_t long_pause_log_threshold, size_t long_gc_log_threshold,
+           bool ignore_max_footprint, bool use_tlab)
     : non_moving_space_(nullptr),
+      rosalloc_space_(nullptr),
+      dlmalloc_space_(nullptr),
       concurrent_gc_(false),
       collector_type_(kCollectorTypeNone),
       post_zygote_collector_type_(post_zygote_collector_type),
+      background_collector_type_(background_collector_type),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
       low_memory_mode_(low_memory_mode),
@@ -116,7 +120,7 @@
       verify_pre_gc_heap_(false),
       verify_post_gc_heap_(false),
       verify_mod_union_table_(false),
-      min_alloc_space_size_for_sticky_gc_(2 * MB),
+      min_alloc_space_size_for_sticky_gc_(1112 * MB),
       min_remaining_space_for_sticky_gc_(1 * MB),
       last_trim_time_ms_(0),
       allocation_rate_(0),
@@ -127,8 +131,8 @@
        */
       max_allocation_stack_size_(kGCALotMode ? kGcAlotInterval
           : (kDesiredHeapVerification > kVerifyAllFast) ? KB : MB),
-      current_allocator_(kMovingCollector ? kAllocatorTypeBumpPointer : kAllocatorTypeFreeList),
-      current_non_moving_allocator_(kAllocatorTypeFreeList),
+      current_allocator_(kAllocatorTypeDlMalloc),
+      current_non_moving_allocator_(kAllocatorTypeNonMoving),
       bump_pointer_space_(nullptr),
       temp_space_(nullptr),
       reference_referent_offset_(0),
@@ -150,7 +154,7 @@
   }
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
-  if (!Runtime::Current()->IsZygote()) {
+  if (!Runtime::Current()->IsZygote() || !kMovingCollector) {
     ChangeCollector(post_zygote_collector_type_);
   } else {
     // We are the zygote, use bump pointer allocation + semi space collector.
@@ -173,20 +177,23 @@
       requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
     }
   }
-
   const char* name = Runtime::Current()->IsZygote() ? "zygote space" : "alloc space";
-  if (!kUseRosAlloc) {
-    non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                     requested_alloc_space_begin);
+  space::MallocSpace* malloc_space;
+  if (kUseRosAlloc) {
+    malloc_space = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                requested_alloc_space_begin, low_memory_mode_);
+    CHECK(malloc_space != nullptr) << "Failed to create rosalloc space";
   } else {
-    non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                     requested_alloc_space_begin, low_memory_mode_);
+    malloc_space = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                requested_alloc_space_begin);
+    CHECK(malloc_space != nullptr) << "Failed to create dlmalloc space";
   }
+
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
     // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
     // issues.
-    const size_t bump_pointer_space_size = std::min(non_moving_space_->Capacity(), 128 * MB);
+    const size_t bump_pointer_space_size = std::min(malloc_space->Capacity(), 128 * MB);
     bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
                                                           bump_pointer_space_size, nullptr);
     CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
@@ -196,19 +203,18 @@
     CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
     AddSpace(temp_space_);
   }
-
-  CHECK(non_moving_space_ != NULL) << "Failed to create non-moving space";
-  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
-  AddSpace(non_moving_space_);
+  non_moving_space_ = malloc_space;
+  malloc_space->SetFootprintLimit(malloc_space->Capacity());
+  AddSpace(malloc_space);
 
   // Allocate the large object space.
-  const bool kUseFreeListSpaceForLOS = false;
+  constexpr bool kUseFreeListSpaceForLOS = false;
   if (kUseFreeListSpaceForLOS) {
-    large_object_space_ = space::FreeListSpace::Create("large object space", NULL, capacity);
+    large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity);
   } else {
     large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
   }
-  CHECK(large_object_space_ != NULL) << "Failed to create large object space";
+  CHECK(large_object_space_ != nullptr) << "Failed to create large object space";
   AddSpace(large_object_space_);
 
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
@@ -278,7 +284,9 @@
 }
 
 void Heap::ChangeAllocator(AllocatorType allocator) {
+  // These two allocators are only used internally and don't have any entrypoints.
   DCHECK_NE(allocator, kAllocatorTypeLOS);
+  DCHECK_NE(allocator, kAllocatorTypeNonMoving);
   if (current_allocator_ != allocator) {
     current_allocator_ = allocator;
     SetQuickAllocEntryPointsAllocator(current_allocator_);
@@ -322,7 +330,16 @@
 }
 
 void Heap::UpdateProcessState(ProcessState process_state) {
-  process_state_ = process_state;
+  if (process_state_ != process_state) {
+    process_state_ = process_state;
+    if (process_state_ == kProcessStateJankPerceptible) {
+      TransitionCollector(post_zygote_collector_type_);
+    } else {
+      TransitionCollector(background_collector_type_);
+    }
+  } else {
+    CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
+  }
 }
 
 void Heap::CreateThreadPool() {
@@ -351,15 +368,28 @@
 }
 
 void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
-  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(), stack);
+  space::ContinuousSpace* space1 = rosalloc_space_ != nullptr ? rosalloc_space_ : non_moving_space_;
+  space::ContinuousSpace* space2 = dlmalloc_space_ != nullptr ? dlmalloc_space_ : non_moving_space_;
+  // This is just logic to handle a case of either not having a rosalloc or dlmalloc space.
+  // TODO: Generalize this to n bitmaps?
+  if (space1 == nullptr) {
+    DCHECK(space2 != nullptr);
+    space1 = space2;
+  }
+  if (space2 == nullptr) {
+    DCHECK(space1 != nullptr);
+    space2 = space1;
+  }
+  MarkAllocStack(space1->GetLiveBitmap(), space2->GetLiveBitmap(),
+                 large_object_space_->GetLiveObjects(), stack);
 }
 
 void Heap::DeleteThreadPool() {
   thread_pool_.reset(nullptr);
 }
 
-void Heap::AddSpace(space::Space* space) {
-  DCHECK(space != NULL);
+void Heap::AddSpace(space::Space* space, bool set_as_default) {
+  DCHECK(space != nullptr);
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   if (space->IsContinuousSpace()) {
     DCHECK(!space->IsDiscontinuousSpace());
@@ -372,31 +402,19 @@
       live_bitmap_->AddContinuousSpaceBitmap(live_bitmap);
       mark_bitmap_->AddContinuousSpaceBitmap(mark_bitmap);
     }
-
     continuous_spaces_.push_back(continuous_space);
-    if (continuous_space->IsMallocSpace()) {
-      non_moving_space_ = continuous_space->AsMallocSpace();
+    if (set_as_default) {
+      if (continuous_space->IsDlMallocSpace()) {
+        dlmalloc_space_ = continuous_space->AsDlMallocSpace();
+      } else if (continuous_space->IsRosAllocSpace()) {
+        rosalloc_space_ = continuous_space->AsRosAllocSpace();
+      }
     }
-
     // Ensure that spaces remain sorted in increasing order of start address.
     std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
               [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
       return a->Begin() < b->Begin();
     });
-    // Ensure that ImageSpaces < ZygoteSpaces < AllocSpaces so that we can do address based checks to
-    // avoid redundant marking.
-    bool seen_zygote = false, seen_alloc = false;
-    for (const auto& space : continuous_spaces_) {
-      if (space->IsImageSpace()) {
-        CHECK(!seen_zygote);
-        CHECK(!seen_alloc);
-      } else if (space->IsZygoteSpace()) {
-        CHECK(!seen_alloc);
-        seen_zygote = true;
-      } else if (space->IsMallocSpace()) {
-        seen_alloc = true;
-      }
-    }
   } else {
     DCHECK(space->IsDiscontinuousSpace());
     space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
@@ -411,6 +429,47 @@
   }
 }
 
+void Heap::RemoveSpace(space::Space* space) {
+  DCHECK(space != nullptr);
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  if (space->IsContinuousSpace()) {
+    DCHECK(!space->IsDiscontinuousSpace());
+    space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
+    // Continuous spaces don't necessarily have bitmaps.
+    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    if (live_bitmap != nullptr) {
+      DCHECK(mark_bitmap != nullptr);
+      live_bitmap_->RemoveContinuousSpaceBitmap(live_bitmap);
+      mark_bitmap_->RemoveContinuousSpaceBitmap(mark_bitmap);
+    }
+    auto it = std::find(continuous_spaces_.begin(), continuous_spaces_.end(), continuous_space);
+    DCHECK(it != continuous_spaces_.end());
+    continuous_spaces_.erase(it);
+    if (continuous_space == dlmalloc_space_) {
+      dlmalloc_space_ = nullptr;
+    } else if (continuous_space == rosalloc_space_) {
+      rosalloc_space_ = nullptr;
+    }
+  } else {
+    DCHECK(space->IsDiscontinuousSpace());
+    space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
+    DCHECK(discontinuous_space->GetLiveObjects() != nullptr);
+    live_bitmap_->RemoveDiscontinuousObjectSet(discontinuous_space->GetLiveObjects());
+    DCHECK(discontinuous_space->GetMarkObjects() != nullptr);
+    mark_bitmap_->RemoveDiscontinuousObjectSet(discontinuous_space->GetMarkObjects());
+    auto it = std::find(discontinuous_spaces_.begin(), discontinuous_spaces_.end(),
+                        discontinuous_space);
+    DCHECK(it != discontinuous_spaces_.end());
+    discontinuous_spaces_.erase(it);
+  }
+  if (space->IsAllocSpace()) {
+    auto it = std::find(alloc_spaces_.begin(), alloc_spaces_.end(), space->AsAllocSpace());
+    DCHECK(it != alloc_spaces_.end());
+    alloc_spaces_.erase(it);
+  }
+}
+
 void Heap::RegisterGCAllocation(size_t bytes) {
   if (this != nullptr) {
     gc_memory_overhead_.FetchAndAdd(bytes);
@@ -845,10 +904,9 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
-  DCHECK_LE(freed_bytes, static_cast<size_t>(num_bytes_allocated_));
+void Heap::RecordFree(int64_t freed_objects, int64_t freed_bytes) {
+  DCHECK_LE(freed_bytes, num_bytes_allocated_.Load());
   num_bytes_allocated_.FetchAndSub(freed_bytes);
-
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
     thread_stats->freed_objects += freed_objects;
@@ -864,12 +922,19 @@
                                              size_t alloc_size, size_t* bytes_allocated,
                                              mirror::Class** klass) {
   mirror::Object* ptr = nullptr;
+  bool was_default_allocator = allocator == GetCurrentAllocator();
   DCHECK(klass != nullptr);
   SirtRef<mirror::Class> sirt_klass(self, *klass);
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
   collector::GcType last_gc = WaitForGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
+    // If we were the default allocator but the allocator changed while we were suspended,
+    // abort the allocation.
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
     ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated);
   }
@@ -880,7 +945,13 @@
       break;
     }
     // Attempt to run the collector, if we succeed, re-try the allocation.
-    if (CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone) {
+    bool gc_ran =
+        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
+    if (gc_ran) {
       // Did we free sufficient memory for the allocation to succeed?
       ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated);
     }
@@ -901,6 +972,10 @@
     // We don't need a WaitForGcToComplete here either.
     DCHECK(!gc_plan_.empty());
     CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
     ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated);
     if (ptr == nullptr) {
       ThrowOutOfMemoryError(self, alloc_size, false);
@@ -1065,6 +1140,92 @@
   CollectGarbageInternal(gc_plan_.back(), kGcCauseExplicit, clear_soft_references);
 }
 
+void Heap::TransitionCollector(CollectorType collector_type) {
+  if (collector_type == collector_type_) {
+    return;
+  }
+  uint64_t start_time = NanoTime();
+  int32_t before_size  = GetTotalMemory();
+  int32_t before_allocated = num_bytes_allocated_.Load();
+  ThreadList* tl = Runtime::Current()->GetThreadList();
+  Thread* self = Thread::Current();
+  ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
+  Locks::mutator_lock_->AssertNotHeld(self);
+  // Busy wait until we can GC (StartGC can fail if we have a non-zero gc_disable_count_, this
+  // rarely occurs however).
+  while (!StartGC(self)) {
+    usleep(100);
+  }
+  tl->SuspendAll();
+  switch (collector_type) {
+    case kCollectorTypeSS: {
+      mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
+      space::MallocSpace* main_space;
+      if (rosalloc_space_ != nullptr) {
+        DCHECK(kUseRosAlloc);
+        main_space = rosalloc_space_;
+      } else {
+        DCHECK(dlmalloc_space_ != nullptr);
+        main_space = dlmalloc_space_;
+      }
+      Compact(temp_space_, main_space);
+      DCHECK(allocator_mem_map_.get() == nullptr);
+      allocator_mem_map_.reset(main_space->ReleaseMemMap());
+      madvise(main_space->Begin(), main_space->Size(), MADV_DONTNEED);
+      RemoveSpace(main_space);
+      break;
+    }
+    case kCollectorTypeMS:
+      // Fall through.
+    case kCollectorTypeCMS: {
+      if (collector_type_ == kCollectorTypeSS) {
+        // TODO: Use mem-map from temp space?
+        MemMap* mem_map = allocator_mem_map_.release();
+        CHECK(mem_map != nullptr);
+        size_t initial_size = kDefaultInitialSize;
+        mprotect(mem_map->Begin(), initial_size, PROT_READ | PROT_WRITE);
+        space::MallocSpace* malloc_space;
+        if (kUseRosAlloc) {
+          malloc_space =
+              space::RosAllocSpace::CreateFromMemMap(mem_map, "alloc space", kPageSize,
+                                                     initial_size, mem_map->Size(),
+                                                     mem_map->Size(), low_memory_mode_);
+        } else {
+          malloc_space =
+              space::DlMallocSpace::CreateFromMemMap(mem_map, "alloc space", kPageSize,
+                                                     initial_size, mem_map->Size(),
+                                                     mem_map->Size());
+        }
+        malloc_space->SetFootprintLimit(malloc_space->Capacity());
+        AddSpace(malloc_space);
+        Compact(malloc_space, bump_pointer_space_);
+      }
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Attempted to transition to invalid collector type";
+      break;
+    }
+  }
+  ChangeCollector(collector_type);
+  tl->ResumeAll();
+  // Can't call into java code with all threads suspended.
+  EnqueueClearedReferences();
+  uint64_t duration = NanoTime() - start_time;
+  GrowForUtilization(collector::kGcTypeFull, duration);
+  FinishGC(self, collector::kGcTypeFull);
+  int32_t after_size = GetTotalMemory();
+  int32_t delta_size = before_size - after_size;
+  int32_t after_allocated = num_bytes_allocated_.Load();
+  int32_t delta_allocated = before_allocated - after_allocated;
+  const std::string saved_bytes_str =
+      delta_size < 0 ? "-" + PrettySize(-delta_size) : PrettySize(delta_size);
+  LOG(INFO) << "Heap transition to " << process_state_ << " took "
+      << PrettyDuration(duration) << " " << PrettySize(before_size) << "->"
+      << PrettySize(after_size) << " from " << PrettySize(delta_allocated) << " to "
+      << PrettySize(delta_size) << " saved";
+}
+
 void Heap::ChangeCollector(CollectorType collector_type) {
   // TODO: Only do this with all mutators suspended to avoid races.
   if (collector_type != collector_type_) {
@@ -1086,7 +1247,7 @@
         gc_plan_.push_back(collector::kGcTypeSticky);
         gc_plan_.push_back(collector::kGcTypePartial);
         gc_plan_.push_back(collector::kGcTypeFull);
-        ChangeAllocator(kAllocatorTypeFreeList);
+        ChangeAllocator(kUseRosAlloc ? kAllocatorTypeRosAlloc : kAllocatorTypeDlMalloc);
         break;
       }
       case kCollectorTypeCMS: {
@@ -1094,7 +1255,7 @@
         gc_plan_.push_back(collector::kGcTypeSticky);
         gc_plan_.push_back(collector::kGcTypePartial);
         gc_plan_.push_back(collector::kGcTypeFull);
-        ChangeAllocator(kAllocatorTypeFreeList);
+        ChangeAllocator(kUseRosAlloc ? kAllocatorTypeRosAlloc : kAllocatorTypeDlMalloc);
         break;
       }
       default: {
@@ -1123,7 +1284,6 @@
     return;
   }
   VLOG(heap) << "Starting PreZygoteFork";
-  // Do this before acquiring the zygote creation lock so that we don't get lock order violations.
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
@@ -1152,7 +1312,13 @@
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
   // the remaining available heap memory.
   space::MallocSpace* zygote_space = non_moving_space_;
-  non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space", low_memory_mode_);
+  non_moving_space_ = non_moving_space_->CreateZygoteSpace("alloc space", low_memory_mode_);
+  if (non_moving_space_->IsRosAllocSpace()) {
+    rosalloc_space_ = non_moving_space_->AsRosAllocSpace();
+  } else if (non_moving_space_->IsDlMallocSpace()) {
+    dlmalloc_space_ = non_moving_space_->AsDlMallocSpace();
+  }
+  // Can't use RosAlloc for non moving space due to thread local buffers.
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
@@ -1168,22 +1334,35 @@
   for (const auto& collector : garbage_collectors_) {
     collector->ResetCumulativeStatistics();
   }
+  // TODO: Not limited space for non-movable objects?
+  space::MallocSpace* new_non_moving_space
+      = space::DlMallocSpace::Create("Non moving dlmalloc space", 2 * MB, 64 * MB, 64 * MB,
+                                     nullptr);
+  AddSpace(new_non_moving_space, false);
+  CHECK(new_non_moving_space != nullptr) << "Failed to create new non-moving space";
+  new_non_moving_space->SetFootprintLimit(new_non_moving_space->Capacity());
+  non_moving_space_ = new_non_moving_space;
 }
 
 void Heap::FlushAllocStack() {
-  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
-                 allocation_stack_.get());
+  MarkAllocStackAsLive(allocation_stack_.get());
   allocation_stack_->Reset();
 }
 
-void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetMap* large_objects,
+void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap1,
+                          accounting::SpaceBitmap* bitmap2,
+                          accounting::SpaceSetMap* large_objects,
                           accounting::ObjectStack* stack) {
+  DCHECK(bitmap1 != nullptr);
+  DCHECK(bitmap2 != nullptr);
   mirror::Object** limit = stack->End();
   for (mirror::Object** it = stack->Begin(); it != limit; ++it) {
     const mirror::Object* obj = *it;
-    DCHECK(obj != NULL);
-    if (LIKELY(bitmap->HasAddress(obj))) {
-      bitmap->Set(obj);
+    DCHECK(obj != nullptr);
+    if (bitmap1->HasAddress(obj)) {
+      bitmap1->Set(obj);
+    } else if (bitmap2->HasAddress(obj)) {
+      bitmap2->Set(obj);
     } else {
       large_objects->Set(obj);
     }
@@ -1223,14 +1402,6 @@
   Runtime* runtime = Runtime::Current();
   // If the heap can't run the GC, silently fail and return that no GC was run.
   switch (gc_type) {
-    case collector::kGcTypeSticky: {
-      const size_t alloc_space_size = non_moving_space_->Size();
-      if (alloc_space_size < min_alloc_space_size_for_sticky_gc_ ||
-        non_moving_space_->Capacity() - alloc_space_size < min_remaining_space_for_sticky_gc_) {
-        return collector::kGcTypeNone;
-      }
-      break;
-    }
     case collector::kGcTypePartial: {
       if (!have_zygote_space_) {
         return collector::kGcTypeNone;
@@ -1247,19 +1418,9 @@
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
-  {
-    gc_complete_lock_->AssertNotHeld(self);
-    MutexLock mu(self, *gc_complete_lock_);
-    // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(self);
-    // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
-    //       Not doing at the moment to ensure soft references are cleared.
-    // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
-    if (gc_disable_count_ != 0) {
-      LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
-      return collector::kGcTypeNone;
-    }
-    is_gc_running_ = true;
+  gc_complete_lock_->AssertNotHeld(self);
+  if (!StartGC(self)) {
+    return collector::kGcTypeNone;
   }
   if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
     ++runtime->GetStats()->gc_for_alloc_count;
@@ -1290,7 +1451,8 @@
     mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
     collector = semi_space_collector_;
     gc_type = collector::kGcTypeFull;
-  } else if (current_allocator_ == kAllocatorTypeFreeList) {
+  } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
+      current_allocator_ == kAllocatorTypeDlMalloc) {
     for (const auto& cur_collector : garbage_collectors_) {
       if (cur_collector->IsConcurrent() == concurrent_gc_ &&
           cur_collector->GetGcType() == gc_type) {
@@ -1312,6 +1474,7 @@
   total_bytes_freed_ever_ += collector->GetFreedBytes();
 
   // Enqueue cleared references.
+  Locks::mutator_lock_->AssertNotHeld(self);
   EnqueueClearedReferences();
 
   // Grow the heap so that we know when to perform the next GC.
@@ -1322,7 +1485,7 @@
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
     // GC for alloc pauses the allocating thread, so consider it as a pause.
     bool was_slow = duration > long_gc_log_threshold_ ||
-            (gc_cause == kGcCauseForAlloc && duration > long_pause_log_threshold_);
+        (gc_cause == kGcCauseForAlloc && duration > long_pause_log_threshold_);
     if (!was_slow) {
       for (uint64_t pause : pauses) {
         was_slow = was_slow || pause > long_pause_log_threshold_;
@@ -1350,15 +1513,7 @@
         }
     }
   }
-
-  {
-      MutexLock mu(self, *gc_complete_lock_);
-      is_gc_running_ = false;
-      last_gc_type_ = gc_type;
-      // Wake anyone who may have been waiting for the GC to complete.
-      gc_complete_cond_->Broadcast(self);
-  }
-
+  FinishGC(self, gc_type);
   ATRACE_END();
 
   // Inform DDMS that a GC completed.
@@ -1366,6 +1521,29 @@
   return gc_type;
 }
 
+bool Heap::StartGC(Thread* self) {
+  MutexLock mu(self, *gc_complete_lock_);
+  // Ensure there is only one GC at a time.
+  WaitForGcToCompleteLocked(self);
+  // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
+  //       Not doing at the moment to ensure soft references are cleared.
+  // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
+  if (gc_disable_count_ != 0) {
+    LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
+    return false;
+  }
+  is_gc_running_ = true;
+  return true;
+}
+
+void Heap::FinishGC(Thread* self, collector::GcType gc_type) {
+  MutexLock mu(self, *gc_complete_lock_);
+  is_gc_running_ = false;
+  last_gc_type_ = gc_type;
+  // Wake anyone who may have been waiting for the GC to complete.
+  gc_complete_cond_->Broadcast(self);
+}
+
 static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg) {
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
   if (root == obj) {
@@ -2046,14 +2224,18 @@
 }
 
 void Heap::RevokeThreadLocalBuffers(Thread* thread) {
-  non_moving_space_->RevokeThreadLocalBuffers(thread);
+  if (rosalloc_space_ != nullptr) {
+    rosalloc_space_->RevokeThreadLocalBuffers(thread);
+  }
   if (bump_pointer_space_ != nullptr) {
     bump_pointer_space_->RevokeThreadLocalBuffers(thread);
   }
 }
 
 void Heap::RevokeAllThreadLocalBuffers() {
-  non_moving_space_->RevokeAllThreadLocalBuffers();
+  if (rosalloc_space_ != nullptr) {
+    rosalloc_space_->RevokeAllThreadLocalBuffers();
+  }
   if (bump_pointer_space_ != nullptr) {
     bump_pointer_space_->RevokeAllThreadLocalBuffers();
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 832d5ec..1b221fa 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -90,10 +90,12 @@
 
 // Different types of allocators.
 enum AllocatorType {
-  kAllocatorTypeBumpPointer,
-  kAllocatorTypeTLAB,
-  kAllocatorTypeFreeList,  // ROSAlloc / dlmalloc
-  kAllocatorTypeLOS,  // Large object space.
+  kAllocatorTypeBumpPointer,  // Use BumpPointer allocator, has entrypoints.
+  kAllocatorTypeTLAB,  // Use TLAB allocator, has entrypoints.
+  kAllocatorTypeRosAlloc,  // Use RosAlloc allocator, has entrypoints.
+  kAllocatorTypeDlMalloc,  // Use dlmalloc allocator, has entrypoints.
+  kAllocatorTypeNonMoving,  // Special allocator for non moving objects, doesn't have entrypoints.
+  kAllocatorTypeLOS,  // Large object space, also doesn't have entrypoints.
 };
 
 // What caused the GC?
@@ -126,6 +128,7 @@
   kProcessStateJankPerceptible = 0,
   kProcessStateJankImperceptible = 1,
 };
+std::ostream& operator<<(std::ostream& os, const ProcessState& process_state);
 
 class Heap {
  public:
@@ -153,7 +156,8 @@
   // ImageWriter output.
   explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
                 size_t max_free, double target_utilization, size_t capacity,
-                const std::string& original_image_file_name, CollectorType collector_type_,
+                const std::string& original_image_file_name,
+                CollectorType post_zygote_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
                 bool ignore_max_footprint, bool use_tlab);
@@ -162,14 +166,13 @@
 
   // Allocates and initializes storage for an object instance.
   template <bool kInstrumented>
-  inline mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
                                                          GetCurrentAllocator());
   }
   template <bool kInstrumented>
-  inline mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass,
-                                               size_t num_bytes)
+  mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
                                                          GetCurrentNonMovingAllocator());
@@ -204,6 +207,9 @@
   // Change the allocator, updates entrypoints.
   void ChangeAllocator(AllocatorType allocator);
 
+  // Transition the garbage collector during runtime, may copy objects from one space to another.
+  void TransitionCollector(CollectorType collector_type);
+
   // Change the collector to be one of the possible options (MS, CMS, SS).
   void ChangeCollector(CollectorType collector_type);
 
@@ -358,11 +364,14 @@
     return low_memory_mode_;
   }
 
-  void RecordFree(size_t freed_objects, size_t freed_bytes);
+  // Freed bytes can be negative in cases where we copy objects from a compacted space to a
+  // free-list backed space.
+  void RecordFree(int64_t freed_objects, int64_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
-  void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/, const mirror::Object* /*new_value*/) {
+  void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/,
+                         const mirror::Object* /*new_value*/) {
     card_table_->MarkCard(dst);
   }
 
@@ -458,8 +467,8 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Mark all the objects in the allocation stack in the specified bitmap.
-  void MarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetMap* large_objects,
-                      accounting::ObjectStack* stack)
+  void MarkAllocStack(accounting::SpaceBitmap* bitmap1, accounting::SpaceBitmap* bitmap2,
+                      accounting::SpaceSetMap* large_objects, accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Mark the specified allocation stack as live.
@@ -470,6 +479,14 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
+  space::DlMallocSpace* GetDlMallocSpace() const {
+    return dlmalloc_space_;
+  }
+
+  space::RosAllocSpace* GetRosAllocSpace() const {
+    return rosalloc_space_;
+  }
+
   space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
   }
@@ -510,6 +527,9 @@
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space);
 
+  bool StartGC(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
+
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
     return
         allocator_type != kAllocatorTypeBumpPointer &&
@@ -614,7 +634,9 @@
 
   size_t GetPercentFree();
 
-  void AddSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space, bool set_as_default = true)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // No thread saftey analysis since we call this everywhere and it is impossible to find a proper
   // lock ordering for it.
@@ -642,6 +664,12 @@
   // Classes, ArtMethods, ArtFields, and non moving objects.
   space::MallocSpace* non_moving_space_;
 
+  // Space which we use for the kAllocatorTypeROSAlloc.
+  space::RosAllocSpace* rosalloc_space_;
+
+  // Space which we use for the kAllocatorTypeDlMalloc.
+  space::DlMallocSpace* dlmalloc_space_;
+
   // The large object space we are currently allocating into.
   space::LargeObjectSpace* large_object_space_;
 
@@ -651,6 +679,10 @@
   // A mod-union table remembers all of the references from the it's space to other spaces.
   SafeMap<space::Space*, accounting::ModUnionTable*> mod_union_tables_;
 
+  // Keep the free list allocator mem map lying around when we transition to background so that we
+  // don't have to worry about virtual address space fragmentation.
+  UniquePtr<MemMap> allocator_mem_map_;
+
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.
   bool concurrent_gc_;
@@ -659,6 +691,8 @@
   CollectorType collector_type_;
   // Which collector we will switch to after zygote fork.
   CollectorType post_zygote_collector_type_;
+  // Which collector we will use when the app is notified of a transition to background.
+  CollectorType background_collector_type_;
 
   // How many GC threads we may use for paused parts of garbage collection.
   const size_t parallel_gc_threads_;
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index d5bc667..4dc17df 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -209,17 +209,17 @@
 void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
   objects_allocated_.FetchAndAdd(thread->thread_local_objects_);
   bytes_allocated_.FetchAndAdd(thread->thread_local_pos_ - thread->thread_local_start_);
-  thread->SetTLAB(nullptr, nullptr);
+  thread->SetTlab(nullptr, nullptr);
 }
 
-bool BumpPointerSpace::AllocNewTLAB(Thread* self, size_t bytes) {
+bool BumpPointerSpace::AllocNewTlab(Thread* self, size_t bytes) {
   MutexLock mu(Thread::Current(), block_lock_);
   RevokeThreadLocalBuffersLocked(self);
   byte* start = AllocBlock(bytes);
   if (start == nullptr) {
     return false;
   }
-  self->SetTLAB(start, start + bytes);
+  self->SetTlab(start, start + bytes);
   return true;
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 0a4be8a..3e25b6b 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -90,12 +90,13 @@
   }
 
   // Clear the memory and reset the pointer to the start of the space.
-  void Clear();
+  void Clear() LOCKS_EXCLUDED(block_lock_);
 
   void Dump(std::ostream& os) const;
 
-  void RevokeThreadLocalBuffers(Thread* thread);
-  void RevokeAllThreadLocalBuffers();
+  void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
+  void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
+                                                    Locks::thread_list_lock_);
 
   uint64_t GetBytesAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint64_t GetObjectsAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -114,7 +115,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Allocate a new TLAB, returns false if the allocation failed.
-  bool AllocNewTLAB(Thread* self, size_t bytes);
+  bool AllocNewTlab(Thread* self, size_t bytes);
 
   virtual BumpPointerSpace* AsBumpPointerSpace() {
     return this;
@@ -147,7 +148,7 @@
   byte* growth_end_;
   AtomicInteger objects_allocated_;  // Accumulated from revoked thread local regions.
   AtomicInteger bytes_allocated_;  // Accumulated from revoked thread local regions.
-  Mutex block_lock_;
+  Mutex block_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // The number of blocks in the space, if it is 0 then the space has one long continuous block
   // which doesn't have an updated header.
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index a4e6eda..981af53 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -42,34 +42,15 @@
   CHECK(mspace != NULL);
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                                     size_t capacity, byte* requested_begin) {
-  uint64_t start_time = 0;
-  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    start_time = NanoTime();
-    VLOG(startup) << "DlMallocSpace::Create entering " << name
-                  << " initial_size=" << PrettySize(initial_size)
-                  << " growth_limit=" << PrettySize(growth_limit)
-                  << " capacity=" << PrettySize(capacity)
-                  << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
-  }
-
-  // Memory we promise to dlmalloc before it asks for morecore.
-  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
-  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
-  // size of the large allocation) will be greater than the footprint limit.
-  size_t starting_size = kPageSize;
-  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
-                                 requested_begin);
-  if (mem_map == NULL) {
-    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
-               << PrettySize(capacity);
-    return NULL;
-  }
+DlMallocSpace* DlMallocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                               size_t starting_size,
+                                               size_t initial_size, size_t growth_limit,
+                                               size_t capacity) {
+  DCHECK(mem_map != nullptr);
   void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
-  if (mspace == NULL) {
+  if (mspace == nullptr) {
     LOG(ERROR) << "Failed to initialize mspace for alloc space (" << name << ")";
-    return NULL;
+    return nullptr;
   }
 
   // Protect memory beyond the initial size.
@@ -79,14 +60,41 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  DlMallocSpace* space;
   byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindMallocSpace<DlMallocSpace, void*>(
+    return new ValgrindMallocSpace<DlMallocSpace, void*>(
         name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
-    space = new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
+    return new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
   }
+}
+
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
+  uint64_t start_time = 0;
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    start_time = NanoTime();
+    LOG(INFO) << "DlMallocSpace::Create entering " << name
+        << " initial_size=" << PrettySize(initial_size)
+        << " growth_limit=" << PrettySize(growth_limit)
+        << " capacity=" << PrettySize(capacity)
+        << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
+  }
+
+  // Memory we promise to dlmalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == nullptr) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
+    return nullptr;
+  }
+  DlMallocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
+                                          growth_limit, capacity);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -102,7 +110,7 @@
   // morecore_start. Don't use an internal dlmalloc lock (as we already hold heap lock). When
   // morecore_start bytes of memory is exhaused morecore will be called.
   void* msp = create_mspace_with_base(begin, morecore_start, false /*locked*/);
-  if (msp != NULL) {
+  if (msp != nullptr) {
     // Do not allow morecore requests to succeed beyond the initial size of the heap
     mspace_set_footprint_limit(msp, initial_size);
   } else {
@@ -202,9 +210,22 @@
 // Callback from dlmalloc when it needs to increase the footprint
 extern "C" void* art_heap_morecore(void* mspace, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK(heap->GetNonMovingSpace()->IsDlMallocSpace());
-  DCHECK_EQ(heap->GetNonMovingSpace()->AsDlMallocSpace()->GetMspace(), mspace);
-  return heap->GetNonMovingSpace()->MoreCore(increment);
+  DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace();
+  // Support for multiple DlMalloc provided by a slow path.
+  if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) {
+    dlmalloc_space = nullptr;
+    for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
+      if (space->IsDlMallocSpace()) {
+        DlMallocSpace* cur_dlmalloc_space = space->AsDlMallocSpace();
+        if (cur_dlmalloc_space->GetMspace() == mspace) {
+          dlmalloc_space = cur_dlmalloc_space;
+          break;
+        }
+      }
+    }
+    CHECK(dlmalloc_space != nullptr) << "Couldn't find DlmMallocSpace with mspace=" << mspace;
+  }
+  return dlmalloc_space->MoreCore(increment);
 }
 
 size_t DlMallocSpace::AllocationSize(const mirror::Object* obj) {
@@ -265,6 +286,12 @@
   return objects_allocated;
 }
 
+void DlMallocSpace::Clear() {
+  madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
+  GetLiveBitmap()->Clear();
+  GetMarkBitmap()->Clear();
+}
+
 #ifndef NDEBUG
 void DlMallocSpace::CheckMoreCoreForPrecondition() {
   lock_.AssertHeld(Thread::Current());
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 73e65d4..671d2b2 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -33,6 +33,11 @@
 // An alloc space is a space where objects may be allocated and garbage collected.
 class DlMallocSpace : public MallocSpace {
  public:
+  // Create a DlMallocSpace from an existing mem_map.
+  static DlMallocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                         size_t starting_size, size_t initial_size,
+                                         size_t growth_limit, size_t capacity);
+
   // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
   // the caller should call Begin on the returned space to confirm the
@@ -90,6 +95,8 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
+  virtual void Clear();
+
   virtual void InvalidateAllocator() {
     mspace_for_alloc_ = nullptr;
   }
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 2727431..31d878c 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -81,10 +81,9 @@
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
                                          PROT_READ | PROT_WRITE, &error_msg);
-  if (mem_map == NULL) {
+  if (mem_map == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
                << PrettySize(*capacity) << ": " << error_msg;
-    return NULL;
   }
   return mem_map;
 }
@@ -190,9 +189,6 @@
   size_t size = RoundUp(Size(), kPageSize);
   // Trim the heap so that we minimize the size of the Zygote space.
   Trim();
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
   // Remaining size is for the new alloc space.
   const size_t growth_limit = growth_limit_ - size;
   const size_t capacity = Capacity() - size;
@@ -203,6 +199,10 @@
              << "Capacity " << Capacity();
   SetGrowthLimit(RoundUp(size, kPageSize));
   SetFootprintLimit(RoundUp(size, kPageSize));
+
+  // TODO: Not hardcode these in?
+  const size_t starting_size = kPageSize;
+  const size_t initial_size = 2 * MB;
   // FIXME: Do we need reference counted pointers here?
   // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
   VLOG(heap) << "Creating new AllocSpace: ";
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 80fdb6c..e5993f6 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -44,6 +44,36 @@
   CHECK(rosalloc != NULL);
 }
 
+RosAllocSpace* RosAllocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                               size_t starting_size,
+                                               size_t initial_size, size_t growth_limit,
+                                               size_t capacity, bool low_memory_mode) {
+  DCHECK(mem_map != nullptr);
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
+                                                 low_memory_mode);
+  if (rosalloc == NULL) {
+    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
+    return NULL;
+  }
+
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
+  }
+
+  // Everything is set so record in immutable structure and leave
+  RosAllocSpace* space;
+  byte* begin = mem_map->Begin();
+  if (RUNNING_ON_VALGRIND > 0) {
+    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
+        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
+  } else {
+    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+  }
+  return space;
+}
+
 RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                      size_t capacity, byte* requested_begin, bool low_memory_mode) {
   uint64_t start_time = 0;
@@ -68,28 +98,9 @@
                << PrettySize(capacity);
     return NULL;
   }
-  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
-                                                 low_memory_mode);
-  if (rosalloc == NULL) {
-    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
-    return NULL;
-  }
 
-  // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
-  }
-
-  // Everything is set so record in immutable structure and leave
-  RosAllocSpace* space;
-  byte* begin = mem_map->Begin();
-  if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
-        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
-  } else {
-    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
-  }
+  RosAllocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
+                                          growth_limit, capacity, low_memory_mode);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -114,7 +125,7 @@
     rosalloc->SetFootprintLimit(initial_size);
   } else {
     PLOG(ERROR) << "RosAlloc::Create failed";
-    }
+  }
   return rosalloc;
 }
 
@@ -203,9 +214,10 @@
 // Callback from rosalloc when it needs to increase the footprint
 extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK(heap->GetNonMovingSpace()->IsRosAllocSpace());
-  DCHECK_EQ(heap->GetNonMovingSpace()->AsRosAllocSpace()->GetRosAlloc(), rosalloc);
-  return heap->GetNonMovingSpace()->MoreCore(increment);
+  RosAllocSpace* rosalloc_space = heap->GetRosAllocSpace();
+  DCHECK(rosalloc_space != nullptr);
+  DCHECK_EQ(rosalloc_space->GetRosAlloc(), rosalloc);
+  return rosalloc_space->MoreCore(increment);
 }
 
 size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
@@ -299,6 +311,12 @@
   rosalloc_->RevokeAllThreadLocalRuns();
 }
 
+void RosAllocSpace::Clear() {
+  madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
+  GetLiveBitmap()->Clear();
+  GetMarkBitmap()->Clear();
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index b0c07fa..6720976 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -39,6 +39,10 @@
   // request was granted.
   static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin, bool low_memory_mode);
+  static RosAllocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                         size_t starting_size, size_t initial_size,
+                                         size_t growth_limit, size_t capacity,
+                                         bool low_memory_mode);
 
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
@@ -78,6 +82,7 @@
   size_t GetFootprintLimit();
   void SetFootprintLimit(size_t limit);
 
+  virtual void Clear();
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                               byte* begin, byte* end, byte* limit, size_t growth_limit);
 
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index db3aca9..31bbb7b 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -354,6 +354,10 @@
     return mem_map_.get();
   }
 
+  MemMap* ReleaseMemMap() {
+    return mem_map_.release();
+  }
+
  protected:
   MemMapSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end, byte* limit,
               GcRetentionPolicy gc_retention_policy)