Thread-local allocation stack.

With this change, Ritz MemAllocTest gets ~14% faster on N4.

Bug: 9986565
Change-Id: I2fb7d6f7c5daa63dd4fc73ba739e6ae4ed820617
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index de9f59e..dbbc115 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -206,6 +206,10 @@
     // This second sweep makes sure that we don't have any objects in the live stack which point to
     // freed objects. These cause problems since their references may be previously freed objects.
     SweepArray(GetHeap()->allocation_stack_.get(), false);
+    // Since SweepArray() above resets the (active) allocation
+    // stack. Need to revoke the thread-local allocation stacks that
+    // point into it.
+    GetHeap()->RevokeAllThreadLocalAllocationStacks(self);
   }
 
   timings_.StartSplit("PreSweepingGcVerification");
@@ -241,12 +245,15 @@
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
   timings_.NewSplit("SwapStacks");
-  heap_->SwapStacks();
+  heap_->SwapStacks(self);
 
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
     MarkRoots();
+    if (kUseThreadLocalAllocationStack) {
+      heap_->RevokeAllThreadLocalAllocationStacks(self);
+    }
   } else {
     MarkThreadRoots(self);
     // At this point the live stack should no longer have any mutators which push into it.
@@ -995,6 +1002,9 @@
         << thread->GetState() << " thread " << thread << " self " << self;
     thread->VisitRoots(MarkSweep::MarkRootParallelCallback, mark_sweep_);
     ATRACE_END();
+    if (kUseThreadLocalAllocationStack) {
+      thread->RevokeThreadLocalAllocationStack();
+    }
     mark_sweep_->GetBarrier().Pass(self);
   }
 
@@ -1062,6 +1072,9 @@
     Object** out = objects;
     for (size_t i = 0; i < count; ++i) {
       Object* obj = objects[i];
+      if (kUseThreadLocalAllocationStack && obj == nullptr) {
+        continue;
+      }
       if (space->HasAddress(obj)) {
         // This object is in the space, remove it from the array and add it to the sweep buffer
         // if needed.
@@ -1100,6 +1113,9 @@
   for (size_t i = 0; i < count; ++i) {
     Object* obj = objects[i];
     // Handle large objects.
+    if (kUseThreadLocalAllocationStack && obj == nullptr) {
+      continue;
+    }
     if (!large_mark_objects->Test(obj)) {
       ++freed_large_objects;
       freed_large_object_bytes += large_object_space->Free(self, obj);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index ac33cc7..b1122b9 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -210,7 +210,10 @@
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
   timings_.NewSplit("SwapStacks");
-  heap_->SwapStacks();
+  if (kUseThreadLocalAllocationStack) {
+    heap_->RevokeAllThreadLocalAllocationStacks(self_);
+  }
+  heap_->SwapStacks(self_);
   WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   MarkRoots();
   // Mark roots of immune spaces.