Thread-local allocation stack.

With this change, Ritz MemAllocTest gets ~14% faster on N4.

Bug: 9986565
Change-Id: I2fb7d6f7c5daa63dd4fc73ba739e6ae4ed820617
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index ac33cc7..b1122b9 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -210,7 +210,10 @@
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
   timings_.NewSplit("SwapStacks");
-  heap_->SwapStacks();
+  if (kUseThreadLocalAllocationStack) {
+    heap_->RevokeAllThreadLocalAllocationStacks(self_);
+  }
+  heap_->SwapStacks(self_);
   WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   MarkRoots();
   // Mark roots of immune spaces.