Thread-local allocation stack.

With this change, Ritz MemAllocTest gets ~14% faster on N4.

Bug: 9986565
Change-Id: I2fb7d6f7c5daa63dd4fc73ba739e6ae4ed820617
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 5e1136b..9c91b0e 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -82,11 +82,7 @@
     DCHECK(!Runtime::Current()->HasStatsEnabled());
   }
   if (AllocatorHasAllocationStack(allocator)) {
-    // This is safe to do since the GC will never free objects which are neither in the allocation
-    // stack or the live bitmap.
-    while (!allocation_stack_->AtomicPushBack(obj)) {
-      CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-    }
+    PushOnAllocationStack(self, obj);
   }
   if (kInstrumented) {
     if (Dbg::IsAllocTrackingEnabled()) {
@@ -111,6 +107,35 @@
   return obj;
 }
 
+// The size of a thread-local allocation stack in the number of references.
+static constexpr size_t kThreadLocalAllocationStackSize = 128;
+
+inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object* obj) {
+  if (kUseThreadLocalAllocationStack) {
+    bool success = self->PushOnThreadLocalAllocationStack(obj);
+    if (UNLIKELY(!success)) {
+      // Slow path. Allocate a new thread-local allocation stack.
+      mirror::Object** start_address;
+      mirror::Object** end_address;
+      while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize,
+                                                &start_address, &end_address)) {
+        CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+      }
+      self->SetThreadLocalAllocationStack(start_address, end_address);
+      // Retry on the new thread-local allocation stack.
+      success = self->PushOnThreadLocalAllocationStack(obj);
+      // Must succeed.
+      CHECK(success);
+    }
+  } else {
+    // This is safe to do since the GC will never free objects which are neither in the allocation
+    // stack or the live bitmap.
+    while (!allocation_stack_->AtomicPushBack(obj)) {
+      CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+    }
+  }
+}
+
 template <bool kInstrumented, typename PreFenceVisitor>
 inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class* klass,
                                               size_t byte_count,