Rosalloc thread local allocation path without a cas.

Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees  4101 -> 2929 (~26% reduction)

Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index dd45eca..db7a4ef 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1259,8 +1259,9 @@
   size_t region_space_bytes_allocated = 0U;
   size_t non_moving_space_bytes_allocated = 0U;
   size_t bytes_allocated = 0U;
+  size_t dummy;
   mirror::Object* to_ref = region_space_->AllocNonvirtual<true>(
-      region_space_alloc_size, &region_space_bytes_allocated, nullptr);
+      region_space_alloc_size, &region_space_bytes_allocated, nullptr, &dummy);
   bytes_allocated = region_space_bytes_allocated;
   if (to_ref != nullptr) {
     DCHECK_EQ(region_space_alloc_size, region_space_bytes_allocated);
@@ -1286,7 +1287,7 @@
       }
       fall_back_to_non_moving = true;
       to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size,
-                                               &non_moving_space_bytes_allocated, nullptr);
+                                               &non_moving_space_bytes_allocated, nullptr, &dummy);
       CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed";
       bytes_allocated = non_moving_space_bytes_allocated;
       // Mark it in the mark bitmap.
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 8be18be..eafcc45 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -48,6 +48,7 @@
   gc_cause_ = gc_cause;
   freed_ = ObjectBytePair();
   freed_los_ = ObjectBytePair();
+  freed_bytes_revoke_ = 0;
 }
 
 uint64_t Iteration::GetEstimatedThroughput() const {
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index b809469..ed5207a 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -75,6 +75,12 @@
   uint64_t GetFreedLargeObjects() const {
     return freed_los_.objects;
   }
+  uint64_t GetFreedRevokeBytes() const {
+    return freed_bytes_revoke_;
+  }
+  void SetFreedRevoke(uint64_t freed) {
+    freed_bytes_revoke_ = freed;
+  }
   void Reset(GcCause gc_cause, bool clear_soft_references);
   // Returns the estimated throughput of the iteration.
   uint64_t GetEstimatedThroughput() const;
@@ -99,6 +105,7 @@
   TimingLogger timings_;
   ObjectBytePair freed_;
   ObjectBytePair freed_los_;
+  uint64_t freed_bytes_revoke_;  // see Heap::num_bytes_freed_revoke_.
   std::vector<uint64_t> pause_times_;
 
   friend class GarbageCollector;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 8aac484..ee4e752 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -292,6 +292,7 @@
   Runtime::Current()->AllowNewSystemWeaks();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    GetHeap()->RecordFreeRevoke();
     // Reclaim unmarked objects.
     Sweep(false);
     // Swap the live and mark bitmaps for each space which we modified space. This is an
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index c1ba5e3..b3d59f2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -242,6 +242,7 @@
   // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
+  GetHeap()->RecordFreeRevoke();  // this is for the non-moving rosalloc space used by GSS.
   // Record freed memory.
   const int64_t from_bytes = from_space_->GetBytesAllocated();
   const int64_t to_bytes = bytes_moved_;
@@ -489,17 +490,18 @@
 
 mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) {
   const size_t object_size = obj->SizeOf();
-  size_t bytes_allocated;
+  size_t bytes_allocated, dummy;
   mirror::Object* forward_address = nullptr;
   if (generational_ && reinterpret_cast<uint8_t*>(obj) < last_gc_to_space_end_) {
     // If it's allocated before the last GC (older), move
     // (pseudo-promote) it to the main free list space (as sort
     // of an old generation.)
     forward_address = promo_dest_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
-                                                           nullptr);
+                                                           nullptr, &dummy);
     if (UNLIKELY(forward_address == nullptr)) {
       // If out of space, fall back to the to-space.
-      forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
+      forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr,
+                                                     &dummy);
       // No logic for marking the bitmap, so it must be null.
       DCHECK(to_space_live_bitmap_ == nullptr);
     } else {
@@ -544,7 +546,8 @@
     }
   } else {
     // If it's allocated after the last GC (younger), copy it to the to-space.
-    forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
+    forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr,
+                                                   &dummy);
     if (forward_address != nullptr && to_space_live_bitmap_ != nullptr) {
       to_space_live_bitmap_->Set(forward_address);
     }
@@ -552,7 +555,7 @@
   // If it's still null, attempt to use the fallback space.
   if (UNLIKELY(forward_address == nullptr)) {
     forward_address = fallback_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
-                                                         nullptr);
+                                                         nullptr, &dummy);
     CHECK(forward_address != nullptr) << "Out of memory in the to-space and fallback space.";
     accounting::ContinuousSpaceBitmap* bitmap = fallback_space_->GetLiveBitmap();
     if (bitmap != nullptr) {