Revoke rosalloc thread-local buffers at the checkpoint.

In the mark sweep collector, rosalloc thread-local buffers were
revoked during the pause. Now, they are revoked at the thread
checkpoint, as opposed to during the pause, which appears to help
reduce the pause time.

In Ritz MemAllocTest, the average sticky pause time went down ~20%
(925 us -> 724 us).

Bug: 13394464
Bug: 9986565
Change-Id: I104992a11b46d59264c0b9aa2db82b1ccf2826bc
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index b190dab..65b5471 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -63,12 +63,6 @@
   total_freed_bytes_ = 0;
 }
 
-void GarbageCollector::RevokeAllThreadLocalBuffers() {
-  timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
-  GetHeap()->RevokeAllThreadLocalBuffers();
-  timings_.EndSplit();
-}
-
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 2182430..93fd2ab 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -127,7 +127,8 @@
   // Called after the GC is finished. Done without mutators paused.
   virtual void FinishPhase() = 0;
 
-  void RevokeAllThreadLocalBuffers();
+  // Revoke all the thread-local buffers.
+  virtual void RevokeAllThreadLocalBuffers() = 0;
 
   static constexpr size_t kPauseBucketSize = 500;
   static constexpr size_t kPauseBucketCount = 32;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 9fe904c..579b781 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -89,6 +89,10 @@
 static constexpr bool kCheckLocks = kDebugLocking;
 static constexpr bool kVerifyRoots = kIsDebugBuild;
 
+// If true, revoke the rosalloc thread-local buffers at the
+// checkpoint, as opposed to during the pause.
+static constexpr bool kRevokeRosAllocThreadLocalBuffersAtCheckpoint = true;
+
 void MarkSweep::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -1028,6 +1032,9 @@
     if (kUseThreadLocalAllocationStack) {
       thread->RevokeThreadLocalAllocationStack();
     }
+    if (kRevokeRosAllocThreadLocalBuffersAtCheckpoint) {
+      mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread);
+    }
     mark_sweep_->GetBarrier().Pass(self);
   }
 
@@ -1360,6 +1367,19 @@
   large_objects->GetMarkObjects()->Clear();
 }
 
+void MarkSweep::RevokeAllThreadLocalBuffers() {
+  if (kRevokeRosAllocThreadLocalBuffersAtCheckpoint && IsConcurrent()) {
+    // If concurrent, rosalloc thread-local buffers are revoked at the
+    // thread checkpoint. Bump pointer space thread-local buffers must
+    // not be in use.
+    GetHeap()->AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked();
+  } else {
+    timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+    GetHeap()->RevokeAllThreadLocalBuffers();
+    timings_.EndSplit();
+  }
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index d88424d..b4dd8c7 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -338,6 +338,9 @@
   // IsExclusiveHeld.
   void RevokeAllThreadLocalAllocationStacks(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
 
+  // Revoke all the thread-local buffers.
+  void RevokeAllThreadLocalBuffers();
+
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 5b9c397..565966a 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -858,6 +858,12 @@
   }
 }
 
+void SemiSpace::RevokeAllThreadLocalBuffers() {
+  timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  GetHeap()->RevokeAllThreadLocalBuffers();
+  timings_.EndSplit();
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 34cc1d3..7cc7f9b 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -246,6 +246,9 @@
 
   inline mirror::Object* GetForwardingAddressInFromSpace(mirror::Object* obj) const;
 
+  // Revoke all the thread-local buffers.
+  void RevokeAllThreadLocalBuffers();
+
   // Current space, we check this space first to avoid searching for the appropriate space for an
   // object.
   accounting::ObjectStack* mark_stack_;