Add ScopedAllowThreadSuspension

Add a way to temporarily re-allow thread suspension. This makes it
cleaner to do the pre alloc visitor stuff in heap-inl.

Test: test-art-host
Change-Id: Ifbb2725923df84e8e0bd073d415e99cb8090e681
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 04632ef..4499342 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -65,141 +65,136 @@
     HandleWrapperObjPtr<mirror::Class> h = hs.NewHandleWrapper(&klass);
     self->PoisonObjectPointers();
   }
-  auto send_pre_object_allocated = [&]() REQUIRES_SHARED(Locks::mutator_lock_)
-                                         ACQUIRE(Roles::uninterruptible_) {
+  auto pre_object_allocated = [&]() REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_) {
     if constexpr (kInstrumented) {
-      AllocationListener* l = nullptr;
-      l = alloc_listener_.load(std::memory_order_seq_cst);
+      AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
       if (UNLIKELY(l != nullptr) && UNLIKELY(l->HasPreAlloc())) {
         StackHandleScope<1> hs(self);
         HandleWrapperObjPtr<mirror::Class> h_klass(hs.NewHandleWrapper(&klass));
         l->PreObjectAllocated(self, h_klass, &byte_count);
       }
     }
-    return self->StartAssertNoThreadSuspension("Called PreObjectAllocated, no suspend until alloc");
   };
-  // Do the initial pre-alloc
-  const char* old_cause = send_pre_object_allocated();
-  // We shouldn't have any NoThreadSuspension here!
-  DCHECK(old_cause == nullptr) << old_cause;
-
-  // Need to check that we aren't the large object allocator since the large object allocation code
-  // path includes this function. If we didn't check we would have an infinite loop.
   ObjPtr<mirror::Object> obj;
-  if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
-    // AllocLargeObject can suspend and will recall PreObjectAllocated if needed.
-    self->EndAssertNoThreadSuspension(old_cause);
-    obj = AllocLargeObject<kInstrumented, PreFenceVisitor>(self, &klass, byte_count,
-                                                           pre_fence_visitor);
-    if (obj != nullptr) {
-      return obj.Ptr();
-    } else {
-      // There should be an OOM exception, since we are retrying, clear it.
-      self->ClearException();
-    }
-    // If the large object allocation failed, try to use the normal spaces (main space,
-    // non moving space). This can happen if there is significant virtual address space
-    // fragmentation.
-    // We need to send the PreObjectAllocated again, we might have suspended during our failure.
-    old_cause = send_pre_object_allocated();
-  }
   // bytes allocated for the (individual) object.
   size_t bytes_allocated;
   size_t usable_size;
   size_t new_num_bytes_allocated = 0;
-  if (IsTLABAllocator(allocator)) {
-    byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
-  }
-  // If we have a thread local allocation we don't need to update bytes allocated.
-  if (IsTLABAllocator(allocator) && byte_count <= self->TlabSize()) {
-    obj = self->AllocTlab(byte_count);
-    DCHECK(obj != nullptr) << "AllocTlab can't fail";
-    obj->SetClass(klass);
-    if (kUseBakerReadBarrier) {
-      obj->AssertReadBarrierState();
-    }
-    bytes_allocated = byte_count;
-    usable_size = bytes_allocated;
-    no_suspend_pre_fence_visitor(obj, usable_size);
-    QuasiAtomic::ThreadFenceForConstructor();
-    self->EndAssertNoThreadSuspension(old_cause);
-  } else if (
-      !kInstrumented && allocator == kAllocatorTypeRosAlloc &&
-      (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) != nullptr &&
-      LIKELY(obj != nullptr)) {
-    DCHECK(!is_running_on_memory_tool_);
-    obj->SetClass(klass);
-    if (kUseBakerReadBarrier) {
-      obj->AssertReadBarrierState();
-    }
-    usable_size = bytes_allocated;
-    no_suspend_pre_fence_visitor(obj, usable_size);
-    QuasiAtomic::ThreadFenceForConstructor();
-    self->EndAssertNoThreadSuspension(old_cause);
-  } else {
-    // Bytes allocated that includes bulk thread-local buffer allocations in addition to direct
-    // non-TLAB object allocations.
-    size_t bytes_tl_bulk_allocated = 0u;
-    obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
-                                              &usable_size, &bytes_tl_bulk_allocated);
-    if (UNLIKELY(obj == nullptr)) {
-      // AllocateInternalWithGc can cause thread suspension, if someone instruments the entrypoints
-      // or changes the allocator in a suspend point here, we need to retry the allocation.
-      // It will send the pre-alloc event again.
-      self->EndAssertNoThreadSuspension(old_cause);
-      obj = AllocateInternalWithGc(self,
-                                   allocator,
-                                   kInstrumented,
-                                   byte_count,
-                                   &bytes_allocated,
-                                   &usable_size,
-                                   &bytes_tl_bulk_allocated,
-                                   &klass,
-                                   &old_cause);
-      if (obj == nullptr) {
-        self->EndAssertNoThreadSuspension(old_cause);
-        // The only way that we can get a null return if there is no pending exception is if the
-        // allocator or instrumentation changed.
-        if (!self->IsExceptionPending()) {
-          // AllocObject will pick up the new allocator type, and instrumented as true is the safe
-          // default.
-          return AllocObject</*kInstrumented=*/true>(self,
-                                                     klass,
-                                                     byte_count,
-                                                     pre_fence_visitor);
-        }
-        return nullptr;
+  {
+    // Do the initial pre-alloc
+    pre_object_allocated();
+    ScopedAssertNoThreadSuspension ants("Called PreObjectAllocated, no suspend until alloc");
+
+    // Need to check that we aren't the large object allocator since the large object allocation
+    // code path includes this function. If we didn't check we would have an infinite loop.
+    if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
+      // AllocLargeObject can suspend and will recall PreObjectAllocated if needed.
+      ScopedAllowThreadSuspension ats;
+      obj = AllocLargeObject<kInstrumented, PreFenceVisitor>(self, &klass, byte_count,
+                                                             pre_fence_visitor);
+      if (obj != nullptr) {
+        return obj.Ptr();
       }
+      // There should be an OOM exception, since we are retrying, clear it.
+      self->ClearException();
+
+      // If the large object allocation failed, try to use the normal spaces (main space,
+      // non moving space). This can happen if there is significant virtual address space
+      // fragmentation.
+      pre_object_allocated();
     }
-    DCHECK_GT(bytes_allocated, 0u);
-    DCHECK_GT(usable_size, 0u);
-    obj->SetClass(klass);
-    if (kUseBakerReadBarrier) {
-      obj->AssertReadBarrierState();
+    if (IsTLABAllocator(allocator)) {
+      byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
     }
-    if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
-      // (Note this if statement will be constant folded away for the fast-path quick entry
-      // points.) Because SetClass() has no write barrier, the GC may need a write barrier in the
-      // case the object is non movable and points to a recently allocated movable class.
-      WriteBarrier::ForFieldWrite(obj, mirror::Object::ClassOffset(), klass);
-    }
-    no_suspend_pre_fence_visitor(obj, usable_size);
-    QuasiAtomic::ThreadFenceForConstructor();
-    self->EndAssertNoThreadSuspension(old_cause);
-    if (bytes_tl_bulk_allocated > 0) {
-      size_t num_bytes_allocated_before =
-          num_bytes_allocated_.fetch_add(bytes_tl_bulk_allocated, std::memory_order_relaxed);
-      new_num_bytes_allocated = num_bytes_allocated_before + bytes_tl_bulk_allocated;
-      // Only trace when we get an increase in the number of bytes allocated. This happens when
-      // obtaining a new TLAB and isn't often enough to hurt performance according to golem.
-      if (region_space_) {
-        // With CC collector, during a GC cycle, the heap usage increases as
-        // there are two copies of evacuated objects. Therefore, add evac-bytes
-        // to the heap size. When the GC cycle is not running, evac-bytes
-        // are 0, as required.
-        TraceHeapSize(new_num_bytes_allocated + region_space_->EvacBytes());
-      } else {
-        TraceHeapSize(new_num_bytes_allocated);
+    // If we have a thread local allocation we don't need to update bytes allocated.
+    if (IsTLABAllocator(allocator) && byte_count <= self->TlabSize()) {
+      obj = self->AllocTlab(byte_count);
+      DCHECK(obj != nullptr) << "AllocTlab can't fail";
+      obj->SetClass(klass);
+      if (kUseBakerReadBarrier) {
+        obj->AssertReadBarrierState();
+      }
+      bytes_allocated = byte_count;
+      usable_size = bytes_allocated;
+      no_suspend_pre_fence_visitor(obj, usable_size);
+      QuasiAtomic::ThreadFenceForConstructor();
+    } else if (
+        !kInstrumented && allocator == kAllocatorTypeRosAlloc &&
+        (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) != nullptr &&
+        LIKELY(obj != nullptr)) {
+      DCHECK(!is_running_on_memory_tool_);
+      obj->SetClass(klass);
+      if (kUseBakerReadBarrier) {
+        obj->AssertReadBarrierState();
+      }
+      usable_size = bytes_allocated;
+      no_suspend_pre_fence_visitor(obj, usable_size);
+      QuasiAtomic::ThreadFenceForConstructor();
+    } else {
+      // Bytes allocated that includes bulk thread-local buffer allocations in addition to direct
+      // non-TLAB object allocations.
+      size_t bytes_tl_bulk_allocated = 0u;
+      obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
+                                                &usable_size, &bytes_tl_bulk_allocated);
+      if (UNLIKELY(obj == nullptr)) {
+        // AllocateInternalWithGc can cause thread suspension, if someone instruments the
+        // entrypoints or changes the allocator in a suspend point here, we need to retry the
+        // allocation. It will send the pre-alloc event again.
+        obj = AllocateInternalWithGc(self,
+                                     allocator,
+                                     kInstrumented,
+                                     byte_count,
+                                     &bytes_allocated,
+                                     &usable_size,
+                                     &bytes_tl_bulk_allocated,
+                                     &klass);
+        if (obj == nullptr) {
+          // The only way that we can get a null return if there is no pending exception is if the
+          // allocator or instrumentation changed.
+          if (!self->IsExceptionPending()) {
+            // Since we are restarting, allow thread suspension.
+            ScopedAllowThreadSuspension ats;
+            // AllocObject will pick up the new allocator type, and instrumented as true is the safe
+            // default.
+            return AllocObject</*kInstrumented=*/true>(self,
+                                                       klass,
+                                                       byte_count,
+                                                       pre_fence_visitor);
+          }
+          return nullptr;
+        }
+      }
+      DCHECK_GT(bytes_allocated, 0u);
+      DCHECK_GT(usable_size, 0u);
+      obj->SetClass(klass);
+      if (kUseBakerReadBarrier) {
+        obj->AssertReadBarrierState();
+      }
+      if (collector::SemiSpace::kUseRememberedSet &&
+          UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
+        // (Note this if statement will be constant folded away for the fast-path quick entry
+        // points.) Because SetClass() has no write barrier, the GC may need a write barrier in the
+        // case the object is non movable and points to a recently allocated movable class.
+        WriteBarrier::ForFieldWrite(obj, mirror::Object::ClassOffset(), klass);
+      }
+      no_suspend_pre_fence_visitor(obj, usable_size);
+      QuasiAtomic::ThreadFenceForConstructor();
+      if (bytes_tl_bulk_allocated > 0) {
+        size_t num_bytes_allocated_before =
+            num_bytes_allocated_.fetch_add(bytes_tl_bulk_allocated, std::memory_order_relaxed);
+        new_num_bytes_allocated = num_bytes_allocated_before + bytes_tl_bulk_allocated;
+        // Only trace when we get an increase in the number of bytes allocated. This happens when
+        // obtaining a new TLAB and isn't often enough to hurt performance according to golem.
+        if (region_space_) {
+          // With CC collector, during a GC cycle, the heap usage increases as
+          // there are two copies of evacuated objects. Therefore, add evac-bytes
+          // to the heap size. When the GC cycle is not running, evac-bytes
+          // are 0, as required.
+          TraceHeapSize(new_num_bytes_allocated + region_space_->EvacBytes());
+        } else {
+          TraceHeapSize(new_num_bytes_allocated);
+        }
       }
     }
   }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b462e29..4c069b3 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1724,8 +1724,7 @@
                                              size_t* bytes_allocated,
                                              size_t* usable_size,
                                              size_t* bytes_tl_bulk_allocated,
-                                             ObjPtr<mirror::Class>* klass,
-                                             /*out*/const char** old_no_thread_suspend_cause) {
+                                             ObjPtr<mirror::Class>* klass) {
   bool was_default_allocator = allocator == GetCurrentAllocator();
   // Make sure there is no pending exception since we may need to throw an OOME.
   self->AssertNoPendingException();
@@ -1734,24 +1733,17 @@
   StackHandleScope<1> hs(self);
   HandleWrapperObjPtr<mirror::Class> h_klass(hs.NewHandleWrapper(klass));
 
-  auto release_no_suspend = [&]() RELEASE(Roles::uninterruptible_) {
-    self->EndAssertNoThreadSuspension(*old_no_thread_suspend_cause);
-  };
-  auto send_object_pre_alloc = [&]() ACQUIRE(Roles::uninterruptible_)
-                                     REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (UNLIKELY(instrumented)) {
-    AllocationListener* l = nullptr;
-    l = alloc_listener_.load(std::memory_order_seq_cst);
-    if (UNLIKELY(l != nullptr) && UNLIKELY(l->HasPreAlloc())) {
-      l->PreObjectAllocated(self, h_klass, &alloc_size);
+  auto send_object_pre_alloc = [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (UNLIKELY(instrumented)) {
+      AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
+      if (UNLIKELY(l != nullptr) && UNLIKELY(l->HasPreAlloc())) {
+        l->PreObjectAllocated(self, h_klass, &alloc_size);
+      }
     }
-  }
-  *old_no_thread_suspend_cause =
-      self->StartAssertNoThreadSuspension("Called PreObjectAllocated, no suspend until alloc");
-};
+  };
 #define PERFORM_SUSPENDING_OPERATION(op)                                          \
   [&]() REQUIRES(Roles::uninterruptible_) REQUIRES_SHARED(Locks::mutator_lock_) { \
-    release_no_suspend();                                                         \
+    ScopedAllowThreadSuspension ats;                                              \
     auto res = (op);                                                              \
     send_object_pre_alloc();                                                      \
     return res;                                                                   \
@@ -1759,7 +1751,8 @@
 
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
-  collector::GcType last_gc = WaitForGcToComplete(kGcCauseForAlloc, self);
+  collector::GcType last_gc =
+      PERFORM_SUSPENDING_OPERATION(WaitForGcToComplete(kGcCauseForAlloc, self));
   // If we were the default allocator but the allocator changed while we were suspended,
   // abort the allocation.
   // We just waited, call the pre-alloc again.
@@ -1896,10 +1889,8 @@
 #undef PERFORM_SUSPENDING_OPERATION
   // If the allocation hasn't succeeded by this point, throw an OOM error.
   if (ptr == nullptr) {
-    release_no_suspend();
+    ScopedAllowThreadSuspension ats;
     ThrowOutOfMemoryError(self, alloc_size, allocator);
-    *old_no_thread_suspend_cause =
-        self->StartAssertNoThreadSuspension("Failed allocation fallback");
   }
   return ptr;
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6f6cfd1..f8bbe10 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -1011,10 +1011,9 @@
                                          size_t* bytes_allocated,
                                          size_t* usable_size,
                                          size_t* bytes_tl_bulk_allocated,
-                                         ObjPtr<mirror::Class>* klass,
-                                         /*out*/const char** old_no_thread_suspend_cause)
+                                         ObjPtr<mirror::Class>* klass)
       REQUIRES(!Locks::thread_suspend_count_lock_, !*gc_complete_lock_, !*pending_task_lock_)
-      ACQUIRE(Roles::uninterruptible_)
+      REQUIRES(Roles::uninterruptible_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Allocate into a specific space.
diff --git a/runtime/thread.h b/runtime/thread.h
index 16ca2aa..29375e5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -355,6 +355,21 @@
     Roles::uninterruptible_.Release();  // No-op.
   }
 
+  // End region where no thread suspension is expected. Returns the current open region in case we
+  // want to reopen it. Used for ScopedAllowThreadSuspension. Not supported if no_thread_suspension
+  // is larger than one.
+  const char* EndAssertNoThreadSuspension() RELEASE(Roles::uninterruptible_) WARN_UNUSED {
+    const char* ret = nullptr;
+    if (kIsDebugBuild) {
+      CHECK_EQ(tls32_.no_thread_suspension, 1u);
+      tls32_.no_thread_suspension--;
+      ret = tlsPtr_.last_no_thread_suspension_cause;
+      tlsPtr_.last_no_thread_suspension_cause = nullptr;
+    }
+    Roles::uninterruptible_.Release();  // No-op.
+    return ret;
+  }
+
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
 
   // Return true if thread suspension is allowable.
@@ -1910,6 +1925,30 @@
   const char* old_cause_;
 };
 
+class ScopedAllowThreadSuspension {
+ public:
+  ALWAYS_INLINE ScopedAllowThreadSuspension() RELEASE(Roles::uninterruptible_) {
+    if (kIsDebugBuild) {
+      self_ = Thread::Current();
+      old_cause_ = self_->EndAssertNoThreadSuspension();
+    } else {
+      Roles::uninterruptible_.Release();  // No-op.
+    }
+  }
+  ALWAYS_INLINE ~ScopedAllowThreadSuspension() ACQUIRE(Roles::uninterruptible_) {
+    if (kIsDebugBuild) {
+      CHECK(self_->StartAssertNoThreadSuspension(old_cause_) == nullptr);
+    } else {
+      Roles::uninterruptible_.Acquire();  // No-op.
+    }
+  }
+
+ private:
+  Thread* self_;
+  const char* old_cause_;
+};
+
+
 class ScopedStackedShadowFramePusher {
  public:
   ScopedStackedShadowFramePusher(Thread* self, ShadowFrame* sf, StackedShadowFrameType type)