Ensure that ConcurrentGC always increments GC num

ConcurrentGC could end up waiting for a GC type like TrimSpaces()
that does not actually end up incrementing completed_gcs_.
It would erroneously think it was done, leaving gcs_requested_
> gcs_completed_, with no task running to perform the requested
GCs, but further requests getting ignored until the next explicit
or heap-overflow GC.

Make ConcurrentGC() actually perform a GC unless the GC number
was incremented. Add a CHECK in ConcurrentGCTask::Run that can catch
this. (Confirmed because it did catch it before we added the fix.)

Have RequestConcurrentGC() return a bool to indicate whether it
did anything. This makes another CHECK possible, and should
eventually allow us to again sleep() until a GC starts.

Bug: 186592536
Bug: 189150802

Test: Build and boot AOSP
Change-Id: Ib11734a9c87b9f9e19c5a3557eac9024f84cadf3
Merged-In: Ib11734a9c87b9f9e19c5a3557eac9024f84cadf3
(cherry picked from commit 20e77ff50047e62e90b3ce9b7849777ffcd55b0d)
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index d9cd1f5..c9830cf 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -371,7 +371,7 @@
           min_interval_homogeneous_space_compaction_by_oom),
       last_time_homogeneous_space_compaction_by_oom_(NanoTime()),
       gcs_completed_(0u),
-      gcs_requested_(0u),
+      max_gc_requested_(0u),
       pending_collector_transition_(nullptr),
       pending_heap_trim_(nullptr),
       use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom),
@@ -2569,8 +2569,9 @@
   // other things. It seems risky to trigger GCs as a result of such changes.
 }
 
-static inline bool GCNumberLt(uint32_t gcs_completed, uint32_t gcs_requested) {
-  uint32_t difference = gcs_requested - gcs_completed;
+static inline bool GCNumberLt(uint32_t gc_num1, uint32_t gc_num2) {
+  // unsigned comparison, assuming a non-huge difference, but dealing correctly with wrapping.
+  uint32_t difference = gc_num2 - gc_num1;
   bool completed_more_than_requested = difference > 0x80000000;
   return difference > 0 && !completed_more_than_requested;
 }
@@ -2586,6 +2587,7 @@
   switch (gc_type) {
     case collector::kGcTypePartial: {
       if (!HasZygoteSpace()) {
+        // Do not increment gcs_completed_ . We should retry with kGcTypeFull.
         return collector::kGcTypeNone;
       }
       break;
@@ -3710,8 +3712,11 @@
   ConcurrentGCTask(uint64_t target_time, GcCause cause, bool force_full, uint32_t gc_num)
       : HeapTask(target_time), cause_(cause), force_full_(force_full), my_gc_num_(gc_num) {}
   void Run(Thread* self) override {
-    gc::Heap* heap = Runtime::Current()->GetHeap();
+    Runtime* runtime = Runtime::Current();
+    gc::Heap* heap = runtime->GetHeap();
+    DCHECK(GCNumberLt(my_gc_num_, heap->GetCurrentGcNum() + 2));  // <= current_gc_num + 1
     heap->ConcurrentGC(self, cause_, force_full_, my_gc_num_);
+    CHECK(!GCNumberLt(heap->GetCurrentGcNum(), my_gc_num_) || runtime->IsShuttingDown(self));
   }
 
  private:
@@ -3726,43 +3731,55 @@
       !self->IsHandlingStackOverflow();
 }
 
-void Heap::RequestConcurrentGC(Thread* self,
+bool Heap::RequestConcurrentGC(Thread* self,
                                GcCause cause,
                                bool force_full,
                                uint32_t observed_gc_num) {
-  uint32_t gcs_requested = gcs_requested_.load(std::memory_order_relaxed);
-  if (!GCNumberLt(observed_gc_num, gcs_requested)) {
-    // Nobody beat us to requesting the next gc after observed_gc_num.
-    if (CanAddHeapTask(self)
-        && gcs_requested_.CompareAndSetStrongRelaxed(gcs_requested, observed_gc_num + 1)) {
-      task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
-                                                          cause,
-                                                          force_full,
-                                                          observed_gc_num + 1));
+  uint32_t max_gc_requested = max_gc_requested_.load(std::memory_order_relaxed);
+  if (!GCNumberLt(observed_gc_num, max_gc_requested)) {
+    // observed_gc_num >= max_gc_requested: Nobody beat us to requesting the next gc.
+    if (CanAddHeapTask(self)) {
+      // Since observed_gc_num >= max_gc_requested, this increases max_gc_requested_, if successful.
+      if (max_gc_requested_.CompareAndSetStrongRelaxed(max_gc_requested, observed_gc_num + 1)) {
+        task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
+                                                            cause,
+                                                            force_full,
+                                                            observed_gc_num + 1));
+      }
+      DCHECK(GCNumberLt(observed_gc_num, max_gc_requested_.load(std::memory_order_relaxed)));
+      // If we increased max_gc_requested_, then we added a task that will eventually cause
+      // gcs_completed_ to be incremented (to at least observed_gc_num + 1).
+      // If the CAS failed, somebody else did.
+      return true;
     }
+    return false;
   }
+  return true;  // Vacuously.
 }
 
 void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t requested_gc_num) {
   if (!Runtime::Current()->IsShuttingDown(self)) {
-    // Wait for any GCs currently running to finish.
-    if (WaitForGcToComplete(cause, self) == collector::kGcTypeNone) {
-      // If we can't run the GC type we wanted to run, find the next appropriate one and try
-      // that instead. E.g. can't do partial, so do full instead.
+    // Wait for any GCs currently running to finish. If this incremented GC number, we're done.
+    WaitForGcToComplete(cause, self);
+    if (GCNumberLt(GetCurrentGcNum(), requested_gc_num)) {
       collector::GcType next_gc_type = next_gc_type_;
       // If forcing full and next gc type is sticky, override with a non-sticky type.
       if (force_full && next_gc_type == collector::kGcTypeSticky) {
         next_gc_type = NonStickyGcType();
       }
+      // If we can't run the GC type we wanted to run, find the next appropriate one and try
+      // that instead. E.g. can't do partial, so do full instead.
+      // We must ensure that we run something that ends up inrementing gcs_completed_.
+      // In the kGcTypePartial case, the initial CollectGarbageInternal call may not have that
+      // effect, but the subsequent KGcTypeFull call will.
       if (CollectGarbageInternal(next_gc_type, cause, false, requested_gc_num)
           == collector::kGcTypeNone) {
         for (collector::GcType gc_type : gc_plan_) {
-          // Attempt to run the collector, if we succeed, we are done.
-          uint32_t gcs_completed = GetCurrentGcNum();
-          if (!GCNumberLt(gcs_completed, requested_gc_num)) {
+          if (!GCNumberLt(GetCurrentGcNum(), requested_gc_num)) {
             // Somebody did it for us.
             break;
           }
+          // Attempt to run the collector, if we succeed, we are done.
           if (gc_type > next_gc_type &&
               CollectGarbageInternal(gc_type, cause, false, requested_gc_num)
               != collector::kGcTypeNone) {
@@ -3968,14 +3985,19 @@
   float gc_urgency = NativeMemoryOverTarget(current_native_bytes, is_gc_concurrent);
   if (UNLIKELY(gc_urgency >= 1.0)) {
     if (is_gc_concurrent) {
-      RequestConcurrentGC(self, kGcCauseForNativeAlloc, /*force_full=*/true, starting_gc_num);
+      bool requested =
+          RequestConcurrentGC(self, kGcCauseForNativeAlloc, /*force_full=*/true, starting_gc_num);
       if (gc_urgency > kStopForNativeFactor
           && current_native_bytes > stop_for_native_allocs_) {
         // We're in danger of running out of memory due to rampant native allocation.
         if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
           LOG(INFO) << "Stopping for native allocation, urgency: " << gc_urgency;
         }
-        WaitForGcToComplete(kGcCauseForNativeAlloc, self);
+        if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) == collector::kGcTypeNone) {
+          DCHECK(!requested
+                 || GCNumberLt(starting_gc_num, max_gc_requested_.load(std::memory_order_relaxed)));
+          // TODO: Eventually sleep here again.
+        }
       }
     } else {
       CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false, starting_gc_num + 1);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 5cad3f1..09aa4e6 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -834,8 +834,10 @@
 
   // Request asynchronous GC. Observed_gc_num is the value of GetCurrentGcNum() when we started to
   // evaluate the GC triggering condition. If a GC has been completed since then, we consider our
-  // job done. Ensures that gcs_completed_ will eventually be incremented beyond observed_gc_num.
-  void RequestConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t observed_gc_num)
+  // job done. If we return true, then we ensured that gcs_completed_ will eventually be
+  // incremented beyond observed_gc_num. We return false only in corner cases in which we cannot
+  // ensure that.
+  bool RequestConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t observed_gc_num)
       REQUIRES(!*pending_task_lock_);
 
   // Whether or not we may use a garbage collector, used so that we only create collectors we need.
@@ -1585,9 +1587,10 @@
   // Increment is guarded by gc_complete_lock_.
   Atomic<uint32_t> gcs_completed_;
 
-  // The number of garbage collections we've scheduled. Normally either gcs_complete_ or
-  // gcs_complete + 1.
-  Atomic<uint32_t> gcs_requested_;
+  // The number of the last garbage collection that has been requested.  A value of gcs_completed
+  // + 1 indicates that another collection is needed or in progress. A value of gcs_completed_ or
+  // (logically) less means that no new GC has been requested.
+  Atomic<uint32_t> max_gc_requested_;
 
   // Active tasks which we can modify (change target time, desired collector type, etc..).
   CollectorTransitionTask* pending_collector_transition_ GUARDED_BY(pending_task_lock_);