Make GetBytesAllocatedEver() monotonic

Make the _freed_ever_ counters atomic, since they can apparently
be read concurrently. This removes the data race.

We enforce monotonicity in a brute force way, which is probably good
enough here. The results remain approximate, as they always were.

Bug: 142328050
Test: Built AOSP.

Change-Id: Ifb8520d0edee7cadadc62f59992c6d9b67251c32
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4b63138..85b79da 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1205,8 +1205,8 @@
   post_gc_last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
   post_gc_weighted_allocated_bytes_ = 0u;
 
-  total_bytes_freed_ever_ = 0;
-  total_objects_freed_ever_ = 0;
+  total_bytes_freed_ever_.store(0);
+  total_objects_freed_ever_.store(0);
   total_wait_time_ = 0;
   blocking_gc_count_ = 0;
   blocking_gc_time_ = 0;
@@ -1903,7 +1903,21 @@
 }
 
 uint64_t Heap::GetBytesAllocatedEver() const {
-  return GetBytesFreedEver() + GetBytesAllocated();
+  // Force the returned value to be monotonically increasing, in the sense that if this is called
+  // at A and B, such that A happens-before B, then the call at B returns a value no smaller than
+  // that at A. This is not otherwise guaranteed, since num_bytes_allocated_ is decremented first,
+  // and total_bytes_freed_ever_ is incremented later.
+  static std::atomic<uint64_t> max_bytes_so_far(0);
+  uint64_t so_far = max_bytes_so_far.load(std::memory_order_relaxed);
+  uint64_t current_bytes = GetBytesFreedEver(std::memory_order_acquire);
+  current_bytes += GetBytesAllocated();
+  do {
+    if (current_bytes <= so_far) {
+      return so_far;
+    }
+  } while (!max_bytes_so_far.compare_exchange_weak(so_far /* updated */,
+                                                   current_bytes, std::memory_order_relaxed));
+  return current_bytes;
 }
 
 // Check whether the given object is an instance of the given class.
@@ -2239,6 +2253,19 @@
   }
 }
 
+void Heap::IncrementFreedEver() {
+  // Counters are updated only by us, but may be read concurrently.
+  // The updates should become visible after the corresponding live object info.
+  total_objects_freed_ever_.store(total_objects_freed_ever_.load(std::memory_order_relaxed)
+                                  + GetCurrentGcIteration()->GetFreedObjects()
+                                  + GetCurrentGcIteration()->GetFreedLargeObjects(),
+                                  std::memory_order_release);
+  total_bytes_freed_ever_.store(total_bytes_freed_ever_.load(std::memory_order_relaxed)
+                                + GetCurrentGcIteration()->GetFreedBytes()
+                                + GetCurrentGcIteration()->GetFreedLargeObjectBytes(),
+                                std::memory_order_release);
+}
+
 void Heap::PreZygoteFork() {
   if (!HasZygoteSpace()) {
     // We still want to GC in case there is some unreachable non moving objects that could cause a
@@ -2313,10 +2340,7 @@
     if (temp_space_ != nullptr) {
       CHECK(temp_space_->IsEmpty());
     }
-    total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects() +
-        GetCurrentGcIteration()->GetFreedLargeObjects();
-    total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes() +
-        GetCurrentGcIteration()->GetFreedLargeObjectBytes();
+    IncrementFreedEver();
     // Update the end and write out image.
     non_moving_space_->SetEnd(target_space.End());
     non_moving_space_->SetLimit(target_space.Limit());
@@ -2588,10 +2612,7 @@
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
   collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
-  total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects() +
-      GetCurrentGcIteration()->GetFreedLargeObjects();
-  total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes() +
-      GetCurrentGcIteration()->GetFreedLargeObjectBytes();
+  IncrementFreedEver();
   RequestTrim(self);
   // Collect cleared references.
   SelfDeletingTask* clear = reference_processor_->CollectClearedReferences(self);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6c3290f..9ef6af5 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -553,13 +553,15 @@
   uint64_t GetBytesAllocatedEver() const;
 
   // Returns the total number of objects freed since the heap was created.
-  uint64_t GetObjectsFreedEver() const {
-    return total_objects_freed_ever_;
+  // With default memory order, this should be viewed only as a hint.
+  uint64_t GetObjectsFreedEver(std::memory_order mo = std::memory_order_relaxed) const {
+    return total_objects_freed_ever_.load(mo);
   }
 
   // Returns the total number of bytes freed since the heap was created.
-  uint64_t GetBytesFreedEver() const {
-    return total_bytes_freed_ever_;
+  // With default memory order, this should be viewed only as a hint.
+  uint64_t GetBytesFreedEver(std::memory_order mo = std::memory_order_relaxed) const {
+    return total_bytes_freed_ever_.load(mo);
   }
 
   space::RegionSpace* GetRegionSpace() const {
@@ -1189,6 +1191,9 @@
 
   ALWAYS_INLINE void IncrementNumberOfBytesFreedRevoke(size_t freed_bytes_revoke);
 
+  // Update *_freed_ever_ counters to reflect current GC values.
+  void IncrementFreedEver();
+
   // Remove a vlog code from heap-inl.h which is transitively included in half the world.
   static void VlogHeapGrowth(size_t max_allowed_footprint, size_t new_footprint, size_t alloc_size);
 
@@ -1342,10 +1347,10 @@
   size_t concurrent_start_bytes_;
 
   // Since the heap was created, how many bytes have been freed.
-  uint64_t total_bytes_freed_ever_;
+  std::atomic<uint64_t> total_bytes_freed_ever_;
 
   // Since the heap was created, how many objects have been freed.
-  uint64_t total_objects_freed_ever_;
+  std::atomic<uint64_t> total_objects_freed_ever_;
 
   // Number of bytes currently allocated and not yet reclaimed. Includes active
   // TLABS in their entirety, even if they have not yet been parceled out.