Change sticky GC ergonomics to use GC throughput.

The old sticky ergonomics used partial/full GC when the bytes until
the footprint limit was < min free. This was suboptimal. The new
sticky GC ergonomics do partial/full GC when the throughput
of the current sticky GC iteration is <= mean throughput of the
partial/full GC.

Total GC time on FormulaEvaluationActions.EvaluateAndApplyChanges.
Before: 26.4s
After: 24.8s
No benchmark score change measured.

Bug: 8788501

Change-Id: I90000305e93fd492a8ef5a06ec9620d830eaf90d
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 0fcec1f..fcec733 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -235,6 +235,28 @@
   return os;
 }
 
+template<typename T>
+class ConstDumpable {
+ public:
+  explicit ConstDumpable(const T& value) : value_(value) {
+  }
+
+  void Dump(std::ostream& os) const {
+    value_.Dump(os);
+  }
+
+ private:
+  const T& value_;
+
+  DISALLOW_COPY_AND_ASSIGN(ConstDumpable);
+};
+
+template<typename T>
+std::ostream& operator<<(std::ostream& os, const ConstDumpable<T>& rhs) {
+  rhs.Dump(os);
+  return os;
+}
+
 // Helps you use operator<< in a const char*-like context such as our various 'F' methods with
 // format strings.
 template<typename T>
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index fe18f66..c769f64 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -60,23 +60,10 @@
 void CumulativeLogger::Reset() {
   MutexLock mu(Thread::Current(), lock_);
   iterations_ = 0;
+  total_time_ = 0;
   STLDeleteElements(&histograms_);
 }
 
-uint64_t CumulativeLogger::GetTotalNs() const {
-  MutexLock mu(Thread::Current(), lock_);
-  return GetTotalTime() * kAdjust;
-}
-
-uint64_t CumulativeLogger::GetTotalTime() const {
-  MutexLock mu(Thread::Current(), lock_);
-  uint64_t total = 0;
-  for (Histogram<uint64_t>* histogram : histograms_) {
-    total += histogram->Sum();
-  }
-  return total;
-}
-
 void CumulativeLogger::AddLogger(const TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
   const TimingLogger::SplitTimings& splits = logger.GetSplits();
@@ -93,7 +80,7 @@
   return iterations_;
 }
 
-void CumulativeLogger::Dump(std::ostream &os) {
+void CumulativeLogger::Dump(std::ostream &os) const {
   MutexLock mu(Thread::Current(), lock_);
   DumpHistogram(os);
 }
@@ -101,7 +88,7 @@
 void CumulativeLogger::AddPair(const std::string& label, uint64_t delta_time) {
   // Convert delta time to microseconds so that we don't overflow our counters.
   delta_time /= kAdjust;
-
+  total_time_ += delta_time;
   Histogram<uint64_t>* histogram;
   Histogram<uint64_t> dummy(label.c_str());
   auto it = histograms_.find(&dummy);
@@ -123,7 +110,7 @@
   }
 };
 
-void CumulativeLogger::DumpHistogram(std::ostream &os) {
+void CumulativeLogger::DumpHistogram(std::ostream &os) const {
   os << "Start Dumping histograms for " << iterations_ << " iterations"
      << " for " << name_ << "\n";
   std::set<Histogram<uint64_t>*, CompareHistorgramByTimeSpentDeclining>
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index b0bcf10..9b55898 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -35,8 +35,10 @@
   void Start();
   void End() LOCKS_EXCLUDED(lock_);
   void Reset() LOCKS_EXCLUDED(lock_);
-  void Dump(std::ostream& os) LOCKS_EXCLUDED(lock_);
-  uint64_t GetTotalNs() const;
+  void Dump(std::ostream& os) const LOCKS_EXCLUDED(lock_);
+  uint64_t GetTotalNs() const {
+    return GetTotalTime() * kAdjust;
+  }
   // Allow the name to be modified, particularly when the cumulative logger is a field within a
   // parent class that is unable to determine the "name" of a sub-class.
   void SetName(const std::string& name) LOCKS_EXCLUDED(lock_);
@@ -57,14 +59,17 @@
 
   void AddPair(const std::string &label, uint64_t delta_time)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void DumpHistogram(std::ostream &os) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  uint64_t GetTotalTime() const;
+  void DumpHistogram(std::ostream &os) const EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  uint64_t GetTotalTime() const {
+    return total_time_;
+  }
   static const uint64_t kAdjust = 1000;
   std::set<Histogram<uint64_t>*, HistogramComparator> histograms_ GUARDED_BY(lock_);
   std::string name_;
   const std::string lock_name_;
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   size_t iterations_ GUARDED_BY(lock_);
+  uint64_t total_time_;
 
   DISALLOW_COPY_AND_ASSIGN(CumulativeLogger);
 };
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 65b5471..0c7565c 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -39,9 +39,8 @@
       name_(name),
       gc_cause_(kGcCauseForAlloc),
       clear_soft_references_(false),
-      verbose_(VLOG_IS_ON(heap)),
       duration_ns_(0),
-      timings_(name_.c_str(), true, verbose_),
+      timings_(name_.c_str(), true, VLOG_IS_ON(heap)),
       pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
       cumulative_timings_(name) {
   ResetCumulativeStatistics();
@@ -186,6 +185,16 @@
   }
 }
 
+uint64_t GarbageCollector::GetEstimatedMeanThroughput() const {
+  // Add 1ms to prevent possible division by 0.
+  return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1);
+}
+
+uint64_t GarbageCollector::GetEstimatedLastIterationThroughput() const {
+  // Add 1ms to prevent possible division by 0.
+  return (freed_bytes_ * 1000) / (NsToMs(GetDurationNs()) + 1);
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 93fd2ab..f8c4579 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -68,8 +68,7 @@
   TimingLogger& GetTimings() {
     return timings_;
   }
-
-  CumulativeLogger& GetCumulativeTimings() {
+  const CumulativeLogger& GetCumulativeTimings() const {
     return cumulative_timings_;
   }
 
@@ -111,6 +110,17 @@
     return pause_histogram_;
   }
 
+  // Returns the estimated throughput in bytes / second.
+  uint64_t GetEstimatedMeanThroughput() const;
+
+  // Returns the estimated throughput of the last GC iteration.
+  uint64_t GetEstimatedLastIterationThroughput() const;
+
+  // Returns how many GC iterations have been run.
+  size_t GetIterations() const {
+    return GetCumulativeTimings().GetIterations();
+  }
+
  protected:
   // The initial phase. Done without mutators paused.
   virtual void InitializePhase() = 0;
@@ -140,8 +150,6 @@
   GcCause gc_cause_;
   bool clear_soft_references_;
 
-  const bool verbose_;
-
   uint64_t duration_ns_;
   TimingLogger timings_;
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 02e7e3f..ff4b4ce 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -658,9 +658,9 @@
   // Dump cumulative loggers for each GC type.
   uint64_t total_paused_time = 0;
   for (const auto& collector : garbage_collectors_) {
-    CumulativeLogger& logger = collector->GetCumulativeTimings();
+    const CumulativeLogger& logger = collector->GetCumulativeTimings();
     if (logger.GetTotalNs() != 0) {
-      os << Dumpable<CumulativeLogger>(logger);
+      os << ConstDumpable<CumulativeLogger>(logger);
       const uint64_t total_ns = logger.GetTotalNs();
       const uint64_t total_pause_ns = collector->GetTotalPausedTimeNs();
       double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
@@ -1440,7 +1440,7 @@
   // Can't call into java code with all threads suspended.
   EnqueueClearedReferences();
   uint64_t duration = NanoTime() - start_time;
-  GrowForUtilization(collector::kGcTypeFull, duration);
+  GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
   int32_t after_size = GetTotalMemory();
   int32_t delta_size = before_size - after_size;
@@ -1821,13 +1821,7 @@
     gc_type = collector::kGcTypeFull;
   } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
       current_allocator_ == kAllocatorTypeDlMalloc) {
-    for (const auto& cur_collector : garbage_collectors_) {
-      if (cur_collector->GetCollectorType() == collector_type_ &&
-          cur_collector->GetGcType() == gc_type) {
-        collector = cur_collector;
-        break;
-      }
-    }
+    collector = FindCollectorByGcType(gc_type);
   } else {
     LOG(FATAL) << "Invalid current allocator " << current_allocator_;
   }
@@ -1838,14 +1832,14 @@
   if (!clear_soft_references) {
     clear_soft_references = gc_type != collector::kGcTypeSticky;  // TODO: GSS?
   }
-  collector->Run(gc_cause, clear_soft_references || Runtime::Current()->IsZygote());
+  collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
   // Enqueue cleared references.
   EnqueueClearedReferences();
   // Grow the heap so that we know when to perform the next GC.
-  GrowForUtilization(gc_type, collector->GetDurationNs());
+  GrowForUtilization(collector);
   if (CareAboutPauseTimes()) {
     const size_t duration = collector->GetDurationNs();
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
@@ -1874,9 +1868,7 @@
                   << percent_free << "% free, " << PrettySize(current_heap_size) << "/"
                   << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
                   << " total " << PrettyDuration((duration / 1000) * 1000);
-        if (VLOG_IS_ON(heap)) {
-            LOG(INFO) << Dumpable<TimingLogger>(collector->GetTimings());
-        }
+        VLOG(heap) << ConstDumpable<TimingLogger>(collector->GetTimings());
     }
   }
   FinishGC(self, gc_type);
@@ -2479,13 +2471,24 @@
   native_footprint_limit_ = 2 * target_size - native_size;
 }
 
-void Heap::GrowForUtilization(collector::GcType gc_type, uint64_t gc_duration) {
+collector::GarbageCollector* Heap::FindCollectorByGcType(collector::GcType gc_type) {
+  for (const auto& collector : garbage_collectors_) {
+    if (collector->GetCollectorType() == collector_type_ &&
+        collector->GetGcType() == gc_type) {
+      return collector;
+    }
+  }
+  return nullptr;
+}
+
+void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran) {
   // We know what our utilization is at this moment.
   // This doesn't actually resize any memory. It just lets the heap grow more when necessary.
   const size_t bytes_allocated = GetBytesAllocated();
   last_gc_size_ = bytes_allocated;
   last_gc_time_ns_ = NanoTime();
   size_t target_size;
+  collector::GcType gc_type = collector_ran->GetGcType();
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
     target_size = bytes_allocated / GetTargetHeapUtilization();
@@ -2497,12 +2500,22 @@
     native_need_to_run_finalization_ = true;
     next_gc_type_ = collector::kGcTypeSticky;
   } else {
-    // Based on how close the current heap size is to the target size, decide
-    // whether or not to do a partial or sticky GC next.
-    if (bytes_allocated + min_free_ <= max_allowed_footprint_) {
+    collector::GcType non_sticky_gc_type =
+        have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull;
+    // Find what the next non sticky collector will be.
+    collector::GarbageCollector* non_sticky_collector = FindCollectorByGcType(non_sticky_gc_type);
+    // If the throughput of the current sticky GC >= throughput of the non sticky collector, then
+    // do another sticky collection next.
+    // We also check that the bytes allocated aren't over the footprint limit in order to prevent a
+    // pathological case where dead objects which aren't reclaimed by sticky could get accumulated
+    // if the sticky GC throughput always remained >= the full/partial throughput.
+    if (collector_ran->GetEstimatedLastIterationThroughput() >=
+        non_sticky_collector->GetEstimatedMeanThroughput() &&
+        non_sticky_collector->GetIterations() > 0 &&
+        bytes_allocated <= max_allowed_footprint_) {
       next_gc_type_ = collector::kGcTypeSticky;
     } else {
-      next_gc_type_ = have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull;
+      next_gc_type_ = non_sticky_gc_type;
     }
     // If we have freed enough memory, shrink the heap back down.
     if (bytes_allocated + max_free_ < max_allowed_footprint_) {
@@ -2516,7 +2529,7 @@
     if (IsGcConcurrent()) {
       // Calculate when to perform the next ConcurrentGC.
       // Calculate the estimated GC duration.
-      const double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
+      const double gc_duration_seconds = NsToMs(collector_ran->GetDurationNs()) / 1000.0;
       // Estimate how many remaining bytes we will have when we need to start the next GC.
       size_t remaining_bytes = allocation_rate_ * gc_duration_seconds;
       remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 60b8450..a522750 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -658,10 +658,13 @@
   // bytes allocated and the target utilization ratio.
   void UpdateMaxNativeFootprint();
 
+  // Find a collector based on GC type.
+  collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
+
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection.
-  void GrowForUtilization(collector::GcType gc_type, uint64_t gc_duration);
+  void GrowForUtilization(collector::GarbageCollector* collector_ran);
 
   size_t GetPercentFree();
 
diff --git a/runtime/verify_object-inl.h b/runtime/verify_object-inl.h
index e211c83..39df375 100644
--- a/runtime/verify_object-inl.h
+++ b/runtime/verify_object-inl.h
@@ -36,9 +36,8 @@
       bool failed = !IsAligned<kObjectAlignment>(obj);
       if (!failed) {
         mirror::Class* c = obj->GetClass<kVerifyNone>();
-        failed = failed || c == nullptr;
-        failed = failed ||!IsAligned<kObjectAlignment>(c);
-        failed = failed ||!VerifyClassClass(c);
+        failed = failed || !IsAligned<kObjectAlignment>(c);
+        failed = failed || !VerifyClassClass(c);
       }
       if (UNLIKELY(failed)) {
         Runtime::Current()->GetHeap()->VerifyObjectBody(obj);