Change sticky GC ergonomics to use GC throughput.
The old sticky ergonomics used partial/full GC when the bytes until
the footprint limit was < min free. This was suboptimal. The new
sticky GC ergonomics do partial/full GC when the throughput
of the current sticky GC iteration is <= mean throughput of the
partial/full GC.
Total GC time on FormulaEvaluationActions.EvaluateAndApplyChanges.
Before: 26.4s
After: 24.8s
No benchmark score change measured.
Bug: 8788501
Change-Id: I90000305e93fd492a8ef5a06ec9620d830eaf90d
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 0fcec1f..fcec733 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -235,6 +235,28 @@
return os;
}
+template<typename T>
+class ConstDumpable {
+ public:
+ explicit ConstDumpable(const T& value) : value_(value) {
+ }
+
+ void Dump(std::ostream& os) const {
+ value_.Dump(os);
+ }
+
+ private:
+ const T& value_;
+
+ DISALLOW_COPY_AND_ASSIGN(ConstDumpable);
+};
+
+template<typename T>
+std::ostream& operator<<(std::ostream& os, const ConstDumpable<T>& rhs) {
+ rhs.Dump(os);
+ return os;
+}
+
// Helps you use operator<< in a const char*-like context such as our various 'F' methods with
// format strings.
template<typename T>
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index fe18f66..c769f64 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -60,23 +60,10 @@
void CumulativeLogger::Reset() {
MutexLock mu(Thread::Current(), lock_);
iterations_ = 0;
+ total_time_ = 0;
STLDeleteElements(&histograms_);
}
-uint64_t CumulativeLogger::GetTotalNs() const {
- MutexLock mu(Thread::Current(), lock_);
- return GetTotalTime() * kAdjust;
-}
-
-uint64_t CumulativeLogger::GetTotalTime() const {
- MutexLock mu(Thread::Current(), lock_);
- uint64_t total = 0;
- for (Histogram<uint64_t>* histogram : histograms_) {
- total += histogram->Sum();
- }
- return total;
-}
-
void CumulativeLogger::AddLogger(const TimingLogger &logger) {
MutexLock mu(Thread::Current(), lock_);
const TimingLogger::SplitTimings& splits = logger.GetSplits();
@@ -93,7 +80,7 @@
return iterations_;
}
-void CumulativeLogger::Dump(std::ostream &os) {
+void CumulativeLogger::Dump(std::ostream &os) const {
MutexLock mu(Thread::Current(), lock_);
DumpHistogram(os);
}
@@ -101,7 +88,7 @@
void CumulativeLogger::AddPair(const std::string& label, uint64_t delta_time) {
// Convert delta time to microseconds so that we don't overflow our counters.
delta_time /= kAdjust;
-
+ total_time_ += delta_time;
Histogram<uint64_t>* histogram;
Histogram<uint64_t> dummy(label.c_str());
auto it = histograms_.find(&dummy);
@@ -123,7 +110,7 @@
}
};
-void CumulativeLogger::DumpHistogram(std::ostream &os) {
+void CumulativeLogger::DumpHistogram(std::ostream &os) const {
os << "Start Dumping histograms for " << iterations_ << " iterations"
<< " for " << name_ << "\n";
std::set<Histogram<uint64_t>*, CompareHistorgramByTimeSpentDeclining>
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index b0bcf10..9b55898 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -35,8 +35,10 @@
void Start();
void End() LOCKS_EXCLUDED(lock_);
void Reset() LOCKS_EXCLUDED(lock_);
- void Dump(std::ostream& os) LOCKS_EXCLUDED(lock_);
- uint64_t GetTotalNs() const;
+ void Dump(std::ostream& os) const LOCKS_EXCLUDED(lock_);
+ uint64_t GetTotalNs() const {
+ return GetTotalTime() * kAdjust;
+ }
// Allow the name to be modified, particularly when the cumulative logger is a field within a
// parent class that is unable to determine the "name" of a sub-class.
void SetName(const std::string& name) LOCKS_EXCLUDED(lock_);
@@ -57,14 +59,17 @@
void AddPair(const std::string &label, uint64_t delta_time)
EXCLUSIVE_LOCKS_REQUIRED(lock_);
- void DumpHistogram(std::ostream &os) EXCLUSIVE_LOCKS_REQUIRED(lock_);
- uint64_t GetTotalTime() const;
+ void DumpHistogram(std::ostream &os) const EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ uint64_t GetTotalTime() const {
+ return total_time_;
+ }
static const uint64_t kAdjust = 1000;
std::set<Histogram<uint64_t>*, HistogramComparator> histograms_ GUARDED_BY(lock_);
std::string name_;
const std::string lock_name_;
mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
size_t iterations_ GUARDED_BY(lock_);
+ uint64_t total_time_;
DISALLOW_COPY_AND_ASSIGN(CumulativeLogger);
};
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 65b5471..0c7565c 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -39,9 +39,8 @@
name_(name),
gc_cause_(kGcCauseForAlloc),
clear_soft_references_(false),
- verbose_(VLOG_IS_ON(heap)),
duration_ns_(0),
- timings_(name_.c_str(), true, verbose_),
+ timings_(name_.c_str(), true, VLOG_IS_ON(heap)),
pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
cumulative_timings_(name) {
ResetCumulativeStatistics();
@@ -186,6 +185,16 @@
}
}
+uint64_t GarbageCollector::GetEstimatedMeanThroughput() const {
+ // Add 1ms to prevent possible division by 0.
+ return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1);
+}
+
+uint64_t GarbageCollector::GetEstimatedLastIterationThroughput() const {
+ // Add 1ms to prevent possible division by 0.
+ return (freed_bytes_ * 1000) / (NsToMs(GetDurationNs()) + 1);
+}
+
} // namespace collector
} // namespace gc
} // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 93fd2ab..f8c4579 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -68,8 +68,7 @@
TimingLogger& GetTimings() {
return timings_;
}
-
- CumulativeLogger& GetCumulativeTimings() {
+ const CumulativeLogger& GetCumulativeTimings() const {
return cumulative_timings_;
}
@@ -111,6 +110,17 @@
return pause_histogram_;
}
+ // Returns the estimated throughput in bytes / second.
+ uint64_t GetEstimatedMeanThroughput() const;
+
+ // Returns the estimated throughput of the last GC iteration.
+ uint64_t GetEstimatedLastIterationThroughput() const;
+
+ // Returns how many GC iterations have been run.
+ size_t GetIterations() const {
+ return GetCumulativeTimings().GetIterations();
+ }
+
protected:
// The initial phase. Done without mutators paused.
virtual void InitializePhase() = 0;
@@ -140,8 +150,6 @@
GcCause gc_cause_;
bool clear_soft_references_;
- const bool verbose_;
-
uint64_t duration_ns_;
TimingLogger timings_;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 02e7e3f..ff4b4ce 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -658,9 +658,9 @@
// Dump cumulative loggers for each GC type.
uint64_t total_paused_time = 0;
for (const auto& collector : garbage_collectors_) {
- CumulativeLogger& logger = collector->GetCumulativeTimings();
+ const CumulativeLogger& logger = collector->GetCumulativeTimings();
if (logger.GetTotalNs() != 0) {
- os << Dumpable<CumulativeLogger>(logger);
+ os << ConstDumpable<CumulativeLogger>(logger);
const uint64_t total_ns = logger.GetTotalNs();
const uint64_t total_pause_ns = collector->GetTotalPausedTimeNs();
double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
@@ -1440,7 +1440,7 @@
// Can't call into java code with all threads suspended.
EnqueueClearedReferences();
uint64_t duration = NanoTime() - start_time;
- GrowForUtilization(collector::kGcTypeFull, duration);
+ GrowForUtilization(semi_space_collector_);
FinishGC(self, collector::kGcTypeFull);
int32_t after_size = GetTotalMemory();
int32_t delta_size = before_size - after_size;
@@ -1821,13 +1821,7 @@
gc_type = collector::kGcTypeFull;
} else if (current_allocator_ == kAllocatorTypeRosAlloc ||
current_allocator_ == kAllocatorTypeDlMalloc) {
- for (const auto& cur_collector : garbage_collectors_) {
- if (cur_collector->GetCollectorType() == collector_type_ &&
- cur_collector->GetGcType() == gc_type) {
- collector = cur_collector;
- break;
- }
- }
+ collector = FindCollectorByGcType(gc_type);
} else {
LOG(FATAL) << "Invalid current allocator " << current_allocator_;
}
@@ -1838,14 +1832,14 @@
if (!clear_soft_references) {
clear_soft_references = gc_type != collector::kGcTypeSticky; // TODO: GSS?
}
- collector->Run(gc_cause, clear_soft_references || Runtime::Current()->IsZygote());
+ collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
total_objects_freed_ever_ += collector->GetFreedObjects();
total_bytes_freed_ever_ += collector->GetFreedBytes();
RequestHeapTrim();
// Enqueue cleared references.
EnqueueClearedReferences();
// Grow the heap so that we know when to perform the next GC.
- GrowForUtilization(gc_type, collector->GetDurationNs());
+ GrowForUtilization(collector);
if (CareAboutPauseTimes()) {
const size_t duration = collector->GetDurationNs();
std::vector<uint64_t> pauses = collector->GetPauseTimes();
@@ -1874,9 +1868,7 @@
<< percent_free << "% free, " << PrettySize(current_heap_size) << "/"
<< PrettySize(total_memory) << ", " << "paused " << pause_string.str()
<< " total " << PrettyDuration((duration / 1000) * 1000);
- if (VLOG_IS_ON(heap)) {
- LOG(INFO) << Dumpable<TimingLogger>(collector->GetTimings());
- }
+ VLOG(heap) << ConstDumpable<TimingLogger>(collector->GetTimings());
}
}
FinishGC(self, gc_type);
@@ -2479,13 +2471,24 @@
native_footprint_limit_ = 2 * target_size - native_size;
}
-void Heap::GrowForUtilization(collector::GcType gc_type, uint64_t gc_duration) {
+collector::GarbageCollector* Heap::FindCollectorByGcType(collector::GcType gc_type) {
+ for (const auto& collector : garbage_collectors_) {
+ if (collector->GetCollectorType() == collector_type_ &&
+ collector->GetGcType() == gc_type) {
+ return collector;
+ }
+ }
+ return nullptr;
+}
+
+void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran) {
// We know what our utilization is at this moment.
// This doesn't actually resize any memory. It just lets the heap grow more when necessary.
const size_t bytes_allocated = GetBytesAllocated();
last_gc_size_ = bytes_allocated;
last_gc_time_ns_ = NanoTime();
size_t target_size;
+ collector::GcType gc_type = collector_ran->GetGcType();
if (gc_type != collector::kGcTypeSticky) {
// Grow the heap for non sticky GC.
target_size = bytes_allocated / GetTargetHeapUtilization();
@@ -2497,12 +2500,22 @@
native_need_to_run_finalization_ = true;
next_gc_type_ = collector::kGcTypeSticky;
} else {
- // Based on how close the current heap size is to the target size, decide
- // whether or not to do a partial or sticky GC next.
- if (bytes_allocated + min_free_ <= max_allowed_footprint_) {
+ collector::GcType non_sticky_gc_type =
+ have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull;
+ // Find what the next non sticky collector will be.
+ collector::GarbageCollector* non_sticky_collector = FindCollectorByGcType(non_sticky_gc_type);
+ // If the throughput of the current sticky GC >= throughput of the non sticky collector, then
+ // do another sticky collection next.
+ // We also check that the bytes allocated aren't over the footprint limit in order to prevent a
+ // pathological case where dead objects which aren't reclaimed by sticky could get accumulated
+ // if the sticky GC throughput always remained >= the full/partial throughput.
+ if (collector_ran->GetEstimatedLastIterationThroughput() >=
+ non_sticky_collector->GetEstimatedMeanThroughput() &&
+ non_sticky_collector->GetIterations() > 0 &&
+ bytes_allocated <= max_allowed_footprint_) {
next_gc_type_ = collector::kGcTypeSticky;
} else {
- next_gc_type_ = have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull;
+ next_gc_type_ = non_sticky_gc_type;
}
// If we have freed enough memory, shrink the heap back down.
if (bytes_allocated + max_free_ < max_allowed_footprint_) {
@@ -2516,7 +2529,7 @@
if (IsGcConcurrent()) {
// Calculate when to perform the next ConcurrentGC.
// Calculate the estimated GC duration.
- const double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
+ const double gc_duration_seconds = NsToMs(collector_ran->GetDurationNs()) / 1000.0;
// Estimate how many remaining bytes we will have when we need to start the next GC.
size_t remaining_bytes = allocation_rate_ * gc_duration_seconds;
remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 60b8450..a522750 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -658,10 +658,13 @@
// bytes allocated and the target utilization ratio.
void UpdateMaxNativeFootprint();
+ // Find a collector based on GC type.
+ collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
+
// Given the current contents of the alloc space, increase the allowed heap footprint to match
// the target utilization ratio. This should only be called immediately after a full garbage
// collection.
- void GrowForUtilization(collector::GcType gc_type, uint64_t gc_duration);
+ void GrowForUtilization(collector::GarbageCollector* collector_ran);
size_t GetPercentFree();
diff --git a/runtime/verify_object-inl.h b/runtime/verify_object-inl.h
index e211c83..39df375 100644
--- a/runtime/verify_object-inl.h
+++ b/runtime/verify_object-inl.h
@@ -36,9 +36,8 @@
bool failed = !IsAligned<kObjectAlignment>(obj);
if (!failed) {
mirror::Class* c = obj->GetClass<kVerifyNone>();
- failed = failed || c == nullptr;
- failed = failed ||!IsAligned<kObjectAlignment>(c);
- failed = failed ||!VerifyClassClass(c);
+ failed = failed || !IsAligned<kObjectAlignment>(c);
+ failed = failed || !VerifyClassClass(c);
}
if (UNLIKELY(failed)) {
Runtime::Current()->GetHeap()->VerifyObjectBody(obj);