ART: add weighted allocated bytes metrics

`process_cpu_start_time_` is moved from `art::Runtime` to `art::gc:Heap`
so that it could be reset when the app starts (current process is forked
from Zygote).

`process_cpu_end_time_` is removed from `art::Runtime` because it's only
used when the runtime is about to shutdown, so it suffices to capture it
in a local variable.

Test: Run art with -XX:DumpGCPerformanceOnShutdown on some benchmarks.
Bug: 112187497
Change-Id: I154fdb6acdf4e0c21dff835807bd4e2cf311e3d1
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ab79b9e..5323cee 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -278,7 +278,6 @@
       // Initially assume we perceive jank in case the process state is never updated.
       process_state_(kProcessStateJankPerceptible),
       zygote_no_threads_(false),
-      process_cpu_start_time_(ProcessCpuNanoTime()),
       verifier_logging_threshold_ms_(100) {
   static_assert(Runtime::kCalleeSaveSize ==
                     static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType), "Unexpected size");
@@ -322,20 +321,26 @@
   }
 
   if (dump_gc_performance_on_shutdown_) {
-    process_cpu_end_time_ = ProcessCpuNanoTime();
+    heap_->CalculateWeightedAllocatedBytes();
+    uint64_t process_cpu_end_time = ProcessCpuNanoTime();
     ScopedLogSeverity sls(LogSeverity::INFO);
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
     // to be still alive.
     heap_->DumpGcPerformanceInfo(LOG_STREAM(INFO));
 
-    uint64_t process_cpu_time = process_cpu_end_time_ - process_cpu_start_time_;
+    uint64_t process_cpu_time = process_cpu_end_time - heap_->GetProcessCpuStartTime();
     uint64_t gc_cpu_time = heap_->GetTotalGcCpuTime();
     float ratio = static_cast<float>(gc_cpu_time) / process_cpu_time;
     LOG_STREAM(INFO) << "GC CPU time " << PrettyDuration(gc_cpu_time)
         << " out of process CPU time " << PrettyDuration(process_cpu_time)
         << " (" << ratio << ")"
         << "\n";
+    float weighted_allocated_bytes =
+      static_cast<float>(heap_->GetWeightedAllocatedBytes()) / process_cpu_time;
+    LOG_STREAM(INFO) << "Weighted bytes allocated over CPU time: "
+        << " (" <<  PrettySize(weighted_allocated_bytes)  << ")"
+        << "\n";
   }
 
   if (jit_ != nullptr) {