Collect CPU frequency thermal throttling stats

This collects the max frequency every 30s and reports
it as a percentage of the unthrottled frequency.
The special value of 101% indicates that the CPU is
using turbo.

BUG=chromium:238890
TEST=ran manually, checked about:histograms

Change-Id: Ia1c8a2344b81b8274f9045b854d2e6d35cf49339
Reviewed-on: https://gerrit.chromium.org/gerrit/50387
Reviewed-by: Darin Petkov <petkov@chromium.org>
Commit-Queue: Luigi Semenzato <semenzato@chromium.org>
Tested-by: Luigi Semenzato <semenzato@chromium.org>
diff --git a/metrics/metrics_daemon.cc b/metrics/metrics_daemon.cc
index a786548..3b9e9db 100644
--- a/metrics/metrics_daemon.cc
+++ b/metrics/metrics_daemon.cc
@@ -11,6 +11,7 @@
 
 #include <base/file_util.h>
 #include <base/logging.h>
+#include <base/string_number_conversions.h>
 #include <base/string_util.h>
 #include <base/stringprintf.h>
 #include <chromeos/dbus/service_constants.h>
@@ -128,6 +129,11 @@
 const char MetricsDaemon::kMetricPageFaultsShortName[] =
     "Platform.PageFaultsShort";
 
+// Thermal CPU throttling.
+
+const char MetricsDaemon::kMetricScaledCpuFrequencyName[] =
+    "Platform.CpuFrequencyThermalScaling";
+
 // persistent metrics path
 const char MetricsDaemon::kMetricsPath[] = "/var/log/metrics";
 
@@ -247,7 +253,10 @@
 
 void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib,
                          const string& diskstats_path,
-                         const string& vmstats_path) {
+                         const string& vmstats_path,
+                         const string& scaling_max_freq_path,
+                         const string& cpuinfo_max_freq_path
+                         ) {
   testing_ = testing;
   DCHECK(metrics_lib != NULL);
   metrics_lib_ = metrics_lib;
@@ -277,6 +286,8 @@
 
   diskstats_path_ = diskstats_path;
   vmstats_path_ = vmstats_path;
+  scaling_max_freq_path_ = scaling_max_freq_path;
+  cpuinfo_max_freq_path_ = cpuinfo_max_freq_path;
   StatsReporterInit();
 
   // Start collecting meminfo stats.
@@ -632,17 +643,16 @@
 bool MetricsDaemon::VmStatsParseStats(char* stats, long int* page_faults) {
   static const char kPageFaultSearchString[] = "\npgmajfault ";
   bool success = false;
-  /* Each line in the file has the form
-   * <ID> <VALUE>
-   * for instance:
-   * nr_free_pages 213427
-   */
+  // Each line in the file has the form
+  // <ID> <VALUE>
+  // for instance:
+  // nr_free_pages 213427
   char* s = strstr(stats, kPageFaultSearchString);
   if (s == NULL) {
     LOG(WARNING) << "cannot find page fault entry in vmstats";
   } else {
     char* endp;
-    /* Skip <ID> and space.  Don't count the terminating null. */
+    // Skip <ID> and space.  Don't count the terminating null.
     s += sizeof(kPageFaultSearchString) - 1;
     *page_faults = strtol(s, &endp, 10);
     if (*endp == '\n') {
@@ -681,6 +691,64 @@
   return success;
 }
 
+bool MetricsDaemon::ReadFreqToInt(const string& sysfs_file_name, int* value) {
+  const string sysfs_dirpath(testing_ ?
+                             "./" : "/sys/devices/system/cpu/cpu0/cpufreq/");
+  const FilePath sysfs_path(sysfs_dirpath + sysfs_file_name);
+  string value_string;
+  if (!file_util::ReadFileToString(sysfs_path, &value_string)) {
+    LOG(WARNING) << "cannot read " << sysfs_path.value().c_str();
+    return false;
+  }
+  if (!RemoveChars(value_string, "\n", &value_string)) {
+    LOG(WARNING) << "no newline in " << value_string;
+    // Continue even though the lack of newline is suspicious.
+  }
+  if (!base::StringToInt(value_string, value)) {
+    LOG(WARNING) << "cannot convert " << value_string << " to int";
+    return false;
+  }
+  return true;
+}
+
+void MetricsDaemon::SendCpuThrottleMetrics() {
+  // |max_freq| is 0 only the first time through.
+  static int max_freq = 0;
+  if (max_freq == -1)
+    // Give up, as sysfs did not report max_freq correctly.
+    return;
+  if (max_freq == 0 || testing_) {
+    // One-time initialization of max_freq.  (Every time when testing.)
+    if (!ReadFreqToInt(cpuinfo_max_freq_path_, &max_freq)) {
+      max_freq = -1;
+      return;
+    }
+    if (max_freq == 0) {
+      LOG(WARNING) << "sysfs reports 0 max CPU frequency\n";
+      max_freq = -1;
+      return;
+    }
+    if (max_freq % 10000 == 1000) {
+      // Special case: system has turbo mode, and max non-turbo frequency is
+      // max_freq - 1000.  This relies on "normal" (non-turbo) frequencies
+      // being multiples of (at least) 10 MHz.  Although there is no guarantee
+      // of this, it seems a fairly reasonable assumption.  Otherwise we should
+      // read scaling_available_frequencies, sort the frequencies, compare the
+      // two highest ones, and check if they differ by 1000 (kHz) (and that's a
+      // hack too, no telling when it will change).
+      max_freq -= 1000;
+    }
+  }
+  int scaled_freq = 0;
+  if (!ReadFreqToInt(scaling_max_freq_path_, &scaled_freq))
+    return;
+  // Frequencies are in kHz.  If scaled_freq > max_freq, turbo is on, but
+  // scaled_freq is not the actual turbo frequency.  We indicate this situation
+  // with a 101% value.
+  int percent = scaled_freq > max_freq ? 101 : scaled_freq / (max_freq / 100);
+  SendLinearMetric(kMetricScaledCpuFrequencyName, percent, 101, 102);
+}
+
 // static
 gboolean MetricsDaemon::StatsCallbackStatic(void* handle) {
   (static_cast<MetricsDaemon*>(handle))->StatsCallback();
@@ -758,6 +826,7 @@
                    kMetricPageFaultsBuckets);
         page_faults_ = page_faults_now;
       }
+      SendCpuThrottleMetrics();
       // Set start time for new cycle.
       stats_initial_time_ = time_now;
       // Schedule short callback.