Collect CPU frequency thermal throttling stats

This collects the max frequency every 30s and reports
it as a percentage of the unthrottled frequency.
The special value of 101% indicates that the CPU is
using turbo.

BUG=chromium:238890
TEST=ran manually, checked about:histograms

Change-Id: Ia1c8a2344b81b8274f9045b854d2e6d35cf49339
Reviewed-on: https://gerrit.chromium.org/gerrit/50387
Reviewed-by: Darin Petkov <petkov@chromium.org>
Commit-Queue: Luigi Semenzato <semenzato@chromium.org>
Tested-by: Luigi Semenzato <semenzato@chromium.org>
diff --git a/metrics/metrics_daemon.cc b/metrics/metrics_daemon.cc
index a786548..3b9e9db 100644
--- a/metrics/metrics_daemon.cc
+++ b/metrics/metrics_daemon.cc
@@ -11,6 +11,7 @@
 
 #include <base/file_util.h>
 #include <base/logging.h>
+#include <base/string_number_conversions.h>
 #include <base/string_util.h>
 #include <base/stringprintf.h>
 #include <chromeos/dbus/service_constants.h>
@@ -128,6 +129,11 @@
 const char MetricsDaemon::kMetricPageFaultsShortName[] =
     "Platform.PageFaultsShort";
 
+// Thermal CPU throttling.
+
+const char MetricsDaemon::kMetricScaledCpuFrequencyName[] =
+    "Platform.CpuFrequencyThermalScaling";
+
 // persistent metrics path
 const char MetricsDaemon::kMetricsPath[] = "/var/log/metrics";
 
@@ -247,7 +253,10 @@
 
 void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib,
                          const string& diskstats_path,
-                         const string& vmstats_path) {
+                         const string& vmstats_path,
+                         const string& scaling_max_freq_path,
+                         const string& cpuinfo_max_freq_path
+                         ) {
   testing_ = testing;
   DCHECK(metrics_lib != NULL);
   metrics_lib_ = metrics_lib;
@@ -277,6 +286,8 @@
 
   diskstats_path_ = diskstats_path;
   vmstats_path_ = vmstats_path;
+  scaling_max_freq_path_ = scaling_max_freq_path;
+  cpuinfo_max_freq_path_ = cpuinfo_max_freq_path;
   StatsReporterInit();
 
   // Start collecting meminfo stats.
@@ -632,17 +643,16 @@
 bool MetricsDaemon::VmStatsParseStats(char* stats, long int* page_faults) {
   static const char kPageFaultSearchString[] = "\npgmajfault ";
   bool success = false;
-  /* Each line in the file has the form
-   * <ID> <VALUE>
-   * for instance:
-   * nr_free_pages 213427
-   */
+  // Each line in the file has the form
+  // <ID> <VALUE>
+  // for instance:
+  // nr_free_pages 213427
   char* s = strstr(stats, kPageFaultSearchString);
   if (s == NULL) {
     LOG(WARNING) << "cannot find page fault entry in vmstats";
   } else {
     char* endp;
-    /* Skip <ID> and space.  Don't count the terminating null. */
+    // Skip <ID> and space.  Don't count the terminating null.
     s += sizeof(kPageFaultSearchString) - 1;
     *page_faults = strtol(s, &endp, 10);
     if (*endp == '\n') {
@@ -681,6 +691,64 @@
   return success;
 }
 
+bool MetricsDaemon::ReadFreqToInt(const string& sysfs_file_name, int* value) {
+  const string sysfs_dirpath(testing_ ?
+                             "./" : "/sys/devices/system/cpu/cpu0/cpufreq/");
+  const FilePath sysfs_path(sysfs_dirpath + sysfs_file_name);
+  string value_string;
+  if (!file_util::ReadFileToString(sysfs_path, &value_string)) {
+    LOG(WARNING) << "cannot read " << sysfs_path.value().c_str();
+    return false;
+  }
+  if (!RemoveChars(value_string, "\n", &value_string)) {
+    LOG(WARNING) << "no newline in " << value_string;
+    // Continue even though the lack of newline is suspicious.
+  }
+  if (!base::StringToInt(value_string, value)) {
+    LOG(WARNING) << "cannot convert " << value_string << " to int";
+    return false;
+  }
+  return true;
+}
+
+void MetricsDaemon::SendCpuThrottleMetrics() {
+  // |max_freq| is 0 only the first time through.
+  static int max_freq = 0;
+  if (max_freq == -1)
+    // Give up, as sysfs did not report max_freq correctly.
+    return;
+  if (max_freq == 0 || testing_) {
+    // One-time initialization of max_freq.  (Every time when testing.)
+    if (!ReadFreqToInt(cpuinfo_max_freq_path_, &max_freq)) {
+      max_freq = -1;
+      return;
+    }
+    if (max_freq == 0) {
+      LOG(WARNING) << "sysfs reports 0 max CPU frequency\n";
+      max_freq = -1;
+      return;
+    }
+    if (max_freq % 10000 == 1000) {
+      // Special case: system has turbo mode, and max non-turbo frequency is
+      // max_freq - 1000.  This relies on "normal" (non-turbo) frequencies
+      // being multiples of (at least) 10 MHz.  Although there is no guarantee
+      // of this, it seems a fairly reasonable assumption.  Otherwise we should
+      // read scaling_available_frequencies, sort the frequencies, compare the
+      // two highest ones, and check if they differ by 1000 (kHz) (and that's a
+      // hack too, no telling when it will change).
+      max_freq -= 1000;
+    }
+  }
+  int scaled_freq = 0;
+  if (!ReadFreqToInt(scaling_max_freq_path_, &scaled_freq))
+    return;
+  // Frequencies are in kHz.  If scaled_freq > max_freq, turbo is on, but
+  // scaled_freq is not the actual turbo frequency.  We indicate this situation
+  // with a 101% value.
+  int percent = scaled_freq > max_freq ? 101 : scaled_freq / (max_freq / 100);
+  SendLinearMetric(kMetricScaledCpuFrequencyName, percent, 101, 102);
+}
+
 // static
 gboolean MetricsDaemon::StatsCallbackStatic(void* handle) {
   (static_cast<MetricsDaemon*>(handle))->StatsCallback();
@@ -758,6 +826,7 @@
                    kMetricPageFaultsBuckets);
         page_faults_ = page_faults_now;
       }
+      SendCpuThrottleMetrics();
       // Set start time for new cycle.
       stats_initial_time_ = time_now;
       // Schedule short callback.
diff --git a/metrics/metrics_daemon.h b/metrics/metrics_daemon.h
index 07eaa01..03c5f3c 100644
--- a/metrics/metrics_daemon.h
+++ b/metrics/metrics_daemon.h
@@ -31,7 +31,9 @@
   // Initializes.
   void Init(bool testing, MetricsLibraryInterface* metrics_lib,
             const std::string& diskstats_path,
-            const std::string& vmstats_path);
+            const std::string& vmstats_path,
+            const std::string& cpuinfo_max_freq_path,
+            const std::string& scaling_max_freq_path);
 
   // Does all the work. If |run_as_daemon| is true, daemonizes by
   // forking.
@@ -56,6 +58,7 @@
   FRIEND_TEST(MetricsDaemonTest, ProcessUncleanShutdown);
   FRIEND_TEST(MetricsDaemonTest, ProcessUserCrash);
   FRIEND_TEST(MetricsDaemonTest, ReportCrashesDailyFrequency);
+  FRIEND_TEST(MetricsDaemonTest, ReadFreqToInt);
   FRIEND_TEST(MetricsDaemonTest, ReportDailyUse);
   FRIEND_TEST(MetricsDaemonTest, ReportDiskStats);
   FRIEND_TEST(MetricsDaemonTest, ReportKernelCrashInterval);
@@ -63,6 +66,7 @@
   FRIEND_TEST(MetricsDaemonTest, ReportUserCrashInterval);
   FRIEND_TEST(MetricsDaemonTest, ScreenSaverStateChanged);
   FRIEND_TEST(MetricsDaemonTest, SendMetric);
+  FRIEND_TEST(MetricsDaemonTest, SendCpuThrottleMetrics);
   FRIEND_TEST(MetricsDaemonTest, SessionStateChanged);
   FRIEND_TEST(MetricsDaemonTest, SetUserActiveState);
   FRIEND_TEST(MetricsDaemonTest, SetUserActiveStateTimeJump);
@@ -148,6 +152,7 @@
   static const char kMetricWriteSectorsShortName[];
   static const char kMetricPageFaultsShortName[];
   static const char kMetricPageFaultsLongName[];
+  static const char kMetricScaledCpuFrequencyName[];
   static const int kMetricStatsShortInterval;
   static const int kMetricStatsLongInterval;
   static const int kMetricMeminfoInterval;
@@ -331,9 +336,15 @@
   // Reads /proc/meminfo and sends total anonymous memory usage to UMA.
   bool MemuseCallbackWork();
 
-  // Parse meminfo data and send to UMA.
+  // Parses meminfo data and sends it to UMA.
   bool ProcessMemuse(const std::string& meminfo_raw);
 
+  // Sends stats for thermal CPU throttling.
+  void SendCpuThrottleMetrics();
+
+  // Reads an integer CPU frequency value from sysfs.
+  bool ReadFreqToInt(const std::string& sysfs_file_name, int* value);
+
   // Test mode.
   bool testing_;
 
@@ -399,6 +410,8 @@
 
   std::string diskstats_path_;
   std::string vmstats_path_;
+  std::string scaling_max_freq_path_;
+  std::string cpuinfo_max_freq_path_;
 };
 
 #endif  // METRICS_DAEMON_H_
diff --git a/metrics/metrics_daemon_main.cc b/metrics/metrics_daemon_main.cc
index d194e54..643a2bf 100644
--- a/metrics/metrics_daemon_main.cc
+++ b/metrics/metrics_daemon_main.cc
@@ -10,6 +10,11 @@
 
 #include "metrics_daemon.h"
 
+const char kScalingMaxFreqPath[] =
+    "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq";
+const char kCpuinfoMaxFreqPath[] =
+    "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq";
+
 DEFINE_bool(daemon, true, "run as daemon (use -nodaemon for debugging)");
 
 // Returns the path to the disk stats in the sysfs.  Returns the null string if
@@ -42,6 +47,7 @@
   MetricsLibrary metrics_lib;
   metrics_lib.Init();
   MetricsDaemon daemon;
-  daemon.Init(false, &metrics_lib, MetricsMainDiskStatsPath(), "/proc/vmstat");
+  daemon.Init(false, &metrics_lib, MetricsMainDiskStatsPath(),
+      "/proc/vmstat", kScalingMaxFreqPath, kCpuinfoMaxFreqPath);
   daemon.Run(FLAGS_daemon);
 }
diff --git a/metrics/metrics_daemon_test.cc b/metrics/metrics_daemon_test.cc
index 370634c..92240a2 100644
--- a/metrics/metrics_daemon_test.cc
+++ b/metrics/metrics_daemon_test.cc
@@ -45,6 +45,8 @@
 static const int kFakeWriteSectors[] = {3000, 4000};
 
 static const char kFakeVmStatsPath[] = "fake-vm-stats";
+static const char kFakeScalingMaxFreqPath[] = "fake-scaling-max-freq";
+static const char kFakeCpuinfoMaxFreqPath[] = "fake-cpuinfo-max-freq";
 
 // This class allows a TimeTicks object to be initialized with seconds
 // (rather than microseconds) through the protected TimeTicks(int64)
@@ -72,7 +74,11 @@
     kFakeDiskStats[1] = StringPrintf(kFakeDiskStatsFormat,
                                      kFakeReadSectors[1], kFakeWriteSectors[1]);
     CreateFakeDiskStatsFile(kFakeDiskStats[0].c_str());
-    daemon_.Init(true, &metrics_lib_, kFakeDiskStatsPath, kFakeVmStatsPath);
+    CreateFakeCpuFrequencyFile(kFakeCpuinfoMaxFreqPath, 10000000);
+    CreateFakeCpuFrequencyFile(kFakeScalingMaxFreqPath, 10000000);
+
+    daemon_.Init(true, &metrics_lib_, kFakeDiskStatsPath, kFakeVmStatsPath,
+        kFakeScalingMaxFreqPath, kFakeCpuinfoMaxFreqPath);
 
     // Check configuration of a few histograms.
     FrequencyCounter* frequency_counter =
@@ -139,7 +145,9 @@
   }
 
   virtual void TearDown() {
-    EXPECT_EQ(unlink(kFakeDiskStatsPath), 0);
+    EXPECT_EQ(0, unlink(kFakeDiskStatsPath));
+    EXPECT_EQ(0, unlink(kFakeScalingMaxFreqPath));
+    EXPECT_EQ(0, unlink(kFakeCpuinfoMaxFreqPath));
   }
 
   const TaggedCounterReporter*
@@ -258,6 +266,17 @@
     EXPECT_EQ(0, fclose(f));
   }
 
+  // Creates or overwrites an input file containing a fake CPU frequency.
+  void CreateFakeCpuFrequencyFile(const char* filename, int frequency) {
+    FilePath path(filename);
+    file_util::Delete(path, false);
+    std::string frequency_string = StringPrintf("%d\n", frequency);
+    int frequency_string_length = frequency_string.length();
+    EXPECT_EQ(frequency_string.length(),
+        file_util::WriteFile(path, frequency_string.c_str(),
+            frequency_string_length));
+  }
+
   // The MetricsDaemon under test.
   MetricsDaemon daemon_;
 
@@ -578,6 +597,7 @@
   EXPECT_CALL(metrics_lib_,
               SendToUMA(_, (kFakeWriteSectors[1] - kFakeWriteSectors[0]) / 30,
                         _, _, _));
+  EXPECT_CALL(metrics_lib_, SendEnumToUMA(_, _, _));  // SendCpuThrottleMetrics
   daemon_.StatsCallback();
   EXPECT_TRUE(s_state != daemon_.stats_state_);
 }
@@ -642,7 +662,7 @@
 MemTotal:        2000000 kB\n\
 MemFree:         1000000 kB\n\
 ";
-  /* Not enough fields */
+  // Not enough fields.
   EXPECT_FALSE(daemon_.ProcessMeminfo(meminfo));
 }
 
@@ -653,6 +673,32 @@
   EXPECT_EQ(page_faults, 42);
 }
 
+TEST_F(MetricsDaemonTest, ReadFreqToInt) {
+  const int fake_scaled_freq = 1666999;
+  const int fake_max_freq = 2000000;
+  int scaled_freq = 0;
+  int max_freq = 0;
+  CreateFakeCpuFrequencyFile(kFakeScalingMaxFreqPath, fake_scaled_freq);
+  CreateFakeCpuFrequencyFile(kFakeCpuinfoMaxFreqPath, fake_max_freq);
+  EXPECT_TRUE(daemon_.testing_);
+  EXPECT_TRUE(daemon_.ReadFreqToInt(kFakeScalingMaxFreqPath, &scaled_freq));
+  EXPECT_TRUE(daemon_.ReadFreqToInt(kFakeCpuinfoMaxFreqPath, &max_freq));
+  EXPECT_EQ(fake_scaled_freq, scaled_freq);
+  EXPECT_EQ(fake_max_freq, max_freq);
+}
+
+TEST_F(MetricsDaemonTest, SendCpuThrottleMetrics) {
+  CreateFakeCpuFrequencyFile(kFakeCpuinfoMaxFreqPath, 2001000);
+  // Test the 101% and 100% cases.
+  CreateFakeCpuFrequencyFile(kFakeScalingMaxFreqPath, 2001000);
+  EXPECT_TRUE(daemon_.testing_);
+  EXPECT_CALL(metrics_lib_, SendEnumToUMA(_, 101, 101));
+  daemon_.SendCpuThrottleMetrics();
+  CreateFakeCpuFrequencyFile(kFakeScalingMaxFreqPath, 2000000);
+  EXPECT_CALL(metrics_lib_, SendEnumToUMA(_, 100, 101));
+  daemon_.SendCpuThrottleMetrics();
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();