Merge e9c5f8b9401bd7d194a0c1bb46e990db6157b90d on remote branch

Change-Id: Ibba25d433a7c7af62a9942dad0e9f9d7ef1e4775
diff --git a/boottime_tools/bootanalyze/bootanalyze.py b/boottime_tools/bootanalyze/bootanalyze.py
index 5bed9f2..af16694 100755
--- a/boottime_tools/bootanalyze/bootanalyze.py
+++ b/boottime_tools/bootanalyze/bootanalyze.py
@@ -84,7 +84,7 @@
       value = float(kv[1])
       components_to_monitor[key] = value
 
-  cfg = yaml.load(args.config, Loader=yaml.FullLoader)
+  cfg = yaml.load(args.config, Loader=yaml.SafeLoader)
 
   if args.stressfs:
     if run_adb_cmd('install -r -g ' + args.stressfs) != 0:
diff --git a/boottime_tools/bootanalyze/bugreport_anayze.py b/boottime_tools/bootanalyze/bugreport_anayze.py
index 3ea7552..07c8b8d 100644
--- a/boottime_tools/bootanalyze/bugreport_anayze.py
+++ b/boottime_tools/bootanalyze/bugreport_anayze.py
@@ -138,7 +138,7 @@
     self.re_log_start = re.compile(LOG_START_PATTERN)
     self.re_log_end = re.compile(LOG_END_PATTERN)
     self.f = bugreport_file
-    cfg = yaml.load(config_file, Loader=yaml.FullLoader)
+    cfg = yaml.load(config_file, Loader=yaml.SafeLoader)
     self.event_patterns = {key: re.compile(pattern)
                          for key, pattern in cfg['events'].iteritems()}
     self.timing_patterns = {key: re.compile(pattern)
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index 40a6bb0..bd1bffb 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -141,8 +141,8 @@
     if (show_cpu) {
       fprintf(fp, "%d,", s.cpu);
     }
-    fprintf(fp, "%s,%s,%s,(%.0f%%)%s\n", s.readable_count.c_str(), s.Name().c_str(),
-            s.comment.c_str(), 1.0 / s.scale * 100, (s.auto_generated ? " (generated)," : ","));
+    fprintf(fp, "%s,%s,%s,%s\n", s.readable_count.c_str(), s.Name().c_str(), s.comment.c_str(),
+            (s.auto_generated ? "(generated)," : ""));
   }
 }
 
@@ -157,7 +157,7 @@
   }
   titles.emplace_back("count");
   titles.emplace_back("event_name");
-  titles.emplace_back(" # count / runtime,  runtime / enabled_time");
+  titles.emplace_back(" # count / runtime");
 
   std::vector<size_t> width(titles.size(), 0);
 
@@ -206,10 +206,9 @@
     if (show_cpu) {
       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.cpu);
     }
-    fprintf(fp, "  %*s  %-*s   # %-*s  (%.0f%%)%s\n", static_cast<int>(width[i]),
-            s.readable_count.c_str(), static_cast<int>(width[i + 1]), s.Name().c_str(),
-            static_cast<int>(width[i + 2]), s.comment.c_str(), 1.0 / s.scale * 100,
-            (s.auto_generated ? " (generated)" : ""));
+    fprintf(fp, "  %*s  %-*s   # %-*s%s\n", static_cast<int>(width[i]), s.readable_count.c_str(),
+            static_cast<int>(width[i + 1]), s.Name().c_str(), static_cast<int>(width[i + 2]),
+            s.comment.c_str(), (s.auto_generated ? " (generated)" : ""));
   }
 }
 
@@ -434,12 +433,13 @@
  private:
   bool ParseOptions(const std::vector<std::string>& args,
                     std::vector<std::string>* non_option_args);
-  bool PrintHardwareCounters();
+  void PrintHardwareCounters();
   bool AddDefaultMeasuredEventTypes();
   void SetEventSelectionFlags();
   void MonitorEachThread();
   void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters);
   bool ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec, FILE* fp);
+  void CheckHardwareCounterMultiplexing();
 
   bool verbose_mode_;
   bool system_wide_collection_;
@@ -480,7 +480,8 @@
     return false;
   }
   if (print_hw_counter_) {
-    return PrintHardwareCounters();
+    PrintHardwareCounters();
+    return true;
   }
   if (!app_package_name_.empty() && !in_app_context_) {
     if (!IsRoot()) {
@@ -536,9 +537,6 @@
   }
 
   // 3. Open perf_event_files and output file if defined.
-  if (cpus_.empty() && !report_per_core_ && (report_per_thread_ || !system_wide_collection_)) {
-    cpus_.push_back(-1);  // Get event count for each thread on all cpus.
-  }
   if (!event_selection_set_.OpenEventFiles(cpus_)) {
     return false;
   }
@@ -617,8 +615,15 @@
 
   // 6. Read and print counters.
   if (interval_in_ms_ == 0) {
-    return print_counters();
+    if (!print_counters()) {
+      return false;
+    }
   }
+
+  // 7. Print hardware counter multiplexing warning when needed.
+  event_selection_set_.CloseEventFiles();
+  CheckHardwareCounterMultiplexing();
+
   return true;
 }
 
@@ -739,52 +744,67 @@
   return true;
 }
 
-bool StatCommand::PrintHardwareCounters() {
-  size_t available_counters = 0;
+std::optional<bool> CheckHardwareCountersOnCpu(int cpu, size_t counters) {
   const EventType* event = FindEventTypeByName("cpu-cycles", true);
   if (event == nullptr) {
-    return false;
+    return std::nullopt;
   }
   perf_event_attr attr = CreateDefaultPerfEventAttr(*event);
+  auto workload = Workload::CreateWorkload({"sleep", "0.1"});
+  if (!workload || !workload->SetCpuAffinity(cpu)) {
+    return std::nullopt;
+  }
+  std::vector<std::unique_ptr<EventFd>> event_fds;
+  for (size_t i = 0; i < counters; i++) {
+    EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get();
+    auto event_fd =
+        EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, "cpu-cycles", false);
+    if (!event_fd) {
+      return false;
+    }
+    event_fds.emplace_back(std::move(event_fd));
+  }
+  if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) {
+    return std::nullopt;
+  }
+  for (auto& event_fd : event_fds) {
+    PerfCounter counter;
+    if (!event_fd->ReadCounter(&counter)) {
+      return std::nullopt;
+    }
+    if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::optional<size_t> GetHardwareCountersOnCpu(int cpu) {
+  size_t available_counters = 0;
   while (true) {
-    auto workload = Workload::CreateWorkload({"sleep", "0.1"});
-    if (!workload) {
-      return false;
+    std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, available_counters + 1);
+    if (!result.has_value()) {
+      return std::nullopt;
     }
-    std::vector<std::unique_ptr<EventFd>> event_fds;
-    for (size_t i = 0; i <= available_counters; i++) {
-      EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get();
-      auto event_fd =
-          EventFd::OpenEventFile(attr, workload->GetPid(), -1, group_event_fd, "cpu-cycles", false);
-      if (!event_fd) {
-        break;
-      }
-      event_fds.emplace_back(std::move(event_fd));
-    }
-    if (event_fds.size() != available_counters + 1) {
-      break;
-    }
-    if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) {
-      return false;
-    }
-    bool always_running = true;
-    for (auto& event_fd : event_fds) {
-      PerfCounter counter;
-      if (!event_fd->ReadCounter(&counter)) {
-        return false;
-      }
-      if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) {
-        always_running = false;
-        break;
-      }
-    }
-    if (!always_running) {
+    if (!result.value()) {
       break;
     }
     available_counters++;
   }
-  printf("There are %zu CPU PMU hardware counters available on this device.\n", available_counters);
-  return true;
+  return available_counters;
+}
+
+void StatCommand::PrintHardwareCounters() {
+  for (int cpu : GetOnlineCpus()) {
+    std::optional<size_t> counters = GetHardwareCountersOnCpu(cpu);
+    if (!counters) {
+      // When built as a 32-bit program, we can't set sched_affinity to a 64-bit only CPU. So we
+      // may not be able to get hardware counters on that CPU.
+      LOG(WARNING) << "Failed to get CPU PMU hardware counters on cpu " << cpu;
+      continue;
+    }
+    printf("There are %zu CPU PMU hardware counters available on cpu %d.\n", counters.value(), cpu);
+  }
 }
 
 bool StatCommand::AddDefaultMeasuredEventTypes() {
@@ -896,44 +916,45 @@
   summaries.GenerateComments(duration_in_sec);
   summaries.Show(fp);
 
-  if (csv_)
+  if (csv_) {
     fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
-  else
-    fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
-
-  const char* COUNTER_MULTIPLEX_INFO =
-      "probably caused by hardware counter multiplexing (less counters than events).\n"
-      "Try --use-devfreq-counters if on a rooted device.";
-
-  if (cpus_ == std::vector<int>(1, -1) ||
-      event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
-    // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
-    // if percentages < 100%, probably it is caused by hardware counter multiplexing.
-    bool counters_always_available = true;
-    for (const auto& summary : summaries.Summaries()) {
-      if (!summary.IsMonitoredAllTheTime()) {
-        counters_always_available = false;
-        break;
-      }
-    }
-    if (!counters_always_available) {
-      LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
-                   << COUNTER_MULTIPLEX_INFO;
-    }
-  } else if (report_per_thread_) {
-    // We monitor each thread on each cpu.
-    LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
-              << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
-              << COUNTER_MULTIPLEX_INFO;
   } else {
-    // We monitor some threads on each cpu.
-    LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
-              << "threads. If percentage sum < 99%, or report for an event is missing,\n"
-              << COUNTER_MULTIPLEX_INFO;
+    fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
   }
   return true;
 }
 
+void StatCommand::CheckHardwareCounterMultiplexing() {
+  size_t hardware_events = 0;
+  for (const EventType* event : event_selection_set_.GetEvents()) {
+    if (event->IsHardwareEvent()) {
+      hardware_events++;
+    }
+  }
+  if (hardware_events == 0) {
+    return;
+  }
+  std::vector<int> cpus = cpus_;
+  if (cpus.empty()) {
+    cpus = GetOnlineCpus();
+  }
+  for (int cpu : cpus) {
+    std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, hardware_events);
+    if (result.has_value() && !result.value()) {
+      LOG(WARNING) << "It seems the number of hardware events are more than the number of\n"
+                   << "available CPU PMU hardware counters. That will trigger hardware counter\n"
+                   << "multiplexing. As a result, events are not counted all the time processes\n"
+                   << "running, and event counts are smaller than what really happen.\n"
+                   << "Use --print-hw-counter to show available hardware counters.\n"
+#if defined(__ANDROID__)
+                   << "If on a rooted device, try --use-devfreq-counters to get more counters.\n"
+#endif
+          ;
+      break;
+    }
+  }
+}
+
 }  // namespace
 
 void RegisterStatCommand() {
diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h
index d2cd0c1..14863ca 100644
--- a/simpleperf/event_type.h
+++ b/simpleperf/event_type.h
@@ -24,6 +24,8 @@
 #include <string>
 #include <vector>
 
+#include "perf_event.h"
+
 namespace simpleperf {
 
 inline const std::string kETMEventName = "cs-etm";
@@ -50,6 +52,9 @@
 
   bool IsPmuEvent() const { return name.find('/') != std::string::npos; }
   bool IsEtmEvent() const { return name == kETMEventName; }
+  bool IsHardwareEvent() const {
+    return type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE || type == PERF_TYPE_RAW;
+  }
 
   std::vector<int> GetPmuCpumask();
 
diff --git a/simpleperf/workload.cpp b/simpleperf/workload.cpp
index 4bb7c54..92c6537 100644
--- a/simpleperf/workload.cpp
+++ b/simpleperf/workload.cpp
@@ -18,6 +18,7 @@
 
 #include <errno.h>
 #include <fcntl.h>
+#include <sched.h>
 #include <sys/prctl.h>
 #include <sys/wait.h>
 #include <unistd.h>
@@ -160,6 +161,18 @@
   }
 }
 
+bool Workload::SetCpuAffinity(int cpu) {
+  CHECK_EQ(work_state_, NotYetStartNewProcess);
+  cpu_set_t mask;
+  CPU_ZERO(&mask);
+  CPU_SET(cpu, &mask);
+  if (sched_setaffinity(GetPid(), sizeof(mask), &mask) != 0) {
+    PLOG(WARNING) << "sched_setaffinity failed";
+    return false;
+  }
+  return true;
+}
+
 bool Workload::Start() {
   CHECK_EQ(work_state_, NotYetStartNewProcess);
   char start_signal = 1;
diff --git a/simpleperf/workload.h b/simpleperf/workload.h
index 2400ac8..86b2892 100644
--- a/simpleperf/workload.h
+++ b/simpleperf/workload.h
@@ -43,6 +43,7 @@
 
   ~Workload();
 
+  bool SetCpuAffinity(int cpu);
   bool Start();
   bool IsStarted() { return work_state_ == Started; }
   pid_t GetPid() { return work_pid_; }