shill: Add metrics for recording termination/suspend time

Added the following metrics:

 - Network.Shill.TerminationActionResult.OnSuspend,
 - Network.Shill.TerminationActionResult.OnTerminate,

which log success/failure;

 - Network.Shill.TerminationActionTime.OnSuspend,
 - Network.Shill.TerminationActionTime.OnTerminate,

which measure the time it takes to complete the termination actions.

BUG=chromium-os:35029
TEST=Manually suspended machine and terminated shill on separate
occasions and observed the output on chrome://histograms

Change-Id: I5183d7c3dd5d4633f661598b7a0cacab6d205d8b
Reviewed-on: https://gerrit.chromium.org/gerrit/34975
Reviewed-by: Ben Chan <benchan@chromium.org>
Commit-Ready: Arman Uguray <armansito@chromium.org>
Tested-by: Arman Uguray <armansito@chromium.org>
diff --git a/manager.cc b/manager.cc
index 7cf4ba7..a283438 100644
--- a/manager.cc
+++ b/manager.cc
@@ -34,7 +34,6 @@
 #include "shill/hook_table.h"
 #include "shill/key_file_store.h"
 #include "shill/logging.h"
-#include "shill/metrics.h"
 #include "shill/profile.h"
 #include "shill/property_accessor.h"
 #include "shill/proxy_factory.h"
@@ -61,7 +60,9 @@
 const char Manager::kErrorTypeRequired[] = "must specify service type";
 const char Manager::kErrorUnsupportedServiceType[] =
     "service type is unsupported";
-const int Manager::kTerminationActionsTimeoutMilliseconds = 3000;
+// This timeout should be less than the upstart job timeout, otherwise
+// stats for termination actions might be lost.
+const int Manager::kTerminationActionsTimeoutMilliseconds = 4500;
 const char Manager::kPowerManagerKey[] = "manager";
 
 Manager::Manager(ControlInterface *control_interface,
@@ -899,6 +900,17 @@
   termination_actions_.Run(kTerminationActionsTimeoutMilliseconds, done);
 }
 
+bool Manager::RunTerminationActionsAndNotifyMetrics(
+        const base::Callback<void(const Error &)> &done,
+        Metrics::TerminationActionReason reason) {
+  if (termination_actions_.IsEmpty())
+    return false;
+
+  metrics_->NotifyTerminationActionsStarted(reason);
+  RunTerminationActions(done);
+  return true;
+}
+
 int Manager::RegisterDefaultServiceCallback(const ServiceCallback &callback) {
   default_service_callbacks_[++default_service_callback_tag_] = callback;
   return default_service_callback_tag_;
@@ -948,13 +960,19 @@
 }
 
 void Manager::OnSuspendDelay(uint32 sequence_number) {
-  RunTerminationActions(
-      Bind(&Manager::OnSuspendActionsComplete, AsWeakPtr(), sequence_number));
+  if (!RunTerminationActionsAndNotifyMetrics(
+       Bind(&Manager::OnSuspendActionsComplete, AsWeakPtr(), sequence_number),
+       Metrics::kTerminationActionReasonSuspend)) {
+    LOG(INFO) << "No suspend actions were run.";
+    power_manager_->SuspendReady(sequence_number);
+  }
 }
 
 void Manager::OnSuspendActionsComplete(uint32 sequence_number,
                                        const Error &error) {
   LOG(INFO) << "Finished suspend actions.  Result: " << error;
+  metrics_->NotifyTerminationActionsCompleted(
+      Metrics::kTerminationActionReasonSuspend, error.IsSuccess());
   power_manager_->SuspendReady(sequence_number);
 }
 
diff --git a/manager.h b/manager.h
index f9aa35a..58ffba3 100644
--- a/manager.h
+++ b/manager.h
@@ -21,6 +21,7 @@
 #include "shill/device_info.h"
 #include "shill/event_dispatcher.h"
 #include "shill/hook_table.h"
+#include "shill/metrics.h"
 #include "shill/modem_info.h"
 #include "shill/power_manager.h"
 #include "shill/property_store.h"
@@ -37,7 +38,6 @@
 class Error;
 class EventDispatcher;
 class ManagerAdaptorInterface;
-class Metrics;
 class Resolver;
 
 class Manager : public base::SupportsWeakPtr<Manager> {
@@ -238,10 +238,17 @@
   // Removes the action associtated with |name|.
   void RemoveTerminationAction(const std::string &name);
 
-  // Runs the termination actions.  If all actions complete within
+  // Runs the termination actions and notifies the metrics framework
+  // that the termination actions started running, only if any termination
+  // actions have been registered. |reason| specifies whether this method was
+  // called due to termination or suspend. If all actions complete within
   // |kTerminationActionsTimeoutMilliseconds|, |done| is called with a value of
-  // Error::kSuccess.  Otherwise, it is called with Error::kOperationTimeout.
-  void RunTerminationActions(const base::Callback<void(const Error &)> &done);
+  // Error::kSuccess. Otherwise, it is called with Error::kOperationTimeout.
+  //
+  // Returns true, if termination actions were run.
+  bool RunTerminationActionsAndNotifyMetrics(
+      const base::Callback<void(const Error &)> &done,
+      Metrics::TerminationActionReason reason);
 
   // Registers a |callback| that's invoked whenever the default service
   // changes. Returns a unique tag that can be used to deregister the
@@ -250,10 +257,13 @@
   virtual void DeregisterDefaultServiceCallback(int tag);
 
  private:
+  friend class CellularTest;
   friend class ManagerAdaptorInterface;
   friend class ManagerTest;
   friend class WiFiObjectTest;
   friend class WiMaxProviderTest;
+
+  FRIEND_TEST(CellularTest, ConnectAddsTerminationAction);
   FRIEND_TEST(ManagerTest, AvailableTechnologies);
   FRIEND_TEST(ManagerTest, CalculateStateOffline);
   FRIEND_TEST(ManagerTest, CalculateStateOnline);
@@ -267,6 +277,7 @@
   FRIEND_TEST(ManagerTest, LinkMonitorEnabled);
   FRIEND_TEST(ManagerTest, NotifyDefaultServiceChanged);
   FRIEND_TEST(ManagerTest, PopProfileWithUnload);
+  FRIEND_TEST(ManagerTest, RunTerminationActions);
   FRIEND_TEST(ManagerTest, SortServices);
   FRIEND_TEST(ManagerTest, SortServicesWithConnection);
   FRIEND_TEST(ManagerTest, StartupPortalList);
@@ -334,6 +345,11 @@
 
   void NotifyDefaultServiceChanged(const ServiceRefPtr &service);
 
+  // Runs the termination actions.  If all actions complete within
+  // |kTerminationActionsTimeoutMilliseconds|, |done| is called with a value of
+  // Error::kSuccess.  Otherwise, it is called with Error::kOperationTimeout.
+  void RunTerminationActions(const base::Callback<void(const Error &)> &done);
+
   void OnPowerStateChanged(PowerManagerProxyDelegate::SuspendState power_state);
   void OnSuspendDelay(uint32 sequence_number);
 
diff --git a/metrics.cc b/metrics.cc
index 578f596..2d59720 100644
--- a/metrics.cc
+++ b/metrics.cc
@@ -87,8 +87,16 @@
 
 const char Metrics::kMetricPortalResult[] = "Network.Shill.%s.PortalResult";
 
-const char Metrics::kMetricTerminationActionResult[] =
-    "Network.Shill.TerminationActionResult";
+const char Metrics::kMetricTerminationActionTimeOnTerminate[] =
+    "Network.Shill.TerminationActionTime.OnTerminate";
+const char Metrics::kMetricTerminationActionResultOnTerminate[] =
+    "Network.Shill.TerminationActionResult.OnTerminate";
+const char Metrics::kMetricTerminationActionTimeOnSuspend[] =
+    "Network.Shill.TerminationActionTime.OnSuspend";
+const char Metrics::kMetricTerminationActionResultOnSuspend[] =
+    "Network.Shill.TerminationActionResult.OnSuspend";
+const int Metrics::kMetricTerminationActionTimeMillisecondsMax = 10000;
+const int Metrics::kMetricTerminationActionTimeMillisecondsMin = 1;
 
 // static
 const uint16 Metrics::kWiFiBandwidth5MHz = 5;
@@ -151,6 +159,7 @@
       time_online_timer_(new chromeos_metrics::Timer),
       time_to_drop_timer_(new chromeos_metrics::Timer),
       time_resume_to_ready_timer_(new chromeos_metrics::Timer),
+      time_termination_actions_timer(new chromeos_metrics::Timer),
       collect_bootstats_(true) {
   metrics_library_.Init();
   chromeos_metrics::TimerReporter::set_metrics_lib(library_);
@@ -421,6 +430,47 @@
   }
 }
 
+void Metrics::NotifyTerminationActionsStarted(
+    TerminationActionReason /*reason*/) {
+  if (time_termination_actions_timer->HasStarted())
+    return;
+  time_termination_actions_timer->Start();
+}
+
+void Metrics::NotifyTerminationActionsCompleted(
+    TerminationActionReason reason, bool success) {
+  if (!time_termination_actions_timer->HasStarted())
+    return;
+
+  int result = success ? kTerminationActionResultSuccess :
+                         kTerminationActionResultFailure;
+
+  base::TimeDelta elapsed_time;
+  time_termination_actions_timer->GetElapsedTime(&elapsed_time);
+  time_termination_actions_timer->Reset();
+  string time_metric, result_metric;
+  switch (reason) {
+  case kTerminationActionReasonSuspend:
+    time_metric = kMetricTerminationActionTimeOnSuspend;
+    result_metric = kMetricTerminationActionResultOnSuspend;
+    break;
+  case kTerminationActionReasonTerminate:
+    time_metric = kMetricTerminationActionTimeOnTerminate;
+    result_metric = kMetricTerminationActionResultOnTerminate;
+    break;
+  }
+
+  SendToUMA(time_metric,
+            elapsed_time.InMilliseconds(),
+            kMetricTerminationActionTimeMillisecondsMin,
+            kMetricTerminationActionTimeMillisecondsMax,
+            kTimerHistogramNumBuckets);
+
+  SendEnumToUMA(result_metric,
+                result,
+                kTerminationActionResultMax);
+}
+
 void Metrics::NotifyLinkMonitorFailure(
     Technology::Identifier technology,
     LinkMonitorFailure failure,
diff --git a/metrics.h b/metrics.h
index d0d070c..a6c8cf8 100644
--- a/metrics.h
+++ b/metrics.h
@@ -140,11 +140,16 @@
   };
 
   enum TerminationActionResult {
-    kTerminationActionSuccess,
-    kTerminationActionFailure,
+    kTerminationActionResultSuccess,
+    kTerminationActionResultFailure,
     kTerminationActionResultMax
   };
 
+  enum TerminationActionReason {
+    kTerminationActionReasonSuspend,
+    kTerminationActionReasonTerminate
+  };
+
   static const char kMetricDisconnect[];
   static const int kMetricDisconnectMax;
   static const int kMetricDisconnectMin;
@@ -220,7 +225,12 @@
   static const char kMetricLinkApDisconnectType[];
 
   // Shill termination action statistics.
-  static const char kMetricTerminationActionResult[];
+  static const char kMetricTerminationActionTimeOnTerminate[];
+  static const char kMetricTerminationActionResultOnTerminate[];
+  static const char kMetricTerminationActionTimeOnSuspend[];
+  static const char kMetricTerminationActionResultOnSuspend[];
+  static const int kMetricTerminationActionTimeMillisecondsMax;
+  static const int kMetricTerminationActionTimeMillisecondsMin;
 
   Metrics();
   virtual ~Metrics();
@@ -268,6 +278,14 @@
   // Notifies this object of a power management state change.
   void NotifyPowerStateChange(PowerManager::SuspendState new_state);
 
+  // Notifies this object that termination actions started executing.
+  void NotifyTerminationActionsStarted(TerminationActionReason reason);
+
+  // Notifies this object that termination actions have been completed.
+  // |success| is true, if the termination actions completed successfully.
+  void NotifyTerminationActionsCompleted(
+      TerminationActionReason reason, bool success);
+
   // Notifies this object of a failure in LinkMonitor.
   void NotifyLinkMonitorFailure(
       Technology::Identifier technology,
@@ -354,6 +372,10 @@
   void set_time_resume_to_ready_timer(chromeos_metrics::Timer *timer) {
     time_resume_to_ready_timer_.reset(timer);  // Passes ownership
   }
+  void set_time_termination_actions_timer(
+    chromeos_metrics::Timer *timer) {
+    time_termination_actions_timer.reset(timer);  // Passes ownership
+  }
 
   // |library_| points to |metrics_library_| when shill runs normally.
   // However, in order to allow for unit testing, we point |library_| to a
@@ -366,6 +388,7 @@
   scoped_ptr<chromeos_metrics::Timer> time_online_timer_;
   scoped_ptr<chromeos_metrics::Timer> time_to_drop_timer_;
   scoped_ptr<chromeos_metrics::Timer> time_resume_to_ready_timer_;
+  scoped_ptr<chromeos_metrics::Timer> time_termination_actions_timer;
   bool collect_bootstats_;
 
   DISALLOW_COPY_AND_ASSIGN(Metrics);
diff --git a/shill_daemon.cc b/shill_daemon.cc
index ce8a26a..ea17007 100644
--- a/shill_daemon.cc
+++ b/shill_daemon.cc
@@ -76,18 +76,18 @@
   // Stop() prevents autoconnect from attempting to immediately connect to
   // services after they have been disconnected.
   Stop();
-  manager_->RunTerminationActions(Bind(&Daemon::TerminationActionsCompleted,
-                                       Unretained(this)));
+  if (!manager_->RunTerminationActionsAndNotifyMetrics(
+      Bind(&Daemon::TerminationActionsCompleted, Unretained(this)),
+      Metrics::kTerminationActionReasonTerminate)) {
+    SLOG(Daemon, 1) << "No termination actions were run";
+    dispatcher_.PostTask(MessageLoop::QuitClosure());
+  }
 }
 
-void Daemon::TerminationActionsCompleted(const Error & error) {
+void Daemon::TerminationActionsCompleted(const Error &error) {
   SLOG(Daemon, 1) << "Finished termination actions.  Result: " << error;
-  Metrics::TerminationActionResult result = error.IsSuccess() ?
-      Metrics::kTerminationActionSuccess :
-      Metrics::kTerminationActionFailure;
-  metrics_.SendEnumToUMA(Metrics::kMetricTerminationActionResult,
-                         result,
-                         Metrics::kTerminationActionResultMax);
+  metrics_.NotifyTerminationActionsCompleted(
+      Metrics::kTerminationActionReasonTerminate, error.IsSuccess());
   dispatcher_.PostTask(MessageLoop::QuitClosure());
 }