shill: LinkMonitor: Add more metrics

LinkMonitorSecondsToFailure, LinkMonitorUnicastErrorsAtFailure,
and LinkMonitorBroadcastErrorsAtFailure.

BUG=chromium-os:32600
TEST=Unit tests.

Change-Id: I0a90deee5675ebfb665a76f8e7707eaaecc23ce9
Reviewed-on: https://gerrit.chromium.org/gerrit/29792
Reviewed-by: mukesh agrawal <quiche@chromium.org>
Commit-Ready: Paul Stewart <pstew@chromium.org>
Tested-by: Paul Stewart <pstew@chromium.org>
diff --git a/link_monitor.cc b/link_monitor.cc
index 28ebd5b..1d8dd1b 100644
--- a/link_monitor.cc
+++ b/link_monitor.cc
@@ -59,13 +59,16 @@
     connection_->interface_index(), &local_mac_address_)) {
     LOG(ERROR) << "Could not get local MAC address.";
     metrics_->NotifyLinkMonitorFailure(
-        connection_->technology(), Metrics::kLinkMonitorMacAddressNotFound);
+        connection_->technology(),
+        Metrics::kLinkMonitorMacAddressNotFound,
+        0, 0, 0);
     Stop();
     return false;
   }
   gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
   send_request_callback_.Reset(
       Bind(&LinkMonitor::SendRequestTask, Unretained(this)));
+  time_->GetTimeMonotonic(&started_monitoring_at_);
   return SendRequest();
 }
 
@@ -81,6 +84,7 @@
   response_sample_count_ = 0;
   receive_response_handler_.reset();
   send_request_callback_.Cancel();
+  timerclear(&started_monitoring_at_);
   timerclear(&sent_request_at_);
 }
 
@@ -146,10 +150,19 @@
                << unicast_failure_count_
                << " unicast failures.";
     failure_callback_.Run();
-    Stop();
+
+    struct timeval now, elapsed_time;
+    time_->GetTimeMonotonic(&now);
+    timersub(&now, &started_monitoring_at_, &elapsed_time);
+
     metrics_->NotifyLinkMonitorFailure(
         connection_->technology(),
-        Metrics::kLinkMonitorFailureThresholdReached);
+        Metrics::kLinkMonitorFailureThresholdReached,
+        elapsed_time.tv_sec,
+        broadcast_failure_count_,
+        unicast_failure_count_);
+
+    Stop();
     return true;
   }
   is_unicast_ = !is_unicast_;
@@ -216,7 +229,9 @@
       LOG(ERROR) << "Failed to start ARP client.";
       Stop();
       metrics_->NotifyLinkMonitorFailure(
-          connection_->technology(), Metrics::kLinkMonitorClientStartFailure);
+          connection_->technology(),
+          Metrics::kLinkMonitorClientStartFailure,
+          0, 0, 0);
       return false;
     }
   } else if (AddMissedResponse()) {
@@ -251,7 +266,8 @@
     LOG(ERROR) << "Failed to send ARP request.  Stopping.";
     Stop();
     metrics_->NotifyLinkMonitorFailure(
-        connection_->technology(), Metrics::kLinkMonitorTransmitFailure);
+        connection_->technology(), Metrics::kLinkMonitorTransmitFailure,
+        0, 0, 0);
     return false;
   }
 
diff --git a/link_monitor.h b/link_monitor.h
index a3e0a79..60a4057 100644
--- a/link_monitor.h
+++ b/link_monitor.h
@@ -32,6 +32,11 @@
  public:
   typedef base::Closure FailureCallback;
 
+  // When the sum of consecutive unicast and broadcast failures
+  // equals this value, the failure callback is called, the counters
+  // are reset, and the link monitoring quiesces.  Needed by Metrics.
+  static const unsigned int kFailureThreshold;
+
   // The number of milliseconds between ARP requests.  Needed by Metrics.
   static const unsigned int kTestPeriodMilliseconds;
 
@@ -57,11 +62,6 @@
   friend class LinkMonitorForTest;
   friend class LinkMonitorTest;
 
-  // When the sum of consecutive unicast and broadcast failures
-  // equals this value, the failure callback is called, the counters
-  // are reset, and the link monitoring quiesces.
-  static const unsigned int kFailureThreshold;
-
   // The number of samples to compute a "strict" average over.  When
   // more samples than this number arrive, this determines how "slow"
   // our simple low-pass filter works.
@@ -119,6 +119,9 @@
   // void callback function SendRequestTask().
   base::CancelableClosure send_request_callback_;
 
+  // The time at which the link monitor started.
+  struct timeval started_monitoring_at_;
+
   // The time at which the last ARP request was sent.
   struct timeval sent_request_at_;
   Time *time_;
diff --git a/link_monitor_unittest.cc b/link_monitor_unittest.cc
index 1aedc8f..27aaf52 100644
--- a/link_monitor_unittest.cc
+++ b/link_monitor_unittest.cc
@@ -96,6 +96,8 @@
       ScopeLogger::GetInstance()->set_verbose_level(4);
     }
     monitor_.time_ = &time_;
+    time_val_.tv_sec = 0;
+    time_val_.tv_usec = 0;
     EXPECT_CALL(time_, GetTimeMonotonic(_))
         .WillRepeatedly(DoAll(SetArgumentPointee<0>(time_val_), Return(0)));
     EXPECT_TRUE(local_ip_.SetAddressFromString(kLocalIPAddress));
@@ -345,8 +347,14 @@
       HasSubstr("LinkMonitorResponseTimeSample"), GetTestPeriodMilliseconds(),
       _, _, _)).Times(GetFailureThreshold());
   StartMonitor();
+  // This value doesn't match real life (the timer in this scenario should
+  // advance by LinkMonitor::kTestPeriodMilliseconds), but this demonstrates
+  // the LinkMonitorSecondsToFailure independent from the response-time
+  // figures.
+  const int kTimeIncrement = 1000;
   for (unsigned int i = 1; i < GetFailureThreshold(); ++i) {
     ExpectTransmit(false);
+    AdvanceTime(kTimeIncrement);
     TriggerRequestTimer();
     EXPECT_FALSE(IsUnicast());
     EXPECT_EQ(i, GetBroadcastFailureCount());
@@ -362,9 +370,17 @@
   EXPECT_CALL(metrics_, SendEnumToUMA(
       HasSubstr("LinkMonitorFailure"),
       Metrics::kLinkMonitorFailureThresholdReached, _));
+  EXPECT_CALL(metrics_, SendToUMA(
+      HasSubstr("LinkMonitorSecondsToFailure"),
+      kTimeIncrement * GetFailureThreshold() / 1000, _, _, _));
+  EXPECT_CALL(metrics_, SendToUMA(
+      HasSubstr("BroadcastErrorsAtFailure"), GetFailureThreshold(), _, _, _));
+  EXPECT_CALL(metrics_, SendToUMA(
+      HasSubstr("UnicastErrorsAtFailure"), 0, _, _, _));
   EXPECT_FALSE(GetSendRequestCallback().IsCancelled());
   ExpectNoTransmit();
   EXPECT_CALL(monitor_, FailureCallback());
+  AdvanceTime(kTimeIncrement);
   TriggerRequestTimer();
   ExpectReset();
 }
@@ -405,6 +421,12 @@
   EXPECT_CALL(metrics_, SendEnumToUMA(
       HasSubstr("LinkMonitorFailure"),
       Metrics::kLinkMonitorFailureThresholdReached, _));
+  EXPECT_CALL(metrics_, SendToUMA(
+      HasSubstr("LinkMonitorSecondsToFailure"), 0, _, _, _));
+  EXPECT_CALL(metrics_, SendToUMA(
+      HasSubstr("BroadcastErrorsAtFailure"), 0, _, _, _));
+  EXPECT_CALL(metrics_, SendToUMA(
+      HasSubstr("UnicastErrorsAtFailure"), GetFailureThreshold(), _, _, _));
   EXPECT_FALSE(GetSendRequestCallback().IsCancelled());
   ExpectNoTransmit();
   EXPECT_CALL(monitor_, FailureCallback());
diff --git a/metrics.cc b/metrics.cc
index 725ef21..dc83a50 100644
--- a/metrics.cc
+++ b/metrics.cc
@@ -105,11 +105,24 @@
     "Network.Shill.%s.LinkMonitorFailure";
 const char Metrics::kMetricLinkMonitorResponseTimeSample[] =
     "Network.Shill.%s.LinkMonitorResponseTimeSample";
-const int Metrics::kMetricLinkMonitorResponseTimeSampleMin = 0;
-const int Metrics::kMetricLinkMonitorResponseTimeSampleMax =
+const unsigned int Metrics::kMetricLinkMonitorResponseTimeSampleMin = 0;
+const unsigned int Metrics::kMetricLinkMonitorResponseTimeSampleMax =
     LinkMonitor::kTestPeriodMilliseconds;
 const int Metrics::kMetricLinkMonitorResponseTimeSampleNumBuckets = 50;
-
+const char Metrics::kMetricLinkMonitorSecondsToFailure[] =
+    "Network.Shill.%s.LinkMonitorSecondsToFailure";
+const unsigned int Metrics::kMetricLinkMonitorSecondsToFailureMin = 0;
+const unsigned int Metrics::kMetricLinkMonitorSecondsToFailureMax = 7200;
+const int Metrics::kMetricLinkMonitorSecondsToFailureNumBuckets = 50;
+const char Metrics::kMetricLinkMonitorBroadcastErrorsAtFailure[] =
+    "Network.Shill.%s.LinkMonitorBroadcastErrorsAtFailure";
+const char Metrics::kMetricLinkMonitorUnicastErrorsAtFailure[] =
+    "Network.Shill.%s.LinkMonitorUnicastErrorsAtFailure";
+const unsigned int Metrics::kMetricLinkMonitorErrorCountMin = 0;
+const unsigned int Metrics::kMetricLinkMonitorErrorCountMax =
+    LinkMonitor::kFailureThreshold;
+const int Metrics::kMetricLinkMonitorErrorCountNumBuckets =
+    LinkMonitor::kFailureThreshold + 1;
 
 Metrics::Metrics()
     : library_(&metrics_library_),
@@ -389,10 +402,41 @@
 }
 
 void Metrics::NotifyLinkMonitorFailure(
-    Technology::Identifier technology, LinkMonitorFailure failure) {
+    Technology::Identifier technology,
+    LinkMonitorFailure failure,
+    unsigned int seconds_to_failure,
+    unsigned int broadcast_error_count,
+    unsigned int unicast_error_count) {
   string histogram = GetFullMetricName(kMetricLinkMonitorFailure,
                                        technology);
   SendEnumToUMA(histogram, failure, kLinkMonitorFailureMax);
+
+  if (failure == kLinkMonitorFailureThresholdReached) {
+    if (seconds_to_failure > kMetricLinkMonitorSecondsToFailureMax) {
+      seconds_to_failure = kMetricLinkMonitorSecondsToFailureMax;
+    }
+    histogram = GetFullMetricName(kMetricLinkMonitorSecondsToFailure,
+                                  technology);
+    SendToUMA(histogram,
+              seconds_to_failure,
+              kMetricLinkMonitorSecondsToFailureMin,
+              kMetricLinkMonitorSecondsToFailureMax,
+              kMetricLinkMonitorSecondsToFailureNumBuckets);
+    histogram = GetFullMetricName(kMetricLinkMonitorBroadcastErrorsAtFailure,
+                                  technology);
+    SendToUMA(histogram,
+              broadcast_error_count,
+              kMetricLinkMonitorErrorCountMin,
+              kMetricLinkMonitorErrorCountMax,
+              kMetricLinkMonitorErrorCountNumBuckets);
+    histogram = GetFullMetricName(kMetricLinkMonitorUnicastErrorsAtFailure,
+                                  technology);
+    SendToUMA(histogram,
+              unicast_error_count,
+              kMetricLinkMonitorErrorCountMin,
+              kMetricLinkMonitorErrorCountMax,
+              kMetricLinkMonitorErrorCountNumBuckets);
+  }
 }
 
 void Metrics::NotifyLinkMonitorResponseTimeSampleAdded(
diff --git a/metrics.h b/metrics.h
index d241249..9e7ea0e 100644
--- a/metrics.h
+++ b/metrics.h
@@ -177,9 +177,18 @@
   // LinkMonitor statistics.
   static const char kMetricLinkMonitorFailure[];
   static const char kMetricLinkMonitorResponseTimeSample[];
-  static const int kMetricLinkMonitorResponseTimeSampleMin;
-  static const int kMetricLinkMonitorResponseTimeSampleMax;
+  static const unsigned int kMetricLinkMonitorResponseTimeSampleMin;
+  static const unsigned int kMetricLinkMonitorResponseTimeSampleMax;
   static const int kMetricLinkMonitorResponseTimeSampleNumBuckets;
+  static const char kMetricLinkMonitorSecondsToFailure[];
+  static const unsigned int kMetricLinkMonitorSecondsToFailureMin;
+  static const unsigned int kMetricLinkMonitorSecondsToFailureMax;
+  static const int kMetricLinkMonitorSecondsToFailureNumBuckets;
+  static const char kMetricLinkMonitorBroadcastErrorsAtFailure[];
+  static const char kMetricLinkMonitorUnicastErrorsAtFailure[];
+  static const unsigned int kMetricLinkMonitorErrorCountMin;
+  static const unsigned int kMetricLinkMonitorErrorCountMax;
+  static const int kMetricLinkMonitorErrorCountNumBuckets;
 
   Metrics();
   virtual ~Metrics();
@@ -229,7 +238,11 @@
 
   // Notifies this object of a failure in LinkMonitor.
   void NotifyLinkMonitorFailure(
-      Technology::Identifier technology, LinkMonitorFailure failure);
+      Technology::Identifier technology,
+      LinkMonitorFailure failure,
+      unsigned int seconds_to_failure,
+      unsigned int broadcast_error_count,
+      unsigned int unicast_error_count);
 
   // Notifies this object that LinkMonitor has added a response time sample
   // for |connection| with a value of |response_time_milliseconds|.