shill: suppress reassociate attempt when network is unreliable

A network is considered unreliable when encountering multiple link
failures in a short period of time (currently set to 60 minutes). When
a network is unreliable, reassociate attempts will be skipped when link
failures are detected.

The unreliable status of the network will be reset when the device
resumes from suspend, or selected network changes, or after a period
of time without link failures after reconnection.

This will avoid unnecessary disconnect/reconnect attempts when the
network is unreliable due to other unforeseen factors.

BUG=chromium:472828
TEST=unittest

Change-Id: I2037d08692a9abec48afdb974a1e028c85eb2ad0
Reviewed-on: https://chromium-review.googlesource.com/263953
Trybot-Ready: Zeping Qiu <zqiu@chromium.org>
Tested-by: Zeping Qiu <zqiu@chromium.org>
Reviewed-by: Paul Stewart <pstew@chromium.org>
Commit-Queue: Zeping Qiu <zqiu@chromium.org>
diff --git a/device.cc b/device.cc
index 411e262..376722b 100644
--- a/device.cc
+++ b/device.cc
@@ -109,7 +109,7 @@
 
 // static
 const int Device::kDNSTimeoutMilliseconds = 5000;
-const int Device::kLinkMonitorFailureUnreliableThresholdSeconds = 5 * 60;
+const int Device::kLinkUnreliableThresholdSeconds = 60 * 60;
 
 Device::Device(ControlInterface *control_interface,
                EventDispatcher *dispatcher,
@@ -467,6 +467,10 @@
   }
   // Resume from sleep, could be in different location now.
   // Ignore previous link monitor failures.
+  if (selected_service_) {
+    selected_service_->set_unreliable(false);
+    reliable_link_callback_.Cancel();
+  }
   last_link_monitor_failed_time_ = 0;
 }
 
@@ -961,7 +965,17 @@
       Metrics::kMetricExpiredLeaseLengthSecondsNumBuckets);
 }
 
-void Device::OnConnected() {}
+void Device::OnConnected() {
+  if (selected_service_->unreliable()) {
+    // Post a delayed task to reset link back to reliable if no link
+    // failure is detected in the next 5 minutes.
+    reliable_link_callback_.Reset(
+        base::Bind(&Device::OnReliableLink, base::Unretained(this)));
+    dispatcher_->PostDelayedTask(
+        reliable_link_callback_.callback(),
+        kLinkUnreliableThresholdSeconds * 1000);
+  }
+}
 
 void Device::OnConnectionUpdated() {
   if (selected_service_) {
@@ -1008,6 +1022,9 @@
     // Just in case the Device subclass has not already done so, make
     // sure the previously selected service has its connection removed.
     selected_service_->SetConnection(nullptr);
+    // Reset link status for the previously selected service.
+    selected_service_->set_unreliable(false);
+    reliable_link_callback_.Cancel();
     StopAllActivities();
   }
 
@@ -1220,10 +1237,21 @@
 }
 
 void Device::OnUnreliableLink() {
+  SLOG(this, 2) << "Device " << FriendlyName()
+                << ": Link is unreliable.";
+  selected_service_->set_unreliable(true);
+  reliable_link_callback_.Cancel();
   metrics_->NotifyUnreliableLinkSignalStrength(
       technology_, selected_service_->strength());
 }
 
+void Device::OnReliableLink() {
+  SLOG(this, 2) << "Device " << FriendlyName()
+                << ": Link is reliable.";
+  selected_service_->set_unreliable(false);
+  // TODO(zqiu): report signal strength to UMA.
+}
+
 void Device::OnLinkMonitorFailure() {
   SLOG(this, 2) << "Device " << FriendlyName()
                 << ": Link Monitor indicates failure.";
@@ -1234,8 +1262,8 @@
   time_t now;
   time_->GetSecondsBoottime(&now);
 
-  if (now - last_link_monitor_failed_time_ <
-          kLinkMonitorFailureUnreliableThresholdSeconds) {
+  if (last_link_monitor_failed_time_ != 0 &&
+      now - last_link_monitor_failed_time_ < kLinkUnreliableThresholdSeconds) {
     OnUnreliableLink();
   }
   last_link_monitor_failed_time_ = now;