blob: 1b925fa99e22fbb7b65e53803ae06b583e786cba [file] [log] [blame]
Paul Stewart3f43f432012-07-16 12:12:45 -07001// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/link_monitor.h"
6
Paul Stewartf1961f82012-09-11 20:45:39 -07007#include <string>
Paul Stewart6c72c972012-07-27 11:29:20 -07008#include <vector>
Paul Stewart3f43f432012-07-16 12:12:45 -07009
Paul Stewart6c72c972012-07-27 11:29:20 -070010#include <base/bind.h>
Ben Chana0ddf462014-02-06 11:32:42 -080011#include <base/strings/stringprintf.h>
12#include <base/strings/string_util.h>
Paul Stewart6c72c972012-07-27 11:29:20 -070013
14#include "shill/arp_client.h"
15#include "shill/arp_packet.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070016#include "shill/connection.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070017#include "shill/device_info.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070018#include "shill/event_dispatcher.h"
Christopher Wileyb691efd2012-08-09 13:51:51 -070019#include "shill/logging.h"
Paul Stewartff845fc2012-08-07 07:28:44 -070020#include "shill/metrics.h"
Peter Qiu8d6b5972014-10-28 15:33:34 -070021#include "shill/net/ip_address.h"
22#include "shill/net/shill_time.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070023
24using base::Bind;
25using base::Unretained;
26using std::string;
Paul Stewart3f43f432012-07-16 12:12:45 -070027
28namespace shill {
29
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -070030namespace Logging {
31static auto kModuleLogScope = ScopeLogger::kLink;
32static string ObjectID(Connection *c) { return c->interface_name(); }
33}
34
mukesh agrawalbb2231c2013-07-17 16:32:24 -070035const int LinkMonitor::kDefaultTestPeriodMilliseconds = 5000;
Paul Stewart036dba02012-08-07 12:34:41 -070036const char LinkMonitor::kDefaultLinkMonitorTechnologies[] = "wifi";
Paul Stewartf1961f82012-09-11 20:45:39 -070037const int LinkMonitor::kFailureThreshold = 5;
mukesh agrawalbb2231c2013-07-17 16:32:24 -070038const int LinkMonitor::kFastTestPeriodMilliseconds = 200;
Paul Stewartf1961f82012-09-11 20:45:39 -070039const int LinkMonitor::kMaxResponseSampleFilterDepth = 5;
Paul Stewartb434ce52013-09-23 13:53:49 -070040const int LinkMonitor::kUnicastReplyReliabilityThreshold = 10;
Paul Stewart6c72c972012-07-27 11:29:20 -070041
Paul Stewart3f43f432012-07-16 12:12:45 -070042LinkMonitor::LinkMonitor(const ConnectionRefPtr &connection,
43 EventDispatcher *dispatcher,
Paul Stewartff845fc2012-08-07 07:28:44 -070044 Metrics *metrics,
Paul Stewart6c72c972012-07-27 11:29:20 -070045 DeviceInfo *device_info,
Peter Qiub5d124f2014-04-14 12:05:02 -070046 const FailureCallback &failure_callback,
47 const GatewayChangeCallback &gateway_change_callback)
Paul Stewart3f43f432012-07-16 12:12:45 -070048 : connection_(connection),
49 dispatcher_(dispatcher),
Paul Stewartff845fc2012-08-07 07:28:44 -070050 metrics_(metrics),
Paul Stewart6c72c972012-07-27 11:29:20 -070051 device_info_(device_info),
Paul Stewart3f43f432012-07-16 12:12:45 -070052 failure_callback_(failure_callback),
Peter Qiub5d124f2014-04-14 12:05:02 -070053 gateway_change_callback_(gateway_change_callback),
mukesh agrawalbb2231c2013-07-17 16:32:24 -070054 test_period_milliseconds_(kDefaultTestPeriodMilliseconds),
Paul Stewart6c72c972012-07-27 11:29:20 -070055 broadcast_failure_count_(0),
56 unicast_failure_count_(0),
mukesh agrawalbb2231c2013-07-17 16:32:24 -070057 broadcast_success_count_(0),
58 unicast_success_count_(0),
Paul Stewart6c72c972012-07-27 11:29:20 -070059 is_unicast_(false),
Paul Stewartb434ce52013-09-23 13:53:49 -070060 gateway_supports_unicast_arp_(false),
Paul Stewart6c72c972012-07-27 11:29:20 -070061 response_sample_count_(0),
62 response_sample_bucket_(0),
Paul Stewartf1961f82012-09-11 20:45:39 -070063 time_(Time::GetInstance()) {
64}
Paul Stewart3f43f432012-07-16 12:12:45 -070065
Paul Stewart6c72c972012-07-27 11:29:20 -070066LinkMonitor::~LinkMonitor() {
67 Stop();
68}
Paul Stewart3f43f432012-07-16 12:12:45 -070069
70bool LinkMonitor::Start() {
Paul Stewart6c72c972012-07-27 11:29:20 -070071 Stop();
mukesh agrawalbb2231c2013-07-17 16:32:24 -070072 return StartInternal(kDefaultTestPeriodMilliseconds);
73}
74
75bool LinkMonitor::StartInternal(int probe_period_milliseconds) {
76 test_period_milliseconds_ = probe_period_milliseconds;
77 if (test_period_milliseconds_ > kDefaultTestPeriodMilliseconds) {
78 LOG(WARNING) << "Long test period; UMA stats will be truncated.";
79 }
Paul Stewart6c72c972012-07-27 11:29:20 -070080
81 if (!device_info_->GetMACAddress(
mukesh agrawalbb2231c2013-07-17 16:32:24 -070082 connection_->interface_index(), &local_mac_address_)) {
Paul Stewart6c72c972012-07-27 11:29:20 -070083 LOG(ERROR) << "Could not get local MAC address.";
Paul Stewartff845fc2012-08-07 07:28:44 -070084 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -070085 connection_->technology(),
86 Metrics::kLinkMonitorMacAddressNotFound,
87 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -070088 Stop();
89 return false;
90 }
mukesh agrawalbb2231c2013-07-17 16:32:24 -070091 if (gateway_mac_address_.IsEmpty()) {
92 gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
93 }
Paul Stewart6c72c972012-07-27 11:29:20 -070094 send_request_callback_.Reset(
Paul Stewartf1961f82012-09-11 20:45:39 -070095 Bind(base::IgnoreResult(&LinkMonitor::SendRequest), Unretained(this)));
Paul Stewart0443aa52012-08-09 10:43:50 -070096 time_->GetTimeMonotonic(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -070097 return SendRequest();
Paul Stewart3f43f432012-07-16 12:12:45 -070098}
99
100void LinkMonitor::Stop() {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700101 SLOG(connection_, 2) << "In " << __func__ << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700102 local_mac_address_.Clear();
103 gateway_mac_address_.Clear();
104 arp_client_.reset();
105 broadcast_failure_count_ = 0;
106 unicast_failure_count_ = 0;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700107 broadcast_success_count_ = 0;
108 unicast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700109 is_unicast_ = false;
Paul Stewartb434ce52013-09-23 13:53:49 -0700110 gateway_supports_unicast_arp_ = false;
Paul Stewart6c72c972012-07-27 11:29:20 -0700111 response_sample_bucket_ = 0;
112 response_sample_count_ = 0;
113 receive_response_handler_.reset();
114 send_request_callback_.Cancel();
Paul Stewart0443aa52012-08-09 10:43:50 -0700115 timerclear(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700116 timerclear(&sent_request_at_);
117}
118
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700119void LinkMonitor::OnAfterResume() {
120 ByteString prior_gateway_mac_address(gateway_mac_address_);
Paul Stewartb434ce52013-09-23 13:53:49 -0700121 bool gateway_supports_unicast_arp = gateway_supports_unicast_arp_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700122 Stop();
123 gateway_mac_address_ = prior_gateway_mac_address;
Paul Stewartb434ce52013-09-23 13:53:49 -0700124 gateway_supports_unicast_arp_ = gateway_supports_unicast_arp;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700125 StartInternal(kFastTestPeriodMilliseconds);
126}
127
Paul Stewartf1961f82012-09-11 20:45:39 -0700128int LinkMonitor::GetResponseTimeMilliseconds() const {
Paul Stewart6c72c972012-07-27 11:29:20 -0700129 return response_sample_count_ ?
130 response_sample_bucket_ / response_sample_count_ : 0;
131}
132
Paul Stewartf1961f82012-09-11 20:45:39 -0700133void LinkMonitor::AddResponseTimeSample(int response_time_milliseconds) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700134 SLOG(connection_, 2) << "In " << __func__ << " with sample "
135 << response_time_milliseconds << ".";
Paul Stewartff845fc2012-08-07 07:28:44 -0700136 metrics_->NotifyLinkMonitorResponseTimeSampleAdded(
137 connection_->technology(), response_time_milliseconds);
Paul Stewart6c72c972012-07-27 11:29:20 -0700138 response_sample_bucket_ += response_time_milliseconds;
139 if (response_sample_count_ < kMaxResponseSampleFilterDepth) {
140 ++response_sample_count_;
141 } else {
142 response_sample_bucket_ =
143 response_sample_bucket_ * kMaxResponseSampleFilterDepth /
144 (kMaxResponseSampleFilterDepth + 1);
145 }
146}
147
148// static
149string LinkMonitor::HardwareAddressToString(const ByteString &address) {
150 std::vector<string> address_parts;
151 for (size_t i = 0; i < address.GetLength(); ++i) {
152 address_parts.push_back(
153 base::StringPrintf("%02x", address.GetConstData()[i]));
154 }
155 return JoinString(address_parts, ':');
156}
157
158bool LinkMonitor::CreateClient() {
159 arp_client_.reset(new ArpClient(connection_->interface_index()));
160
Paul Stewart417e5f02014-10-09 08:52:35 -0700161 if (!arp_client_->StartReplyListener()) {
Paul Stewartff845fc2012-08-07 07:28:44 -0700162 return false;
Paul Stewart6c72c972012-07-27 11:29:20 -0700163 }
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700164 SLOG(connection_, 4) << "Created ARP client; listening on socket "
165 << arp_client_->socket() << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700166 receive_response_handler_.reset(
167 dispatcher_->CreateReadyHandler(
168 arp_client_->socket(),
169 IOHandler::kModeInput,
170 Bind(&LinkMonitor::ReceiveResponse, Unretained(this))));
171 return true;
172}
173
174bool LinkMonitor::AddMissedResponse() {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700175 SLOG(connection_, 2) << "In " << __func__ << ".";
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700176 AddResponseTimeSample(test_period_milliseconds_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700177
178 if (is_unicast_) {
Paul Stewartb434ce52013-09-23 13:53:49 -0700179 if (gateway_supports_unicast_arp_) {
180 ++unicast_failure_count_;
181 }
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700182 unicast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700183 } else {
184 ++broadcast_failure_count_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700185 broadcast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700186 }
187
188 if (unicast_failure_count_ + broadcast_failure_count_ >= kFailureThreshold) {
189 LOG(ERROR) << "Link monitor has reached the failure threshold with "
190 << broadcast_failure_count_
191 << " broadcast failures and "
192 << unicast_failure_count_
193 << " unicast failures.";
194 failure_callback_.Run();
Paul Stewart0443aa52012-08-09 10:43:50 -0700195
196 struct timeval now, elapsed_time;
197 time_->GetTimeMonotonic(&now);
198 timersub(&now, &started_monitoring_at_, &elapsed_time);
199
Paul Stewartff845fc2012-08-07 07:28:44 -0700200 metrics_->NotifyLinkMonitorFailure(
201 connection_->technology(),
Paul Stewart0443aa52012-08-09 10:43:50 -0700202 Metrics::kLinkMonitorFailureThresholdReached,
203 elapsed_time.tv_sec,
204 broadcast_failure_count_,
205 unicast_failure_count_);
206
207 Stop();
Paul Stewart6c72c972012-07-27 11:29:20 -0700208 return true;
209 }
210 is_unicast_ = !is_unicast_;
211 return false;
212}
213
Paul Stewart9f7823e2012-08-09 10:58:26 -0700214bool LinkMonitor::IsGatewayFound() const {
215 return !gateway_mac_address_.IsZero();
216}
217
Paul Stewart6c72c972012-07-27 11:29:20 -0700218void LinkMonitor::ReceiveResponse(int fd) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700219 SLOG(connection_, 2) << "In " << __func__ << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700220 ArpPacket packet;
221 ByteString sender;
Paul Stewart417e5f02014-10-09 08:52:35 -0700222 if (!arp_client_->ReceivePacket(&packet, &sender)) {
223 return;
224 }
225
226 if (!packet.IsReply()) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700227 SLOG(connection_, 4) << "This is not a reply packet. Ignoring.";
Paul Stewart6c72c972012-07-27 11:29:20 -0700228 return;
229 }
230
Paul Stewart9f7823e2012-08-09 10:58:26 -0700231 if (!connection_->local().address().Equals(
232 packet.remote_ip_address().address())) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700233 SLOG(connection_, 4) << "Response is not for our IP address.";
Paul Stewart6c72c972012-07-27 11:29:20 -0700234 return;
235 }
236
Paul Stewart9f7823e2012-08-09 10:58:26 -0700237 if (!local_mac_address_.Equals(packet.remote_mac_address())) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700238 SLOG(connection_, 4) << "Response is not for our MAC address.";
Paul Stewart6c72c972012-07-27 11:29:20 -0700239 return;
240 }
241
Paul Stewart9f7823e2012-08-09 10:58:26 -0700242 if (!connection_->gateway().address().Equals(
243 packet.local_ip_address().address())) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700244 SLOG(connection_, 4) << "Response is not from the gateway IP address.";
Paul Stewart6c72c972012-07-27 11:29:20 -0700245 return;
246 }
247
248 struct timeval now, elapsed_time;
249 time_->GetTimeMonotonic(&now);
250 timersub(&now, &sent_request_at_, &elapsed_time);
251
252 AddResponseTimeSample(elapsed_time.tv_sec * 1000 +
253 elapsed_time.tv_usec / 1000);
254
255 receive_response_handler_.reset();
256 arp_client_.reset();
257
258 if (is_unicast_) {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700259 ++unicast_success_count_;
Paul Stewart6c72c972012-07-27 11:29:20 -0700260 unicast_failure_count_ = 0;
Paul Stewartb434ce52013-09-23 13:53:49 -0700261 if (unicast_success_count_ >= kUnicastReplyReliabilityThreshold) {
262 SLOG_IF(Link, 2, !gateway_supports_unicast_arp_)
263 << "Gateway is now considered a reliable unicast responder. "
264 "Unicast failures will now count.";
265 gateway_supports_unicast_arp_ = true;
266 }
Paul Stewart6c72c972012-07-27 11:29:20 -0700267 } else {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700268 ++broadcast_success_count_;
Paul Stewart6c72c972012-07-27 11:29:20 -0700269 broadcast_failure_count_ = 0;
270 }
271
Paul Stewart9f7823e2012-08-09 10:58:26 -0700272 if (!gateway_mac_address_.Equals(packet.local_mac_address())) {
273 const ByteString &new_mac_address = packet.local_mac_address();
274 if (!IsGatewayFound()) {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700275 SLOG(connection_, 2) << "Found gateway at "
276 << HardwareAddressToString(new_mac_address);
Paul Stewart6c72c972012-07-27 11:29:20 -0700277 } else {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700278 SLOG(connection_, 2) << "Gateway MAC address changed.";
Paul Stewart6c72c972012-07-27 11:29:20 -0700279 }
280 gateway_mac_address_ = new_mac_address;
Peter Qiub5d124f2014-04-14 12:05:02 -0700281
282 // Notify device of the new gateway mac address.
283 gateway_change_callback_.Run();
Paul Stewart6c72c972012-07-27 11:29:20 -0700284 }
285
286 is_unicast_ = !is_unicast_;
Paul Stewartb434ce52013-09-23 13:53:49 -0700287 if ((unicast_success_count_ || !gateway_supports_unicast_arp_)
288 && broadcast_success_count_) {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700289 test_period_milliseconds_ = kDefaultTestPeriodMilliseconds;
290 }
Paul Stewart6c72c972012-07-27 11:29:20 -0700291}
292
293bool LinkMonitor::SendRequest() {
Rebecca Silbersteinc9c31d82014-10-21 15:01:00 -0700294 SLOG(connection_, 2) << "In " << __func__ << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700295 if (!arp_client_.get()) {
296 if (!CreateClient()) {
297 LOG(ERROR) << "Failed to start ARP client.";
298 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700299 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700300 connection_->technology(),
301 Metrics::kLinkMonitorClientStartFailure,
302 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700303 return false;
304 }
305 } else if (AddMissedResponse()) {
306 // If an ARP client is still listening, this means we have timed
307 // out reception of the ARP reply.
308 return false;
309 } else {
310 // We already have an ArpClient instance running. These aren't
311 // bound sockets in the conventional sense, and we cannot distinguish
312 // which request (from which trial, or even from which component
313 // in the local system) an ARP reply was sent in response to.
314 // Therefore we keep the already open ArpClient in the case of
315 // a non-fatal timeout.
316 }
Paul Stewart6c72c972012-07-27 11:29:20 -0700317 ByteString destination_mac_address(gateway_mac_address_.GetLength());
Paul Stewart9f7823e2012-08-09 10:58:26 -0700318 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700319 // The remote MAC addess is set by convention to be all-zeroes in the
320 // ARP header if not known. The ArpClient will translate an all-zeroes
321 // remote address into a send to the broadcast (all-ones) address in
322 // the Ethernet frame header.
323 SLOG_IF(Link, 2, is_unicast_) << "Sending broadcast since "
324 << "gateway MAC is unknown";
325 is_unicast_ = false;
326 } else if (is_unicast_) {
327 destination_mac_address = gateway_mac_address_;
328 }
329
330 ArpPacket request(connection_->local(), connection_->gateway(),
331 local_mac_address_, destination_mac_address);
332 if (!arp_client_->TransmitRequest(request)) {
333 LOG(ERROR) << "Failed to send ARP request. Stopping.";
334 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700335 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700336 connection_->technology(), Metrics::kLinkMonitorTransmitFailure,
337 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700338 return false;
339 }
340
341 time_->GetTimeMonotonic(&sent_request_at_);
342
343 dispatcher_->PostDelayedTask(send_request_callback_.callback(),
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700344 test_period_milliseconds_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700345 return true;
346}
347
Paul Stewart3f43f432012-07-16 12:12:45 -0700348} // namespace shill