blob: 1047050446dd968af4c56533334dcecee3a647af [file] [log] [blame]
Paul Stewart3f43f432012-07-16 12:12:45 -07001// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/link_monitor.h"
6
Paul Stewartf1961f82012-09-11 20:45:39 -07007#include <string>
Paul Stewart6c72c972012-07-27 11:29:20 -07008#include <vector>
Paul Stewart3f43f432012-07-16 12:12:45 -07009
Paul Stewart6c72c972012-07-27 11:29:20 -070010#include <base/bind.h>
Ben Chana0ddf462014-02-06 11:32:42 -080011#include <base/strings/stringprintf.h>
12#include <base/strings/string_util.h>
Paul Stewart6c72c972012-07-27 11:29:20 -070013
14#include "shill/arp_client.h"
15#include "shill/arp_packet.h"
16#include "shill/byte_string.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070017#include "shill/connection.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070018#include "shill/device_info.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070019#include "shill/event_dispatcher.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070020#include "shill/ip_address.h"
Christopher Wileyb691efd2012-08-09 13:51:51 -070021#include "shill/logging.h"
Paul Stewartff845fc2012-08-07 07:28:44 -070022#include "shill/metrics.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070023#include "shill/shill_time.h"
24
25using base::Bind;
26using base::Unretained;
27using std::string;
Paul Stewart3f43f432012-07-16 12:12:45 -070028
29namespace shill {
30
mukesh agrawalbb2231c2013-07-17 16:32:24 -070031const int LinkMonitor::kDefaultTestPeriodMilliseconds = 5000;
Paul Stewart036dba02012-08-07 12:34:41 -070032const char LinkMonitor::kDefaultLinkMonitorTechnologies[] = "wifi";
Paul Stewartf1961f82012-09-11 20:45:39 -070033const int LinkMonitor::kFailureThreshold = 5;
mukesh agrawalbb2231c2013-07-17 16:32:24 -070034const int LinkMonitor::kFastTestPeriodMilliseconds = 200;
Paul Stewartf1961f82012-09-11 20:45:39 -070035const int LinkMonitor::kMaxResponseSampleFilterDepth = 5;
Paul Stewartb434ce52013-09-23 13:53:49 -070036const int LinkMonitor::kUnicastReplyReliabilityThreshold = 10;
Paul Stewart6c72c972012-07-27 11:29:20 -070037
Paul Stewart3f43f432012-07-16 12:12:45 -070038LinkMonitor::LinkMonitor(const ConnectionRefPtr &connection,
39 EventDispatcher *dispatcher,
Paul Stewartff845fc2012-08-07 07:28:44 -070040 Metrics *metrics,
Paul Stewart6c72c972012-07-27 11:29:20 -070041 DeviceInfo *device_info,
Peter Qiub5d124f2014-04-14 12:05:02 -070042 const FailureCallback &failure_callback,
43 const GatewayChangeCallback &gateway_change_callback)
Paul Stewart3f43f432012-07-16 12:12:45 -070044 : connection_(connection),
45 dispatcher_(dispatcher),
Paul Stewartff845fc2012-08-07 07:28:44 -070046 metrics_(metrics),
Paul Stewart6c72c972012-07-27 11:29:20 -070047 device_info_(device_info),
Paul Stewart3f43f432012-07-16 12:12:45 -070048 failure_callback_(failure_callback),
Peter Qiub5d124f2014-04-14 12:05:02 -070049 gateway_change_callback_(gateway_change_callback),
mukesh agrawalbb2231c2013-07-17 16:32:24 -070050 test_period_milliseconds_(kDefaultTestPeriodMilliseconds),
Paul Stewart6c72c972012-07-27 11:29:20 -070051 broadcast_failure_count_(0),
52 unicast_failure_count_(0),
mukesh agrawalbb2231c2013-07-17 16:32:24 -070053 broadcast_success_count_(0),
54 unicast_success_count_(0),
Paul Stewart6c72c972012-07-27 11:29:20 -070055 is_unicast_(false),
Paul Stewartb434ce52013-09-23 13:53:49 -070056 gateway_supports_unicast_arp_(false),
Paul Stewart6c72c972012-07-27 11:29:20 -070057 response_sample_count_(0),
58 response_sample_bucket_(0),
Paul Stewartf1961f82012-09-11 20:45:39 -070059 time_(Time::GetInstance()) {
60}
Paul Stewart3f43f432012-07-16 12:12:45 -070061
Paul Stewart6c72c972012-07-27 11:29:20 -070062LinkMonitor::~LinkMonitor() {
63 Stop();
64}
Paul Stewart3f43f432012-07-16 12:12:45 -070065
66bool LinkMonitor::Start() {
Paul Stewart6c72c972012-07-27 11:29:20 -070067 Stop();
mukesh agrawalbb2231c2013-07-17 16:32:24 -070068 return StartInternal(kDefaultTestPeriodMilliseconds);
69}
70
71bool LinkMonitor::StartInternal(int probe_period_milliseconds) {
72 test_period_milliseconds_ = probe_period_milliseconds;
73 if (test_period_milliseconds_ > kDefaultTestPeriodMilliseconds) {
74 LOG(WARNING) << "Long test period; UMA stats will be truncated.";
75 }
Paul Stewart6c72c972012-07-27 11:29:20 -070076
77 if (!device_info_->GetMACAddress(
mukesh agrawalbb2231c2013-07-17 16:32:24 -070078 connection_->interface_index(), &local_mac_address_)) {
Paul Stewart6c72c972012-07-27 11:29:20 -070079 LOG(ERROR) << "Could not get local MAC address.";
Paul Stewartff845fc2012-08-07 07:28:44 -070080 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -070081 connection_->technology(),
82 Metrics::kLinkMonitorMacAddressNotFound,
83 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -070084 Stop();
85 return false;
86 }
mukesh agrawalbb2231c2013-07-17 16:32:24 -070087 if (gateway_mac_address_.IsEmpty()) {
88 gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
89 }
Paul Stewart6c72c972012-07-27 11:29:20 -070090 send_request_callback_.Reset(
Paul Stewartf1961f82012-09-11 20:45:39 -070091 Bind(base::IgnoreResult(&LinkMonitor::SendRequest), Unretained(this)));
Paul Stewart0443aa52012-08-09 10:43:50 -070092 time_->GetTimeMonotonic(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -070093 return SendRequest();
Paul Stewart3f43f432012-07-16 12:12:45 -070094}
95
96void LinkMonitor::Stop() {
Paul Stewart6c72c972012-07-27 11:29:20 -070097 SLOG(Link, 2) << "In " << __func__ << ".";
98 local_mac_address_.Clear();
99 gateway_mac_address_.Clear();
100 arp_client_.reset();
101 broadcast_failure_count_ = 0;
102 unicast_failure_count_ = 0;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700103 broadcast_success_count_ = 0;
104 unicast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700105 is_unicast_ = false;
Paul Stewartb434ce52013-09-23 13:53:49 -0700106 gateway_supports_unicast_arp_ = false;
Paul Stewart6c72c972012-07-27 11:29:20 -0700107 response_sample_bucket_ = 0;
108 response_sample_count_ = 0;
109 receive_response_handler_.reset();
110 send_request_callback_.Cancel();
Paul Stewart0443aa52012-08-09 10:43:50 -0700111 timerclear(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700112 timerclear(&sent_request_at_);
113}
114
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700115void LinkMonitor::OnAfterResume() {
116 ByteString prior_gateway_mac_address(gateway_mac_address_);
Paul Stewartb434ce52013-09-23 13:53:49 -0700117 bool gateway_supports_unicast_arp = gateway_supports_unicast_arp_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700118 Stop();
119 gateway_mac_address_ = prior_gateway_mac_address;
Paul Stewartb434ce52013-09-23 13:53:49 -0700120 gateway_supports_unicast_arp_ = gateway_supports_unicast_arp;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700121 StartInternal(kFastTestPeriodMilliseconds);
122}
123
Paul Stewartf1961f82012-09-11 20:45:39 -0700124int LinkMonitor::GetResponseTimeMilliseconds() const {
Paul Stewart6c72c972012-07-27 11:29:20 -0700125 return response_sample_count_ ?
126 response_sample_bucket_ / response_sample_count_ : 0;
127}
128
Paul Stewartf1961f82012-09-11 20:45:39 -0700129void LinkMonitor::AddResponseTimeSample(int response_time_milliseconds) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700130 SLOG(Link, 2) << "In " << __func__ << " with sample "
131 << response_time_milliseconds << ".";
Paul Stewartff845fc2012-08-07 07:28:44 -0700132 metrics_->NotifyLinkMonitorResponseTimeSampleAdded(
133 connection_->technology(), response_time_milliseconds);
Paul Stewart6c72c972012-07-27 11:29:20 -0700134 response_sample_bucket_ += response_time_milliseconds;
135 if (response_sample_count_ < kMaxResponseSampleFilterDepth) {
136 ++response_sample_count_;
137 } else {
138 response_sample_bucket_ =
139 response_sample_bucket_ * kMaxResponseSampleFilterDepth /
140 (kMaxResponseSampleFilterDepth + 1);
141 }
142}
143
144// static
145string LinkMonitor::HardwareAddressToString(const ByteString &address) {
146 std::vector<string> address_parts;
147 for (size_t i = 0; i < address.GetLength(); ++i) {
148 address_parts.push_back(
149 base::StringPrintf("%02x", address.GetConstData()[i]));
150 }
151 return JoinString(address_parts, ':');
152}
153
154bool LinkMonitor::CreateClient() {
155 arp_client_.reset(new ArpClient(connection_->interface_index()));
156
157 if (!arp_client_->Start()) {
Paul Stewartff845fc2012-08-07 07:28:44 -0700158 return false;
Paul Stewart6c72c972012-07-27 11:29:20 -0700159 }
Paul Stewart9f7823e2012-08-09 10:58:26 -0700160 SLOG(Link, 4) << "Created ARP client; listening on socket "
161 << arp_client_->socket() << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700162 receive_response_handler_.reset(
163 dispatcher_->CreateReadyHandler(
164 arp_client_->socket(),
165 IOHandler::kModeInput,
166 Bind(&LinkMonitor::ReceiveResponse, Unretained(this))));
167 return true;
168}
169
170bool LinkMonitor::AddMissedResponse() {
171 SLOG(Link, 2) << "In " << __func__ << ".";
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700172 AddResponseTimeSample(test_period_milliseconds_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700173
174 if (is_unicast_) {
Paul Stewartb434ce52013-09-23 13:53:49 -0700175 if (gateway_supports_unicast_arp_) {
176 ++unicast_failure_count_;
177 }
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700178 unicast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700179 } else {
180 ++broadcast_failure_count_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700181 broadcast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700182 }
183
184 if (unicast_failure_count_ + broadcast_failure_count_ >= kFailureThreshold) {
185 LOG(ERROR) << "Link monitor has reached the failure threshold with "
186 << broadcast_failure_count_
187 << " broadcast failures and "
188 << unicast_failure_count_
189 << " unicast failures.";
190 failure_callback_.Run();
Paul Stewart0443aa52012-08-09 10:43:50 -0700191
192 struct timeval now, elapsed_time;
193 time_->GetTimeMonotonic(&now);
194 timersub(&now, &started_monitoring_at_, &elapsed_time);
195
Paul Stewartff845fc2012-08-07 07:28:44 -0700196 metrics_->NotifyLinkMonitorFailure(
197 connection_->technology(),
Paul Stewart0443aa52012-08-09 10:43:50 -0700198 Metrics::kLinkMonitorFailureThresholdReached,
199 elapsed_time.tv_sec,
200 broadcast_failure_count_,
201 unicast_failure_count_);
202
203 Stop();
Paul Stewart6c72c972012-07-27 11:29:20 -0700204 return true;
205 }
206 is_unicast_ = !is_unicast_;
207 return false;
208}
209
Paul Stewart9f7823e2012-08-09 10:58:26 -0700210bool LinkMonitor::IsGatewayFound() const {
211 return !gateway_mac_address_.IsZero();
212}
213
Paul Stewart6c72c972012-07-27 11:29:20 -0700214void LinkMonitor::ReceiveResponse(int fd) {
215 SLOG(Link, 2) << "In " << __func__ << ".";
216 ArpPacket packet;
217 ByteString sender;
218 if (!arp_client_->ReceiveReply(&packet, &sender)) {
219 return;
220 }
221
Paul Stewart9f7823e2012-08-09 10:58:26 -0700222 if (!connection_->local().address().Equals(
223 packet.remote_ip_address().address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700224 SLOG(Link, 4) << "Response is not for our IP address.";
225 return;
226 }
227
Paul Stewart9f7823e2012-08-09 10:58:26 -0700228 if (!local_mac_address_.Equals(packet.remote_mac_address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700229 SLOG(Link, 4) << "Response is not for our MAC address.";
230 return;
231 }
232
Paul Stewart9f7823e2012-08-09 10:58:26 -0700233 if (!connection_->gateway().address().Equals(
234 packet.local_ip_address().address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700235 SLOG(Link, 4) << "Response is not from the gateway IP address.";
236 return;
237 }
238
239 struct timeval now, elapsed_time;
240 time_->GetTimeMonotonic(&now);
241 timersub(&now, &sent_request_at_, &elapsed_time);
242
243 AddResponseTimeSample(elapsed_time.tv_sec * 1000 +
244 elapsed_time.tv_usec / 1000);
245
246 receive_response_handler_.reset();
247 arp_client_.reset();
248
249 if (is_unicast_) {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700250 ++unicast_success_count_;
Paul Stewart6c72c972012-07-27 11:29:20 -0700251 unicast_failure_count_ = 0;
Paul Stewartb434ce52013-09-23 13:53:49 -0700252 if (unicast_success_count_ >= kUnicastReplyReliabilityThreshold) {
253 SLOG_IF(Link, 2, !gateway_supports_unicast_arp_)
254 << "Gateway is now considered a reliable unicast responder. "
255 "Unicast failures will now count.";
256 gateway_supports_unicast_arp_ = true;
257 }
Paul Stewart6c72c972012-07-27 11:29:20 -0700258 } else {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700259 ++broadcast_success_count_;
Paul Stewart6c72c972012-07-27 11:29:20 -0700260 broadcast_failure_count_ = 0;
261 }
262
Paul Stewart9f7823e2012-08-09 10:58:26 -0700263 if (!gateway_mac_address_.Equals(packet.local_mac_address())) {
264 const ByteString &new_mac_address = packet.local_mac_address();
265 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700266 SLOG(Link, 2) << "Found gateway at "
267 << HardwareAddressToString(new_mac_address);
268 } else {
269 SLOG(Link, 2) << "Gateway MAC address changed.";
270 }
271 gateway_mac_address_ = new_mac_address;
Peter Qiub5d124f2014-04-14 12:05:02 -0700272
273 // Notify device of the new gateway mac address.
274 gateway_change_callback_.Run();
Paul Stewart6c72c972012-07-27 11:29:20 -0700275 }
276
277 is_unicast_ = !is_unicast_;
Paul Stewartb434ce52013-09-23 13:53:49 -0700278 if ((unicast_success_count_ || !gateway_supports_unicast_arp_)
279 && broadcast_success_count_) {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700280 test_period_milliseconds_ = kDefaultTestPeriodMilliseconds;
281 }
Paul Stewart6c72c972012-07-27 11:29:20 -0700282}
283
284bool LinkMonitor::SendRequest() {
285 SLOG(Link, 2) << "In " << __func__ << ".";
286 if (!arp_client_.get()) {
287 if (!CreateClient()) {
288 LOG(ERROR) << "Failed to start ARP client.";
289 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700290 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700291 connection_->technology(),
292 Metrics::kLinkMonitorClientStartFailure,
293 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700294 return false;
295 }
296 } else if (AddMissedResponse()) {
297 // If an ARP client is still listening, this means we have timed
298 // out reception of the ARP reply.
299 return false;
300 } else {
301 // We already have an ArpClient instance running. These aren't
302 // bound sockets in the conventional sense, and we cannot distinguish
303 // which request (from which trial, or even from which component
304 // in the local system) an ARP reply was sent in response to.
305 // Therefore we keep the already open ArpClient in the case of
306 // a non-fatal timeout.
307 }
308
309 ByteString destination_mac_address(gateway_mac_address_.GetLength());
Paul Stewart9f7823e2012-08-09 10:58:26 -0700310 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700311 // The remote MAC addess is set by convention to be all-zeroes in the
312 // ARP header if not known. The ArpClient will translate an all-zeroes
313 // remote address into a send to the broadcast (all-ones) address in
314 // the Ethernet frame header.
315 SLOG_IF(Link, 2, is_unicast_) << "Sending broadcast since "
316 << "gateway MAC is unknown";
317 is_unicast_ = false;
318 } else if (is_unicast_) {
319 destination_mac_address = gateway_mac_address_;
320 }
321
322 ArpPacket request(connection_->local(), connection_->gateway(),
323 local_mac_address_, destination_mac_address);
324 if (!arp_client_->TransmitRequest(request)) {
325 LOG(ERROR) << "Failed to send ARP request. Stopping.";
326 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700327 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700328 connection_->technology(), Metrics::kLinkMonitorTransmitFailure,
329 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700330 return false;
331 }
332
333 time_->GetTimeMonotonic(&sent_request_at_);
334
335 dispatcher_->PostDelayedTask(send_request_callback_.callback(),
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700336 test_period_milliseconds_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700337 return true;
338}
339
Paul Stewart3f43f432012-07-16 12:12:45 -0700340} // namespace shill