blob: 4fa7a157a57fdfcdf00ce96eafcd8c0d66ed6f03 [file] [log] [blame]
Paul Stewart3f43f432012-07-16 12:12:45 -07001// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/link_monitor.h"
6
Paul Stewartf1961f82012-09-11 20:45:39 -07007#include <string>
Paul Stewart6c72c972012-07-27 11:29:20 -07008#include <vector>
Paul Stewart3f43f432012-07-16 12:12:45 -07009
Paul Stewart6c72c972012-07-27 11:29:20 -070010#include <base/bind.h>
Paul Stewart6c72c972012-07-27 11:29:20 -070011#include <base/stringprintf.h>
12#include <base/string_util.h>
13
14#include "shill/arp_client.h"
15#include "shill/arp_packet.h"
16#include "shill/byte_string.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070017#include "shill/connection.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070018#include "shill/device_info.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070019#include "shill/event_dispatcher.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070020#include "shill/ip_address.h"
Christopher Wileyb691efd2012-08-09 13:51:51 -070021#include "shill/logging.h"
Paul Stewartff845fc2012-08-07 07:28:44 -070022#include "shill/metrics.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070023#include "shill/shill_time.h"
24
25using base::Bind;
26using base::Unretained;
27using std::string;
Paul Stewart3f43f432012-07-16 12:12:45 -070028
29namespace shill {
30
mukesh agrawalbb2231c2013-07-17 16:32:24 -070031const int LinkMonitor::kDefaultTestPeriodMilliseconds = 5000;
Paul Stewart036dba02012-08-07 12:34:41 -070032const char LinkMonitor::kDefaultLinkMonitorTechnologies[] = "wifi";
Paul Stewartf1961f82012-09-11 20:45:39 -070033const int LinkMonitor::kFailureThreshold = 5;
mukesh agrawalbb2231c2013-07-17 16:32:24 -070034const int LinkMonitor::kFastTestPeriodMilliseconds = 200;
Paul Stewartf1961f82012-09-11 20:45:39 -070035const int LinkMonitor::kMaxResponseSampleFilterDepth = 5;
Paul Stewart6c72c972012-07-27 11:29:20 -070036
Paul Stewart3f43f432012-07-16 12:12:45 -070037LinkMonitor::LinkMonitor(const ConnectionRefPtr &connection,
38 EventDispatcher *dispatcher,
Paul Stewartff845fc2012-08-07 07:28:44 -070039 Metrics *metrics,
Paul Stewart6c72c972012-07-27 11:29:20 -070040 DeviceInfo *device_info,
Paul Stewart3f43f432012-07-16 12:12:45 -070041 const FailureCallback &failure_callback)
42 : connection_(connection),
43 dispatcher_(dispatcher),
Paul Stewartff845fc2012-08-07 07:28:44 -070044 metrics_(metrics),
Paul Stewart6c72c972012-07-27 11:29:20 -070045 device_info_(device_info),
Paul Stewart3f43f432012-07-16 12:12:45 -070046 failure_callback_(failure_callback),
mukesh agrawalbb2231c2013-07-17 16:32:24 -070047 test_period_milliseconds_(kDefaultTestPeriodMilliseconds),
Paul Stewart6c72c972012-07-27 11:29:20 -070048 broadcast_failure_count_(0),
49 unicast_failure_count_(0),
mukesh agrawalbb2231c2013-07-17 16:32:24 -070050 broadcast_success_count_(0),
51 unicast_success_count_(0),
Paul Stewart6c72c972012-07-27 11:29:20 -070052 is_unicast_(false),
53 response_sample_count_(0),
54 response_sample_bucket_(0),
Paul Stewartf1961f82012-09-11 20:45:39 -070055 time_(Time::GetInstance()) {
56}
Paul Stewart3f43f432012-07-16 12:12:45 -070057
Paul Stewart6c72c972012-07-27 11:29:20 -070058LinkMonitor::~LinkMonitor() {
59 Stop();
60}
Paul Stewart3f43f432012-07-16 12:12:45 -070061
62bool LinkMonitor::Start() {
Paul Stewart6c72c972012-07-27 11:29:20 -070063 Stop();
mukesh agrawalbb2231c2013-07-17 16:32:24 -070064 return StartInternal(kDefaultTestPeriodMilliseconds);
65}
66
67bool LinkMonitor::StartInternal(int probe_period_milliseconds) {
68 test_period_milliseconds_ = probe_period_milliseconds;
69 if (test_period_milliseconds_ > kDefaultTestPeriodMilliseconds) {
70 LOG(WARNING) << "Long test period; UMA stats will be truncated.";
71 }
Paul Stewart6c72c972012-07-27 11:29:20 -070072
73 if (!device_info_->GetMACAddress(
mukesh agrawalbb2231c2013-07-17 16:32:24 -070074 connection_->interface_index(), &local_mac_address_)) {
Paul Stewart6c72c972012-07-27 11:29:20 -070075 LOG(ERROR) << "Could not get local MAC address.";
Paul Stewartff845fc2012-08-07 07:28:44 -070076 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -070077 connection_->technology(),
78 Metrics::kLinkMonitorMacAddressNotFound,
79 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -070080 Stop();
81 return false;
82 }
mukesh agrawalbb2231c2013-07-17 16:32:24 -070083 if (gateway_mac_address_.IsEmpty()) {
84 gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
85 }
Paul Stewart6c72c972012-07-27 11:29:20 -070086 send_request_callback_.Reset(
Paul Stewartf1961f82012-09-11 20:45:39 -070087 Bind(base::IgnoreResult(&LinkMonitor::SendRequest), Unretained(this)));
Paul Stewart0443aa52012-08-09 10:43:50 -070088 time_->GetTimeMonotonic(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -070089 return SendRequest();
Paul Stewart3f43f432012-07-16 12:12:45 -070090}
91
92void LinkMonitor::Stop() {
Paul Stewart6c72c972012-07-27 11:29:20 -070093 SLOG(Link, 2) << "In " << __func__ << ".";
94 local_mac_address_.Clear();
95 gateway_mac_address_.Clear();
96 arp_client_.reset();
97 broadcast_failure_count_ = 0;
98 unicast_failure_count_ = 0;
mukesh agrawalbb2231c2013-07-17 16:32:24 -070099 broadcast_success_count_ = 0;
100 unicast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700101 is_unicast_ = false;
102 response_sample_bucket_ = 0;
103 response_sample_count_ = 0;
104 receive_response_handler_.reset();
105 send_request_callback_.Cancel();
Paul Stewart0443aa52012-08-09 10:43:50 -0700106 timerclear(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700107 timerclear(&sent_request_at_);
108}
109
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700110void LinkMonitor::OnAfterResume() {
111 ByteString prior_gateway_mac_address(gateway_mac_address_);
112 Stop();
113 gateway_mac_address_ = prior_gateway_mac_address;
114 StartInternal(kFastTestPeriodMilliseconds);
115}
116
Paul Stewartf1961f82012-09-11 20:45:39 -0700117int LinkMonitor::GetResponseTimeMilliseconds() const {
Paul Stewart6c72c972012-07-27 11:29:20 -0700118 return response_sample_count_ ?
119 response_sample_bucket_ / response_sample_count_ : 0;
120}
121
Paul Stewartf1961f82012-09-11 20:45:39 -0700122void LinkMonitor::AddResponseTimeSample(int response_time_milliseconds) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700123 SLOG(Link, 2) << "In " << __func__ << " with sample "
124 << response_time_milliseconds << ".";
Paul Stewartff845fc2012-08-07 07:28:44 -0700125 metrics_->NotifyLinkMonitorResponseTimeSampleAdded(
126 connection_->technology(), response_time_milliseconds);
Paul Stewart6c72c972012-07-27 11:29:20 -0700127 response_sample_bucket_ += response_time_milliseconds;
128 if (response_sample_count_ < kMaxResponseSampleFilterDepth) {
129 ++response_sample_count_;
130 } else {
131 response_sample_bucket_ =
132 response_sample_bucket_ * kMaxResponseSampleFilterDepth /
133 (kMaxResponseSampleFilterDepth + 1);
134 }
135}
136
137// static
138string LinkMonitor::HardwareAddressToString(const ByteString &address) {
139 std::vector<string> address_parts;
140 for (size_t i = 0; i < address.GetLength(); ++i) {
141 address_parts.push_back(
142 base::StringPrintf("%02x", address.GetConstData()[i]));
143 }
144 return JoinString(address_parts, ':');
145}
146
147bool LinkMonitor::CreateClient() {
148 arp_client_.reset(new ArpClient(connection_->interface_index()));
149
150 if (!arp_client_->Start()) {
Paul Stewartff845fc2012-08-07 07:28:44 -0700151 return false;
Paul Stewart6c72c972012-07-27 11:29:20 -0700152 }
Paul Stewart9f7823e2012-08-09 10:58:26 -0700153 SLOG(Link, 4) << "Created ARP client; listening on socket "
154 << arp_client_->socket() << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700155 receive_response_handler_.reset(
156 dispatcher_->CreateReadyHandler(
157 arp_client_->socket(),
158 IOHandler::kModeInput,
159 Bind(&LinkMonitor::ReceiveResponse, Unretained(this))));
160 return true;
161}
162
163bool LinkMonitor::AddMissedResponse() {
164 SLOG(Link, 2) << "In " << __func__ << ".";
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700165 AddResponseTimeSample(test_period_milliseconds_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700166
167 if (is_unicast_) {
168 ++unicast_failure_count_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700169 unicast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700170 } else {
171 ++broadcast_failure_count_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700172 broadcast_success_count_ = 0;
Paul Stewart6c72c972012-07-27 11:29:20 -0700173 }
174
175 if (unicast_failure_count_ + broadcast_failure_count_ >= kFailureThreshold) {
176 LOG(ERROR) << "Link monitor has reached the failure threshold with "
177 << broadcast_failure_count_
178 << " broadcast failures and "
179 << unicast_failure_count_
180 << " unicast failures.";
181 failure_callback_.Run();
Paul Stewart0443aa52012-08-09 10:43:50 -0700182
183 struct timeval now, elapsed_time;
184 time_->GetTimeMonotonic(&now);
185 timersub(&now, &started_monitoring_at_, &elapsed_time);
186
Paul Stewartff845fc2012-08-07 07:28:44 -0700187 metrics_->NotifyLinkMonitorFailure(
188 connection_->technology(),
Paul Stewart0443aa52012-08-09 10:43:50 -0700189 Metrics::kLinkMonitorFailureThresholdReached,
190 elapsed_time.tv_sec,
191 broadcast_failure_count_,
192 unicast_failure_count_);
193
194 Stop();
Paul Stewart6c72c972012-07-27 11:29:20 -0700195 return true;
196 }
197 is_unicast_ = !is_unicast_;
198 return false;
199}
200
Paul Stewart9f7823e2012-08-09 10:58:26 -0700201bool LinkMonitor::IsGatewayFound() const {
202 return !gateway_mac_address_.IsZero();
203}
204
Paul Stewart6c72c972012-07-27 11:29:20 -0700205void LinkMonitor::ReceiveResponse(int fd) {
206 SLOG(Link, 2) << "In " << __func__ << ".";
207 ArpPacket packet;
208 ByteString sender;
209 if (!arp_client_->ReceiveReply(&packet, &sender)) {
210 return;
211 }
212
Paul Stewart9f7823e2012-08-09 10:58:26 -0700213 if (!connection_->local().address().Equals(
214 packet.remote_ip_address().address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700215 SLOG(Link, 4) << "Response is not for our IP address.";
216 return;
217 }
218
Paul Stewart9f7823e2012-08-09 10:58:26 -0700219 if (!local_mac_address_.Equals(packet.remote_mac_address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700220 SLOG(Link, 4) << "Response is not for our MAC address.";
221 return;
222 }
223
Paul Stewart9f7823e2012-08-09 10:58:26 -0700224 if (!connection_->gateway().address().Equals(
225 packet.local_ip_address().address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700226 SLOG(Link, 4) << "Response is not from the gateway IP address.";
227 return;
228 }
229
230 struct timeval now, elapsed_time;
231 time_->GetTimeMonotonic(&now);
232 timersub(&now, &sent_request_at_, &elapsed_time);
233
234 AddResponseTimeSample(elapsed_time.tv_sec * 1000 +
235 elapsed_time.tv_usec / 1000);
236
237 receive_response_handler_.reset();
238 arp_client_.reset();
239
240 if (is_unicast_) {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700241 ++unicast_success_count_;
Paul Stewart6c72c972012-07-27 11:29:20 -0700242 unicast_failure_count_ = 0;
243 } else {
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700244 ++broadcast_success_count_;
Paul Stewart6c72c972012-07-27 11:29:20 -0700245 broadcast_failure_count_ = 0;
246 }
247
Paul Stewart9f7823e2012-08-09 10:58:26 -0700248 if (!gateway_mac_address_.Equals(packet.local_mac_address())) {
249 const ByteString &new_mac_address = packet.local_mac_address();
250 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700251 SLOG(Link, 2) << "Found gateway at "
252 << HardwareAddressToString(new_mac_address);
253 } else {
254 SLOG(Link, 2) << "Gateway MAC address changed.";
255 }
256 gateway_mac_address_ = new_mac_address;
257 }
258
259 is_unicast_ = !is_unicast_;
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700260 if (unicast_success_count_ && broadcast_success_count_) {
261 test_period_milliseconds_ = kDefaultTestPeriodMilliseconds;
262 }
Paul Stewart6c72c972012-07-27 11:29:20 -0700263}
264
265bool LinkMonitor::SendRequest() {
266 SLOG(Link, 2) << "In " << __func__ << ".";
267 if (!arp_client_.get()) {
268 if (!CreateClient()) {
269 LOG(ERROR) << "Failed to start ARP client.";
270 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700271 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700272 connection_->technology(),
273 Metrics::kLinkMonitorClientStartFailure,
274 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700275 return false;
276 }
277 } else if (AddMissedResponse()) {
278 // If an ARP client is still listening, this means we have timed
279 // out reception of the ARP reply.
280 return false;
281 } else {
282 // We already have an ArpClient instance running. These aren't
283 // bound sockets in the conventional sense, and we cannot distinguish
284 // which request (from which trial, or even from which component
285 // in the local system) an ARP reply was sent in response to.
286 // Therefore we keep the already open ArpClient in the case of
287 // a non-fatal timeout.
288 }
289
290 ByteString destination_mac_address(gateway_mac_address_.GetLength());
Paul Stewart9f7823e2012-08-09 10:58:26 -0700291 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700292 // The remote MAC addess is set by convention to be all-zeroes in the
293 // ARP header if not known. The ArpClient will translate an all-zeroes
294 // remote address into a send to the broadcast (all-ones) address in
295 // the Ethernet frame header.
296 SLOG_IF(Link, 2, is_unicast_) << "Sending broadcast since "
297 << "gateway MAC is unknown";
298 is_unicast_ = false;
299 } else if (is_unicast_) {
300 destination_mac_address = gateway_mac_address_;
301 }
302
303 ArpPacket request(connection_->local(), connection_->gateway(),
304 local_mac_address_, destination_mac_address);
305 if (!arp_client_->TransmitRequest(request)) {
306 LOG(ERROR) << "Failed to send ARP request. Stopping.";
307 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700308 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700309 connection_->technology(), Metrics::kLinkMonitorTransmitFailure,
310 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700311 return false;
312 }
313
314 time_->GetTimeMonotonic(&sent_request_at_);
315
316 dispatcher_->PostDelayedTask(send_request_callback_.callback(),
mukesh agrawalbb2231c2013-07-17 16:32:24 -0700317 test_period_milliseconds_);
Paul Stewart6c72c972012-07-27 11:29:20 -0700318 return true;
319}
320
Paul Stewart3f43f432012-07-16 12:12:45 -0700321} // namespace shill