blob: 26cb0dd620526b6717275924e72ed786b253966a [file] [log] [blame]
Paul Stewart3f43f432012-07-16 12:12:45 -07001// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/link_monitor.h"
6
Paul Stewart6c72c972012-07-27 11:29:20 -07007#include <vector>
Paul Stewart3f43f432012-07-16 12:12:45 -07008
Paul Stewart6c72c972012-07-27 11:29:20 -07009#include <base/bind.h>
Paul Stewart6c72c972012-07-27 11:29:20 -070010#include <base/stringprintf.h>
11#include <base/string_util.h>
12
13#include "shill/arp_client.h"
14#include "shill/arp_packet.h"
15#include "shill/byte_string.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070016#include "shill/connection.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070017#include "shill/device_info.h"
Paul Stewart3f43f432012-07-16 12:12:45 -070018#include "shill/event_dispatcher.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070019#include "shill/ip_address.h"
Christopher Wileyb691efd2012-08-09 13:51:51 -070020#include "shill/logging.h"
Paul Stewartff845fc2012-08-07 07:28:44 -070021#include "shill/metrics.h"
Paul Stewart6c72c972012-07-27 11:29:20 -070022#include "shill/shill_time.h"
23
24using base::Bind;
25using base::Unretained;
26using std::string;
Paul Stewart3f43f432012-07-16 12:12:45 -070027
28namespace shill {
29
Paul Stewart6c72c972012-07-27 11:29:20 -070030const unsigned int LinkMonitor::kTestPeriodMilliseconds = 5000;
31const unsigned int LinkMonitor::kFailureThreshold = 5;
32const unsigned int LinkMonitor::kMaxResponseSampleFilterDepth = 5;
33
Paul Stewart3f43f432012-07-16 12:12:45 -070034LinkMonitor::LinkMonitor(const ConnectionRefPtr &connection,
35 EventDispatcher *dispatcher,
Paul Stewartff845fc2012-08-07 07:28:44 -070036 Metrics *metrics,
Paul Stewart6c72c972012-07-27 11:29:20 -070037 DeviceInfo *device_info,
Paul Stewart3f43f432012-07-16 12:12:45 -070038 const FailureCallback &failure_callback)
39 : connection_(connection),
40 dispatcher_(dispatcher),
Paul Stewartff845fc2012-08-07 07:28:44 -070041 metrics_(metrics),
Paul Stewart6c72c972012-07-27 11:29:20 -070042 device_info_(device_info),
Paul Stewart3f43f432012-07-16 12:12:45 -070043 failure_callback_(failure_callback),
Paul Stewart6c72c972012-07-27 11:29:20 -070044 broadcast_failure_count_(0),
45 unicast_failure_count_(0),
46 is_unicast_(false),
47 response_sample_count_(0),
48 response_sample_bucket_(0),
49 time_(Time::GetInstance()) {}
Paul Stewart3f43f432012-07-16 12:12:45 -070050
Paul Stewart6c72c972012-07-27 11:29:20 -070051LinkMonitor::~LinkMonitor() {
52 Stop();
53}
Paul Stewart3f43f432012-07-16 12:12:45 -070054
55bool LinkMonitor::Start() {
Paul Stewart6c72c972012-07-27 11:29:20 -070056 Stop();
57
58 if (!device_info_->GetMACAddress(
59 connection_->interface_index(), &local_mac_address_)) {
60 LOG(ERROR) << "Could not get local MAC address.";
Paul Stewartff845fc2012-08-07 07:28:44 -070061 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -070062 connection_->technology(),
63 Metrics::kLinkMonitorMacAddressNotFound,
64 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -070065 Stop();
66 return false;
67 }
68 gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
69 send_request_callback_.Reset(
70 Bind(&LinkMonitor::SendRequestTask, Unretained(this)));
Paul Stewart0443aa52012-08-09 10:43:50 -070071 time_->GetTimeMonotonic(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -070072 return SendRequest();
Paul Stewart3f43f432012-07-16 12:12:45 -070073}
74
75void LinkMonitor::Stop() {
Paul Stewart6c72c972012-07-27 11:29:20 -070076 SLOG(Link, 2) << "In " << __func__ << ".";
77 local_mac_address_.Clear();
78 gateway_mac_address_.Clear();
79 arp_client_.reset();
80 broadcast_failure_count_ = 0;
81 unicast_failure_count_ = 0;
82 is_unicast_ = false;
83 response_sample_bucket_ = 0;
84 response_sample_count_ = 0;
85 receive_response_handler_.reset();
86 send_request_callback_.Cancel();
Paul Stewart0443aa52012-08-09 10:43:50 -070087 timerclear(&started_monitoring_at_);
Paul Stewart6c72c972012-07-27 11:29:20 -070088 timerclear(&sent_request_at_);
89}
90
Paul Stewart9f7823e2012-08-09 10:58:26 -070091unsigned int LinkMonitor::GetResponseTimeMilliseconds() const {
Paul Stewart6c72c972012-07-27 11:29:20 -070092 return response_sample_count_ ?
93 response_sample_bucket_ / response_sample_count_ : 0;
94}
95
96void LinkMonitor::AddResponseTimeSample(
97 unsigned int response_time_milliseconds) {
98 SLOG(Link, 2) << "In " << __func__ << " with sample "
99 << response_time_milliseconds << ".";
Paul Stewartff845fc2012-08-07 07:28:44 -0700100 metrics_->NotifyLinkMonitorResponseTimeSampleAdded(
101 connection_->technology(), response_time_milliseconds);
Paul Stewart6c72c972012-07-27 11:29:20 -0700102 response_sample_bucket_ += response_time_milliseconds;
103 if (response_sample_count_ < kMaxResponseSampleFilterDepth) {
104 ++response_sample_count_;
105 } else {
106 response_sample_bucket_ =
107 response_sample_bucket_ * kMaxResponseSampleFilterDepth /
108 (kMaxResponseSampleFilterDepth + 1);
109 }
110}
111
112// static
113string LinkMonitor::HardwareAddressToString(const ByteString &address) {
114 std::vector<string> address_parts;
115 for (size_t i = 0; i < address.GetLength(); ++i) {
116 address_parts.push_back(
117 base::StringPrintf("%02x", address.GetConstData()[i]));
118 }
119 return JoinString(address_parts, ':');
120}
121
122bool LinkMonitor::CreateClient() {
123 arp_client_.reset(new ArpClient(connection_->interface_index()));
124
125 if (!arp_client_->Start()) {
Paul Stewartff845fc2012-08-07 07:28:44 -0700126 return false;
Paul Stewart6c72c972012-07-27 11:29:20 -0700127 }
Paul Stewart9f7823e2012-08-09 10:58:26 -0700128 SLOG(Link, 4) << "Created ARP client; listening on socket "
129 << arp_client_->socket() << ".";
Paul Stewart6c72c972012-07-27 11:29:20 -0700130 receive_response_handler_.reset(
131 dispatcher_->CreateReadyHandler(
132 arp_client_->socket(),
133 IOHandler::kModeInput,
134 Bind(&LinkMonitor::ReceiveResponse, Unretained(this))));
135 return true;
136}
137
138bool LinkMonitor::AddMissedResponse() {
139 SLOG(Link, 2) << "In " << __func__ << ".";
140 AddResponseTimeSample(kTestPeriodMilliseconds);
141
142 if (is_unicast_) {
143 ++unicast_failure_count_;
144 } else {
145 ++broadcast_failure_count_;
146 }
147
148 if (unicast_failure_count_ + broadcast_failure_count_ >= kFailureThreshold) {
149 LOG(ERROR) << "Link monitor has reached the failure threshold with "
150 << broadcast_failure_count_
151 << " broadcast failures and "
152 << unicast_failure_count_
153 << " unicast failures.";
154 failure_callback_.Run();
Paul Stewart0443aa52012-08-09 10:43:50 -0700155
156 struct timeval now, elapsed_time;
157 time_->GetTimeMonotonic(&now);
158 timersub(&now, &started_monitoring_at_, &elapsed_time);
159
Paul Stewartff845fc2012-08-07 07:28:44 -0700160 metrics_->NotifyLinkMonitorFailure(
161 connection_->technology(),
Paul Stewart0443aa52012-08-09 10:43:50 -0700162 Metrics::kLinkMonitorFailureThresholdReached,
163 elapsed_time.tv_sec,
164 broadcast_failure_count_,
165 unicast_failure_count_);
166
167 Stop();
Paul Stewart6c72c972012-07-27 11:29:20 -0700168 return true;
169 }
170 is_unicast_ = !is_unicast_;
171 return false;
172}
173
Paul Stewart9f7823e2012-08-09 10:58:26 -0700174bool LinkMonitor::IsGatewayFound() const {
175 return !gateway_mac_address_.IsZero();
176}
177
Paul Stewart6c72c972012-07-27 11:29:20 -0700178void LinkMonitor::ReceiveResponse(int fd) {
179 SLOG(Link, 2) << "In " << __func__ << ".";
180 ArpPacket packet;
181 ByteString sender;
182 if (!arp_client_->ReceiveReply(&packet, &sender)) {
183 return;
184 }
185
Paul Stewart9f7823e2012-08-09 10:58:26 -0700186 if (!connection_->local().address().Equals(
187 packet.remote_ip_address().address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700188 SLOG(Link, 4) << "Response is not for our IP address.";
189 return;
190 }
191
Paul Stewart9f7823e2012-08-09 10:58:26 -0700192 if (!local_mac_address_.Equals(packet.remote_mac_address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700193 SLOG(Link, 4) << "Response is not for our MAC address.";
194 return;
195 }
196
Paul Stewart9f7823e2012-08-09 10:58:26 -0700197 if (!connection_->gateway().address().Equals(
198 packet.local_ip_address().address())) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700199 SLOG(Link, 4) << "Response is not from the gateway IP address.";
200 return;
201 }
202
203 struct timeval now, elapsed_time;
204 time_->GetTimeMonotonic(&now);
205 timersub(&now, &sent_request_at_, &elapsed_time);
206
207 AddResponseTimeSample(elapsed_time.tv_sec * 1000 +
208 elapsed_time.tv_usec / 1000);
209
210 receive_response_handler_.reset();
211 arp_client_.reset();
212
213 if (is_unicast_) {
214 unicast_failure_count_ = 0;
215 } else {
216 broadcast_failure_count_ = 0;
217 }
218
Paul Stewart9f7823e2012-08-09 10:58:26 -0700219 if (!gateway_mac_address_.Equals(packet.local_mac_address())) {
220 const ByteString &new_mac_address = packet.local_mac_address();
221 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700222 SLOG(Link, 2) << "Found gateway at "
223 << HardwareAddressToString(new_mac_address);
224 } else {
225 SLOG(Link, 2) << "Gateway MAC address changed.";
226 }
227 gateway_mac_address_ = new_mac_address;
228 }
229
230 is_unicast_ = !is_unicast_;
231}
232
233bool LinkMonitor::SendRequest() {
234 SLOG(Link, 2) << "In " << __func__ << ".";
235 if (!arp_client_.get()) {
236 if (!CreateClient()) {
237 LOG(ERROR) << "Failed to start ARP client.";
238 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700239 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700240 connection_->technology(),
241 Metrics::kLinkMonitorClientStartFailure,
242 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700243 return false;
244 }
245 } else if (AddMissedResponse()) {
246 // If an ARP client is still listening, this means we have timed
247 // out reception of the ARP reply.
248 return false;
249 } else {
250 // We already have an ArpClient instance running. These aren't
251 // bound sockets in the conventional sense, and we cannot distinguish
252 // which request (from which trial, or even from which component
253 // in the local system) an ARP reply was sent in response to.
254 // Therefore we keep the already open ArpClient in the case of
255 // a non-fatal timeout.
256 }
257
258 ByteString destination_mac_address(gateway_mac_address_.GetLength());
Paul Stewart9f7823e2012-08-09 10:58:26 -0700259 if (!IsGatewayFound()) {
Paul Stewart6c72c972012-07-27 11:29:20 -0700260 // The remote MAC addess is set by convention to be all-zeroes in the
261 // ARP header if not known. The ArpClient will translate an all-zeroes
262 // remote address into a send to the broadcast (all-ones) address in
263 // the Ethernet frame header.
264 SLOG_IF(Link, 2, is_unicast_) << "Sending broadcast since "
265 << "gateway MAC is unknown";
266 is_unicast_ = false;
267 } else if (is_unicast_) {
268 destination_mac_address = gateway_mac_address_;
269 }
270
271 ArpPacket request(connection_->local(), connection_->gateway(),
272 local_mac_address_, destination_mac_address);
273 if (!arp_client_->TransmitRequest(request)) {
274 LOG(ERROR) << "Failed to send ARP request. Stopping.";
275 Stop();
Paul Stewartff845fc2012-08-07 07:28:44 -0700276 metrics_->NotifyLinkMonitorFailure(
Paul Stewart0443aa52012-08-09 10:43:50 -0700277 connection_->technology(), Metrics::kLinkMonitorTransmitFailure,
278 0, 0, 0);
Paul Stewart6c72c972012-07-27 11:29:20 -0700279 return false;
280 }
281
282 time_->GetTimeMonotonic(&sent_request_at_);
283
284 dispatcher_->PostDelayedTask(send_request_callback_.callback(),
285 kTestPeriodMilliseconds);
286 return true;
287}
288
289void LinkMonitor::SendRequestTask() {
290 SendRequest();
Paul Stewart3f43f432012-07-16 12:12:45 -0700291}
292
293} // namespace shill