blob: 52ba738253b6e48ce51a4523dbe89bc969d3b1a1 [file] [log] [blame]
Prathmesh Prabhu40daa012013-04-03 10:35:03 -07001// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/connection_health_checker.h"
6
7#include <arpa/inet.h>
8#include <netinet/in.h>
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -07009#include <stdlib.h>
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070010#include <sys/socket.h>
11#include <sys/types.h>
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070012#include <time.h>
13
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070014#include <vector>
15
16#include <base/bind.h>
17
18#include "shill/async_connection.h"
19#include "shill/connection.h"
20#include "shill/dns_client.h"
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070021#include "shill/dns_client_factory.h"
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070022#include "shill/error.h"
23#include "shill/http_url.h"
24#include "shill/ip_address.h"
25#include "shill/logging.h"
26#include "shill/sockets.h"
27#include "shill/socket_info.h"
28#include "shill/socket_info_reader.h"
29
30using base::Bind;
31using std::string;
32using std::vector;
33
34namespace shill {
35
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070036//static
37const int ConnectionHealthChecker::kDNSTimeoutMilliseconds = 5000;
38//static
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070039const int ConnectionHealthChecker::kMaxConnectionAttempts = 3;
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070040//static
41const int ConnectionHealthChecker::kNumDNSQueries = 5;
42//static
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070043const uint16 ConnectionHealthChecker::kRemotePort = 80;
44
45ConnectionHealthChecker::ConnectionHealthChecker(
46 ConnectionRefPtr connection,
47 EventDispatcher *dispatcher,
48 const base::Callback<void(Result)> &result_callback)
49 : connection_(connection),
50 dispatcher_(dispatcher),
51 result_callback_(result_callback),
52 socket_info_reader_(new SocketInfoReader()),
53 socket_(new Sockets()),
54 weak_ptr_factory_(this),
55 connection_complete_callback_(
56 Bind(&ConnectionHealthChecker::OnConnectionComplete,
57 weak_ptr_factory_.GetWeakPtr())),
58 dns_client_callback_(Bind(&ConnectionHealthChecker::GetDNSResult,
59 weak_ptr_factory_.GetWeakPtr())),
60 tcp_connection_(new AsyncConnection(connection_->interface_name(),
61 dispatcher_,
62 socket_.get(),
63 connection_complete_callback_)),
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070064 dns_client_factory_(DNSClientFactory::GetInstance()),
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070065 run_data_test_(true),
66 health_check_in_progress_(false),
67 num_connection_attempts_(0) {}
68
69ConnectionHealthChecker::~ConnectionHealthChecker() {
70 Stop();
71}
72
73void ConnectionHealthChecker::AddRemoteIP(IPAddress ip) {
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070074 remote_ips_.push_back(ip);
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070075}
76
77void ConnectionHealthChecker::AddRemoteURL(const string &url_string) {
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070078 GarbageCollectDNSClients();
79
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070080 HTTPURL url;
81 if (!url.ParseFromString(url_string)) {
82 SLOG(Connection, 2) << __func__ << ": Malformed url: " << url_string << ".";
83 return;
84 }
85 if (url.port() != kRemotePort) {
86 SLOG(Connection, 2) << __func__ << ": Remote connections only supported "
87 << " to port 80, requested " << url.port() << ".";
88 return;
89 }
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070090 for (int i = 0; i < kNumDNSQueries; ++i) {
91 Error error;
92 DNSClient *dns_client =
93 dns_client_factory_->CreateDNSClient(IPAddress::kFamilyIPv4,
94 connection_->interface_name(),
95 connection_->dns_servers(),
96 kDNSTimeoutMilliseconds,
97 dispatcher_,
98 dns_client_callback_);
99 dns_clients_.push_back(dns_client);
100 if (!dns_clients_[i]->Start(url.host(), &error)) {
101 SLOG(Connection, 2) << __func__ << ": Failed to start DNS client "
102 << "(query #" << i << "): "
103 << error.message();
104 }
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700105 }
106}
107
108void ConnectionHealthChecker::Start() {
109 if (health_check_in_progress_) {
110 SLOG(Connection, 2) << __func__ << ": Health Check already in progress.";
111 return;
112 }
113 if (!connection_.get()) {
114 SLOG(Connection, 2) << __func__ << ": Connection not ready yet.";
115 result_callback_.Run(kResultUnknown);
116 return;
117 }
118
119 health_check_in_progress_ = true;
120 num_connection_attempts_ = 0;
121
122 // Initiate the first attempt.
123 if (remote_ips_.empty()) {
124 // Nothing to try.
125 Stop();
126 SLOG(Connection, 2) << __func__ << ": Not enough IPs.";
127 result_callback_.Run(kResultUnknown);
128 return;
129 }
130 SetupTcpConnection();
131}
132
133void ConnectionHealthChecker::Stop() {
134 if (tcp_connection_ != NULL)
135 tcp_connection_->Stop();
136 health_check_in_progress_ = false;
137}
138
Prathmesh Prabhu5489b7a2013-04-10 13:33:59 -0700139const char *ConnectionHealthChecker::ResultToString(
140 ConnectionHealthChecker::Result result) {
141 switch(result) {
142 case kResultUnknown:
143 return "Unknown";
144 case kResultInProgress:
145 return "InProgress";
146 case kResultConnectionFailure:
147 return "ConnectionFailure";
148 case kResultElongatedTimeWait:
149 return "ElongatedTimeWait";
150 case kResultCongestedTxQueue:
151 return "CongestedTxQueue";
152 case kResultSuccess:
153 return "Success";
154 default:
155 return "Invalid";
156 }
157}
158
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700159void ConnectionHealthChecker::SetupTcpConnection() {
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700160 // Pick a random IP from the set of IPs.
161 // This guards against
162 // (1) Repeated failed attempts for the same IP at start-up everytime.
163 // (2) All users attempting to connect to the same IP.
164 int next_ip_index = rand() % remote_ips_.size();
165 const IPAddress &ip = remote_ips_[next_ip_index];
Arman Ugurayf84a4242013-04-09 20:01:07 -0700166 SLOG(Connection, 3) << __func__ << ": Starting connection at "
167 << ip.ToString();
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700168 if (tcp_connection_->Start(ip, kRemotePort)) {
169 // TCP connection successful, no need to try more.
170 return;
171 }
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700172 SLOG(Connection, 2) << __func__ << ": Connection attempt failed.";
173 TryNextIP();
174}
175
176void ConnectionHealthChecker::OnConnectionComplete(bool success, int sock_fd) {
177 if (!success) {
178 SLOG(Connection, 2) << __func__
179 << ": AsyncConnection connection attempt failed.";
180 TryNextIP(); // Make sure TryNextIP() is the last statement.
181 return;
182 }
183 // Transferred owndership of valid sock_fd.
184
185 // Check if the established connection is healthy.
186 Result result = run_data_test_ ? SendData(sock_fd) : ShutDown(sock_fd);
187
188 // The health check routine(s) may further indicate a problem requiring a
189 // reattempt.
190 if (result == kResultConnectionFailure || result == kResultUnknown) {
191 socket_->Close(sock_fd);
192 TryNextIP(); // Make sure TryNextIP() is the last statement.
193 } else {
194 socket_->Close(sock_fd);
195 Stop();
196 result_callback_.Run(result); // Make sure this is the last statement.
197 }
198}
199
200void ConnectionHealthChecker::GetDNSResult(const Error &error,
201 const IPAddress& ip) {
202 if (!error.IsSuccess()) {
203 SLOG(Connection, 2) << __func__ << "DNSClient returned failure: "
204 << error.message();
205 return;
206 }
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700207 // Insert ip into the list of cached IP addresses, if not already present.
208 for (IPAddresses::size_type i = 0; i < remote_ips_.size(); ++i)
209 if (remote_ips_[i].Equals(ip))
210 return;
211 remote_ips_.push_back(ip);
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700212}
213
214void ConnectionHealthChecker::TryNextIP() {
215 ++num_connection_attempts_;
216 // Check if enough attempts have been made already.
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700217 if (num_connection_attempts_ >= kMaxConnectionAttempts) {
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700218 LOG(INFO) << __func__
219 << ": multiple failed attempts to established a TCP connection.";
220 // Give up. Clean up and notify client.
221 Stop();
222 result_callback_.Run(kResultConnectionFailure);
223 return;
224 }
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700225 SetupTcpConnection();
226}
227
228// Send data on the connection and observe the TxCount.
229ConnectionHealthChecker::Result ConnectionHealthChecker::SendData(int sock_fd) {
230 SocketInfo sock_info;
231 uint64 old_transmit_queue_value;
232 if (!GetSocketInfo(sock_fd, &sock_info) ||
233 sock_info.connection_state() !=
234 SocketInfo::kConnectionStateEstablished) {
235 SLOG(Connection, 2) << __func__
236 << ": Connection originally not in established state..";
237 // Count this as a failed connection attempt.
238 return kResultUnknown;
239 }
240 old_transmit_queue_value = sock_info.transmit_queue_value();
241
242 char buf;
243 if (socket_->Send(sock_fd, &buf, sizeof(buf), 0) == -1) {
244 SLOG(Connection, 2) << __func__ << ": " << socket_->ErrorString();
245 // Count this as a failed connection attempt.
246 return kResultConnectionFailure;
247 }
248
249 // Wait to give enough time for the TxCount to be updated.
250 // TODO(pprabhu) Check that this is reliable wrt timing effects.
251 if (!GetSocketInfo(sock_fd, &sock_info) ||
252 sock_info.connection_state() !=
253 SocketInfo::kConnectionStateEstablished) {
254 SLOG(Connection, 2) << __func__
255 << ": Connection not in established state after send.";
256 // Count this as a failed connection attempt.
257 return kResultUnknown;
258 }
259
260 if (sock_info.transmit_queue_value() > old_transmit_queue_value) {
261 return kResultCongestedTxQueue;
262 }
263
264 return kResultSuccess;
265}
266
267// Attempt to shutdown the connection and check if the connection is stuck in
268// the TIME_WAIT tcp state.
269ConnectionHealthChecker::Result ConnectionHealthChecker::ShutDown(int sock_fd) {
270 if (socket_->ShutDown(sock_fd, SHUT_RDWR) == -1) {
271 SLOG(Connection, 2) << __func__
272 << ": Failed to cleanly shut down the connection.";
273 // Count this as a failed connection attempt.
274 return kResultUnknown;
275 }
276 // Wait to give enough time for a normal TCP shutdown?
277 // TODO(pprabhu) Check that this is reliable wrt timing effects.
278
279 SocketInfo sock_info;
280 if (!GetSocketInfo(sock_fd, &sock_info)) {
281 // The TCP socket for the connection has been cleaned.
282 // This means ShutDown was successful.
283 return kResultSuccess;
284 }
285 if (sock_info.connection_state() == SocketInfo::kConnectionStateFinWait1 ||
286 sock_info.connection_state() == SocketInfo::kConnectionStateFinWait2 ||
287 sock_info.connection_state() == SocketInfo::kConnectionStateTimeWait)
288 return kResultElongatedTimeWait;
289
290 return kResultUnknown;
291}
292
293//TODO(pprabhu): Scrub IP address logging.
294bool ConnectionHealthChecker::GetSocketInfo(int sock_fd,
295 SocketInfo *sock_info) {
296 struct sockaddr_storage addr;
297 socklen_t addrlen = sizeof(addr);
298 memset(&addr, 0, sizeof(addr));
299 if (socket_->GetSockName(sock_fd,
300 reinterpret_cast<struct sockaddr *>(&addr),
301 &addrlen) != 0) {
302 SLOG(Connection, 2) << __func__
303 << ": Failed to get address of created socket.";
304 return false;
305 }
306 if (addr.ss_family != AF_INET) {
307 SLOG(Connection, 2) << __func__ << ": IPv6 socket address found.";
308 return false;
309 }
310
311 CHECK_EQ(sizeof(struct sockaddr_in), addrlen);
312 struct sockaddr_in *addr_in = reinterpret_cast<sockaddr_in *>(&addr);
313 uint16 local_port = ntohs(addr_in->sin_port);
314 char ipstr[INET_ADDRSTRLEN];
315 const char *res = inet_ntop(AF_INET, &addr_in->sin_addr,
316 ipstr, sizeof(ipstr));
317 if (res == NULL) {
318 SLOG(Connection, 2) << __func__
319 << ": Could not convert IP address to string.";
320 return false;
321 }
322
323 IPAddress local_ip_address(IPAddress::kFamilyIPv4);
324 CHECK(local_ip_address.SetAddressFromString(ipstr));
325 SLOG(Connection, 3) << "Local IP = " << local_ip_address.ToString()
326 << ":" << local_port;
327
328 vector<SocketInfo> info_list;
329 if (!socket_info_reader_->LoadTcpSocketInfo(&info_list)) {
330 SLOG(Connection, 2) << __func__ << ": Failed to load TCP socket info.";
331 return false;
332 }
333
334 for (vector<SocketInfo>::const_iterator info_list_it = info_list.begin();
335 info_list_it != info_list.end();
336 ++info_list_it) {
337 const SocketInfo &cur_sock_info = *info_list_it;
338
339 SLOG(Connection, 3)
340 << "Testing against IP = "
341 << cur_sock_info.local_ip_address().ToString()
342 << ":" << cur_sock_info.local_port()
343 << " (addresses equal:"
344 << cur_sock_info.local_ip_address().Equals(local_ip_address)
345 << ", ports equal:" << (cur_sock_info.local_port() == local_port)
346 << ")";
347
348 if (cur_sock_info.local_ip_address().Equals(local_ip_address) &&
349 cur_sock_info.local_port() == local_port) {
350 // Copy SocketInfo.
351 *sock_info = cur_sock_info;
352 return true;
353 }
354 }
355
356 SLOG(Connection, 2) << __func__ << ": No matching TCP socket info.";
357 return false;
358}
359
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700360void ConnectionHealthChecker::GarbageCollectDNSClients() {
361 ScopedVector<DNSClient> keep;
362 ScopedVector<DNSClient> discard;
363 for (size_t i = 0; i < dns_clients_.size(); ++i) {
364 if (dns_clients_[i]->IsActive())
365 keep.push_back(dns_clients_[i]);
366 else
367 discard.push_back(dns_clients_[i]);
368 }
369 dns_clients_.weak_clear();
370 dns_clients_ = keep.Pass(); // Passes ownership of contents.
371 discard.clear();
372}
373
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700374} // namespace shill