blob: 77261a178deb8417d057f55241158dc01efc1e53 [file] [log] [blame]
Prathmesh Prabhu40daa012013-04-03 10:35:03 -07001// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/connection_health_checker.h"
6
7#include <arpa/inet.h>
8#include <netinet/in.h>
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -07009#include <stdlib.h>
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070010#include <sys/socket.h>
11#include <sys/types.h>
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070012#include <time.h>
13
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070014#include <vector>
15
16#include <base/bind.h>
17
18#include "shill/async_connection.h"
19#include "shill/connection.h"
20#include "shill/dns_client.h"
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070021#include "shill/dns_client_factory.h"
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070022#include "shill/error.h"
23#include "shill/http_url.h"
24#include "shill/ip_address.h"
Prathmesh Prabhuba99b592013-04-17 15:13:14 -070025#include "shill/ip_address_store.h"
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070026#include "shill/logging.h"
27#include "shill/sockets.h"
28#include "shill/socket_info.h"
29#include "shill/socket_info_reader.h"
30
31using base::Bind;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070032using base::Unretained;
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070033using std::string;
34using std::vector;
35
36namespace shill {
37
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070038// static
Prathmesh Prabhuba99b592013-04-17 15:13:14 -070039const char *ConnectionHealthChecker::kDefaultRemoteIPPool[] = {
40 "74.125.224.47",
41 "74.125.224.79",
42 "74.125.224.111",
43 "74.125.224.143"
44};
45// static
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070046const int ConnectionHealthChecker::kDNSTimeoutMilliseconds = 5000;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070047// static
48const int ConnectionHealthChecker::kInvalidSocket = -1;
49// static
50const int ConnectionHealthChecker::kMaxFailedConnectionAttempts = 2;
51// static
52const int ConnectionHealthChecker::kMaxSentDataPollingAttempts = 2;
53// static
54const int ConnectionHealthChecker::kMinCongestedQueueAttempts = 2;
55// static
56const int ConnectionHealthChecker::kMinSuccessfulSendAttempts = 1;
57// static
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070058const int ConnectionHealthChecker::kNumDNSQueries = 5;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070059// static
60const int ConnectionHealthChecker::kTCPStateUpdateWaitMilliseconds = 5000;
61// static
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070062const uint16 ConnectionHealthChecker::kRemotePort = 80;
63
64ConnectionHealthChecker::ConnectionHealthChecker(
65 ConnectionRefPtr connection,
66 EventDispatcher *dispatcher,
Prathmesh Prabhuba99b592013-04-17 15:13:14 -070067 IPAddressStore *remote_ips,
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070068 const base::Callback<void(Result)> &result_callback)
69 : connection_(connection),
70 dispatcher_(dispatcher),
Prathmesh Prabhuba99b592013-04-17 15:13:14 -070071 remote_ips_(remote_ips),
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070072 result_callback_(result_callback),
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070073 socket_(new Sockets()),
74 weak_ptr_factory_(this),
75 connection_complete_callback_(
76 Bind(&ConnectionHealthChecker::OnConnectionComplete,
77 weak_ptr_factory_.GetWeakPtr())),
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070078 tcp_connection_(new AsyncConnection(connection_->interface_name(),
79 dispatcher_,
80 socket_.get(),
81 connection_complete_callback_)),
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070082 report_result_(
83 Bind(&ConnectionHealthChecker::ReportResult,
84 weak_ptr_factory_.GetWeakPtr())),
85 sock_fd_(kInvalidSocket),
86 socket_info_reader_(new SocketInfoReader()),
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -070087 dns_client_factory_(DNSClientFactory::GetInstance()),
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070088 dns_client_callback_(Bind(&ConnectionHealthChecker::GetDNSResult,
89 weak_ptr_factory_.GetWeakPtr())),
Prathmesh Prabhu40daa012013-04-03 10:35:03 -070090 health_check_in_progress_(false),
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -070091 num_connection_failures_(0),
92 num_congested_queue_detected_(0),
Prathmesh Prabhu81404c62013-05-08 17:04:28 -070093 num_successful_sends_(0),
94 tcp_state_update_wait_milliseconds_(kTCPStateUpdateWaitMilliseconds) {
Prathmesh Prabhuba99b592013-04-17 15:13:14 -070095 for (size_t i = 0; i < arraysize(kDefaultRemoteIPPool); ++i) {
96 const char *ip_string = kDefaultRemoteIPPool[i];
97 IPAddress ip(IPAddress::kFamilyIPv4);
98 ip.SetAddressFromString(ip_string);
99 remote_ips_->AddUnique(ip);
100 }
101}
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700102
103ConnectionHealthChecker::~ConnectionHealthChecker() {
104 Stop();
105}
106
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700107bool ConnectionHealthChecker::health_check_in_progress() const {
108 return health_check_in_progress_;
109}
110
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700111void ConnectionHealthChecker::AddRemoteIP(IPAddress ip) {
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700112 remote_ips_->AddUnique(ip);
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700113}
114
115void ConnectionHealthChecker::AddRemoteURL(const string &url_string) {
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700116 GarbageCollectDNSClients();
117
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700118 HTTPURL url;
119 if (!url.ParseFromString(url_string)) {
120 SLOG(Connection, 2) << __func__ << ": Malformed url: " << url_string << ".";
121 return;
122 }
123 if (url.port() != kRemotePort) {
124 SLOG(Connection, 2) << __func__ << ": Remote connections only supported "
125 << " to port 80, requested " << url.port() << ".";
126 return;
127 }
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700128 for (int i = 0; i < kNumDNSQueries; ++i) {
129 Error error;
130 DNSClient *dns_client =
131 dns_client_factory_->CreateDNSClient(IPAddress::kFamilyIPv4,
132 connection_->interface_name(),
133 connection_->dns_servers(),
134 kDNSTimeoutMilliseconds,
135 dispatcher_,
136 dns_client_callback_);
137 dns_clients_.push_back(dns_client);
138 if (!dns_clients_[i]->Start(url.host(), &error)) {
139 SLOG(Connection, 2) << __func__ << ": Failed to start DNS client "
140 << "(query #" << i << "): "
141 << error.message();
142 }
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700143 }
144}
145
146void ConnectionHealthChecker::Start() {
147 if (health_check_in_progress_) {
148 SLOG(Connection, 2) << __func__ << ": Health Check already in progress.";
149 return;
150 }
151 if (!connection_.get()) {
152 SLOG(Connection, 2) << __func__ << ": Connection not ready yet.";
153 result_callback_.Run(kResultUnknown);
154 return;
155 }
156
157 health_check_in_progress_ = true;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700158 num_connection_failures_ = 0;
159 num_congested_queue_detected_ = 0;
160 num_successful_sends_ = 0;
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700161
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700162 if (remote_ips_->Empty()) {
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700163 // Nothing to try.
164 Stop();
165 SLOG(Connection, 2) << __func__ << ": Not enough IPs.";
166 result_callback_.Run(kResultUnknown);
167 return;
168 }
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700169
170 // Initiate the first attempt.
171 NextHealthCheckSample();
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700172}
173
174void ConnectionHealthChecker::Stop() {
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700175 if (tcp_connection_.get() != NULL)
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700176 tcp_connection_->Stop();
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700177 verify_sent_data_callback_.Cancel();
178 ClearSocketDescriptor();
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700179 health_check_in_progress_ = false;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700180 num_connection_failures_ = 0;
181 num_congested_queue_detected_ = 0;
182 num_successful_sends_ = 0;
183 num_tx_queue_polling_attempts_ = 0;
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700184}
185
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700186void ConnectionHealthChecker::SetConnection(ConnectionRefPtr connection) {
187 SLOG(Connection, 3) << __func__;
188 connection_ = connection;
189 tcp_connection_.reset(new AsyncConnection(connection_->interface_name(),
190 dispatcher_,
191 socket_.get(),
192 connection_complete_callback_));
193 dns_clients_.clear();
194 bool restart = health_check_in_progress();
195 Stop();
196 if (restart)
197 Start();
198}
199
Prathmesh Prabhu5489b7a2013-04-10 13:33:59 -0700200const char *ConnectionHealthChecker::ResultToString(
201 ConnectionHealthChecker::Result result) {
202 switch(result) {
203 case kResultUnknown:
204 return "Unknown";
Prathmesh Prabhu5489b7a2013-04-10 13:33:59 -0700205 case kResultConnectionFailure:
206 return "ConnectionFailure";
Prathmesh Prabhu5489b7a2013-04-10 13:33:59 -0700207 case kResultCongestedTxQueue:
208 return "CongestedTxQueue";
209 case kResultSuccess:
210 return "Success";
211 default:
212 return "Invalid";
213 }
214}
215
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700216void ConnectionHealthChecker::GetDNSResult(const Error &error,
217 const IPAddress& ip) {
218 if (!error.IsSuccess()) {
219 SLOG(Connection, 2) << __func__ << "DNSClient returned failure: "
220 << error.message();
221 return;
222 }
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700223 remote_ips_->AddUnique(ip);
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700224}
225
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700226void ConnectionHealthChecker::GarbageCollectDNSClients() {
227 ScopedVector<DNSClient> keep;
228 ScopedVector<DNSClient> discard;
229 for (size_t i = 0; i < dns_clients_.size(); ++i) {
230 if (dns_clients_[i]->IsActive())
231 keep.push_back(dns_clients_[i]);
232 else
233 discard.push_back(dns_clients_[i]);
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700234 }
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700235 dns_clients_.weak_clear();
236 dns_clients_ = keep.Pass(); // Passes ownership of contents.
237 discard.clear();
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700238}
239
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700240void ConnectionHealthChecker::NextHealthCheckSample() {
241 // Finish conditions:
242 if (num_connection_failures_ == kMaxFailedConnectionAttempts) {
243 health_check_result_ = kResultConnectionFailure;
244 dispatcher_->PostTask(report_result_);
245 return;
246 }
247 if (num_congested_queue_detected_ == kMinCongestedQueueAttempts) {
248 health_check_result_ = kResultCongestedTxQueue;
249 dispatcher_->PostTask(report_result_);
250 return;
251 }
252 if (num_successful_sends_ == kMinSuccessfulSendAttempts) {
253 health_check_result_ = kResultSuccess;
254 dispatcher_->PostTask(report_result_);
255 return;
256 }
257
258 // Pick a random IP from the set of IPs.
259 // This guards against
260 // (1) Repeated failed attempts for the same IP at start-up everytime.
261 // (2) All users attempting to connect to the same IP.
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700262 IPAddress ip = remote_ips_->GetRandomIP();
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700263 SLOG(Connection, 3) << __func__ << ": Starting connection at "
264 << ip.ToString();
265 if (!tcp_connection_->Start(ip, kRemotePort)) {
266 SLOG(Connection, 2) << __func__ << ": Connection attempt failed.";
267 ++num_connection_failures_;
268 NextHealthCheckSample();
269 }
270}
271
272void ConnectionHealthChecker::OnConnectionComplete(bool success, int sock_fd) {
273 if (!success) {
274 SLOG(Connection, 2) << __func__
Prathmesh Prabhuba99b592013-04-17 15:13:14 -0700275 << ": AsyncConnection connection attempt failed "
276 << "with error: "
277 << tcp_connection_->error();
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700278 ++num_connection_failures_;
279 NextHealthCheckSample();
280 return;
281 }
282
283 SetSocketDescriptor(sock_fd);
284
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700285 SocketInfo sock_info;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700286 if (!GetSocketInfo(sock_fd_, &sock_info) ||
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700287 sock_info.connection_state() !=
288 SocketInfo::kConnectionStateEstablished) {
289 SLOG(Connection, 2) << __func__
290 << ": Connection originally not in established state..";
291 // Count this as a failed connection attempt.
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700292 ++num_connection_failures_;
293 ClearSocketDescriptor();
294 NextHealthCheckSample();
295 return;
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700296 }
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700297
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700298 old_transmit_queue_value_ = sock_info.transmit_queue_value();
299 num_tx_queue_polling_attempts_ = 0;
300
301 // Send data on the connection and post a delayed task to check successful
302 // transfer.
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700303 char buf;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700304 if (socket_->Send(sock_fd_, &buf, sizeof(buf), 0) == -1) {
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700305 SLOG(Connection, 2) << __func__ << ": " << socket_->ErrorString();
306 // Count this as a failed connection attempt.
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700307 ++num_connection_failures_;
308 ClearSocketDescriptor();
309 NextHealthCheckSample();
310 return;
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700311 }
312
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700313 verify_sent_data_callback_.Reset(
314 Bind(&ConnectionHealthChecker::VerifySentData, Unretained(this)));
315 dispatcher_->PostDelayedTask(verify_sent_data_callback_.callback(),
Prathmesh Prabhu81404c62013-05-08 17:04:28 -0700316 tcp_state_update_wait_milliseconds_);
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700317}
318
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700319void ConnectionHealthChecker::VerifySentData() {
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700320 SocketInfo sock_info;
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700321 bool sock_info_found = GetSocketInfo(sock_fd_, &sock_info);
322 // Acceptable TCP connection states after sending the data:
323 // kConnectionStateEstablished: No change in connection state since the send.
324 // kConnectionStateCloseWait: The remote host recieved the sent data and
325 // requested connection close.
326 if (!sock_info_found ||
327 (sock_info.connection_state() !=
328 SocketInfo::kConnectionStateEstablished &&
329 sock_info.connection_state() !=
330 SocketInfo::kConnectionStateCloseWait)) {
331 SLOG(Connection, 2) << __func__
332 << ": Connection not in acceptable state after send.";
333 if (sock_info_found)
334 SLOG(Connection, 3) << "Found socket info but in state: "
335 << sock_info.connection_state();
336 ++num_connection_failures_;
337 } else if (sock_info.transmit_queue_value() > old_transmit_queue_value_ &&
338 sock_info.timer_state() ==
339 SocketInfo::kTimerStateRetransmitTimerPending) {
340 if (num_tx_queue_polling_attempts_ < kMaxSentDataPollingAttempts) {
341 SLOG(Connection, 2) << __func__
342 << ": Polling again.";
343 ++num_tx_queue_polling_attempts_;
344 verify_sent_data_callback_.Reset(
345 Bind(&ConnectionHealthChecker::VerifySentData, Unretained(this)));
346 dispatcher_->PostDelayedTask(verify_sent_data_callback_.callback(),
Prathmesh Prabhu81404c62013-05-08 17:04:28 -0700347 tcp_state_update_wait_milliseconds_);
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700348 return;
349 }
350 SLOG(Connection, 2) << __func__ << ": Sampled congested Tx-Queue";
351 ++num_congested_queue_detected_;
352 } else {
353 SLOG(Connection, 2) << __func__ << ": Sampled successful send.";
354 ++num_successful_sends_;
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700355 }
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700356 ClearSocketDescriptor();
357 NextHealthCheckSample();
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700358}
359
360//TODO(pprabhu): Scrub IP address logging.
361bool ConnectionHealthChecker::GetSocketInfo(int sock_fd,
362 SocketInfo *sock_info) {
363 struct sockaddr_storage addr;
364 socklen_t addrlen = sizeof(addr);
365 memset(&addr, 0, sizeof(addr));
366 if (socket_->GetSockName(sock_fd,
367 reinterpret_cast<struct sockaddr *>(&addr),
368 &addrlen) != 0) {
369 SLOG(Connection, 2) << __func__
370 << ": Failed to get address of created socket.";
371 return false;
372 }
373 if (addr.ss_family != AF_INET) {
374 SLOG(Connection, 2) << __func__ << ": IPv6 socket address found.";
375 return false;
376 }
377
378 CHECK_EQ(sizeof(struct sockaddr_in), addrlen);
379 struct sockaddr_in *addr_in = reinterpret_cast<sockaddr_in *>(&addr);
380 uint16 local_port = ntohs(addr_in->sin_port);
381 char ipstr[INET_ADDRSTRLEN];
382 const char *res = inet_ntop(AF_INET, &addr_in->sin_addr,
383 ipstr, sizeof(ipstr));
384 if (res == NULL) {
385 SLOG(Connection, 2) << __func__
386 << ": Could not convert IP address to string.";
387 return false;
388 }
389
390 IPAddress local_ip_address(IPAddress::kFamilyIPv4);
391 CHECK(local_ip_address.SetAddressFromString(ipstr));
392 SLOG(Connection, 3) << "Local IP = " << local_ip_address.ToString()
393 << ":" << local_port;
394
395 vector<SocketInfo> info_list;
396 if (!socket_info_reader_->LoadTcpSocketInfo(&info_list)) {
397 SLOG(Connection, 2) << __func__ << ": Failed to load TCP socket info.";
398 return false;
399 }
400
401 for (vector<SocketInfo>::const_iterator info_list_it = info_list.begin();
402 info_list_it != info_list.end();
403 ++info_list_it) {
404 const SocketInfo &cur_sock_info = *info_list_it;
405
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700406 SLOG(Connection, 4)
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700407 << "Testing against IP = "
408 << cur_sock_info.local_ip_address().ToString()
409 << ":" << cur_sock_info.local_port()
410 << " (addresses equal:"
411 << cur_sock_info.local_ip_address().Equals(local_ip_address)
412 << ", ports equal:" << (cur_sock_info.local_port() == local_port)
413 << ")";
414
415 if (cur_sock_info.local_ip_address().Equals(local_ip_address) &&
416 cur_sock_info.local_port() == local_port) {
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700417 SLOG(Connection, 3) << __func__ << ": Found matching TCP socket info.";
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700418 *sock_info = cur_sock_info;
419 return true;
420 }
421 }
422
423 SLOG(Connection, 2) << __func__ << ": No matching TCP socket info.";
424 return false;
425}
426
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700427void ConnectionHealthChecker::ReportResult() {
428 SLOG(Connection, 2) << __func__ << ": Result: "
429 << ResultToString(health_check_result_);
430 Stop();
431 result_callback_.Run(health_check_result_);
432}
433
434void ConnectionHealthChecker::SetSocketDescriptor(int sock_fd) {
435 if (sock_fd_ != kInvalidSocket) {
436 SLOG(Connection, 4) << "Closing socket";
437 socket_->Close(sock_fd_);
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700438 }
Prathmesh Prabhuade9b9a2013-04-22 18:01:19 -0700439 sock_fd_ = sock_fd;
440}
441
442void ConnectionHealthChecker::ClearSocketDescriptor() {
443 SetSocketDescriptor(kInvalidSocket);
Prathmesh Prabhu3e452f82013-04-10 16:31:44 -0700444}
445
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700446} // namespace shill