Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 1 | // Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "shill/connection_health_checker.h" |
| 6 | |
| 7 | #include <arpa/inet.h> |
| 8 | #include <netinet/in.h> |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 9 | #include <stdlib.h> |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 10 | #include <sys/socket.h> |
| 11 | #include <sys/types.h> |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 12 | #include <time.h> |
| 13 | |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 14 | #include <vector> |
| 15 | |
| 16 | #include <base/bind.h> |
| 17 | |
| 18 | #include "shill/async_connection.h" |
| 19 | #include "shill/connection.h" |
| 20 | #include "shill/dns_client.h" |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 21 | #include "shill/dns_client_factory.h" |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 22 | #include "shill/error.h" |
| 23 | #include "shill/http_url.h" |
| 24 | #include "shill/ip_address.h" |
| 25 | #include "shill/logging.h" |
| 26 | #include "shill/sockets.h" |
| 27 | #include "shill/socket_info.h" |
| 28 | #include "shill/socket_info_reader.h" |
| 29 | |
| 30 | using base::Bind; |
| 31 | using std::string; |
| 32 | using std::vector; |
| 33 | |
| 34 | namespace shill { |
| 35 | |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 36 | //static |
| 37 | const int ConnectionHealthChecker::kDNSTimeoutMilliseconds = 5000; |
| 38 | //static |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 39 | const int ConnectionHealthChecker::kMaxConnectionAttempts = 3; |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 40 | //static |
| 41 | const int ConnectionHealthChecker::kNumDNSQueries = 5; |
| 42 | //static |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 43 | const uint16 ConnectionHealthChecker::kRemotePort = 80; |
| 44 | |
| 45 | ConnectionHealthChecker::ConnectionHealthChecker( |
| 46 | ConnectionRefPtr connection, |
| 47 | EventDispatcher *dispatcher, |
| 48 | const base::Callback<void(Result)> &result_callback) |
| 49 | : connection_(connection), |
| 50 | dispatcher_(dispatcher), |
| 51 | result_callback_(result_callback), |
| 52 | socket_info_reader_(new SocketInfoReader()), |
| 53 | socket_(new Sockets()), |
| 54 | weak_ptr_factory_(this), |
| 55 | connection_complete_callback_( |
| 56 | Bind(&ConnectionHealthChecker::OnConnectionComplete, |
| 57 | weak_ptr_factory_.GetWeakPtr())), |
| 58 | dns_client_callback_(Bind(&ConnectionHealthChecker::GetDNSResult, |
| 59 | weak_ptr_factory_.GetWeakPtr())), |
| 60 | tcp_connection_(new AsyncConnection(connection_->interface_name(), |
| 61 | dispatcher_, |
| 62 | socket_.get(), |
| 63 | connection_complete_callback_)), |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 64 | dns_client_factory_(DNSClientFactory::GetInstance()), |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 65 | run_data_test_(true), |
| 66 | health_check_in_progress_(false), |
| 67 | num_connection_attempts_(0) {} |
| 68 | |
| 69 | ConnectionHealthChecker::~ConnectionHealthChecker() { |
| 70 | Stop(); |
| 71 | } |
| 72 | |
| 73 | void ConnectionHealthChecker::AddRemoteIP(IPAddress ip) { |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 74 | remote_ips_.push_back(ip); |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 75 | } |
| 76 | |
| 77 | void ConnectionHealthChecker::AddRemoteURL(const string &url_string) { |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 78 | GarbageCollectDNSClients(); |
| 79 | |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 80 | HTTPURL url; |
| 81 | if (!url.ParseFromString(url_string)) { |
| 82 | SLOG(Connection, 2) << __func__ << ": Malformed url: " << url_string << "."; |
| 83 | return; |
| 84 | } |
| 85 | if (url.port() != kRemotePort) { |
| 86 | SLOG(Connection, 2) << __func__ << ": Remote connections only supported " |
| 87 | << " to port 80, requested " << url.port() << "."; |
| 88 | return; |
| 89 | } |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 90 | for (int i = 0; i < kNumDNSQueries; ++i) { |
| 91 | Error error; |
| 92 | DNSClient *dns_client = |
| 93 | dns_client_factory_->CreateDNSClient(IPAddress::kFamilyIPv4, |
| 94 | connection_->interface_name(), |
| 95 | connection_->dns_servers(), |
| 96 | kDNSTimeoutMilliseconds, |
| 97 | dispatcher_, |
| 98 | dns_client_callback_); |
| 99 | dns_clients_.push_back(dns_client); |
| 100 | if (!dns_clients_[i]->Start(url.host(), &error)) { |
| 101 | SLOG(Connection, 2) << __func__ << ": Failed to start DNS client " |
| 102 | << "(query #" << i << "): " |
| 103 | << error.message(); |
| 104 | } |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 105 | } |
| 106 | } |
| 107 | |
| 108 | void ConnectionHealthChecker::Start() { |
| 109 | if (health_check_in_progress_) { |
| 110 | SLOG(Connection, 2) << __func__ << ": Health Check already in progress."; |
| 111 | return; |
| 112 | } |
| 113 | if (!connection_.get()) { |
| 114 | SLOG(Connection, 2) << __func__ << ": Connection not ready yet."; |
| 115 | result_callback_.Run(kResultUnknown); |
| 116 | return; |
| 117 | } |
| 118 | |
| 119 | health_check_in_progress_ = true; |
| 120 | num_connection_attempts_ = 0; |
| 121 | |
| 122 | // Initiate the first attempt. |
| 123 | if (remote_ips_.empty()) { |
| 124 | // Nothing to try. |
| 125 | Stop(); |
| 126 | SLOG(Connection, 2) << __func__ << ": Not enough IPs."; |
| 127 | result_callback_.Run(kResultUnknown); |
| 128 | return; |
| 129 | } |
| 130 | SetupTcpConnection(); |
| 131 | } |
| 132 | |
| 133 | void ConnectionHealthChecker::Stop() { |
| 134 | if (tcp_connection_ != NULL) |
| 135 | tcp_connection_->Stop(); |
| 136 | health_check_in_progress_ = false; |
| 137 | } |
| 138 | |
Prathmesh Prabhu | 5489b7a | 2013-04-10 13:33:59 -0700 | [diff] [blame] | 139 | const char *ConnectionHealthChecker::ResultToString( |
| 140 | ConnectionHealthChecker::Result result) { |
| 141 | switch(result) { |
| 142 | case kResultUnknown: |
| 143 | return "Unknown"; |
| 144 | case kResultInProgress: |
| 145 | return "InProgress"; |
| 146 | case kResultConnectionFailure: |
| 147 | return "ConnectionFailure"; |
| 148 | case kResultElongatedTimeWait: |
| 149 | return "ElongatedTimeWait"; |
| 150 | case kResultCongestedTxQueue: |
| 151 | return "CongestedTxQueue"; |
| 152 | case kResultSuccess: |
| 153 | return "Success"; |
| 154 | default: |
| 155 | return "Invalid"; |
| 156 | } |
| 157 | } |
| 158 | |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 159 | void ConnectionHealthChecker::SetupTcpConnection() { |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 160 | // Pick a random IP from the set of IPs. |
| 161 | // This guards against |
| 162 | // (1) Repeated failed attempts for the same IP at start-up everytime. |
| 163 | // (2) All users attempting to connect to the same IP. |
| 164 | int next_ip_index = rand() % remote_ips_.size(); |
| 165 | const IPAddress &ip = remote_ips_[next_ip_index]; |
Arman Uguray | f84a424 | 2013-04-09 20:01:07 -0700 | [diff] [blame] | 166 | SLOG(Connection, 3) << __func__ << ": Starting connection at " |
| 167 | << ip.ToString(); |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 168 | if (tcp_connection_->Start(ip, kRemotePort)) { |
| 169 | // TCP connection successful, no need to try more. |
| 170 | return; |
| 171 | } |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 172 | SLOG(Connection, 2) << __func__ << ": Connection attempt failed."; |
| 173 | TryNextIP(); |
| 174 | } |
| 175 | |
| 176 | void ConnectionHealthChecker::OnConnectionComplete(bool success, int sock_fd) { |
| 177 | if (!success) { |
| 178 | SLOG(Connection, 2) << __func__ |
| 179 | << ": AsyncConnection connection attempt failed."; |
| 180 | TryNextIP(); // Make sure TryNextIP() is the last statement. |
| 181 | return; |
| 182 | } |
| 183 | // Transferred owndership of valid sock_fd. |
| 184 | |
| 185 | // Check if the established connection is healthy. |
| 186 | Result result = run_data_test_ ? SendData(sock_fd) : ShutDown(sock_fd); |
| 187 | |
| 188 | // The health check routine(s) may further indicate a problem requiring a |
| 189 | // reattempt. |
| 190 | if (result == kResultConnectionFailure || result == kResultUnknown) { |
| 191 | socket_->Close(sock_fd); |
| 192 | TryNextIP(); // Make sure TryNextIP() is the last statement. |
| 193 | } else { |
| 194 | socket_->Close(sock_fd); |
| 195 | Stop(); |
| 196 | result_callback_.Run(result); // Make sure this is the last statement. |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | void ConnectionHealthChecker::GetDNSResult(const Error &error, |
| 201 | const IPAddress& ip) { |
| 202 | if (!error.IsSuccess()) { |
| 203 | SLOG(Connection, 2) << __func__ << "DNSClient returned failure: " |
| 204 | << error.message(); |
| 205 | return; |
| 206 | } |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 207 | // Insert ip into the list of cached IP addresses, if not already present. |
| 208 | for (IPAddresses::size_type i = 0; i < remote_ips_.size(); ++i) |
| 209 | if (remote_ips_[i].Equals(ip)) |
| 210 | return; |
| 211 | remote_ips_.push_back(ip); |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 212 | } |
| 213 | |
| 214 | void ConnectionHealthChecker::TryNextIP() { |
| 215 | ++num_connection_attempts_; |
| 216 | // Check if enough attempts have been made already. |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 217 | if (num_connection_attempts_ >= kMaxConnectionAttempts) { |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 218 | LOG(INFO) << __func__ |
| 219 | << ": multiple failed attempts to established a TCP connection."; |
| 220 | // Give up. Clean up and notify client. |
| 221 | Stop(); |
| 222 | result_callback_.Run(kResultConnectionFailure); |
| 223 | return; |
| 224 | } |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 225 | SetupTcpConnection(); |
| 226 | } |
| 227 | |
| 228 | // Send data on the connection and observe the TxCount. |
| 229 | ConnectionHealthChecker::Result ConnectionHealthChecker::SendData(int sock_fd) { |
| 230 | SocketInfo sock_info; |
| 231 | uint64 old_transmit_queue_value; |
| 232 | if (!GetSocketInfo(sock_fd, &sock_info) || |
| 233 | sock_info.connection_state() != |
| 234 | SocketInfo::kConnectionStateEstablished) { |
| 235 | SLOG(Connection, 2) << __func__ |
| 236 | << ": Connection originally not in established state.."; |
| 237 | // Count this as a failed connection attempt. |
| 238 | return kResultUnknown; |
| 239 | } |
| 240 | old_transmit_queue_value = sock_info.transmit_queue_value(); |
| 241 | |
| 242 | char buf; |
| 243 | if (socket_->Send(sock_fd, &buf, sizeof(buf), 0) == -1) { |
| 244 | SLOG(Connection, 2) << __func__ << ": " << socket_->ErrorString(); |
| 245 | // Count this as a failed connection attempt. |
| 246 | return kResultConnectionFailure; |
| 247 | } |
| 248 | |
| 249 | // Wait to give enough time for the TxCount to be updated. |
| 250 | // TODO(pprabhu) Check that this is reliable wrt timing effects. |
| 251 | if (!GetSocketInfo(sock_fd, &sock_info) || |
| 252 | sock_info.connection_state() != |
| 253 | SocketInfo::kConnectionStateEstablished) { |
| 254 | SLOG(Connection, 2) << __func__ |
| 255 | << ": Connection not in established state after send."; |
| 256 | // Count this as a failed connection attempt. |
| 257 | return kResultUnknown; |
| 258 | } |
| 259 | |
| 260 | if (sock_info.transmit_queue_value() > old_transmit_queue_value) { |
| 261 | return kResultCongestedTxQueue; |
| 262 | } |
| 263 | |
| 264 | return kResultSuccess; |
| 265 | } |
| 266 | |
| 267 | // Attempt to shutdown the connection and check if the connection is stuck in |
| 268 | // the TIME_WAIT tcp state. |
| 269 | ConnectionHealthChecker::Result ConnectionHealthChecker::ShutDown(int sock_fd) { |
| 270 | if (socket_->ShutDown(sock_fd, SHUT_RDWR) == -1) { |
| 271 | SLOG(Connection, 2) << __func__ |
| 272 | << ": Failed to cleanly shut down the connection."; |
| 273 | // Count this as a failed connection attempt. |
| 274 | return kResultUnknown; |
| 275 | } |
| 276 | // Wait to give enough time for a normal TCP shutdown? |
| 277 | // TODO(pprabhu) Check that this is reliable wrt timing effects. |
| 278 | |
| 279 | SocketInfo sock_info; |
| 280 | if (!GetSocketInfo(sock_fd, &sock_info)) { |
| 281 | // The TCP socket for the connection has been cleaned. |
| 282 | // This means ShutDown was successful. |
| 283 | return kResultSuccess; |
| 284 | } |
| 285 | if (sock_info.connection_state() == SocketInfo::kConnectionStateFinWait1 || |
| 286 | sock_info.connection_state() == SocketInfo::kConnectionStateFinWait2 || |
| 287 | sock_info.connection_state() == SocketInfo::kConnectionStateTimeWait) |
| 288 | return kResultElongatedTimeWait; |
| 289 | |
| 290 | return kResultUnknown; |
| 291 | } |
| 292 | |
| 293 | //TODO(pprabhu): Scrub IP address logging. |
| 294 | bool ConnectionHealthChecker::GetSocketInfo(int sock_fd, |
| 295 | SocketInfo *sock_info) { |
| 296 | struct sockaddr_storage addr; |
| 297 | socklen_t addrlen = sizeof(addr); |
| 298 | memset(&addr, 0, sizeof(addr)); |
| 299 | if (socket_->GetSockName(sock_fd, |
| 300 | reinterpret_cast<struct sockaddr *>(&addr), |
| 301 | &addrlen) != 0) { |
| 302 | SLOG(Connection, 2) << __func__ |
| 303 | << ": Failed to get address of created socket."; |
| 304 | return false; |
| 305 | } |
| 306 | if (addr.ss_family != AF_INET) { |
| 307 | SLOG(Connection, 2) << __func__ << ": IPv6 socket address found."; |
| 308 | return false; |
| 309 | } |
| 310 | |
| 311 | CHECK_EQ(sizeof(struct sockaddr_in), addrlen); |
| 312 | struct sockaddr_in *addr_in = reinterpret_cast<sockaddr_in *>(&addr); |
| 313 | uint16 local_port = ntohs(addr_in->sin_port); |
| 314 | char ipstr[INET_ADDRSTRLEN]; |
| 315 | const char *res = inet_ntop(AF_INET, &addr_in->sin_addr, |
| 316 | ipstr, sizeof(ipstr)); |
| 317 | if (res == NULL) { |
| 318 | SLOG(Connection, 2) << __func__ |
| 319 | << ": Could not convert IP address to string."; |
| 320 | return false; |
| 321 | } |
| 322 | |
| 323 | IPAddress local_ip_address(IPAddress::kFamilyIPv4); |
| 324 | CHECK(local_ip_address.SetAddressFromString(ipstr)); |
| 325 | SLOG(Connection, 3) << "Local IP = " << local_ip_address.ToString() |
| 326 | << ":" << local_port; |
| 327 | |
| 328 | vector<SocketInfo> info_list; |
| 329 | if (!socket_info_reader_->LoadTcpSocketInfo(&info_list)) { |
| 330 | SLOG(Connection, 2) << __func__ << ": Failed to load TCP socket info."; |
| 331 | return false; |
| 332 | } |
| 333 | |
| 334 | for (vector<SocketInfo>::const_iterator info_list_it = info_list.begin(); |
| 335 | info_list_it != info_list.end(); |
| 336 | ++info_list_it) { |
| 337 | const SocketInfo &cur_sock_info = *info_list_it; |
| 338 | |
| 339 | SLOG(Connection, 3) |
| 340 | << "Testing against IP = " |
| 341 | << cur_sock_info.local_ip_address().ToString() |
| 342 | << ":" << cur_sock_info.local_port() |
| 343 | << " (addresses equal:" |
| 344 | << cur_sock_info.local_ip_address().Equals(local_ip_address) |
| 345 | << ", ports equal:" << (cur_sock_info.local_port() == local_port) |
| 346 | << ")"; |
| 347 | |
| 348 | if (cur_sock_info.local_ip_address().Equals(local_ip_address) && |
| 349 | cur_sock_info.local_port() == local_port) { |
| 350 | // Copy SocketInfo. |
| 351 | *sock_info = cur_sock_info; |
| 352 | return true; |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | SLOG(Connection, 2) << __func__ << ": No matching TCP socket info."; |
| 357 | return false; |
| 358 | } |
| 359 | |
Prathmesh Prabhu | 3e452f8 | 2013-04-10 16:31:44 -0700 | [diff] [blame^] | 360 | void ConnectionHealthChecker::GarbageCollectDNSClients() { |
| 361 | ScopedVector<DNSClient> keep; |
| 362 | ScopedVector<DNSClient> discard; |
| 363 | for (size_t i = 0; i < dns_clients_.size(); ++i) { |
| 364 | if (dns_clients_[i]->IsActive()) |
| 365 | keep.push_back(dns_clients_[i]); |
| 366 | else |
| 367 | discard.push_back(dns_clients_[i]); |
| 368 | } |
| 369 | dns_clients_.weak_clear(); |
| 370 | dns_clients_ = keep.Pass(); // Passes ownership of contents. |
| 371 | discard.clear(); |
| 372 | } |
| 373 | |
Prathmesh Prabhu | 40daa01 | 2013-04-03 10:35:03 -0700 | [diff] [blame] | 374 | } // namespace shill |