blob: ee635ee83bb8f27d0d4c9a482a6776a98c6cf6f4 [file] [log] [blame]
Prathmesh Prabhu40daa012013-04-03 10:35:03 -07001// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/connection_health_checker.h"
6
7#include <arpa/inet.h>
8#include <netinet/in.h>
9#include <sys/socket.h>
10#include <sys/types.h>
11#include <vector>
12
13#include <base/bind.h>
14
15#include "shill/async_connection.h"
16#include "shill/connection.h"
17#include "shill/dns_client.h"
18#include "shill/error.h"
19#include "shill/http_url.h"
20#include "shill/ip_address.h"
21#include "shill/logging.h"
22#include "shill/sockets.h"
23#include "shill/socket_info.h"
24#include "shill/socket_info_reader.h"
25
26using base::Bind;
27using std::string;
28using std::vector;
29
30namespace shill {
31
32const int ConnectionHealthChecker::kDNSTimeoutSeconds = 5;
33const int ConnectionHealthChecker::kMaxConnectionAttempts = 3;
34const uint16 ConnectionHealthChecker::kRemotePort = 80;
35
36ConnectionHealthChecker::ConnectionHealthChecker(
37 ConnectionRefPtr connection,
38 EventDispatcher *dispatcher,
39 const base::Callback<void(Result)> &result_callback)
40 : connection_(connection),
41 dispatcher_(dispatcher),
42 result_callback_(result_callback),
43 socket_info_reader_(new SocketInfoReader()),
44 socket_(new Sockets()),
45 weak_ptr_factory_(this),
46 connection_complete_callback_(
47 Bind(&ConnectionHealthChecker::OnConnectionComplete,
48 weak_ptr_factory_.GetWeakPtr())),
49 dns_client_callback_(Bind(&ConnectionHealthChecker::GetDNSResult,
50 weak_ptr_factory_.GetWeakPtr())),
51 tcp_connection_(new AsyncConnection(connection_->interface_name(),
52 dispatcher_,
53 socket_.get(),
54 connection_complete_callback_)),
55 dns_client_(new DNSClient(IPAddress::kFamilyIPv4,
56 connection->interface_name(),
57 connection->dns_servers(),
58 kDNSTimeoutSeconds * 1000,
59 dispatcher,
60 dns_client_callback_)),
61 run_data_test_(true),
62 health_check_in_progress_(false),
63 num_connection_attempts_(0) {}
64
65ConnectionHealthChecker::~ConnectionHealthChecker() {
66 Stop();
67}
68
69void ConnectionHealthChecker::AddRemoteIP(IPAddress ip) {
70 remote_ips_.push(ip);
71}
72
73void ConnectionHealthChecker::AddRemoteURL(const string &url_string) {
74 HTTPURL url;
75 if (!url.ParseFromString(url_string)) {
76 SLOG(Connection, 2) << __func__ << ": Malformed url: " << url_string << ".";
77 return;
78 }
79 if (url.port() != kRemotePort) {
80 SLOG(Connection, 2) << __func__ << ": Remote connections only supported "
81 << " to port 80, requested " << url.port() << ".";
82 return;
83 }
84 Error error;
85 if (!dns_client_->Start(url.host(), &error)) {
86 SLOG(Connection, 2) << __func__ << ": Failed to start DNS client: "
87 << error.message();
88 }
89}
90
91void ConnectionHealthChecker::Start() {
92 if (health_check_in_progress_) {
93 SLOG(Connection, 2) << __func__ << ": Health Check already in progress.";
94 return;
95 }
96 if (!connection_.get()) {
97 SLOG(Connection, 2) << __func__ << ": Connection not ready yet.";
98 result_callback_.Run(kResultUnknown);
99 return;
100 }
101
102 health_check_in_progress_ = true;
103 num_connection_attempts_ = 0;
104
105 // Initiate the first attempt.
106 if (remote_ips_.empty()) {
107 // Nothing to try.
108 Stop();
109 SLOG(Connection, 2) << __func__ << ": Not enough IPs.";
110 result_callback_.Run(kResultUnknown);
111 return;
112 }
113 SetupTcpConnection();
114}
115
116void ConnectionHealthChecker::Stop() {
117 if (tcp_connection_ != NULL)
118 tcp_connection_->Stop();
119 health_check_in_progress_ = false;
120}
121
Prathmesh Prabhu5489b7a2013-04-10 13:33:59 -0700122const char *ConnectionHealthChecker::ResultToString(
123 ConnectionHealthChecker::Result result) {
124 switch(result) {
125 case kResultUnknown:
126 return "Unknown";
127 case kResultInProgress:
128 return "InProgress";
129 case kResultConnectionFailure:
130 return "ConnectionFailure";
131 case kResultElongatedTimeWait:
132 return "ElongatedTimeWait";
133 case kResultCongestedTxQueue:
134 return "CongestedTxQueue";
135 case kResultSuccess:
136 return "Success";
137 default:
138 return "Invalid";
139 }
140}
141
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700142void ConnectionHealthChecker::SetupTcpConnection() {
143 IPAddress ip = remote_ips_.front();
Arman Ugurayf84a4242013-04-09 20:01:07 -0700144 SLOG(Connection, 3) << __func__ << ": Starting connection at "
145 << ip.ToString();
Prathmesh Prabhu40daa012013-04-03 10:35:03 -0700146 if (tcp_connection_->Start(ip, kRemotePort)) {
147 // TCP connection successful, no need to try more.
148 return;
149 }
150
151 SLOG(Connection, 2) << __func__ << ": Connection attempt failed.";
152 TryNextIP();
153}
154
155void ConnectionHealthChecker::OnConnectionComplete(bool success, int sock_fd) {
156 if (!success) {
157 SLOG(Connection, 2) << __func__
158 << ": AsyncConnection connection attempt failed.";
159 TryNextIP(); // Make sure TryNextIP() is the last statement.
160 return;
161 }
162 // Transferred owndership of valid sock_fd.
163
164 // Check if the established connection is healthy.
165 Result result = run_data_test_ ? SendData(sock_fd) : ShutDown(sock_fd);
166
167 // The health check routine(s) may further indicate a problem requiring a
168 // reattempt.
169 if (result == kResultConnectionFailure || result == kResultUnknown) {
170 socket_->Close(sock_fd);
171 TryNextIP(); // Make sure TryNextIP() is the last statement.
172 } else {
173 socket_->Close(sock_fd);
174 Stop();
175 result_callback_.Run(result); // Make sure this is the last statement.
176 }
177}
178
179void ConnectionHealthChecker::GetDNSResult(const Error &error,
180 const IPAddress& ip) {
181 if (!error.IsSuccess()) {
182 SLOG(Connection, 2) << __func__ << "DNSClient returned failure: "
183 << error.message();
184 return;
185 }
186 remote_ips_.push(ip);
187}
188
189void ConnectionHealthChecker::TryNextIP() {
190 ++num_connection_attempts_;
191 // Check if enough attempts have been made already.
192 if (num_connection_attempts_ >= kMaxConnectionAttempts ||
193 static_cast<IPAddressQueue::size_type>(num_connection_attempts_)
194 >= remote_ips_.size()) {
195 LOG(INFO) << __func__
196 << ": multiple failed attempts to established a TCP connection.";
197 // Give up. Clean up and notify client.
198 Stop();
199 result_callback_.Run(kResultConnectionFailure);
200 return;
201 }
202 IPAddress recycle_addr = remote_ips_.front();
203 remote_ips_.pop();
204 remote_ips_.push(recycle_addr);
205
206 SetupTcpConnection();
207}
208
209// Send data on the connection and observe the TxCount.
210ConnectionHealthChecker::Result ConnectionHealthChecker::SendData(int sock_fd) {
211 SocketInfo sock_info;
212 uint64 old_transmit_queue_value;
213 if (!GetSocketInfo(sock_fd, &sock_info) ||
214 sock_info.connection_state() !=
215 SocketInfo::kConnectionStateEstablished) {
216 SLOG(Connection, 2) << __func__
217 << ": Connection originally not in established state..";
218 // Count this as a failed connection attempt.
219 return kResultUnknown;
220 }
221 old_transmit_queue_value = sock_info.transmit_queue_value();
222
223 char buf;
224 if (socket_->Send(sock_fd, &buf, sizeof(buf), 0) == -1) {
225 SLOG(Connection, 2) << __func__ << ": " << socket_->ErrorString();
226 // Count this as a failed connection attempt.
227 return kResultConnectionFailure;
228 }
229
230 // Wait to give enough time for the TxCount to be updated.
231 // TODO(pprabhu) Check that this is reliable wrt timing effects.
232 if (!GetSocketInfo(sock_fd, &sock_info) ||
233 sock_info.connection_state() !=
234 SocketInfo::kConnectionStateEstablished) {
235 SLOG(Connection, 2) << __func__
236 << ": Connection not in established state after send.";
237 // Count this as a failed connection attempt.
238 return kResultUnknown;
239 }
240
241 if (sock_info.transmit_queue_value() > old_transmit_queue_value) {
242 return kResultCongestedTxQueue;
243 }
244
245 return kResultSuccess;
246}
247
248// Attempt to shutdown the connection and check if the connection is stuck in
249// the TIME_WAIT tcp state.
250ConnectionHealthChecker::Result ConnectionHealthChecker::ShutDown(int sock_fd) {
251 if (socket_->ShutDown(sock_fd, SHUT_RDWR) == -1) {
252 SLOG(Connection, 2) << __func__
253 << ": Failed to cleanly shut down the connection.";
254 // Count this as a failed connection attempt.
255 return kResultUnknown;
256 }
257 // Wait to give enough time for a normal TCP shutdown?
258 // TODO(pprabhu) Check that this is reliable wrt timing effects.
259
260 SocketInfo sock_info;
261 if (!GetSocketInfo(sock_fd, &sock_info)) {
262 // The TCP socket for the connection has been cleaned.
263 // This means ShutDown was successful.
264 return kResultSuccess;
265 }
266 if (sock_info.connection_state() == SocketInfo::kConnectionStateFinWait1 ||
267 sock_info.connection_state() == SocketInfo::kConnectionStateFinWait2 ||
268 sock_info.connection_state() == SocketInfo::kConnectionStateTimeWait)
269 return kResultElongatedTimeWait;
270
271 return kResultUnknown;
272}
273
274//TODO(pprabhu): Scrub IP address logging.
275bool ConnectionHealthChecker::GetSocketInfo(int sock_fd,
276 SocketInfo *sock_info) {
277 struct sockaddr_storage addr;
278 socklen_t addrlen = sizeof(addr);
279 memset(&addr, 0, sizeof(addr));
280 if (socket_->GetSockName(sock_fd,
281 reinterpret_cast<struct sockaddr *>(&addr),
282 &addrlen) != 0) {
283 SLOG(Connection, 2) << __func__
284 << ": Failed to get address of created socket.";
285 return false;
286 }
287 if (addr.ss_family != AF_INET) {
288 SLOG(Connection, 2) << __func__ << ": IPv6 socket address found.";
289 return false;
290 }
291
292 CHECK_EQ(sizeof(struct sockaddr_in), addrlen);
293 struct sockaddr_in *addr_in = reinterpret_cast<sockaddr_in *>(&addr);
294 uint16 local_port = ntohs(addr_in->sin_port);
295 char ipstr[INET_ADDRSTRLEN];
296 const char *res = inet_ntop(AF_INET, &addr_in->sin_addr,
297 ipstr, sizeof(ipstr));
298 if (res == NULL) {
299 SLOG(Connection, 2) << __func__
300 << ": Could not convert IP address to string.";
301 return false;
302 }
303
304 IPAddress local_ip_address(IPAddress::kFamilyIPv4);
305 CHECK(local_ip_address.SetAddressFromString(ipstr));
306 SLOG(Connection, 3) << "Local IP = " << local_ip_address.ToString()
307 << ":" << local_port;
308
309 vector<SocketInfo> info_list;
310 if (!socket_info_reader_->LoadTcpSocketInfo(&info_list)) {
311 SLOG(Connection, 2) << __func__ << ": Failed to load TCP socket info.";
312 return false;
313 }
314
315 for (vector<SocketInfo>::const_iterator info_list_it = info_list.begin();
316 info_list_it != info_list.end();
317 ++info_list_it) {
318 const SocketInfo &cur_sock_info = *info_list_it;
319
320 SLOG(Connection, 3)
321 << "Testing against IP = "
322 << cur_sock_info.local_ip_address().ToString()
323 << ":" << cur_sock_info.local_port()
324 << " (addresses equal:"
325 << cur_sock_info.local_ip_address().Equals(local_ip_address)
326 << ", ports equal:" << (cur_sock_info.local_port() == local_port)
327 << ")";
328
329 if (cur_sock_info.local_ip_address().Equals(local_ip_address) &&
330 cur_sock_info.local_port() == local_port) {
331 // Copy SocketInfo.
332 *sock_info = cur_sock_info;
333 return true;
334 }
335 }
336
337 SLOG(Connection, 2) << __func__ << ": No matching TCP socket info.";
338 return false;
339}
340
341} // namespace shill