blob: 58737534ad1ec9f345a6a178c47c11dff3f43b2d [file] [log] [blame]
Ben Chanb061f892013-02-27 17:46:55 -08001// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "shill/traffic_monitor.h"
6
7#include <base/bind.h>
Thieu Le03026662013-04-04 10:45:11 -07008#include <base/stringprintf.h>
Thieu Lefa7960e2013-04-15 13:14:55 -07009#include <netinet/in.h>
Ben Chanb061f892013-02-27 17:46:55 -080010
11#include "shill/device.h"
12#include "shill/device_info.h"
13#include "shill/event_dispatcher.h"
Thieu Le03026662013-04-04 10:45:11 -070014#include "shill/logging.h"
15#include "shill/socket_info_reader.h"
16
17using base::StringPrintf;
18using std::string;
19using std::vector;
Ben Chanb061f892013-02-27 17:46:55 -080020
21namespace shill {
22
Thieu Le03026662013-04-04 10:45:11 -070023// static
Thieu Lefa7960e2013-04-15 13:14:55 -070024const uint16 TrafficMonitor::kDnsPort = 53;
25const int64 TrafficMonitor::kDnsTimedOutThresholdSeconds = 15;
Thieu Le03026662013-04-04 10:45:11 -070026const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2;
27const int64 TrafficMonitor::kSamplingIntervalMilliseconds = 5000;
Ben Chanb061f892013-02-27 17:46:55 -080028
29TrafficMonitor::TrafficMonitor(const DeviceRefPtr &device,
30 EventDispatcher *dispatcher)
31 : device_(device),
32 dispatcher_(dispatcher),
Thieu Le03026662013-04-04 10:45:11 -070033 socket_info_reader_(new SocketInfoReader),
Thieu Lefa7960e2013-04-15 13:14:55 -070034 accummulated_congested_tx_queues_samples_(0),
35 connection_info_reader_(new ConnectionInfoReader),
36 accummulated_dns_failures_samples_(0) {
Ben Chanb061f892013-02-27 17:46:55 -080037}
38
39TrafficMonitor::~TrafficMonitor() {
40 Stop();
41}
42
43void TrafficMonitor::Start() {
Thieu Le03026662013-04-04 10:45:11 -070044 SLOG(Link, 2) << __func__;
Ben Chanb061f892013-02-27 17:46:55 -080045 Stop();
46
Ben Chanb061f892013-02-27 17:46:55 -080047 sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic,
48 base::Unretained(this)));
49 dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(),
50 kSamplingIntervalMilliseconds);
51}
52
53void TrafficMonitor::Stop() {
Thieu Le03026662013-04-04 10:45:11 -070054 SLOG(Link, 2) << __func__;
Ben Chanb061f892013-02-27 17:46:55 -080055 sample_traffic_callback_.Cancel();
Thieu Lefa7960e2013-04-15 13:14:55 -070056 ResetCongestedTxQueuesStats();
57 ResetDnsFailingStats();
58}
59
60void TrafficMonitor::ResetCongestedTxQueuesStats() {
61 accummulated_congested_tx_queues_samples_ = 0;
62}
63
64void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() {
65 SLOG(Link, 2) << __func__ << ": Tx-queues decongested";
66 ResetCongestedTxQueuesStats();
Thieu Le03026662013-04-04 10:45:11 -070067}
68
69void TrafficMonitor::BuildIPPortToTxQueueLength(
70 const vector<SocketInfo> &socket_infos,
71 IPPortToTxQueueLengthMap *tx_queue_lengths) {
Arman Ugurayd42d8ec2013-04-08 19:28:21 -070072 SLOG(Link, 3) << __func__;
Thieu Le03026662013-04-04 10:45:11 -070073 string device_ip_address = device_->ipconfig()->properties().address;
74 vector<SocketInfo>::const_iterator it;
75 for (it = socket_infos.begin(); it != socket_infos.end(); ++it) {
Arman Ugurayd42d8ec2013-04-08 19:28:21 -070076 SLOG(Link, 4) << "SocketInfo(IP=" << it->local_ip_address().ToString()
77 << ", TX=" << it->transmit_queue_value()
78 << ", State=" << it->connection_state()
79 << ", TimerState=" << it->timer_state();
Thieu Le03026662013-04-04 10:45:11 -070080 if (it->local_ip_address().ToString() != device_ip_address ||
81 it->transmit_queue_value() == 0 ||
82 it->connection_state() != SocketInfo::kConnectionStateEstablished ||
Arman Ugurayd42d8ec2013-04-08 19:28:21 -070083 (it->timer_state() != SocketInfo::kTimerStateRetransmitTimerPending &&
84 it->timer_state() !=
85 SocketInfo::kTimerStateZeroWindowProbeTimerPending)) {
86 SLOG(Link, 4) << "Connection Filtered.";
Thieu Le03026662013-04-04 10:45:11 -070087 continue;
Arman Ugurayd42d8ec2013-04-08 19:28:21 -070088 }
89 SLOG(Link, 3) << "Monitoring connection: TX=" << it->transmit_queue_value()
90 << " TimerState=" << it->timer_state();
Thieu Le03026662013-04-04 10:45:11 -070091
92 string local_ip_port =
93 StringPrintf("%s:%d",
94 it->local_ip_address().ToString().c_str(),
95 it->local_port());
96 (*tx_queue_lengths)[local_ip_port] = it->transmit_queue_value();
97 }
Ben Chanb061f892013-02-27 17:46:55 -080098}
99
Thieu Lefa7960e2013-04-15 13:14:55 -0700100bool TrafficMonitor::IsCongestedTxQueues() {
101 SLOG(Link, 4) << __func__;
Thieu Le03026662013-04-04 10:45:11 -0700102 vector<SocketInfo> socket_infos;
103 if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) ||
104 socket_infos.empty()) {
Thieu Lefa7960e2013-04-15 13:14:55 -0700105 SLOG(Link, 3) << __func__ << ": Empty socket info";
106 ResetCongestedTxQueuesStatsWithLogging();
107 return false;
Ben Chanb061f892013-02-27 17:46:55 -0800108 }
Thieu Lefa7960e2013-04-15 13:14:55 -0700109 bool congested_tx_queues = true;
Thieu Le03026662013-04-04 10:45:11 -0700110 IPPortToTxQueueLengthMap curr_tx_queue_lengths;
111 BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths);
112 if (curr_tx_queue_lengths.empty()) {
Thieu Lefa7960e2013-04-15 13:14:55 -0700113 SLOG(Link, 3) << __func__ << ": No interesting socket info";
114 ResetCongestedTxQueuesStatsWithLogging();
Thieu Le03026662013-04-04 10:45:11 -0700115 } else {
Thieu Le03026662013-04-04 10:45:11 -0700116 IPPortToTxQueueLengthMap::iterator old_tx_queue_it;
117 for (old_tx_queue_it = old_tx_queue_lengths_.begin();
118 old_tx_queue_it != old_tx_queue_lengths_.end();
119 ++old_tx_queue_it) {
120 IPPortToTxQueueLengthMap::iterator curr_tx_queue_it =
121 curr_tx_queue_lengths.find(old_tx_queue_it->first);
122 if (curr_tx_queue_it == curr_tx_queue_lengths.end() ||
123 curr_tx_queue_it->second < old_tx_queue_it->second) {
124 congested_tx_queues = false;
Arman Ugurayf84a4242013-04-09 20:01:07 -0700125 // TODO(armansito): If we had a false positive earlier, we may
126 // want to correct it here by invoking a "connection back to normal
127 // callback", so that the OutOfCredits property can be set to
128 // false.
Thieu Le03026662013-04-04 10:45:11 -0700129 break;
130 }
131 }
Thieu Lefa7960e2013-04-15 13:14:55 -0700132 if (congested_tx_queues) {
133 ++accummulated_congested_tx_queues_samples_;
134 SLOG(Link, 2) << __func__
135 << ": Congested tx-queues detected ("
136 << accummulated_congested_tx_queues_samples_ << ")";
Ben Chanb061f892013-02-27 17:46:55 -0800137 }
138 }
Thieu Le03026662013-04-04 10:45:11 -0700139 old_tx_queue_lengths_ = curr_tx_queue_lengths;
Ben Chanb061f892013-02-27 17:46:55 -0800140
Thieu Lefa7960e2013-04-15 13:14:55 -0700141 return congested_tx_queues;
142}
143
144void TrafficMonitor::ResetDnsFailingStats() {
145 accummulated_dns_failures_samples_ = 0;
146}
147
148void TrafficMonitor::ResetDnsFailingStatsWithLogging() {
149 SLOG(Link, 2) << __func__ << ": DNS queries restored";
150 ResetDnsFailingStats();
151}
152
153bool TrafficMonitor::IsDnsFailing() {
154 SLOG(Link, 4) << __func__;
155 vector<ConnectionInfo> connection_infos;
156 if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) ||
157 connection_infos.empty()) {
158 SLOG(Link, 3) << __func__ << ": Empty connection info";
159 } else {
160 // The time-to-expire counter is used to determine when a DNS request
161 // has timed out. This counter is the number of seconds remaining until
162 // the entry is removed from the system IP connection tracker. The
163 // default time is 30 seconds. This is too long of a wait. Instead, we
164 // want to time out at |kDnsTimedOutThresholdSeconds|. Unfortunately,
165 // we cannot simply look for entries less than
166 // |kDnsTimedOutThresholdSeconds| because we will count the entry
167 // multiple times once its time-to-expire is less than
168 // |kDnsTimedOutThresholdSeconds|. To ensure that we only count an
169 // entry once, we look for entries in this time window between
170 // |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|.
171 const int64 kDnsTimedOutLowerThresholdSeconds =
172 kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000;
173 string device_ip_address = device_->ipconfig()->properties().address;
174 vector<ConnectionInfo>::const_iterator it;
175 for (it = connection_infos.begin(); it != connection_infos.end(); ++it) {
176 if (it->protocol() != IPPROTO_UDP ||
177 it->time_to_expire_seconds() > kDnsTimedOutThresholdSeconds ||
178 it->time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds ||
179 !it->is_unreplied() ||
180 it->original_source_ip_address().ToString() != device_ip_address ||
181 it->original_destination_port() != kDnsPort)
182 continue;
183
184 ++accummulated_dns_failures_samples_;
185 SLOG(Link, 2) << __func__
186 << ": DNS failures detected ("
187 << accummulated_dns_failures_samples_ << ")";
188 return true;
189 }
190 }
191 ResetDnsFailingStatsWithLogging();
192 return false;
193}
194
195void TrafficMonitor::SampleTraffic() {
196 SLOG(Link, 3) << __func__;
197
198 if (IsCongestedTxQueues() &&
199 accummulated_congested_tx_queues_samples_ ==
200 kMinimumFailedSamplesToTrigger) {
201 LOG(WARNING) << "Congested tx queues detected, out-of-credits?";
202 outgoing_tcp_packets_not_routed_callback_.Run();
203 } else if (IsDnsFailing() &&
204 accummulated_dns_failures_samples_ ==
205 kMinimumFailedSamplesToTrigger) {
206 LOG(WARNING) << "DNS queries failing, out-of-credits?";
207 outgoing_tcp_packets_not_routed_callback_.Run();
208 }
209
Ben Chanb061f892013-02-27 17:46:55 -0800210 dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(),
211 kSamplingIntervalMilliseconds);
212}
213
214} // namespace shill