| // Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "shill/traffic_monitor.h" |
| |
| #include <base/bind.h> |
| #include <base/strings/stringprintf.h> |
| #include <netinet/in.h> |
| |
| #include "shill/device.h" |
| #include "shill/device_info.h" |
| #include "shill/event_dispatcher.h" |
| #include "shill/logging.h" |
| #include "shill/socket_info_reader.h" |
| |
| using base::StringPrintf; |
| using std::string; |
| using std::vector; |
| |
| namespace shill { |
| |
| // static |
| const uint16 TrafficMonitor::kDnsPort = 53; |
| const int64 TrafficMonitor::kDnsTimedOutThresholdSeconds = 15; |
| const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2; |
| const int64 TrafficMonitor::kSamplingIntervalMilliseconds = 5000; |
| |
| TrafficMonitor::TrafficMonitor(const DeviceRefPtr &device, |
| EventDispatcher *dispatcher) |
| : device_(device), |
| dispatcher_(dispatcher), |
| socket_info_reader_(new SocketInfoReader), |
| accummulated_congested_tx_queues_samples_(0), |
| connection_info_reader_(new ConnectionInfoReader), |
| accummulated_dns_failures_samples_(0) { |
| } |
| |
| TrafficMonitor::~TrafficMonitor() { |
| Stop(); |
| } |
| |
| void TrafficMonitor::Start() { |
| SLOG(Link, 2) << __func__; |
| Stop(); |
| |
| sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic, |
| base::Unretained(this))); |
| dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(), |
| kSamplingIntervalMilliseconds); |
| } |
| |
| void TrafficMonitor::Stop() { |
| SLOG(Link, 2) << __func__; |
| sample_traffic_callback_.Cancel(); |
| ResetCongestedTxQueuesStats(); |
| ResetDnsFailingStats(); |
| } |
| |
| void TrafficMonitor::ResetCongestedTxQueuesStats() { |
| accummulated_congested_tx_queues_samples_ = 0; |
| } |
| |
| void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() { |
| SLOG(Link, 2) << __func__ << ": Tx-queues decongested"; |
| ResetCongestedTxQueuesStats(); |
| } |
| |
| void TrafficMonitor::BuildIPPortToTxQueueLength( |
| const vector<SocketInfo> &socket_infos, |
| IPPortToTxQueueLengthMap *tx_queue_lengths) { |
| SLOG(Link, 3) << __func__; |
| string device_ip_address = device_->ipconfig()->properties().address; |
| for (const auto &info : socket_infos) { |
| SLOG(Link, 4) << "SocketInfo(IP=" << info.local_ip_address().ToString() |
| << ", TX=" << info.transmit_queue_value() |
| << ", State=" << info.connection_state() |
| << ", TimerState=" << info.timer_state(); |
| if (info.local_ip_address().ToString() != device_ip_address || |
| info.transmit_queue_value() == 0 || |
| info.connection_state() != SocketInfo::kConnectionStateEstablished || |
| (info.timer_state() != SocketInfo::kTimerStateRetransmitTimerPending && |
| info.timer_state() != |
| SocketInfo::kTimerStateZeroWindowProbeTimerPending)) { |
| SLOG(Link, 4) << "Connection Filtered."; |
| continue; |
| } |
| SLOG(Link, 3) << "Monitoring connection: TX=" << info.transmit_queue_value() |
| << " TimerState=" << info.timer_state(); |
| |
| string local_ip_port = |
| StringPrintf("%s:%d", |
| info.local_ip_address().ToString().c_str(), |
| info.local_port()); |
| (*tx_queue_lengths)[local_ip_port] = info.transmit_queue_value(); |
| } |
| } |
| |
| bool TrafficMonitor::IsCongestedTxQueues() { |
| SLOG(Link, 4) << __func__; |
| vector<SocketInfo> socket_infos; |
| if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) || |
| socket_infos.empty()) { |
| SLOG(Link, 3) << __func__ << ": Empty socket info"; |
| ResetCongestedTxQueuesStatsWithLogging(); |
| return false; |
| } |
| bool congested_tx_queues = true; |
| IPPortToTxQueueLengthMap curr_tx_queue_lengths; |
| BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths); |
| if (curr_tx_queue_lengths.empty()) { |
| SLOG(Link, 3) << __func__ << ": No interesting socket info"; |
| ResetCongestedTxQueuesStatsWithLogging(); |
| } else { |
| for (const auto &length_entry : old_tx_queue_lengths_) { |
| IPPortToTxQueueLengthMap::iterator curr_tx_queue_it = |
| curr_tx_queue_lengths.find(length_entry.first); |
| if (curr_tx_queue_it == curr_tx_queue_lengths.end() || |
| curr_tx_queue_it->second < length_entry.second) { |
| congested_tx_queues = false; |
| // TODO(armansito): If we had a false positive earlier, we may |
| // want to correct it here by invoking a "connection back to normal |
| // callback", so that the OutOfCredits property can be set to |
| // false. |
| break; |
| } |
| } |
| if (congested_tx_queues) { |
| ++accummulated_congested_tx_queues_samples_; |
| SLOG(Link, 2) << __func__ |
| << ": Congested tx-queues detected (" |
| << accummulated_congested_tx_queues_samples_ << ")"; |
| } |
| } |
| old_tx_queue_lengths_ = curr_tx_queue_lengths; |
| |
| return congested_tx_queues; |
| } |
| |
| void TrafficMonitor::ResetDnsFailingStats() { |
| accummulated_dns_failures_samples_ = 0; |
| } |
| |
| void TrafficMonitor::ResetDnsFailingStatsWithLogging() { |
| SLOG(Link, 2) << __func__ << ": DNS queries restored"; |
| ResetDnsFailingStats(); |
| } |
| |
| bool TrafficMonitor::IsDnsFailing() { |
| SLOG(Link, 4) << __func__; |
| vector<ConnectionInfo> connection_infos; |
| if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) || |
| connection_infos.empty()) { |
| SLOG(Link, 3) << __func__ << ": Empty connection info"; |
| } else { |
| // The time-to-expire counter is used to determine when a DNS request |
| // has timed out. This counter is the number of seconds remaining until |
| // the entry is removed from the system IP connection tracker. The |
| // default time is 30 seconds. This is too long of a wait. Instead, we |
| // want to time out at |kDnsTimedOutThresholdSeconds|. Unfortunately, |
| // we cannot simply look for entries less than |
| // |kDnsTimedOutThresholdSeconds| because we will count the entry |
| // multiple times once its time-to-expire is less than |
| // |kDnsTimedOutThresholdSeconds|. To ensure that we only count an |
| // entry once, we look for entries in this time window between |
| // |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|. |
| const int64 kDnsTimedOutLowerThresholdSeconds = |
| kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000; |
| string device_ip_address = device_->ipconfig()->properties().address; |
| for (const auto &info : connection_infos) { |
| if (info.protocol() != IPPROTO_UDP || |
| info.time_to_expire_seconds() > kDnsTimedOutThresholdSeconds || |
| info.time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds || |
| !info.is_unreplied() || |
| info.original_source_ip_address().ToString() != device_ip_address || |
| info.original_destination_port() != kDnsPort) |
| continue; |
| |
| ++accummulated_dns_failures_samples_; |
| SLOG(Link, 2) << __func__ |
| << ": DNS failures detected (" |
| << accummulated_dns_failures_samples_ << ")"; |
| return true; |
| } |
| } |
| ResetDnsFailingStatsWithLogging(); |
| return false; |
| } |
| |
| void TrafficMonitor::SampleTraffic() { |
| SLOG(Link, 3) << __func__; |
| |
| // Schedule the sample callback first, so it is possible for the network |
| // problem callback to stop the traffic monitor. |
| dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(), |
| kSamplingIntervalMilliseconds); |
| |
| if (IsCongestedTxQueues() && |
| accummulated_congested_tx_queues_samples_ == |
| kMinimumFailedSamplesToTrigger) { |
| LOG(WARNING) << "Congested tx queues detected, out-of-credits?"; |
| network_problem_detected_callback_.Run(kNetworkProblemCongestedTxQueue); |
| } else if (IsDnsFailing() && |
| accummulated_dns_failures_samples_ == |
| kMinimumFailedSamplesToTrigger) { |
| LOG(WARNING) << "DNS queries failing, out-of-credits?"; |
| network_problem_detected_callback_.Run(kNetworkProblemDNSFailure); |
| } |
| } |
| |
| } // namespace shill |