mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 1 | // Copyright (c) 2014 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "shill/mac80211_monitor.h" |
| 6 | |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 7 | #include <algorithm> |
| 8 | |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 9 | #include <base/file_util.h> |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 10 | #include <base/strings/string_number_conversions.h> |
| 11 | #include <base/strings/string_split.h> |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 12 | #include <base/strings/stringprintf.h> |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 13 | |
| 14 | #include "shill/logging.h" |
mukesh agrawal | f734873 | 2014-08-06 18:08:56 -0700 | [diff] [blame] | 15 | #include "shill/metrics.h" |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 16 | #include "shill/shill_time.h" |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 17 | |
| 18 | namespace shill { |
| 19 | |
| 20 | using std::string; |
| 21 | using std::vector; |
| 22 | |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 23 | // statics |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 24 | // At 17-25 bytes per queue, this accommodates 80 queues. |
| 25 | // ath9k has 4 queues, and WP2 has 16 queues. |
| 26 | const size_t Mac80211Monitor::kMaxQueueStateSizeBytes = 2048; |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 27 | const char Mac80211Monitor::kQueueStatusPathFormat[] = |
| 28 | "/sys/kernel/debug/ieee80211/%s/queues"; |
| 29 | const char Mac80211Monitor::kWakeQueuesPathFormat[] = |
| 30 | "/sys/kernel/debug/ieee80211/%s/wake_queues"; |
| 31 | const time_t Mac80211Monitor::kQueueStatePollIntervalSeconds = 30; |
| 32 | const time_t Mac80211Monitor::kMinimumTimeBetweenWakesSeconds = 60; |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 33 | |
| 34 | Mac80211Monitor::Mac80211Monitor( |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 35 | EventDispatcher *dispatcher, |
| 36 | const string &link_name, |
| 37 | size_t queue_length_limit, |
| 38 | const base::Closure &on_repair_callback, |
| 39 | Metrics *metrics) |
| 40 | : time_(Time::GetInstance()), |
| 41 | dispatcher_(dispatcher), |
| 42 | link_name_(link_name), |
mukesh agrawal | f734873 | 2014-08-06 18:08:56 -0700 | [diff] [blame] | 43 | queue_length_limit_(queue_length_limit), |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 44 | on_repair_callback_(on_repair_callback), |
| 45 | metrics_(metrics), |
| 46 | phy_name_("phy-unknown"), |
| 47 | last_woke_queues_monotonic_seconds_(0), |
| 48 | is_running_(false), |
| 49 | is_device_connected_(false), |
| 50 | weak_ptr_factory_(this) { |
| 51 | CHECK(time_); |
| 52 | CHECK(dispatcher_); |
mukesh agrawal | f734873 | 2014-08-06 18:08:56 -0700 | [diff] [blame] | 53 | CHECK(metrics_); |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 54 | } |
| 55 | |
| 56 | Mac80211Monitor::~Mac80211Monitor() { |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 57 | Stop(); |
| 58 | } |
| 59 | |
| 60 | void Mac80211Monitor::Start(const string &phy_name) { |
| 61 | SLOG(WiFi, 2) << __func__ << " on " << link_name_ << " (" << phy_name << ")"; |
| 62 | CHECK(!is_running_); |
| 63 | phy_name_ = phy_name; |
| 64 | queue_state_file_path_ = base::FilePath( |
| 65 | base::StringPrintf(kQueueStatusPathFormat, phy_name.c_str())); |
| 66 | wake_queues_file_path_ = base::FilePath( |
| 67 | base::StringPrintf(kWakeQueuesPathFormat, phy_name.c_str())); |
| 68 | last_woke_queues_monotonic_seconds_ = 0; |
| 69 | StartTimer(); |
| 70 | is_running_ = true; |
| 71 | } |
| 72 | |
| 73 | void Mac80211Monitor::Stop() { |
| 74 | SLOG(WiFi, 2) << __func__ << " on " << link_name_ << " (" << phy_name_ << ")"; |
| 75 | StopTimer(); |
| 76 | is_running_ = false; |
| 77 | } |
| 78 | |
| 79 | void Mac80211Monitor::UpdateConnectedState(bool new_state) { |
| 80 | SLOG(WiFi, 2) << __func__ << " (new_state=" << new_state << ")"; |
| 81 | is_device_connected_ = new_state; |
| 82 | } |
| 83 | |
| 84 | void Mac80211Monitor::StartTimer() { |
| 85 | SLOG(WiFi, 2) << __func__; |
| 86 | if (check_queues_callback_.IsCancelled()) { |
| 87 | check_queues_callback_.Reset( |
| 88 | Bind(&Mac80211Monitor::WakeQueuesIfNeeded, |
| 89 | weak_ptr_factory_.GetWeakPtr())); |
| 90 | } |
| 91 | dispatcher_->PostDelayedTask(check_queues_callback_.callback(), |
| 92 | kQueueStatePollIntervalSeconds * 1000); |
| 93 | } |
| 94 | |
| 95 | void Mac80211Monitor::StopTimer() { |
| 96 | SLOG(WiFi, 2) << __func__; |
| 97 | check_queues_callback_.Cancel(); |
| 98 | } |
| 99 | |
| 100 | void Mac80211Monitor::WakeQueuesIfNeeded() { |
| 101 | SLOG(WiFi, 2) << __func__ << " on " << link_name_ << " (" << phy_name_ << ")"; |
| 102 | CHECK(is_running_); |
| 103 | StartTimer(); // Always re-arm timer. |
| 104 | |
| 105 | if (is_device_connected_) { |
| 106 | SLOG(WiFi, 5) << "Skipping queue check: device is connected."; |
| 107 | return; |
| 108 | } |
| 109 | |
| 110 | string queue_state_string; |
| 111 | if (!base::ReadFileToString(queue_state_file_path_, &queue_state_string, |
| 112 | kMaxQueueStateSizeBytes)) { |
| 113 | LOG(WARNING) << __func__ << ": incomplete read on " |
| 114 | << queue_state_file_path_.value(); |
| 115 | } |
| 116 | |
| 117 | uint32_t stuck_flags = |
| 118 | CheckAreQueuesStuck(ParseQueueState(queue_state_string)); |
| 119 | SLOG(WiFi, 2) << __func__ << " stuck_flags=" << stuck_flags; |
| 120 | if (!(stuck_flags & kStopFlagPowerSave)) { |
| 121 | if (stuck_flags) { |
| 122 | LOG(INFO) << "Skipping wake: stuck_flags is " |
| 123 | << std::showbase << std::hex << stuck_flags |
| 124 | << " (require " << kStopFlagPowerSave << " to wake)." |
| 125 | << std::dec << std::noshowbase; |
| 126 | } |
| 127 | return; |
| 128 | } |
| 129 | |
| 130 | time_t now_monotonic_seconds = 0; |
| 131 | if (!time_->GetSecondsMonotonic(&now_monotonic_seconds)) { |
| 132 | LOG(WARNING) << "Skipping reset: failed to get monotonic time"; |
| 133 | return; |
| 134 | } |
| 135 | |
| 136 | time_t elapsed = now_monotonic_seconds - last_woke_queues_monotonic_seconds_; |
| 137 | if (elapsed < kMinimumTimeBetweenWakesSeconds) { |
| 138 | LOG(WARNING) << "Skipping reset " |
| 139 | << "(min interval=" << kMinimumTimeBetweenWakesSeconds |
| 140 | << ", elapsed=" << elapsed << ")"; |
| 141 | return; |
| 142 | } |
| 143 | |
| 144 | LOG(WARNING) << "Queues appear stuck; waking."; |
| 145 | if (base::WriteFile(wake_queues_file_path_, "", sizeof("")) < 0) { |
| 146 | LOG(ERROR) << "Failed to write to " << wake_queues_file_path_.value() |
| 147 | << ": " << strerror(errno); |
| 148 | return; |
| 149 | } |
| 150 | |
| 151 | if (!on_repair_callback_.is_null()) { |
| 152 | on_repair_callback_.Run(); |
| 153 | } |
| 154 | |
| 155 | last_woke_queues_monotonic_seconds_ = now_monotonic_seconds; |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 156 | } |
| 157 | |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 158 | uint32_t Mac80211Monitor::CheckAreQueuesStuck( |
| 159 | const vector<QueueState> &queue_states) { |
| 160 | size_t max_stuck_queue_len = 0; |
| 161 | uint32_t stuck_flags = 0; |
| 162 | for (const auto &queue_state : queue_states) { |
| 163 | if (queue_state.queue_length < queue_length_limit_) { |
| 164 | SLOG(WiFi, 5) << __func__ |
| 165 | << " skipping queue of length " << queue_state.queue_length |
| 166 | << " (threshold is " << queue_length_limit_ << ")"; |
| 167 | continue; |
| 168 | } |
| 169 | if (!queue_state.stop_flags) { |
| 170 | SLOG(WiFi, 5) << __func__ |
| 171 | << " skipping queue of length " << queue_state.queue_length |
| 172 | << " (not stopped)"; |
| 173 | continue; |
| 174 | } |
| 175 | stuck_flags = stuck_flags | queue_state.stop_flags; |
| 176 | max_stuck_queue_len = |
| 177 | std::max(max_stuck_queue_len, queue_state.queue_length); |
| 178 | } |
| 179 | |
| 180 | if (max_stuck_queue_len >= queue_length_limit_) { |
| 181 | LOG(WARNING) << "max queue length is " << max_stuck_queue_len; |
| 182 | } |
| 183 | |
| 184 | if (stuck_flags) { |
mukesh agrawal | 241727d | 2014-08-08 13:55:11 -0700 | [diff] [blame] | 185 | for (unsigned int i = 0; i < kStopReasonMax; ++i) { |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 186 | auto stop_reason = static_cast<QueueStopReason>(i); |
| 187 | if (stuck_flags & GetFlagForReason(stop_reason)) { |
mukesh agrawal | f734873 | 2014-08-06 18:08:56 -0700 | [diff] [blame] | 188 | metrics_->SendEnumToUMA(Metrics::kMetricWifiStoppedTxQueueReason, |
| 189 | stop_reason, |
| 190 | kStopReasonMax); |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 191 | } |
| 192 | } |
mukesh agrawal | f734873 | 2014-08-06 18:08:56 -0700 | [diff] [blame] | 193 | |
| 194 | metrics_->SendToUMA(Metrics::kMetricWifiStoppedTxQueueLength, |
| 195 | max_stuck_queue_len, |
| 196 | Metrics::kMetricWifiStoppedTxQueueLengthMin, |
| 197 | Metrics::kMetricWifiStoppedTxQueueLengthMax, |
| 198 | Metrics::kMetricWifiStoppedTxQueueLengthNumBuckets); |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 199 | } |
| 200 | |
| 201 | return stuck_flags; |
| 202 | } |
| 203 | |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 204 | // example input: |
| 205 | // 01: 0x00000000/0 |
| 206 | // ... |
| 207 | // 04: 0x00000000/0 |
| 208 | // |
| 209 | // static |
| 210 | vector<Mac80211Monitor::QueueState> |
| 211 | Mac80211Monitor::ParseQueueState(const string &state_string) { |
| 212 | vector<string> queue_state_strings; |
| 213 | vector <QueueState> queue_states; |
| 214 | base::SplitString(state_string, '\n', &queue_state_strings); |
| 215 | |
| 216 | if (queue_state_strings.empty()) { |
| 217 | return queue_states; |
| 218 | } |
| 219 | |
| 220 | // Trailing newline generates empty string as last element. |
| 221 | // Discard that empty string if present. |
| 222 | if (queue_state_strings.back().empty()) { |
| 223 | queue_state_strings.pop_back(); |
| 224 | } |
| 225 | |
| 226 | for (const auto &queue_state : queue_state_strings) { |
| 227 | // Example |queue_state|: "00: 0x00000000/10". |
| 228 | vector<string> queuenum_and_state; |
| 229 | base::SplitString(queue_state, ':', &queuenum_and_state); |
| 230 | if (queuenum_and_state.size() != 2) { |
| 231 | LOG(WARNING) << __func__ << ": parse error on " << queue_state; |
| 232 | continue; |
| 233 | } |
| 234 | |
| 235 | // Example |queuenum_and_state|: {"00", "0x00000000/10"}. |
| 236 | vector<string> stopflags_and_length; |
| 237 | base::SplitString(queuenum_and_state[1], '/', &stopflags_and_length); |
| 238 | if (stopflags_and_length.size() != 2) { |
| 239 | LOG(WARNING) << __func__ << ": parse error on " << queue_state; |
| 240 | continue; |
| 241 | } |
| 242 | |
| 243 | // Example |stopflags_and_length|: {"0x00000000", "10"}. |
| 244 | size_t queue_number; |
| 245 | uint32_t stop_flags; |
| 246 | size_t queue_length; |
| 247 | if (!base::StringToSizeT(queuenum_and_state[0], &queue_number)) { |
| 248 | LOG(WARNING) << __func__ << ": parse error on " << queue_state; |
| 249 | continue; |
| 250 | } |
| 251 | if (!base::HexStringToUInt(stopflags_and_length[0], &stop_flags)) { |
| 252 | LOG(WARNING) << __func__ << ": parse error on " << queue_state; |
| 253 | continue; |
| 254 | } |
| 255 | if (!base::StringToSizeT(stopflags_and_length[1], &queue_length)) { |
| 256 | LOG(WARNING) << __func__ << ": parse error on " << queue_state; |
| 257 | continue; |
| 258 | } |
| 259 | queue_states.emplace_back(queue_number, stop_flags, queue_length); |
| 260 | } |
| 261 | |
| 262 | return queue_states; |
| 263 | } |
| 264 | |
mukesh agrawal | 3f5cd3f | 2014-08-08 13:54:45 -0700 | [diff] [blame] | 265 | // static |
| 266 | Mac80211Monitor::QueueStopFlag Mac80211Monitor::GetFlagForReason( |
| 267 | QueueStopReason reason) { |
| 268 | switch (reason) { |
| 269 | case kStopReasonDriver: |
| 270 | return kStopFlagDriver; |
| 271 | case kStopReasonPowerSave: |
| 272 | return kStopFlagPowerSave; |
| 273 | case kStopReasonChannelSwitch: |
| 274 | return kStopFlagChannelSwitch; |
| 275 | case kStopReasonAggregation: |
| 276 | return kStopFlagAggregation; |
| 277 | case kStopReasonSuspend: |
| 278 | return kStopFlagSuspend; |
| 279 | case kStopReasonBufferAdd: |
| 280 | return kStopFlagBufferAdd; |
| 281 | case kStopReasonChannelTypeChange: |
| 282 | return kStopFlagChannelTypeChange; |
| 283 | } |
| 284 | |
| 285 | // The switch statement above is exhaustive (-Wswitch will complain |
| 286 | // if it is not). But |reason| might be outside the defined range for |
| 287 | // the enum, so we need this to keep the compiler happy. |
| 288 | return kStopFlagInvalid; |
| 289 | } |
| 290 | |
mukesh agrawal | 3d487e6 | 2014-08-08 11:34:56 -0700 | [diff] [blame] | 291 | } // namespace shill |