Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 1 | /** |
| 2 | * Copyright (c) 2020, The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #define LOG_TAG "carwatchdogd" |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 18 | |
| 19 | #include "IoPerfCollection.h" |
| 20 | |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 21 | #include <WatchdogProperties.sysprop.h> |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 22 | #include <android-base/file.h> |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 23 | #include <android-base/parseint.h> |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 24 | #include <android-base/stringprintf.h> |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 25 | #include <android-base/strings.h> |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 26 | #include <binder/IServiceManager.h> |
| 27 | #include <cutils/android_filesystem_config.h> |
| 28 | #include <inttypes.h> |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 29 | #include <log/log.h> |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 30 | #include <processgroup/sched_policy.h> |
Lakshman Annadorai | 30b87e5 | 2020-04-14 15:34:52 -0700 | [diff] [blame] | 31 | #include <pthread.h> |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 32 | #include <pwd.h> |
| 33 | |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 34 | #include <algorithm> |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 35 | #include <iomanip> |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 36 | #include <iterator> |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 37 | #include <limits> |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 38 | #include <string> |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 39 | #include <thread> |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 40 | #include <unordered_map> |
| 41 | #include <unordered_set> |
| 42 | #include <vector> |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 43 | |
| 44 | namespace android { |
| 45 | namespace automotive { |
| 46 | namespace watchdog { |
| 47 | |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 48 | using android::defaultServiceManager; |
| 49 | using android::IBinder; |
| 50 | using android::IServiceManager; |
| 51 | using android::sp; |
| 52 | using android::String16; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 53 | using android::base::Error; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 54 | using android::base::ParseUint; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 55 | using android::base::Result; |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 56 | using android::base::Split; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 57 | using android::base::StringAppendF; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 58 | using android::base::WriteStringToFd; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 59 | using android::content::pm::IPackageManagerNative; |
| 60 | |
| 61 | namespace { |
| 62 | |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 63 | const int32_t kDefaultTopNStatsPerCategory = 5; |
Lakshman Annadorai | efc2339 | 2020-04-02 14:18:39 -0700 | [diff] [blame] | 64 | const int32_t kDefaultTopNStatsPerSubcategory = 3; |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 65 | const std::chrono::seconds kDefaultBoottimeCollectionInterval = 1s; |
| 66 | const std::chrono::seconds kDefaultPeriodicCollectionInterval = 10s; |
| 67 | // Number of periodic collection perf data snapshots to cache in memory. |
| 68 | const int32_t kDefaultPeriodicCollectionBufferSize = 180; |
| 69 | |
| 70 | // Minimum collection interval between subsequent collections. |
| 71 | const std::chrono::nanoseconds kMinCollectionInterval = 1s; |
| 72 | |
| 73 | // Default values for the custom collection interval and max_duration. |
| 74 | const std::chrono::nanoseconds kCustomCollectionInterval = 10s; |
| 75 | const std::chrono::nanoseconds kCustomCollectionDuration = 30min; |
| 76 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 77 | const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n"; |
| 78 | |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 79 | double percentage(uint64_t numer, uint64_t denom) { |
| 80 | return denom == 0 ? 0.0 : (static_cast<double>(numer) / static_cast<double>(denom)) * 100.0; |
| 81 | } |
| 82 | |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 83 | struct UidProcessStats { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 84 | struct ProcessInfo { |
| 85 | std::string comm = ""; |
| 86 | uint64_t count = 0; |
| 87 | }; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 88 | uint64_t uid = 0; |
| 89 | uint32_t ioBlockedTasksCnt = 0; |
| 90 | uint32_t totalTasksCnt = 0; |
| 91 | uint64_t majorFaults = 0; |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 92 | std::vector<ProcessInfo> topNIoBlockedProcesses = {}; |
| 93 | std::vector<ProcessInfo> topNMajorFaultProcesses = {}; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 94 | }; |
| 95 | |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 96 | std::unique_ptr<std::unordered_map<uint32_t, UidProcessStats>> getUidProcessStats( |
| 97 | const std::vector<ProcessStats>& processStats, int topNStatsPerSubCategory) { |
| 98 | std::unique_ptr<std::unordered_map<uint32_t, UidProcessStats>> uidProcessStats( |
| 99 | new std::unordered_map<uint32_t, UidProcessStats>()); |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 100 | for (const auto& stats : processStats) { |
| 101 | if (stats.uid < 0) { |
| 102 | continue; |
| 103 | } |
| 104 | uint32_t uid = static_cast<uint32_t>(stats.uid); |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 105 | if (uidProcessStats->find(uid) == uidProcessStats->end()) { |
| 106 | (*uidProcessStats)[uid] = UidProcessStats{ |
| 107 | .uid = uid, |
| 108 | .topNIoBlockedProcesses = std::vector< |
| 109 | UidProcessStats::ProcessInfo>(topNStatsPerSubCategory, |
| 110 | UidProcessStats::ProcessInfo{}), |
| 111 | .topNMajorFaultProcesses = std::vector< |
| 112 | UidProcessStats::ProcessInfo>(topNStatsPerSubCategory, |
| 113 | UidProcessStats::ProcessInfo{}), |
| 114 | }; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 115 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 116 | auto& curUidProcessStats = (*uidProcessStats)[uid]; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 117 | // Top-level process stats has the aggregated major page faults count and this should be |
| 118 | // persistent across thread creation/termination. Thus use the value from this field. |
| 119 | curUidProcessStats.majorFaults += stats.process.majorFaults; |
| 120 | curUidProcessStats.totalTasksCnt += stats.threads.size(); |
| 121 | // The process state is the same as the main thread state. Thus to avoid double counting |
| 122 | // ignore the process state. |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 123 | uint32_t ioBlockedTasksCnt = 0; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 124 | for (const auto& threadStat : stats.threads) { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 125 | ioBlockedTasksCnt += threadStat.second.state == "D" ? 1 : 0; |
| 126 | } |
| 127 | curUidProcessStats.ioBlockedTasksCnt += ioBlockedTasksCnt; |
| 128 | for (auto it = curUidProcessStats.topNIoBlockedProcesses.begin(); |
| 129 | it != curUidProcessStats.topNIoBlockedProcesses.end(); ++it) { |
| 130 | if (it->count < ioBlockedTasksCnt) { |
| 131 | curUidProcessStats.topNIoBlockedProcesses |
| 132 | .emplace(it, |
| 133 | UidProcessStats::ProcessInfo{ |
| 134 | .comm = stats.process.comm, |
| 135 | .count = ioBlockedTasksCnt, |
| 136 | }); |
| 137 | curUidProcessStats.topNIoBlockedProcesses.pop_back(); |
| 138 | break; |
| 139 | } |
| 140 | } |
| 141 | for (auto it = curUidProcessStats.topNMajorFaultProcesses.begin(); |
| 142 | it != curUidProcessStats.topNMajorFaultProcesses.end(); ++it) { |
| 143 | if (it->count < stats.process.majorFaults) { |
| 144 | curUidProcessStats.topNMajorFaultProcesses |
| 145 | .emplace(it, |
| 146 | UidProcessStats::ProcessInfo{ |
| 147 | .comm = stats.process.comm, |
| 148 | .count = stats.process.majorFaults, |
| 149 | }); |
| 150 | curUidProcessStats.topNMajorFaultProcesses.pop_back(); |
| 151 | break; |
| 152 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 153 | } |
| 154 | } |
| 155 | return uidProcessStats; |
| 156 | } |
| 157 | |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 158 | Result<std::chrono::seconds> parseSecondsFlag(Vector<String16> args, size_t pos) { |
| 159 | if (args.size() < pos) { |
| 160 | return Error() << "Value not provided"; |
| 161 | } |
| 162 | |
| 163 | uint64_t value; |
| 164 | std::string strValue = std::string(String8(args[pos]).string()); |
| 165 | if (!ParseUint(strValue, &value)) { |
| 166 | return Error() << "Invalid value " << args[pos].string() << ", must be an integer"; |
| 167 | } |
| 168 | return std::chrono::seconds(value); |
| 169 | } |
| 170 | |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 171 | } // namespace |
| 172 | |
| 173 | std::string toString(const UidIoPerfData& data) { |
| 174 | std::string buffer; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 175 | if (data.topNReads.size() > 0) { |
| 176 | StringAppendF(&buffer, "\nTop N Reads:\n%s\n", std::string(12, '-').c_str()); |
| 177 | StringAppendF(&buffer, |
| 178 | "Android User ID, Package Name, Foreground Bytes, Foreground Bytes %%, " |
| 179 | "Foreground Fsync, Foreground Fsync %%, Background Bytes, " |
| 180 | "Background Bytes %%, Background Fsync, Background Fsync %%\n"); |
| 181 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 182 | for (const auto& stat : data.topNReads) { |
| 183 | StringAppendF(&buffer, "%" PRIu32 ", %s", stat.userId, stat.packageName.c_str()); |
| 184 | for (int i = 0; i < UID_STATES; ++i) { |
| 185 | StringAppendF(&buffer, ", %" PRIu64 ", %.2f%%, %" PRIu64 ", %.2f%%", stat.bytes[i], |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 186 | percentage(stat.bytes[i], data.total[READ_BYTES][i]), stat.fsync[i], |
| 187 | percentage(stat.fsync[i], data.total[FSYNC_COUNT][i])); |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 188 | } |
| 189 | StringAppendF(&buffer, "\n"); |
| 190 | } |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 191 | if (data.topNWrites.size() > 0) { |
| 192 | StringAppendF(&buffer, "\nTop N Writes:\n%s\n", std::string(13, '-').c_str()); |
| 193 | StringAppendF(&buffer, |
| 194 | "Android User ID, Package Name, Foreground Bytes, Foreground Bytes %%, " |
| 195 | "Foreground Fsync, Foreground Fsync %%, Background Bytes, " |
| 196 | "Background Bytes %%, Background Fsync, Background Fsync %%\n"); |
| 197 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 198 | for (const auto& stat : data.topNWrites) { |
| 199 | StringAppendF(&buffer, "%" PRIu32 ", %s", stat.userId, stat.packageName.c_str()); |
| 200 | for (int i = 0; i < UID_STATES; ++i) { |
| 201 | StringAppendF(&buffer, ", %" PRIu64 ", %.2f%%, %" PRIu64 ", %.2f%%", stat.bytes[i], |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 202 | percentage(stat.bytes[i], data.total[WRITE_BYTES][i]), stat.fsync[i], |
| 203 | percentage(stat.fsync[i], data.total[FSYNC_COUNT][i])); |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 204 | } |
| 205 | StringAppendF(&buffer, "\n"); |
| 206 | } |
| 207 | return buffer; |
| 208 | } |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 209 | |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 210 | std::string toString(const SystemIoPerfData& data) { |
| 211 | std::string buffer; |
| 212 | StringAppendF(&buffer, "CPU I/O wait time/percent: %" PRIu64 " / %.2f%%\n", data.cpuIoWaitTime, |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 213 | percentage(data.cpuIoWaitTime, data.totalCpuTime)); |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 214 | StringAppendF(&buffer, "Number of I/O blocked processes/percent: %" PRIu32 " / %.2f%%\n", |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 215 | data.ioBlockedProcessesCnt, |
| 216 | percentage(data.ioBlockedProcessesCnt, data.totalProcessesCnt)); |
| 217 | return buffer; |
| 218 | } |
| 219 | |
| 220 | std::string toString(const ProcessIoPerfData& data) { |
| 221 | std::string buffer; |
| 222 | StringAppendF(&buffer, "Number of major page faults since last collection: %" PRIu64 "\n", |
| 223 | data.totalMajorFaults); |
| 224 | StringAppendF(&buffer, |
| 225 | "Percentage of change in major page faults since last collection: %.2f%%\n", |
| 226 | data.majorFaultsPercentChange); |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 227 | if (data.topNMajorFaultUids.size() > 0) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 228 | StringAppendF(&buffer, "\nTop N major page faults:\n%s\n", std::string(24, '-').c_str()); |
| 229 | StringAppendF(&buffer, |
| 230 | "Android User ID, Package Name, Number of major page faults, " |
| 231 | "Percentage of total major page faults\n"); |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 232 | StringAppendF(&buffer, |
| 233 | "\tCommand, Number of major page faults, Percentage of UID's major page " |
| 234 | "faults\n"); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 235 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 236 | for (const auto& uidStats : data.topNMajorFaultUids) { |
| 237 | StringAppendF(&buffer, "%" PRIu32 ", %s, %" PRIu64 ", %.2f%%\n", uidStats.userId, |
| 238 | uidStats.packageName.c_str(), uidStats.count, |
| 239 | percentage(uidStats.count, data.totalMajorFaults)); |
| 240 | for (const auto& procStats : uidStats.topNProcesses) { |
| 241 | StringAppendF(&buffer, "\t%s, %" PRIu64 ", %.2f%%\n", procStats.comm.c_str(), |
| 242 | procStats.count, percentage(procStats.count, uidStats.count)); |
| 243 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 244 | } |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 245 | if (data.topNIoBlockedUids.size() > 0) { |
| 246 | StringAppendF(&buffer, "\nTop N I/O waiting UIDs:\n%s\n", std::string(23, '-').c_str()); |
| 247 | StringAppendF(&buffer, |
| 248 | "Android User ID, Package Name, Number of owned tasks waiting for I/O, " |
| 249 | "Percentage of owned tasks waiting for I/O\n"); |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 250 | StringAppendF(&buffer, |
| 251 | "\tCommand, Number of I/O waiting tasks, Percentage of UID's tasks waiting " |
| 252 | "for I/O\n"); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 253 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 254 | for (size_t i = 0; i < data.topNIoBlockedUids.size(); ++i) { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 255 | const auto& uidStats = data.topNIoBlockedUids[i]; |
| 256 | StringAppendF(&buffer, "%" PRIu32 ", %s, %" PRIu64 ", %.2f%%\n", uidStats.userId, |
| 257 | uidStats.packageName.c_str(), uidStats.count, |
| 258 | percentage(uidStats.count, data.topNIoBlockedUidsTotalTaskCnt[i])); |
| 259 | for (const auto& procStats : uidStats.topNProcesses) { |
| 260 | StringAppendF(&buffer, "\t%s, %" PRIu64 ", %.2f%%\n", procStats.comm.c_str(), |
| 261 | procStats.count, percentage(procStats.count, uidStats.count)); |
| 262 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 263 | } |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 264 | return buffer; |
| 265 | } |
| 266 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 267 | std::string toString(const IoPerfRecord& record) { |
| 268 | std::string buffer; |
| 269 | StringAppendF(&buffer, "%s%s%s", toString(record.systemIoPerfData).c_str(), |
| 270 | toString(record.processIoPerfData).c_str(), |
| 271 | toString(record.uidIoPerfData).c_str()); |
| 272 | return buffer; |
| 273 | } |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 274 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 275 | std::string toString(const CollectionInfo& collectionInfo) { |
| 276 | std::string buffer; |
| 277 | StringAppendF(&buffer, "Number of collections: %zu\n", collectionInfo.records.size()); |
| 278 | auto interval = |
| 279 | std::chrono::duration_cast<std::chrono::seconds>(collectionInfo.interval).count(); |
| 280 | StringAppendF(&buffer, "Collection interval: %lld second%s\n", interval, |
| 281 | ((interval > 1) ? "s" : "")); |
| 282 | for (size_t i = 0; i < collectionInfo.records.size(); ++i) { |
| 283 | const auto& record = collectionInfo.records[i]; |
| 284 | std::stringstream timestamp; |
| 285 | timestamp << std::put_time(std::localtime(&record.time), "%c %Z"); |
| 286 | StringAppendF(&buffer, "Collection %zu: <%s>\n%s\n%s\n", i, timestamp.str().c_str(), |
| 287 | std::string(45, '=').c_str(), toString(record).c_str()); |
| 288 | } |
| 289 | return buffer; |
| 290 | } |
| 291 | |
| 292 | Result<void> IoPerfCollection::start() { |
| 293 | { |
| 294 | Mutex::Autolock lock(mMutex); |
| 295 | if (mCurrCollectionEvent != CollectionEvent::INIT || mCollectionThread.joinable()) { |
Lakshman Annadorai | 2c0b0d1 | 2020-03-04 11:14:59 -0800 | [diff] [blame] | 296 | return Error(INVALID_OPERATION) |
| 297 | << "Cannot start I/O performance collection more than once"; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 298 | } |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 299 | mTopNStatsPerCategory = static_cast<int>( |
| 300 | sysprop::topNStatsPerCategory().value_or(kDefaultTopNStatsPerCategory)); |
Lakshman Annadorai | efc2339 | 2020-04-02 14:18:39 -0700 | [diff] [blame] | 301 | mTopNStatsPerSubcategory = static_cast<int>( |
| 302 | sysprop::topNStatsPerSubcategory().value_or(kDefaultTopNStatsPerSubcategory)); |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 303 | std::chrono::nanoseconds boottimeCollectionInterval = |
| 304 | std::chrono::duration_cast<std::chrono::nanoseconds>( |
| 305 | std::chrono::seconds(sysprop::boottimeCollectionInterval().value_or( |
| 306 | kDefaultBoottimeCollectionInterval.count()))); |
| 307 | std::chrono::nanoseconds periodicCollectionInterval = |
| 308 | std::chrono::duration_cast<std::chrono::nanoseconds>( |
| 309 | std::chrono::seconds(sysprop::periodicCollectionInterval().value_or( |
| 310 | kDefaultPeriodicCollectionInterval.count()))); |
| 311 | size_t periodicCollectionBufferSize = |
| 312 | static_cast<size_t>(sysprop::periodicCollectionBufferSize().value_or( |
| 313 | kDefaultPeriodicCollectionBufferSize)); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 314 | mBoottimeCollection = { |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 315 | .interval = boottimeCollectionInterval, |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 316 | .maxCacheSize = std::numeric_limits<std::size_t>::max(), |
| 317 | .lastCollectionUptime = 0, |
| 318 | .records = {}, |
| 319 | }; |
| 320 | mPeriodicCollection = { |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 321 | .interval = periodicCollectionInterval, |
| 322 | .maxCacheSize = periodicCollectionBufferSize, |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 323 | .lastCollectionUptime = 0, |
| 324 | .records = {}, |
| 325 | }; |
| 326 | } |
| 327 | |
| 328 | mCollectionThread = std::thread([&]() { |
| 329 | { |
| 330 | Mutex::Autolock lock(mMutex); |
| 331 | if (mCurrCollectionEvent != CollectionEvent::INIT) { |
| 332 | ALOGE("Skipping I/O performance data collection as the current collection event " |
| 333 | "%s != %s", |
| 334 | toString(mCurrCollectionEvent).c_str(), |
| 335 | toString(CollectionEvent::INIT).c_str()); |
| 336 | return; |
| 337 | } |
| 338 | mCurrCollectionEvent = CollectionEvent::BOOT_TIME; |
| 339 | mBoottimeCollection.lastCollectionUptime = mHandlerLooper->now(); |
| 340 | mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0)); |
| 341 | mHandlerLooper->sendMessage(this, CollectionEvent::BOOT_TIME); |
| 342 | } |
| 343 | if (set_sched_policy(0, SP_BACKGROUND) != 0) { |
| 344 | ALOGW("Failed to set background scheduling priority to I/O performance data collection " |
| 345 | "thread"); |
| 346 | } |
Lakshman Annadorai | 30b87e5 | 2020-04-14 15:34:52 -0700 | [diff] [blame] | 347 | int ret = pthread_setname_np(pthread_self(), "IoPerfCollect"); |
| 348 | if (ret != 0) { |
| 349 | ALOGE("Failed to set I/O perf collection thread name: %d", ret); |
| 350 | } |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 351 | bool isCollectionActive = true; |
| 352 | // Loop until the collection is not active -- I/O perf collection runs on this thread in a |
| 353 | // handler. |
| 354 | while (isCollectionActive) { |
| 355 | mHandlerLooper->pollAll(/*timeoutMillis=*/-1); |
| 356 | Mutex::Autolock lock(mMutex); |
| 357 | isCollectionActive = mCurrCollectionEvent != CollectionEvent::TERMINATED; |
| 358 | } |
| 359 | }); |
| 360 | return {}; |
| 361 | } |
| 362 | |
| 363 | void IoPerfCollection::terminate() { |
| 364 | { |
| 365 | Mutex::Autolock lock(mMutex); |
| 366 | if (mCurrCollectionEvent == CollectionEvent::TERMINATED) { |
| 367 | ALOGE("I/O performance data collection was terminated already"); |
| 368 | return; |
| 369 | } |
| 370 | ALOGE("Terminating I/O performance data collection"); |
| 371 | mCurrCollectionEvent = CollectionEvent::TERMINATED; |
| 372 | } |
| 373 | if (mCollectionThread.joinable()) { |
| 374 | mHandlerLooper->removeMessages(this); |
| 375 | mHandlerLooper->wake(); |
| 376 | mCollectionThread.join(); |
| 377 | } |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 378 | } |
| 379 | |
| 380 | Result<void> IoPerfCollection::onBootFinished() { |
| 381 | Mutex::Autolock lock(mMutex); |
| 382 | if (mCurrCollectionEvent != CollectionEvent::BOOT_TIME) { |
Lakshman Annadorai | dd9cb77 | 2020-04-02 16:23:31 -0700 | [diff] [blame] | 383 | // This case happens when either the I/O perf collection has prematurely terminated before |
| 384 | // boot complete notification is received or multiple boot complete notifications are |
| 385 | // received. In either case don't return error as this will lead to runtime exception and |
| 386 | // cause system to boot loop. |
| 387 | ALOGE("Current I/O performance data collection event %s != %s", |
| 388 | toString(mCurrCollectionEvent).c_str(), |
| 389 | toString(CollectionEvent::BOOT_TIME).c_str()); |
| 390 | return {}; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 391 | } |
Lakshman Annadorai | e877d87 | 2020-04-15 13:45:11 -0700 | [diff] [blame] | 392 | mBoottimeCollection.lastCollectionUptime = mHandlerLooper->now(); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 393 | mHandlerLooper->removeMessages(this); |
Lakshman Annadorai | e877d87 | 2020-04-15 13:45:11 -0700 | [diff] [blame] | 394 | mHandlerLooper->sendMessage(this, SwitchEvent::END_BOOTTIME_COLLECTION); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 395 | return {}; |
| 396 | } |
| 397 | |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 398 | Result<void> IoPerfCollection::dump(int fd, const Vector<String16>& args) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 399 | if (args.empty()) { |
| 400 | const auto& ret = dumpCollection(fd); |
| 401 | if (!ret) { |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 402 | return ret; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 403 | } |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 404 | return {}; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 405 | } |
| 406 | |
| 407 | if (args[0] == String16(kStartCustomCollectionFlag)) { |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 408 | if (args.size() > 7) { |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 409 | return Error(INVALID_OPERATION) << "Number of arguments to start custom " |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 410 | << "I/O performance data collection cannot exceed 7"; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 411 | } |
| 412 | std::chrono::nanoseconds interval = kCustomCollectionInterval; |
| 413 | std::chrono::nanoseconds maxDuration = kCustomCollectionDuration; |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 414 | std::unordered_set<std::string> filterPackages; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 415 | for (size_t i = 1; i < args.size(); ++i) { |
| 416 | if (args[i] == String16(kIntervalFlag)) { |
| 417 | const auto& ret = parseSecondsFlag(args, i + 1); |
| 418 | if (!ret) { |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 419 | return Error(FAILED_TRANSACTION) |
| 420 | << "Failed to parse " << kIntervalFlag << ": " << ret.error(); |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 421 | } |
| 422 | interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*ret); |
| 423 | ++i; |
| 424 | continue; |
| 425 | } |
| 426 | if (args[i] == String16(kMaxDurationFlag)) { |
| 427 | const auto& ret = parseSecondsFlag(args, i + 1); |
| 428 | if (!ret) { |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 429 | return Error(FAILED_TRANSACTION) |
| 430 | << "Failed to parse " << kMaxDurationFlag << ": " << ret.error(); |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 431 | } |
| 432 | maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*ret); |
| 433 | ++i; |
| 434 | continue; |
| 435 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 436 | if (args[i] == String16(kFilterPackagesFlag)) { |
| 437 | if (args.size() < i + 1) { |
| 438 | return Error(FAILED_TRANSACTION) |
| 439 | << "Must provide value for '" << kFilterPackagesFlag << "' flag"; |
| 440 | } |
| 441 | std::vector<std::string> packages = |
| 442 | Split(std::string(String8(args[i + 1]).string()), ","); |
| 443 | std::copy(packages.begin(), packages.end(), |
| 444 | std::inserter(filterPackages, filterPackages.end())); |
| 445 | ++i; |
| 446 | continue; |
| 447 | } |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 448 | ALOGW("Unknown flag %s provided to start custom I/O performance data collection", |
| 449 | String8(args[i]).string()); |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 450 | return Error(INVALID_OPERATION) << "Unknown flag " << String8(args[i]).string() |
| 451 | << " provided to start custom I/O performance data " |
| 452 | << "collection"; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 453 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 454 | const auto& ret = startCustomCollection(interval, maxDuration, filterPackages); |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 455 | if (!ret) { |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 456 | return ret; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 457 | } |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 458 | return {}; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 459 | } |
| 460 | |
| 461 | if (args[0] == String16(kEndCustomCollectionFlag)) { |
| 462 | if (args.size() != 1) { |
| 463 | ALOGW("Number of arguments to end custom I/O performance data collection cannot " |
| 464 | "exceed 1"); |
| 465 | } |
| 466 | const auto& ret = endCustomCollection(fd); |
| 467 | if (!ret) { |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 468 | return ret; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 469 | } |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 470 | return {}; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 471 | } |
| 472 | |
Lakshman Annadorai | 0dfeeeb | 2020-03-13 16:57:12 -0700 | [diff] [blame] | 473 | return Error(INVALID_OPERATION) |
| 474 | << "Dump arguments start neither with " << kStartCustomCollectionFlag << " nor with " |
| 475 | << kEndCustomCollectionFlag << " flags"; |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 476 | } |
| 477 | |
| 478 | Result<void> IoPerfCollection::dumpCollection(int fd) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 479 | Mutex::Autolock lock(mMutex); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 480 | if (mCurrCollectionEvent == CollectionEvent::TERMINATED) { |
| 481 | ALOGW("I/O performance data collection not active. Dumping cached data"); |
| 482 | if (!WriteStringToFd("I/O performance data collection not active. Dumping cached data.", |
| 483 | fd)) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 484 | return Error(FAILED_TRANSACTION) << "Failed to write I/O performance collection status"; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 485 | } |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 486 | } |
| 487 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 488 | const auto& ret = dumpCollectorsStatusLocked(fd); |
| 489 | if (!ret) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 490 | return Error(FAILED_TRANSACTION) << ret.error(); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 491 | } |
| 492 | |
| 493 | if (!WriteStringToFd(StringPrintf("%sI/O performance data reports:\n%sBoot-time collection " |
| 494 | "report:\n%s\n", |
| 495 | kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str(), |
| 496 | std::string(28, '=').c_str()), |
| 497 | fd) || |
| 498 | !WriteStringToFd(toString(mBoottimeCollection), fd) || |
| 499 | !WriteStringToFd(StringPrintf("%s\nPeriodic collection report:\n%s\n", |
| 500 | std::string(75, '-').c_str(), std::string(27, '=').c_str()), |
| 501 | fd) || |
| 502 | !WriteStringToFd(toString(mPeriodicCollection), fd) || |
| 503 | !WriteStringToFd(kDumpMajorDelimiter, fd)) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 504 | return Error(FAILED_TRANSACTION) |
| 505 | << "Failed to dump the boot-time and periodic collection reports."; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 506 | } |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 507 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 508 | } |
| 509 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 510 | Result<void> IoPerfCollection::dumpCollectorsStatusLocked(int fd) { |
| 511 | if (!mUidIoStats->enabled() && |
| 512 | !WriteStringToFd(StringPrintf("UidIoStats collector failed to access the file %s", |
| 513 | mUidIoStats->filePath().c_str()), |
| 514 | fd)) { |
| 515 | return Error() << "Failed to write UidIoStats collector status"; |
| 516 | } |
| 517 | if (!mProcStat->enabled() && |
| 518 | !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s", |
| 519 | mProcStat->filePath().c_str()), |
| 520 | fd)) { |
| 521 | return Error() << "Failed to write ProcStat collector status"; |
| 522 | } |
| 523 | if (!mProcPidStat->enabled() && |
| 524 | !WriteStringToFd(StringPrintf("ProcPidStat collector failed to access the directory %s", |
| 525 | mProcPidStat->dirPath().c_str()), |
| 526 | fd)) { |
| 527 | return Error() << "Failed to write ProcPidStat collector status"; |
| 528 | } |
| 529 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 530 | } |
| 531 | |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 532 | Result<void> IoPerfCollection::startCustomCollection( |
| 533 | std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration, |
| 534 | const std::unordered_set<std::string>& filterPackages) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 535 | if (interval < kMinCollectionInterval || maxDuration < kMinCollectionInterval) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 536 | return Error(INVALID_OPERATION) |
| 537 | << "Collection interval and maximum duration must be >= " |
| 538 | << std::chrono::duration_cast<std::chrono::milliseconds>(kMinCollectionInterval) |
| 539 | .count() |
| 540 | << " milliseconds."; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 541 | } |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 542 | Mutex::Autolock lock(mMutex); |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 543 | if (mCurrCollectionEvent != CollectionEvent::PERIODIC) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 544 | return Error(INVALID_OPERATION) |
| 545 | << "Cannot start a custom collection when " |
| 546 | << "the current collection event " << toString(mCurrCollectionEvent) |
| 547 | << " != " << toString(CollectionEvent::PERIODIC) << " collection event"; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 548 | } |
| 549 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 550 | mCustomCollection = { |
| 551 | .interval = interval, |
| 552 | .maxCacheSize = std::numeric_limits<std::size_t>::max(), |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 553 | .filterPackages = filterPackages, |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 554 | .lastCollectionUptime = mHandlerLooper->now(), |
| 555 | .records = {}, |
| 556 | }; |
| 557 | |
| 558 | mHandlerLooper->removeMessages(this); |
| 559 | nsecs_t uptime = mHandlerLooper->now() + maxDuration.count(); |
| 560 | mHandlerLooper->sendMessageAtTime(uptime, this, SwitchEvent::END_CUSTOM_COLLECTION); |
| 561 | mCurrCollectionEvent = CollectionEvent::CUSTOM; |
| 562 | mHandlerLooper->sendMessage(this, CollectionEvent::CUSTOM); |
| 563 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 564 | } |
| 565 | |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 566 | Result<void> IoPerfCollection::endCustomCollection(int fd) { |
| 567 | Mutex::Autolock lock(mMutex); |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 568 | if (mCurrCollectionEvent != CollectionEvent::CUSTOM) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 569 | return Error(INVALID_OPERATION) << "No custom collection is running"; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 570 | } |
| 571 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 572 | mHandlerLooper->removeMessages(this); |
| 573 | mHandlerLooper->sendMessage(this, SwitchEvent::END_CUSTOM_COLLECTION); |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 574 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 575 | const auto& ret = dumpCollectorsStatusLocked(fd); |
| 576 | if (!ret) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 577 | return Error(FAILED_TRANSACTION) << ret.error(); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 578 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 579 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 580 | if (!WriteStringToFd(StringPrintf("%sI/O performance data report for custom collection:\n%s", |
| 581 | kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()), |
| 582 | fd) || |
| 583 | !WriteStringToFd(toString(mCustomCollection), fd) || |
| 584 | !WriteStringToFd(kDumpMajorDelimiter, fd)) { |
Lakshman Annadorai | 19bf275 | 2020-03-05 17:45:43 -0800 | [diff] [blame] | 585 | return Error(FAILED_TRANSACTION) << "Failed to write custom collection report."; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 586 | } |
| 587 | |
| 588 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 589 | } |
| 590 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 591 | void IoPerfCollection::handleMessage(const Message& message) { |
| 592 | Result<void> result; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 593 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 594 | switch (message.what) { |
| 595 | case static_cast<int>(CollectionEvent::BOOT_TIME): |
| 596 | result = processCollectionEvent(CollectionEvent::BOOT_TIME, &mBoottimeCollection); |
| 597 | break; |
Lakshman Annadorai | e877d87 | 2020-04-15 13:45:11 -0700 | [diff] [blame] | 598 | case static_cast<int>(SwitchEvent::END_BOOTTIME_COLLECTION): |
| 599 | result = processCollectionEvent(CollectionEvent::BOOT_TIME, &mBoottimeCollection); |
| 600 | if (result.ok()) { |
| 601 | mHandlerLooper->removeMessages(this); |
| 602 | mCurrCollectionEvent = CollectionEvent::PERIODIC; |
| 603 | mPeriodicCollection.lastCollectionUptime = |
| 604 | mHandlerLooper->now() + mPeriodicCollection.interval.count(); |
| 605 | mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastCollectionUptime, this, |
| 606 | CollectionEvent::PERIODIC); |
| 607 | } |
| 608 | break; |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 609 | case static_cast<int>(CollectionEvent::PERIODIC): |
| 610 | result = processCollectionEvent(CollectionEvent::PERIODIC, &mPeriodicCollection); |
| 611 | break; |
| 612 | case static_cast<int>(CollectionEvent::CUSTOM): |
| 613 | result = processCollectionEvent(CollectionEvent::CUSTOM, &mCustomCollection); |
| 614 | break; |
| 615 | case static_cast<int>(SwitchEvent::END_CUSTOM_COLLECTION): { |
| 616 | Mutex::Autolock lock(mMutex); |
| 617 | if (mCurrCollectionEvent != CollectionEvent::CUSTOM) { |
| 618 | ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s", |
| 619 | toString(mCurrCollectionEvent).c_str(), |
| 620 | toString(CollectionEvent::CUSTOM).c_str()); |
| 621 | return; |
| 622 | } |
| 623 | mCustomCollection = {}; |
| 624 | mHandlerLooper->removeMessages(this); |
| 625 | mCurrCollectionEvent = CollectionEvent::PERIODIC; |
| 626 | mPeriodicCollection.lastCollectionUptime = mHandlerLooper->now(); |
| 627 | mHandlerLooper->sendMessage(this, CollectionEvent::PERIODIC); |
| 628 | return; |
| 629 | } |
| 630 | default: |
| 631 | result = Error() << "Unknown message: " << message.what; |
| 632 | } |
| 633 | |
Lakshman Annadorai | e877d87 | 2020-04-15 13:45:11 -0700 | [diff] [blame] | 634 | if (!result.ok()) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 635 | Mutex::Autolock lock(mMutex); |
| 636 | ALOGE("Terminating I/O performance data collection: %s", result.error().message().c_str()); |
| 637 | // DO NOT CALL terminate() as it tries to join the collection thread but this code is |
| 638 | // executed on the collection thread. Thus it will result in a deadlock. |
| 639 | mCurrCollectionEvent = CollectionEvent::TERMINATED; |
| 640 | mHandlerLooper->removeMessages(this); |
| 641 | mHandlerLooper->wake(); |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | Result<void> IoPerfCollection::processCollectionEvent(CollectionEvent event, CollectionInfo* info) { |
| 646 | Mutex::Autolock lock(mMutex); |
| 647 | // Messages sent to the looper are intrinsically racy such that a message from the previous |
| 648 | // collection event may land in the looper after the current collection has already begun. Thus |
| 649 | // verify the current collection event before starting the collection. |
| 650 | if (mCurrCollectionEvent != event) { |
| 651 | ALOGW("Skipping %s collection message on collection event %s", toString(event).c_str(), |
| 652 | toString(mCurrCollectionEvent).c_str()); |
| 653 | return {}; |
| 654 | } |
| 655 | if (info->maxCacheSize == 0) { |
| 656 | return Error() << "Maximum cache size for " << toString(event) << " collection cannot be 0"; |
| 657 | } |
| 658 | if (info->interval < kMinCollectionInterval) { |
| 659 | return Error() |
| 660 | << "Collection interval of " |
| 661 | << std::chrono::duration_cast<std::chrono::seconds>(info->interval).count() |
| 662 | << " seconds for " << toString(event) << " collection cannot be less than " |
| 663 | << std::chrono::duration_cast<std::chrono::seconds>(kMinCollectionInterval).count() |
| 664 | << " seconds"; |
| 665 | } |
| 666 | auto ret = collectLocked(info); |
| 667 | if (!ret) { |
| 668 | return Error() << toString(event) << " collection failed: " << ret.error(); |
| 669 | } |
| 670 | info->lastCollectionUptime += info->interval.count(); |
| 671 | mHandlerLooper->sendMessageAtTime(info->lastCollectionUptime, this, event); |
| 672 | return {}; |
| 673 | } |
| 674 | |
| 675 | Result<void> IoPerfCollection::collectLocked(CollectionInfo* collectionInfo) { |
| 676 | if (!mUidIoStats->enabled() && !mProcStat->enabled() && !mProcPidStat->enabled()) { |
| 677 | return Error() << "No collectors enabled"; |
| 678 | } |
| 679 | IoPerfRecord record{ |
| 680 | .time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), |
| 681 | }; |
| 682 | auto ret = collectSystemIoPerfDataLocked(&record.systemIoPerfData); |
| 683 | if (!ret) { |
| 684 | return ret; |
| 685 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 686 | ret = collectProcessIoPerfDataLocked(*collectionInfo, &record.processIoPerfData); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 687 | if (!ret) { |
| 688 | return ret; |
| 689 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 690 | ret = collectUidIoPerfDataLocked(*collectionInfo, &record.uidIoPerfData); |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 691 | if (!ret) { |
| 692 | return ret; |
| 693 | } |
| 694 | if (collectionInfo->records.size() > collectionInfo->maxCacheSize) { |
| 695 | collectionInfo->records.erase(collectionInfo->records.begin()); // Erase the oldest record. |
| 696 | } |
| 697 | collectionInfo->records.emplace_back(record); |
| 698 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 699 | } |
| 700 | |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 701 | Result<void> IoPerfCollection::collectUidIoPerfDataLocked(const CollectionInfo& collectionInfo, |
| 702 | UidIoPerfData* uidIoPerfData) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 703 | if (!mUidIoStats->enabled()) { |
| 704 | // Don't return an error to avoid pre-mature termination. Instead, fetch data from other |
| 705 | // collectors. |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 706 | return {}; |
| 707 | } |
| 708 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 709 | const Result<std::unordered_map<uint32_t, UidIoUsage>>& usage = mUidIoStats->collect(); |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 710 | if (!usage) { |
| 711 | return Error() << "Failed to collect uid I/O usage: " << usage.error(); |
| 712 | } |
| 713 | |
| 714 | // Fetch only the top N reads and writes from the usage records. |
| 715 | UidIoUsage tempUsage = {}; |
| 716 | std::vector<const UidIoUsage*> topNReads(mTopNStatsPerCategory, &tempUsage); |
| 717 | std::vector<const UidIoUsage*> topNWrites(mTopNStatsPerCategory, &tempUsage); |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 718 | std::unordered_set<uint32_t> unmappedUids; |
| 719 | |
| 720 | for (const auto& uIt : *usage) { |
| 721 | const UidIoUsage& curUsage = uIt.second; |
| 722 | if (curUsage.ios.isZero()) { |
| 723 | continue; |
| 724 | } |
| 725 | if (mUidToPackageNameMapping.find(curUsage.uid) == mUidToPackageNameMapping.end()) { |
| 726 | unmappedUids.insert(curUsage.uid); |
| 727 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 728 | uidIoPerfData->total[READ_BYTES][FOREGROUND] += |
| 729 | curUsage.ios.metrics[READ_BYTES][FOREGROUND]; |
| 730 | uidIoPerfData->total[READ_BYTES][BACKGROUND] += |
| 731 | curUsage.ios.metrics[READ_BYTES][BACKGROUND]; |
| 732 | uidIoPerfData->total[WRITE_BYTES][FOREGROUND] += |
| 733 | curUsage.ios.metrics[WRITE_BYTES][FOREGROUND]; |
| 734 | uidIoPerfData->total[WRITE_BYTES][BACKGROUND] += |
| 735 | curUsage.ios.metrics[WRITE_BYTES][BACKGROUND]; |
| 736 | uidIoPerfData->total[FSYNC_COUNT][FOREGROUND] += |
| 737 | curUsage.ios.metrics[FSYNC_COUNT][FOREGROUND]; |
| 738 | uidIoPerfData->total[FSYNC_COUNT][BACKGROUND] += |
| 739 | curUsage.ios.metrics[FSYNC_COUNT][BACKGROUND]; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 740 | |
| 741 | for (auto it = topNReads.begin(); it != topNReads.end(); ++it) { |
| 742 | const UidIoUsage* curRead = *it; |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 743 | if (curRead->ios.sumReadBytes() < curUsage.ios.sumReadBytes()) { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 744 | topNReads.emplace(it, &curUsage); |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 745 | if (collectionInfo.filterPackages.empty()) { |
| 746 | topNReads.pop_back(); |
| 747 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 748 | break; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 749 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 750 | } |
| 751 | for (auto it = topNWrites.begin(); it != topNWrites.end(); ++it) { |
| 752 | const UidIoUsage* curWrite = *it; |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 753 | if (curWrite->ios.sumWriteBytes() < curUsage.ios.sumWriteBytes()) { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 754 | topNWrites.emplace(it, &curUsage); |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 755 | if (collectionInfo.filterPackages.empty()) { |
| 756 | topNWrites.pop_back(); |
| 757 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 758 | break; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 759 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 760 | } |
| 761 | } |
| 762 | |
| 763 | const auto& ret = updateUidToPackageNameMapping(unmappedUids); |
| 764 | if (!ret) { |
| 765 | ALOGW("%s", ret.error().message().c_str()); |
| 766 | } |
| 767 | |
| 768 | // Convert the top N I/O usage to UidIoPerfData. |
| 769 | for (const auto& usage : topNReads) { |
| 770 | if (usage->ios.isZero()) { |
| 771 | // End of non-zero usage records. This case occurs when the number of UIDs with active |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 772 | // I/O operations is < |ro.carwatchdog.top_n_stats_per_category|. |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 773 | break; |
| 774 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 775 | UidIoPerfData::Stats stats = { |
| 776 | .userId = multiuser_get_user_id(usage->uid), |
| 777 | .packageName = std::to_string(usage->uid), |
| 778 | .bytes = {usage->ios.metrics[READ_BYTES][FOREGROUND], |
| 779 | usage->ios.metrics[READ_BYTES][BACKGROUND]}, |
| 780 | .fsync = {usage->ios.metrics[FSYNC_COUNT][FOREGROUND], |
| 781 | usage->ios.metrics[FSYNC_COUNT][BACKGROUND]}, |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 782 | }; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 783 | if (mUidToPackageNameMapping.find(usage->uid) != mUidToPackageNameMapping.end()) { |
| 784 | stats.packageName = mUidToPackageNameMapping[usage->uid]; |
| 785 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 786 | if (!collectionInfo.filterPackages.empty() && |
| 787 | collectionInfo.filterPackages.find(stats.packageName) == |
| 788 | collectionInfo.filterPackages.end()) { |
| 789 | continue; |
| 790 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 791 | uidIoPerfData->topNReads.emplace_back(stats); |
| 792 | } |
| 793 | |
| 794 | for (const auto& usage : topNWrites) { |
| 795 | if (usage->ios.isZero()) { |
| 796 | // End of non-zero usage records. This case occurs when the number of UIDs with active |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 797 | // I/O operations is < |ro.carwatchdog.top_n_stats_per_category|. |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 798 | break; |
| 799 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 800 | UidIoPerfData::Stats stats = { |
| 801 | .userId = multiuser_get_user_id(usage->uid), |
| 802 | .packageName = std::to_string(usage->uid), |
| 803 | .bytes = {usage->ios.metrics[WRITE_BYTES][FOREGROUND], |
| 804 | usage->ios.metrics[WRITE_BYTES][BACKGROUND]}, |
| 805 | .fsync = {usage->ios.metrics[FSYNC_COUNT][FOREGROUND], |
| 806 | usage->ios.metrics[FSYNC_COUNT][BACKGROUND]}, |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 807 | }; |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 808 | if (mUidToPackageNameMapping.find(usage->uid) != mUidToPackageNameMapping.end()) { |
| 809 | stats.packageName = mUidToPackageNameMapping[usage->uid]; |
| 810 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 811 | if (!collectionInfo.filterPackages.empty() && |
| 812 | collectionInfo.filterPackages.find(stats.packageName) == |
| 813 | collectionInfo.filterPackages.end()) { |
| 814 | continue; |
| 815 | } |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 816 | uidIoPerfData->topNWrites.emplace_back(stats); |
| 817 | } |
| 818 | return {}; |
| 819 | } |
| 820 | |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 821 | Result<void> IoPerfCollection::collectSystemIoPerfDataLocked(SystemIoPerfData* systemIoPerfData) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 822 | if (!mProcStat->enabled()) { |
| 823 | // Don't return an error to avoid pre-mature termination. Instead, fetch data from other |
| 824 | // collectors. |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 825 | return {}; |
| 826 | } |
| 827 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 828 | const Result<ProcStatInfo>& procStatInfo = mProcStat->collect(); |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 829 | if (!procStatInfo) { |
| 830 | return Error() << "Failed to collect proc stats: " << procStatInfo.error(); |
| 831 | } |
| 832 | |
| 833 | systemIoPerfData->cpuIoWaitTime = procStatInfo->cpuStats.ioWaitTime; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 834 | systemIoPerfData->totalCpuTime = procStatInfo->totalCpuTime(); |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 835 | systemIoPerfData->ioBlockedProcessesCnt = procStatInfo->ioBlockedProcessesCnt; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 836 | systemIoPerfData->totalProcessesCnt = procStatInfo->totalProcessesCnt(); |
Lakshman Annadorai | f9b47c2 | 2020-02-10 16:45:18 -0800 | [diff] [blame] | 837 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 838 | } |
| 839 | |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 840 | Result<void> IoPerfCollection::collectProcessIoPerfDataLocked( |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 841 | const CollectionInfo& collectionInfo, ProcessIoPerfData* processIoPerfData) { |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 842 | if (!mProcPidStat->enabled()) { |
| 843 | // Don't return an error to avoid pre-mature termination. Instead, fetch data from other |
| 844 | // collectors. |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 845 | return {}; |
| 846 | } |
| 847 | |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 848 | const Result<std::vector<ProcessStats>>& processStats = mProcPidStat->collect(); |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 849 | if (!processStats) { |
| 850 | return Error() << "Failed to collect process stats: " << processStats.error(); |
| 851 | } |
| 852 | |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 853 | const auto& uidProcessStats = getUidProcessStats(*processStats, mTopNStatsPerSubcategory); |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 854 | std::unordered_set<uint32_t> unmappedUids; |
| 855 | // Fetch only the top N I/O blocked UIDs and UIDs with most major page faults. |
| 856 | UidProcessStats temp = {}; |
| 857 | std::vector<const UidProcessStats*> topNIoBlockedUids(mTopNStatsPerCategory, &temp); |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 858 | std::vector<const UidProcessStats*> topNMajorFaultUids(mTopNStatsPerCategory, &temp); |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 859 | processIoPerfData->totalMajorFaults = 0; |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 860 | for (const auto& it : *uidProcessStats) { |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 861 | const UidProcessStats& curStats = it.second; |
| 862 | if (mUidToPackageNameMapping.find(curStats.uid) == mUidToPackageNameMapping.end()) { |
| 863 | unmappedUids.insert(curStats.uid); |
| 864 | } |
| 865 | processIoPerfData->totalMajorFaults += curStats.majorFaults; |
| 866 | for (auto it = topNIoBlockedUids.begin(); it != topNIoBlockedUids.end(); ++it) { |
| 867 | const UidProcessStats* topStats = *it; |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 868 | if (topStats->ioBlockedTasksCnt < curStats.ioBlockedTasksCnt) { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 869 | topNIoBlockedUids.emplace(it, &curStats); |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 870 | if (collectionInfo.filterPackages.empty()) { |
| 871 | topNIoBlockedUids.pop_back(); |
| 872 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 873 | break; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 874 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 875 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 876 | for (auto it = topNMajorFaultUids.begin(); it != topNMajorFaultUids.end(); ++it) { |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 877 | const UidProcessStats* topStats = *it; |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 878 | if (topStats->majorFaults < curStats.majorFaults) { |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 879 | topNMajorFaultUids.emplace(it, &curStats); |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 880 | if (collectionInfo.filterPackages.empty()) { |
| 881 | topNMajorFaultUids.pop_back(); |
| 882 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 883 | break; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 884 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 885 | } |
| 886 | } |
| 887 | |
| 888 | const auto& ret = updateUidToPackageNameMapping(unmappedUids); |
| 889 | if (!ret) { |
| 890 | ALOGW("%s", ret.error().message().c_str()); |
| 891 | } |
| 892 | |
| 893 | // Convert the top N uid process stats to ProcessIoPerfData. |
| 894 | for (const auto& it : topNIoBlockedUids) { |
| 895 | if (it->ioBlockedTasksCnt == 0) { |
| 896 | // End of non-zero elements. This case occurs when the number of UIDs with I/O blocked |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 897 | // processes is < |ro.carwatchdog.top_n_stats_per_category|. |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 898 | break; |
| 899 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 900 | ProcessIoPerfData::UidStats stats = { |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 901 | .userId = multiuser_get_user_id(it->uid), |
| 902 | .packageName = std::to_string(it->uid), |
| 903 | .count = it->ioBlockedTasksCnt, |
| 904 | }; |
| 905 | if (mUidToPackageNameMapping.find(it->uid) != mUidToPackageNameMapping.end()) { |
| 906 | stats.packageName = mUidToPackageNameMapping[it->uid]; |
| 907 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 908 | if (!collectionInfo.filterPackages.empty() && |
| 909 | collectionInfo.filterPackages.find(stats.packageName) == |
| 910 | collectionInfo.filterPackages.end()) { |
| 911 | continue; |
| 912 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 913 | for (const auto& pIt : it->topNIoBlockedProcesses) { |
| 914 | if (pIt.count == 0) { |
| 915 | break; |
| 916 | } |
| 917 | stats.topNProcesses.emplace_back( |
| 918 | ProcessIoPerfData::UidStats::ProcessStats{pIt.comm, pIt.count}); |
| 919 | } |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 920 | processIoPerfData->topNIoBlockedUids.emplace_back(stats); |
| 921 | processIoPerfData->topNIoBlockedUidsTotalTaskCnt.emplace_back(it->totalTasksCnt); |
| 922 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 923 | for (const auto& it : topNMajorFaultUids) { |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 924 | if (it->majorFaults == 0) { |
| 925 | // End of non-zero elements. This case occurs when the number of UIDs with major faults |
Lakshman Annadorai | 5287770 | 2020-03-25 14:29:52 -0700 | [diff] [blame] | 926 | // is < |ro.carwatchdog.top_n_stats_per_category|. |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 927 | break; |
| 928 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 929 | ProcessIoPerfData::UidStats stats = { |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 930 | .userId = multiuser_get_user_id(it->uid), |
| 931 | .packageName = std::to_string(it->uid), |
| 932 | .count = it->majorFaults, |
| 933 | }; |
| 934 | if (mUidToPackageNameMapping.find(it->uid) != mUidToPackageNameMapping.end()) { |
| 935 | stats.packageName = mUidToPackageNameMapping[it->uid]; |
| 936 | } |
Lakshman Annadorai | 73ef67f | 2020-05-01 16:28:00 -0700 | [diff] [blame^] | 937 | if (!collectionInfo.filterPackages.empty() && |
| 938 | collectionInfo.filterPackages.find(stats.packageName) == |
| 939 | collectionInfo.filterPackages.end()) { |
| 940 | continue; |
| 941 | } |
Lakshman Annadorai | ff35a99 | 2020-04-10 18:03:05 -0700 | [diff] [blame] | 942 | for (const auto& pIt : it->topNMajorFaultProcesses) { |
| 943 | if (pIt.count == 0) { |
| 944 | break; |
| 945 | } |
| 946 | stats.topNProcesses.emplace_back( |
| 947 | ProcessIoPerfData::UidStats::ProcessStats{pIt.comm, pIt.count}); |
| 948 | } |
| 949 | processIoPerfData->topNMajorFaultUids.emplace_back(stats); |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 950 | } |
| 951 | if (mLastMajorFaults == 0) { |
| 952 | processIoPerfData->majorFaultsPercentChange = 0; |
| 953 | } else { |
| 954 | int64_t increase = processIoPerfData->totalMajorFaults - mLastMajorFaults; |
| 955 | processIoPerfData->majorFaultsPercentChange = |
Lakshman Annadorai | f2855b2 | 2020-03-03 14:13:10 -0800 | [diff] [blame] | 956 | (static_cast<double>(increase) / static_cast<double>(mLastMajorFaults)) * 100.0; |
Lakshman Annadorai | 325e965 | 2020-02-20 17:27:11 -0800 | [diff] [blame] | 957 | } |
| 958 | mLastMajorFaults = processIoPerfData->totalMajorFaults; |
| 959 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 960 | } |
| 961 | |
Lakshman Annadorai | ab4d3fd | 2020-02-06 11:24:56 -0800 | [diff] [blame] | 962 | Result<void> IoPerfCollection::updateUidToPackageNameMapping( |
| 963 | const std::unordered_set<uint32_t>& uids) { |
| 964 | std::vector<int32_t> appUids; |
| 965 | |
| 966 | for (const auto& uid : uids) { |
| 967 | if (uid >= AID_APP_START) { |
| 968 | appUids.emplace_back(static_cast<int32_t>(uid)); |
| 969 | continue; |
| 970 | } |
| 971 | // System/native UIDs. |
| 972 | passwd* usrpwd = getpwuid(uid); |
| 973 | if (!usrpwd) { |
| 974 | continue; |
| 975 | } |
| 976 | mUidToPackageNameMapping[uid] = std::string(usrpwd->pw_name); |
| 977 | } |
| 978 | |
| 979 | if (appUids.empty()) { |
| 980 | return {}; |
| 981 | } |
| 982 | |
| 983 | if (mPackageManager == nullptr) { |
| 984 | auto ret = retrievePackageManager(); |
| 985 | if (!ret) { |
| 986 | return Error() << "Failed to retrieve package manager: " << ret.error(); |
| 987 | } |
| 988 | } |
| 989 | |
| 990 | std::vector<std::string> packageNames; |
| 991 | const binder::Status& status = mPackageManager->getNamesForUids(appUids, &packageNames); |
| 992 | if (!status.isOk()) { |
| 993 | return Error() << "package_native::getNamesForUids failed: " << status.exceptionMessage(); |
| 994 | } |
| 995 | |
| 996 | for (uint32_t i = 0; i < appUids.size(); i++) { |
| 997 | if (!packageNames[i].empty()) { |
| 998 | mUidToPackageNameMapping[appUids[i]] = packageNames[i]; |
| 999 | } |
| 1000 | } |
| 1001 | |
| 1002 | return {}; |
| 1003 | } |
| 1004 | |
| 1005 | Result<void> IoPerfCollection::retrievePackageManager() { |
| 1006 | const sp<IServiceManager> sm = defaultServiceManager(); |
| 1007 | if (sm == nullptr) { |
| 1008 | return Error() << "Failed to retrieve defaultServiceManager"; |
| 1009 | } |
| 1010 | |
| 1011 | sp<IBinder> binder = sm->getService(String16("package_native")); |
| 1012 | if (binder == nullptr) { |
| 1013 | return Error() << "Failed to get service package_native"; |
| 1014 | } |
| 1015 | mPackageManager = interface_cast<IPackageManagerNative>(binder); |
| 1016 | return {}; |
Lakshman Annadorai | 6094e9a | 2020-01-31 10:03:33 -0800 | [diff] [blame] | 1017 | } |
| 1018 | |
| 1019 | } // namespace watchdog |
| 1020 | } // namespace automotive |
| 1021 | } // namespace android |