blob: a97858b845b43802485acc199e8ad76965ff0927 [file] [log] [blame]
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -08001/**
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "carwatchdogd"
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -080018
19#include "IoPerfCollection.h"
20
Lakshman Annadorai52877702020-03-25 14:29:52 -070021#include <WatchdogProperties.sysprop.h>
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080022#include <android-base/file.h>
Lakshman Annadorai19bf2752020-03-05 17:45:43 -080023#include <android-base/parseint.h>
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080024#include <android-base/stringprintf.h>
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -070025#include <android-base/strings.h>
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080026#include <binder/IServiceManager.h>
27#include <cutils/android_filesystem_config.h>
28#include <inttypes.h>
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -080029#include <log/log.h>
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080030#include <processgroup/sched_policy.h>
Lakshman Annadorai30b87e52020-04-14 15:34:52 -070031#include <pthread.h>
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080032#include <pwd.h>
33
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -070034#include <algorithm>
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080035#include <iomanip>
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -070036#include <iterator>
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080037#include <limits>
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080038#include <string>
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080039#include <thread>
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080040#include <unordered_map>
41#include <unordered_set>
42#include <vector>
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -080043
44namespace android {
45namespace automotive {
46namespace watchdog {
47
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080048using android::defaultServiceManager;
49using android::IBinder;
50using android::IServiceManager;
51using android::sp;
52using android::String16;
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -080053using android::base::Error;
Lakshman Annadorai19bf2752020-03-05 17:45:43 -080054using android::base::ParseUint;
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -080055using android::base::Result;
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -070056using android::base::Split;
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080057using android::base::StringAppendF;
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080058using android::base::WriteStringToFd;
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080059using android::content::pm::IPackageManagerNative;
60
61namespace {
62
Lakshman Annadorai0f4bee02020-05-01 11:49:07 -070063const int32_t kDefaultTopNStatsPerCategory = 10;
64const int32_t kDefaultTopNStatsPerSubcategory = 5;
Lakshman Annadorai52877702020-03-25 14:29:52 -070065const std::chrono::seconds kDefaultBoottimeCollectionInterval = 1s;
66const std::chrono::seconds kDefaultPeriodicCollectionInterval = 10s;
67// Number of periodic collection perf data snapshots to cache in memory.
68const int32_t kDefaultPeriodicCollectionBufferSize = 180;
69
70// Minimum collection interval between subsequent collections.
71const std::chrono::nanoseconds kMinCollectionInterval = 1s;
72
73// Default values for the custom collection interval and max_duration.
74const std::chrono::nanoseconds kCustomCollectionInterval = 10s;
75const std::chrono::nanoseconds kCustomCollectionDuration = 30min;
76
Lakshman Annadoraif2855b22020-03-03 14:13:10 -080077const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n";
78
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -080079double percentage(uint64_t numer, uint64_t denom) {
80 return denom == 0 ? 0.0 : (static_cast<double>(numer) / static_cast<double>(denom)) * 100.0;
81}
82
Lakshman Annadorai325e9652020-02-20 17:27:11 -080083struct UidProcessStats {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -070084 struct ProcessInfo {
85 std::string comm = "";
86 uint64_t count = 0;
87 };
Lakshman Annadorai325e9652020-02-20 17:27:11 -080088 uint64_t uid = 0;
89 uint32_t ioBlockedTasksCnt = 0;
90 uint32_t totalTasksCnt = 0;
91 uint64_t majorFaults = 0;
Lakshman Annadoraiff35a992020-04-10 18:03:05 -070092 std::vector<ProcessInfo> topNIoBlockedProcesses = {};
93 std::vector<ProcessInfo> topNMajorFaultProcesses = {};
Lakshman Annadorai325e9652020-02-20 17:27:11 -080094};
95
Lakshman Annadoraiff35a992020-04-10 18:03:05 -070096std::unique_ptr<std::unordered_map<uint32_t, UidProcessStats>> getUidProcessStats(
97 const std::vector<ProcessStats>& processStats, int topNStatsPerSubCategory) {
98 std::unique_ptr<std::unordered_map<uint32_t, UidProcessStats>> uidProcessStats(
99 new std::unordered_map<uint32_t, UidProcessStats>());
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800100 for (const auto& stats : processStats) {
101 if (stats.uid < 0) {
102 continue;
103 }
104 uint32_t uid = static_cast<uint32_t>(stats.uid);
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700105 if (uidProcessStats->find(uid) == uidProcessStats->end()) {
106 (*uidProcessStats)[uid] = UidProcessStats{
107 .uid = uid,
108 .topNIoBlockedProcesses = std::vector<
109 UidProcessStats::ProcessInfo>(topNStatsPerSubCategory,
110 UidProcessStats::ProcessInfo{}),
111 .topNMajorFaultProcesses = std::vector<
112 UidProcessStats::ProcessInfo>(topNStatsPerSubCategory,
113 UidProcessStats::ProcessInfo{}),
114 };
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800115 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700116 auto& curUidProcessStats = (*uidProcessStats)[uid];
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800117 // Top-level process stats has the aggregated major page faults count and this should be
118 // persistent across thread creation/termination. Thus use the value from this field.
119 curUidProcessStats.majorFaults += stats.process.majorFaults;
120 curUidProcessStats.totalTasksCnt += stats.threads.size();
121 // The process state is the same as the main thread state. Thus to avoid double counting
122 // ignore the process state.
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700123 uint32_t ioBlockedTasksCnt = 0;
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800124 for (const auto& threadStat : stats.threads) {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700125 ioBlockedTasksCnt += threadStat.second.state == "D" ? 1 : 0;
126 }
127 curUidProcessStats.ioBlockedTasksCnt += ioBlockedTasksCnt;
128 for (auto it = curUidProcessStats.topNIoBlockedProcesses.begin();
129 it != curUidProcessStats.topNIoBlockedProcesses.end(); ++it) {
130 if (it->count < ioBlockedTasksCnt) {
131 curUidProcessStats.topNIoBlockedProcesses
132 .emplace(it,
133 UidProcessStats::ProcessInfo{
134 .comm = stats.process.comm,
135 .count = ioBlockedTasksCnt,
136 });
137 curUidProcessStats.topNIoBlockedProcesses.pop_back();
138 break;
139 }
140 }
141 for (auto it = curUidProcessStats.topNMajorFaultProcesses.begin();
142 it != curUidProcessStats.topNMajorFaultProcesses.end(); ++it) {
143 if (it->count < stats.process.majorFaults) {
144 curUidProcessStats.topNMajorFaultProcesses
145 .emplace(it,
146 UidProcessStats::ProcessInfo{
147 .comm = stats.process.comm,
148 .count = stats.process.majorFaults,
149 });
150 curUidProcessStats.topNMajorFaultProcesses.pop_back();
151 break;
152 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800153 }
154 }
155 return uidProcessStats;
156}
157
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800158Result<std::chrono::seconds> parseSecondsFlag(Vector<String16> args, size_t pos) {
159 if (args.size() < pos) {
160 return Error() << "Value not provided";
161 }
162
163 uint64_t value;
164 std::string strValue = std::string(String8(args[pos]).string());
165 if (!ParseUint(strValue, &value)) {
166 return Error() << "Invalid value " << args[pos].string() << ", must be an integer";
167 }
168 return std::chrono::seconds(value);
169}
170
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800171} // namespace
172
173std::string toString(const UidIoPerfData& data) {
174 std::string buffer;
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800175 if (data.topNReads.size() > 0) {
176 StringAppendF(&buffer, "\nTop N Reads:\n%s\n", std::string(12, '-').c_str());
177 StringAppendF(&buffer,
178 "Android User ID, Package Name, Foreground Bytes, Foreground Bytes %%, "
179 "Foreground Fsync, Foreground Fsync %%, Background Bytes, "
180 "Background Bytes %%, Background Fsync, Background Fsync %%\n");
181 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800182 for (const auto& stat : data.topNReads) {
183 StringAppendF(&buffer, "%" PRIu32 ", %s", stat.userId, stat.packageName.c_str());
184 for (int i = 0; i < UID_STATES; ++i) {
185 StringAppendF(&buffer, ", %" PRIu64 ", %.2f%%, %" PRIu64 ", %.2f%%", stat.bytes[i],
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800186 percentage(stat.bytes[i], data.total[READ_BYTES][i]), stat.fsync[i],
187 percentage(stat.fsync[i], data.total[FSYNC_COUNT][i]));
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800188 }
189 StringAppendF(&buffer, "\n");
190 }
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800191 if (data.topNWrites.size() > 0) {
192 StringAppendF(&buffer, "\nTop N Writes:\n%s\n", std::string(13, '-').c_str());
193 StringAppendF(&buffer,
194 "Android User ID, Package Name, Foreground Bytes, Foreground Bytes %%, "
195 "Foreground Fsync, Foreground Fsync %%, Background Bytes, "
196 "Background Bytes %%, Background Fsync, Background Fsync %%\n");
197 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800198 for (const auto& stat : data.topNWrites) {
199 StringAppendF(&buffer, "%" PRIu32 ", %s", stat.userId, stat.packageName.c_str());
200 for (int i = 0; i < UID_STATES; ++i) {
201 StringAppendF(&buffer, ", %" PRIu64 ", %.2f%%, %" PRIu64 ", %.2f%%", stat.bytes[i],
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800202 percentage(stat.bytes[i], data.total[WRITE_BYTES][i]), stat.fsync[i],
203 percentage(stat.fsync[i], data.total[FSYNC_COUNT][i]));
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800204 }
205 StringAppendF(&buffer, "\n");
206 }
207 return buffer;
208}
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800209
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800210std::string toString(const SystemIoPerfData& data) {
211 std::string buffer;
212 StringAppendF(&buffer, "CPU I/O wait time/percent: %" PRIu64 " / %.2f%%\n", data.cpuIoWaitTime,
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800213 percentage(data.cpuIoWaitTime, data.totalCpuTime));
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800214 StringAppendF(&buffer, "Number of I/O blocked processes/percent: %" PRIu32 " / %.2f%%\n",
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800215 data.ioBlockedProcessesCnt,
216 percentage(data.ioBlockedProcessesCnt, data.totalProcessesCnt));
217 return buffer;
218}
219
220std::string toString(const ProcessIoPerfData& data) {
221 std::string buffer;
222 StringAppendF(&buffer, "Number of major page faults since last collection: %" PRIu64 "\n",
223 data.totalMajorFaults);
224 StringAppendF(&buffer,
225 "Percentage of change in major page faults since last collection: %.2f%%\n",
226 data.majorFaultsPercentChange);
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700227 if (data.topNMajorFaultUids.size() > 0) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800228 StringAppendF(&buffer, "\nTop N major page faults:\n%s\n", std::string(24, '-').c_str());
229 StringAppendF(&buffer,
230 "Android User ID, Package Name, Number of major page faults, "
231 "Percentage of total major page faults\n");
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700232 StringAppendF(&buffer,
233 "\tCommand, Number of major page faults, Percentage of UID's major page "
234 "faults\n");
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800235 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700236 for (const auto& uidStats : data.topNMajorFaultUids) {
237 StringAppendF(&buffer, "%" PRIu32 ", %s, %" PRIu64 ", %.2f%%\n", uidStats.userId,
238 uidStats.packageName.c_str(), uidStats.count,
239 percentage(uidStats.count, data.totalMajorFaults));
240 for (const auto& procStats : uidStats.topNProcesses) {
241 StringAppendF(&buffer, "\t%s, %" PRIu64 ", %.2f%%\n", procStats.comm.c_str(),
242 procStats.count, percentage(procStats.count, uidStats.count));
243 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800244 }
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800245 if (data.topNIoBlockedUids.size() > 0) {
246 StringAppendF(&buffer, "\nTop N I/O waiting UIDs:\n%s\n", std::string(23, '-').c_str());
247 StringAppendF(&buffer,
248 "Android User ID, Package Name, Number of owned tasks waiting for I/O, "
249 "Percentage of owned tasks waiting for I/O\n");
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700250 StringAppendF(&buffer,
251 "\tCommand, Number of I/O waiting tasks, Percentage of UID's tasks waiting "
252 "for I/O\n");
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800253 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800254 for (size_t i = 0; i < data.topNIoBlockedUids.size(); ++i) {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700255 const auto& uidStats = data.topNIoBlockedUids[i];
256 StringAppendF(&buffer, "%" PRIu32 ", %s, %" PRIu64 ", %.2f%%\n", uidStats.userId,
257 uidStats.packageName.c_str(), uidStats.count,
258 percentage(uidStats.count, data.topNIoBlockedUidsTotalTaskCnt[i]));
259 for (const auto& procStats : uidStats.topNProcesses) {
260 StringAppendF(&buffer, "\t%s, %" PRIu64 ", %.2f%%\n", procStats.comm.c_str(),
261 procStats.count, percentage(procStats.count, uidStats.count));
262 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800263 }
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800264 return buffer;
265}
266
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800267std::string toString(const IoPerfRecord& record) {
268 std::string buffer;
269 StringAppendF(&buffer, "%s%s%s", toString(record.systemIoPerfData).c_str(),
270 toString(record.processIoPerfData).c_str(),
271 toString(record.uidIoPerfData).c_str());
272 return buffer;
273}
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800274
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800275std::string toString(const CollectionInfo& collectionInfo) {
276 std::string buffer;
277 StringAppendF(&buffer, "Number of collections: %zu\n", collectionInfo.records.size());
278 auto interval =
279 std::chrono::duration_cast<std::chrono::seconds>(collectionInfo.interval).count();
280 StringAppendF(&buffer, "Collection interval: %lld second%s\n", interval,
281 ((interval > 1) ? "s" : ""));
282 for (size_t i = 0; i < collectionInfo.records.size(); ++i) {
283 const auto& record = collectionInfo.records[i];
284 std::stringstream timestamp;
285 timestamp << std::put_time(std::localtime(&record.time), "%c %Z");
286 StringAppendF(&buffer, "Collection %zu: <%s>\n%s\n%s\n", i, timestamp.str().c_str(),
287 std::string(45, '=').c_str(), toString(record).c_str());
288 }
289 return buffer;
290}
291
292Result<void> IoPerfCollection::start() {
293 {
294 Mutex::Autolock lock(mMutex);
295 if (mCurrCollectionEvent != CollectionEvent::INIT || mCollectionThread.joinable()) {
Lakshman Annadorai2c0b0d12020-03-04 11:14:59 -0800296 return Error(INVALID_OPERATION)
297 << "Cannot start I/O performance collection more than once";
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800298 }
Lakshman Annadorai52877702020-03-25 14:29:52 -0700299 mTopNStatsPerCategory = static_cast<int>(
300 sysprop::topNStatsPerCategory().value_or(kDefaultTopNStatsPerCategory));
Lakshman Annadoraiefc23392020-04-02 14:18:39 -0700301 mTopNStatsPerSubcategory = static_cast<int>(
302 sysprop::topNStatsPerSubcategory().value_or(kDefaultTopNStatsPerSubcategory));
Lakshman Annadorai52877702020-03-25 14:29:52 -0700303 std::chrono::nanoseconds boottimeCollectionInterval =
304 std::chrono::duration_cast<std::chrono::nanoseconds>(
305 std::chrono::seconds(sysprop::boottimeCollectionInterval().value_or(
306 kDefaultBoottimeCollectionInterval.count())));
307 std::chrono::nanoseconds periodicCollectionInterval =
308 std::chrono::duration_cast<std::chrono::nanoseconds>(
309 std::chrono::seconds(sysprop::periodicCollectionInterval().value_or(
310 kDefaultPeriodicCollectionInterval.count())));
311 size_t periodicCollectionBufferSize =
312 static_cast<size_t>(sysprop::periodicCollectionBufferSize().value_or(
313 kDefaultPeriodicCollectionBufferSize));
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800314 mBoottimeCollection = {
Lakshman Annadorai52877702020-03-25 14:29:52 -0700315 .interval = boottimeCollectionInterval,
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800316 .maxCacheSize = std::numeric_limits<std::size_t>::max(),
317 .lastCollectionUptime = 0,
318 .records = {},
319 };
320 mPeriodicCollection = {
Lakshman Annadorai52877702020-03-25 14:29:52 -0700321 .interval = periodicCollectionInterval,
322 .maxCacheSize = periodicCollectionBufferSize,
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800323 .lastCollectionUptime = 0,
324 .records = {},
325 };
326 }
327
328 mCollectionThread = std::thread([&]() {
329 {
330 Mutex::Autolock lock(mMutex);
331 if (mCurrCollectionEvent != CollectionEvent::INIT) {
332 ALOGE("Skipping I/O performance data collection as the current collection event "
333 "%s != %s",
334 toString(mCurrCollectionEvent).c_str(),
335 toString(CollectionEvent::INIT).c_str());
336 return;
337 }
338 mCurrCollectionEvent = CollectionEvent::BOOT_TIME;
339 mBoottimeCollection.lastCollectionUptime = mHandlerLooper->now();
340 mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
341 mHandlerLooper->sendMessage(this, CollectionEvent::BOOT_TIME);
342 }
343 if (set_sched_policy(0, SP_BACKGROUND) != 0) {
344 ALOGW("Failed to set background scheduling priority to I/O performance data collection "
345 "thread");
346 }
Lakshman Annadorai30b87e52020-04-14 15:34:52 -0700347 int ret = pthread_setname_np(pthread_self(), "IoPerfCollect");
348 if (ret != 0) {
349 ALOGE("Failed to set I/O perf collection thread name: %d", ret);
350 }
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800351 bool isCollectionActive = true;
352 // Loop until the collection is not active -- I/O perf collection runs on this thread in a
353 // handler.
354 while (isCollectionActive) {
355 mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
356 Mutex::Autolock lock(mMutex);
357 isCollectionActive = mCurrCollectionEvent != CollectionEvent::TERMINATED;
358 }
359 });
360 return {};
361}
362
363void IoPerfCollection::terminate() {
364 {
365 Mutex::Autolock lock(mMutex);
366 if (mCurrCollectionEvent == CollectionEvent::TERMINATED) {
367 ALOGE("I/O performance data collection was terminated already");
368 return;
369 }
370 ALOGE("Terminating I/O performance data collection");
371 mCurrCollectionEvent = CollectionEvent::TERMINATED;
372 }
373 if (mCollectionThread.joinable()) {
374 mHandlerLooper->removeMessages(this);
375 mHandlerLooper->wake();
376 mCollectionThread.join();
377 }
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800378}
379
380Result<void> IoPerfCollection::onBootFinished() {
381 Mutex::Autolock lock(mMutex);
382 if (mCurrCollectionEvent != CollectionEvent::BOOT_TIME) {
Lakshman Annadoraidd9cb772020-04-02 16:23:31 -0700383 // This case happens when either the I/O perf collection has prematurely terminated before
384 // boot complete notification is received or multiple boot complete notifications are
385 // received. In either case don't return error as this will lead to runtime exception and
386 // cause system to boot loop.
387 ALOGE("Current I/O performance data collection event %s != %s",
388 toString(mCurrCollectionEvent).c_str(),
389 toString(CollectionEvent::BOOT_TIME).c_str());
390 return {};
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800391 }
Lakshman Annadoraie877d872020-04-15 13:45:11 -0700392 mBoottimeCollection.lastCollectionUptime = mHandlerLooper->now();
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800393 mHandlerLooper->removeMessages(this);
Lakshman Annadoraie877d872020-04-15 13:45:11 -0700394 mHandlerLooper->sendMessage(this, SwitchEvent::END_BOOTTIME_COLLECTION);
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800395 return {};
396}
397
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700398Result<void> IoPerfCollection::dump(int fd, const Vector<String16>& args) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800399 if (args.empty()) {
400 const auto& ret = dumpCollection(fd);
401 if (!ret) {
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700402 return ret;
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800403 }
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700404 return {};
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800405 }
406
407 if (args[0] == String16(kStartCustomCollectionFlag)) {
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700408 if (args.size() > 7) {
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700409 return Error(INVALID_OPERATION) << "Number of arguments to start custom "
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700410 << "I/O performance data collection cannot exceed 7";
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800411 }
412 std::chrono::nanoseconds interval = kCustomCollectionInterval;
413 std::chrono::nanoseconds maxDuration = kCustomCollectionDuration;
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700414 std::unordered_set<std::string> filterPackages;
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800415 for (size_t i = 1; i < args.size(); ++i) {
416 if (args[i] == String16(kIntervalFlag)) {
417 const auto& ret = parseSecondsFlag(args, i + 1);
418 if (!ret) {
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700419 return Error(FAILED_TRANSACTION)
420 << "Failed to parse " << kIntervalFlag << ": " << ret.error();
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800421 }
422 interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*ret);
423 ++i;
424 continue;
425 }
426 if (args[i] == String16(kMaxDurationFlag)) {
427 const auto& ret = parseSecondsFlag(args, i + 1);
428 if (!ret) {
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700429 return Error(FAILED_TRANSACTION)
430 << "Failed to parse " << kMaxDurationFlag << ": " << ret.error();
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800431 }
432 maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*ret);
433 ++i;
434 continue;
435 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700436 if (args[i] == String16(kFilterPackagesFlag)) {
437 if (args.size() < i + 1) {
438 return Error(FAILED_TRANSACTION)
439 << "Must provide value for '" << kFilterPackagesFlag << "' flag";
440 }
441 std::vector<std::string> packages =
442 Split(std::string(String8(args[i + 1]).string()), ",");
443 std::copy(packages.begin(), packages.end(),
444 std::inserter(filterPackages, filterPackages.end()));
445 ++i;
446 continue;
447 }
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800448 ALOGW("Unknown flag %s provided to start custom I/O performance data collection",
449 String8(args[i]).string());
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700450 return Error(INVALID_OPERATION) << "Unknown flag " << String8(args[i]).string()
451 << " provided to start custom I/O performance data "
452 << "collection";
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800453 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700454 const auto& ret = startCustomCollection(interval, maxDuration, filterPackages);
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800455 if (!ret) {
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700456 return ret;
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800457 }
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700458 return {};
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800459 }
460
461 if (args[0] == String16(kEndCustomCollectionFlag)) {
462 if (args.size() != 1) {
463 ALOGW("Number of arguments to end custom I/O performance data collection cannot "
464 "exceed 1");
465 }
466 const auto& ret = endCustomCollection(fd);
467 if (!ret) {
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700468 return ret;
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800469 }
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700470 return {};
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800471 }
472
Lakshman Annadorai0dfeeeb2020-03-13 16:57:12 -0700473 return Error(INVALID_OPERATION)
474 << "Dump arguments start neither with " << kStartCustomCollectionFlag << " nor with "
475 << kEndCustomCollectionFlag << " flags";
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800476}
477
478Result<void> IoPerfCollection::dumpCollection(int fd) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800479 Mutex::Autolock lock(mMutex);
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800480 if (mCurrCollectionEvent == CollectionEvent::TERMINATED) {
481 ALOGW("I/O performance data collection not active. Dumping cached data");
482 if (!WriteStringToFd("I/O performance data collection not active. Dumping cached data.",
483 fd)) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800484 return Error(FAILED_TRANSACTION) << "Failed to write I/O performance collection status";
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800485 }
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800486 }
487
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800488 const auto& ret = dumpCollectorsStatusLocked(fd);
489 if (!ret) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800490 return Error(FAILED_TRANSACTION) << ret.error();
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800491 }
492
493 if (!WriteStringToFd(StringPrintf("%sI/O performance data reports:\n%sBoot-time collection "
494 "report:\n%s\n",
495 kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str(),
496 std::string(28, '=').c_str()),
497 fd) ||
498 !WriteStringToFd(toString(mBoottimeCollection), fd) ||
499 !WriteStringToFd(StringPrintf("%s\nPeriodic collection report:\n%s\n",
500 std::string(75, '-').c_str(), std::string(27, '=').c_str()),
501 fd) ||
502 !WriteStringToFd(toString(mPeriodicCollection), fd) ||
503 !WriteStringToFd(kDumpMajorDelimiter, fd)) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800504 return Error(FAILED_TRANSACTION)
505 << "Failed to dump the boot-time and periodic collection reports.";
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800506 }
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800507 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800508}
509
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800510Result<void> IoPerfCollection::dumpCollectorsStatusLocked(int fd) {
511 if (!mUidIoStats->enabled() &&
512 !WriteStringToFd(StringPrintf("UidIoStats collector failed to access the file %s",
513 mUidIoStats->filePath().c_str()),
514 fd)) {
515 return Error() << "Failed to write UidIoStats collector status";
516 }
517 if (!mProcStat->enabled() &&
518 !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s",
519 mProcStat->filePath().c_str()),
520 fd)) {
521 return Error() << "Failed to write ProcStat collector status";
522 }
523 if (!mProcPidStat->enabled() &&
524 !WriteStringToFd(StringPrintf("ProcPidStat collector failed to access the directory %s",
525 mProcPidStat->dirPath().c_str()),
526 fd)) {
527 return Error() << "Failed to write ProcPidStat collector status";
528 }
529 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800530}
531
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700532Result<void> IoPerfCollection::startCustomCollection(
533 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
534 const std::unordered_set<std::string>& filterPackages) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800535 if (interval < kMinCollectionInterval || maxDuration < kMinCollectionInterval) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800536 return Error(INVALID_OPERATION)
537 << "Collection interval and maximum duration must be >= "
538 << std::chrono::duration_cast<std::chrono::milliseconds>(kMinCollectionInterval)
539 .count()
540 << " milliseconds.";
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800541 }
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800542 Mutex::Autolock lock(mMutex);
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800543 if (mCurrCollectionEvent != CollectionEvent::PERIODIC) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800544 return Error(INVALID_OPERATION)
545 << "Cannot start a custom collection when "
546 << "the current collection event " << toString(mCurrCollectionEvent)
547 << " != " << toString(CollectionEvent::PERIODIC) << " collection event";
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800548 }
549
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800550 mCustomCollection = {
551 .interval = interval,
552 .maxCacheSize = std::numeric_limits<std::size_t>::max(),
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700553 .filterPackages = filterPackages,
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800554 .lastCollectionUptime = mHandlerLooper->now(),
555 .records = {},
556 };
557
558 mHandlerLooper->removeMessages(this);
559 nsecs_t uptime = mHandlerLooper->now() + maxDuration.count();
560 mHandlerLooper->sendMessageAtTime(uptime, this, SwitchEvent::END_CUSTOM_COLLECTION);
561 mCurrCollectionEvent = CollectionEvent::CUSTOM;
562 mHandlerLooper->sendMessage(this, CollectionEvent::CUSTOM);
563 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800564}
565
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800566Result<void> IoPerfCollection::endCustomCollection(int fd) {
567 Mutex::Autolock lock(mMutex);
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800568 if (mCurrCollectionEvent != CollectionEvent::CUSTOM) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800569 return Error(INVALID_OPERATION) << "No custom collection is running";
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800570 }
571
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800572 mHandlerLooper->removeMessages(this);
573 mHandlerLooper->sendMessage(this, SwitchEvent::END_CUSTOM_COLLECTION);
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800574
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800575 const auto& ret = dumpCollectorsStatusLocked(fd);
576 if (!ret) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800577 return Error(FAILED_TRANSACTION) << ret.error();
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800578 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800579
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800580 if (!WriteStringToFd(StringPrintf("%sI/O performance data report for custom collection:\n%s",
581 kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()),
582 fd) ||
583 !WriteStringToFd(toString(mCustomCollection), fd) ||
584 !WriteStringToFd(kDumpMajorDelimiter, fd)) {
Lakshman Annadorai19bf2752020-03-05 17:45:43 -0800585 return Error(FAILED_TRANSACTION) << "Failed to write custom collection report.";
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800586 }
587
588 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800589}
590
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800591void IoPerfCollection::handleMessage(const Message& message) {
592 Result<void> result;
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800593
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800594 switch (message.what) {
595 case static_cast<int>(CollectionEvent::BOOT_TIME):
596 result = processCollectionEvent(CollectionEvent::BOOT_TIME, &mBoottimeCollection);
597 break;
Lakshman Annadoraie877d872020-04-15 13:45:11 -0700598 case static_cast<int>(SwitchEvent::END_BOOTTIME_COLLECTION):
599 result = processCollectionEvent(CollectionEvent::BOOT_TIME, &mBoottimeCollection);
600 if (result.ok()) {
601 mHandlerLooper->removeMessages(this);
602 mCurrCollectionEvent = CollectionEvent::PERIODIC;
603 mPeriodicCollection.lastCollectionUptime =
604 mHandlerLooper->now() + mPeriodicCollection.interval.count();
605 mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastCollectionUptime, this,
606 CollectionEvent::PERIODIC);
607 }
608 break;
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800609 case static_cast<int>(CollectionEvent::PERIODIC):
610 result = processCollectionEvent(CollectionEvent::PERIODIC, &mPeriodicCollection);
611 break;
612 case static_cast<int>(CollectionEvent::CUSTOM):
613 result = processCollectionEvent(CollectionEvent::CUSTOM, &mCustomCollection);
614 break;
615 case static_cast<int>(SwitchEvent::END_CUSTOM_COLLECTION): {
616 Mutex::Autolock lock(mMutex);
617 if (mCurrCollectionEvent != CollectionEvent::CUSTOM) {
618 ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s",
619 toString(mCurrCollectionEvent).c_str(),
620 toString(CollectionEvent::CUSTOM).c_str());
621 return;
622 }
623 mCustomCollection = {};
624 mHandlerLooper->removeMessages(this);
625 mCurrCollectionEvent = CollectionEvent::PERIODIC;
626 mPeriodicCollection.lastCollectionUptime = mHandlerLooper->now();
627 mHandlerLooper->sendMessage(this, CollectionEvent::PERIODIC);
628 return;
629 }
630 default:
631 result = Error() << "Unknown message: " << message.what;
632 }
633
Lakshman Annadoraie877d872020-04-15 13:45:11 -0700634 if (!result.ok()) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800635 Mutex::Autolock lock(mMutex);
636 ALOGE("Terminating I/O performance data collection: %s", result.error().message().c_str());
637 // DO NOT CALL terminate() as it tries to join the collection thread but this code is
638 // executed on the collection thread. Thus it will result in a deadlock.
639 mCurrCollectionEvent = CollectionEvent::TERMINATED;
640 mHandlerLooper->removeMessages(this);
641 mHandlerLooper->wake();
642 }
643}
644
645Result<void> IoPerfCollection::processCollectionEvent(CollectionEvent event, CollectionInfo* info) {
646 Mutex::Autolock lock(mMutex);
647 // Messages sent to the looper are intrinsically racy such that a message from the previous
648 // collection event may land in the looper after the current collection has already begun. Thus
649 // verify the current collection event before starting the collection.
650 if (mCurrCollectionEvent != event) {
651 ALOGW("Skipping %s collection message on collection event %s", toString(event).c_str(),
652 toString(mCurrCollectionEvent).c_str());
653 return {};
654 }
655 if (info->maxCacheSize == 0) {
656 return Error() << "Maximum cache size for " << toString(event) << " collection cannot be 0";
657 }
658 if (info->interval < kMinCollectionInterval) {
659 return Error()
660 << "Collection interval of "
661 << std::chrono::duration_cast<std::chrono::seconds>(info->interval).count()
662 << " seconds for " << toString(event) << " collection cannot be less than "
663 << std::chrono::duration_cast<std::chrono::seconds>(kMinCollectionInterval).count()
664 << " seconds";
665 }
666 auto ret = collectLocked(info);
667 if (!ret) {
668 return Error() << toString(event) << " collection failed: " << ret.error();
669 }
670 info->lastCollectionUptime += info->interval.count();
671 mHandlerLooper->sendMessageAtTime(info->lastCollectionUptime, this, event);
672 return {};
673}
674
675Result<void> IoPerfCollection::collectLocked(CollectionInfo* collectionInfo) {
676 if (!mUidIoStats->enabled() && !mProcStat->enabled() && !mProcPidStat->enabled()) {
677 return Error() << "No collectors enabled";
678 }
679 IoPerfRecord record{
680 .time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
681 };
682 auto ret = collectSystemIoPerfDataLocked(&record.systemIoPerfData);
683 if (!ret) {
684 return ret;
685 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700686 ret = collectProcessIoPerfDataLocked(*collectionInfo, &record.processIoPerfData);
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800687 if (!ret) {
688 return ret;
689 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700690 ret = collectUidIoPerfDataLocked(*collectionInfo, &record.uidIoPerfData);
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800691 if (!ret) {
692 return ret;
693 }
694 if (collectionInfo->records.size() > collectionInfo->maxCacheSize) {
695 collectionInfo->records.erase(collectionInfo->records.begin()); // Erase the oldest record.
696 }
697 collectionInfo->records.emplace_back(record);
698 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800699}
700
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700701Result<void> IoPerfCollection::collectUidIoPerfDataLocked(const CollectionInfo& collectionInfo,
702 UidIoPerfData* uidIoPerfData) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800703 if (!mUidIoStats->enabled()) {
704 // Don't return an error to avoid pre-mature termination. Instead, fetch data from other
705 // collectors.
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800706 return {};
707 }
708
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800709 const Result<std::unordered_map<uint32_t, UidIoUsage>>& usage = mUidIoStats->collect();
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800710 if (!usage) {
711 return Error() << "Failed to collect uid I/O usage: " << usage.error();
712 }
713
714 // Fetch only the top N reads and writes from the usage records.
715 UidIoUsage tempUsage = {};
716 std::vector<const UidIoUsage*> topNReads(mTopNStatsPerCategory, &tempUsage);
717 std::vector<const UidIoUsage*> topNWrites(mTopNStatsPerCategory, &tempUsage);
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800718 std::unordered_set<uint32_t> unmappedUids;
719
720 for (const auto& uIt : *usage) {
721 const UidIoUsage& curUsage = uIt.second;
722 if (curUsage.ios.isZero()) {
723 continue;
724 }
725 if (mUidToPackageNameMapping.find(curUsage.uid) == mUidToPackageNameMapping.end()) {
726 unmappedUids.insert(curUsage.uid);
727 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800728 uidIoPerfData->total[READ_BYTES][FOREGROUND] +=
729 curUsage.ios.metrics[READ_BYTES][FOREGROUND];
730 uidIoPerfData->total[READ_BYTES][BACKGROUND] +=
731 curUsage.ios.metrics[READ_BYTES][BACKGROUND];
732 uidIoPerfData->total[WRITE_BYTES][FOREGROUND] +=
733 curUsage.ios.metrics[WRITE_BYTES][FOREGROUND];
734 uidIoPerfData->total[WRITE_BYTES][BACKGROUND] +=
735 curUsage.ios.metrics[WRITE_BYTES][BACKGROUND];
736 uidIoPerfData->total[FSYNC_COUNT][FOREGROUND] +=
737 curUsage.ios.metrics[FSYNC_COUNT][FOREGROUND];
738 uidIoPerfData->total[FSYNC_COUNT][BACKGROUND] +=
739 curUsage.ios.metrics[FSYNC_COUNT][BACKGROUND];
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800740
741 for (auto it = topNReads.begin(); it != topNReads.end(); ++it) {
742 const UidIoUsage* curRead = *it;
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700743 if (curRead->ios.sumReadBytes() < curUsage.ios.sumReadBytes()) {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700744 topNReads.emplace(it, &curUsage);
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700745 if (collectionInfo.filterPackages.empty()) {
746 topNReads.pop_back();
747 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700748 break;
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800749 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800750 }
751 for (auto it = topNWrites.begin(); it != topNWrites.end(); ++it) {
752 const UidIoUsage* curWrite = *it;
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700753 if (curWrite->ios.sumWriteBytes() < curUsage.ios.sumWriteBytes()) {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700754 topNWrites.emplace(it, &curUsage);
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700755 if (collectionInfo.filterPackages.empty()) {
756 topNWrites.pop_back();
757 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700758 break;
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800759 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800760 }
761 }
762
763 const auto& ret = updateUidToPackageNameMapping(unmappedUids);
764 if (!ret) {
765 ALOGW("%s", ret.error().message().c_str());
766 }
767
768 // Convert the top N I/O usage to UidIoPerfData.
769 for (const auto& usage : topNReads) {
770 if (usage->ios.isZero()) {
771 // End of non-zero usage records. This case occurs when the number of UIDs with active
Lakshman Annadorai52877702020-03-25 14:29:52 -0700772 // I/O operations is < |ro.carwatchdog.top_n_stats_per_category|.
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800773 break;
774 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800775 UidIoPerfData::Stats stats = {
776 .userId = multiuser_get_user_id(usage->uid),
777 .packageName = std::to_string(usage->uid),
778 .bytes = {usage->ios.metrics[READ_BYTES][FOREGROUND],
779 usage->ios.metrics[READ_BYTES][BACKGROUND]},
780 .fsync = {usage->ios.metrics[FSYNC_COUNT][FOREGROUND],
781 usage->ios.metrics[FSYNC_COUNT][BACKGROUND]},
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800782 };
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800783 if (mUidToPackageNameMapping.find(usage->uid) != mUidToPackageNameMapping.end()) {
784 stats.packageName = mUidToPackageNameMapping[usage->uid];
785 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700786 if (!collectionInfo.filterPackages.empty() &&
787 collectionInfo.filterPackages.find(stats.packageName) ==
788 collectionInfo.filterPackages.end()) {
789 continue;
790 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800791 uidIoPerfData->topNReads.emplace_back(stats);
792 }
793
794 for (const auto& usage : topNWrites) {
795 if (usage->ios.isZero()) {
796 // End of non-zero usage records. This case occurs when the number of UIDs with active
Lakshman Annadorai52877702020-03-25 14:29:52 -0700797 // I/O operations is < |ro.carwatchdog.top_n_stats_per_category|.
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800798 break;
799 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800800 UidIoPerfData::Stats stats = {
801 .userId = multiuser_get_user_id(usage->uid),
802 .packageName = std::to_string(usage->uid),
803 .bytes = {usage->ios.metrics[WRITE_BYTES][FOREGROUND],
804 usage->ios.metrics[WRITE_BYTES][BACKGROUND]},
805 .fsync = {usage->ios.metrics[FSYNC_COUNT][FOREGROUND],
806 usage->ios.metrics[FSYNC_COUNT][BACKGROUND]},
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800807 };
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800808 if (mUidToPackageNameMapping.find(usage->uid) != mUidToPackageNameMapping.end()) {
809 stats.packageName = mUidToPackageNameMapping[usage->uid];
810 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700811 if (!collectionInfo.filterPackages.empty() &&
812 collectionInfo.filterPackages.find(stats.packageName) ==
813 collectionInfo.filterPackages.end()) {
814 continue;
815 }
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800816 uidIoPerfData->topNWrites.emplace_back(stats);
817 }
818 return {};
819}
820
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800821Result<void> IoPerfCollection::collectSystemIoPerfDataLocked(SystemIoPerfData* systemIoPerfData) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800822 if (!mProcStat->enabled()) {
823 // Don't return an error to avoid pre-mature termination. Instead, fetch data from other
824 // collectors.
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800825 return {};
826 }
827
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800828 const Result<ProcStatInfo>& procStatInfo = mProcStat->collect();
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800829 if (!procStatInfo) {
830 return Error() << "Failed to collect proc stats: " << procStatInfo.error();
831 }
832
833 systemIoPerfData->cpuIoWaitTime = procStatInfo->cpuStats.ioWaitTime;
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800834 systemIoPerfData->totalCpuTime = procStatInfo->totalCpuTime();
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800835 systemIoPerfData->ioBlockedProcessesCnt = procStatInfo->ioBlockedProcessesCnt;
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800836 systemIoPerfData->totalProcessesCnt = procStatInfo->totalProcessesCnt();
Lakshman Annadoraif9b47c22020-02-10 16:45:18 -0800837 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800838}
839
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800840Result<void> IoPerfCollection::collectProcessIoPerfDataLocked(
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700841 const CollectionInfo& collectionInfo, ProcessIoPerfData* processIoPerfData) {
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800842 if (!mProcPidStat->enabled()) {
843 // Don't return an error to avoid pre-mature termination. Instead, fetch data from other
844 // collectors.
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800845 return {};
846 }
847
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800848 const Result<std::vector<ProcessStats>>& processStats = mProcPidStat->collect();
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800849 if (!processStats) {
850 return Error() << "Failed to collect process stats: " << processStats.error();
851 }
852
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700853 const auto& uidProcessStats = getUidProcessStats(*processStats, mTopNStatsPerSubcategory);
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800854 std::unordered_set<uint32_t> unmappedUids;
855 // Fetch only the top N I/O blocked UIDs and UIDs with most major page faults.
856 UidProcessStats temp = {};
857 std::vector<const UidProcessStats*> topNIoBlockedUids(mTopNStatsPerCategory, &temp);
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700858 std::vector<const UidProcessStats*> topNMajorFaultUids(mTopNStatsPerCategory, &temp);
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800859 processIoPerfData->totalMajorFaults = 0;
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700860 for (const auto& it : *uidProcessStats) {
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800861 const UidProcessStats& curStats = it.second;
862 if (mUidToPackageNameMapping.find(curStats.uid) == mUidToPackageNameMapping.end()) {
863 unmappedUids.insert(curStats.uid);
864 }
865 processIoPerfData->totalMajorFaults += curStats.majorFaults;
866 for (auto it = topNIoBlockedUids.begin(); it != topNIoBlockedUids.end(); ++it) {
867 const UidProcessStats* topStats = *it;
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700868 if (topStats->ioBlockedTasksCnt < curStats.ioBlockedTasksCnt) {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700869 topNIoBlockedUids.emplace(it, &curStats);
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700870 if (collectionInfo.filterPackages.empty()) {
871 topNIoBlockedUids.pop_back();
872 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700873 break;
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800874 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800875 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700876 for (auto it = topNMajorFaultUids.begin(); it != topNMajorFaultUids.end(); ++it) {
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800877 const UidProcessStats* topStats = *it;
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700878 if (topStats->majorFaults < curStats.majorFaults) {
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700879 topNMajorFaultUids.emplace(it, &curStats);
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700880 if (collectionInfo.filterPackages.empty()) {
881 topNMajorFaultUids.pop_back();
882 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700883 break;
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800884 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800885 }
886 }
887
888 const auto& ret = updateUidToPackageNameMapping(unmappedUids);
889 if (!ret) {
890 ALOGW("%s", ret.error().message().c_str());
891 }
892
893 // Convert the top N uid process stats to ProcessIoPerfData.
894 for (const auto& it : topNIoBlockedUids) {
895 if (it->ioBlockedTasksCnt == 0) {
896 // End of non-zero elements. This case occurs when the number of UIDs with I/O blocked
Lakshman Annadorai52877702020-03-25 14:29:52 -0700897 // processes is < |ro.carwatchdog.top_n_stats_per_category|.
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800898 break;
899 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700900 ProcessIoPerfData::UidStats stats = {
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800901 .userId = multiuser_get_user_id(it->uid),
902 .packageName = std::to_string(it->uid),
903 .count = it->ioBlockedTasksCnt,
904 };
905 if (mUidToPackageNameMapping.find(it->uid) != mUidToPackageNameMapping.end()) {
906 stats.packageName = mUidToPackageNameMapping[it->uid];
907 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700908 if (!collectionInfo.filterPackages.empty() &&
909 collectionInfo.filterPackages.find(stats.packageName) ==
910 collectionInfo.filterPackages.end()) {
911 continue;
912 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700913 for (const auto& pIt : it->topNIoBlockedProcesses) {
914 if (pIt.count == 0) {
915 break;
916 }
917 stats.topNProcesses.emplace_back(
918 ProcessIoPerfData::UidStats::ProcessStats{pIt.comm, pIt.count});
919 }
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800920 processIoPerfData->topNIoBlockedUids.emplace_back(stats);
921 processIoPerfData->topNIoBlockedUidsTotalTaskCnt.emplace_back(it->totalTasksCnt);
922 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700923 for (const auto& it : topNMajorFaultUids) {
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800924 if (it->majorFaults == 0) {
925 // End of non-zero elements. This case occurs when the number of UIDs with major faults
Lakshman Annadorai52877702020-03-25 14:29:52 -0700926 // is < |ro.carwatchdog.top_n_stats_per_category|.
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800927 break;
928 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700929 ProcessIoPerfData::UidStats stats = {
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800930 .userId = multiuser_get_user_id(it->uid),
931 .packageName = std::to_string(it->uid),
932 .count = it->majorFaults,
933 };
934 if (mUidToPackageNameMapping.find(it->uid) != mUidToPackageNameMapping.end()) {
935 stats.packageName = mUidToPackageNameMapping[it->uid];
936 }
Lakshman Annadorai73ef67f2020-05-01 16:28:00 -0700937 if (!collectionInfo.filterPackages.empty() &&
938 collectionInfo.filterPackages.find(stats.packageName) ==
939 collectionInfo.filterPackages.end()) {
940 continue;
941 }
Lakshman Annadoraiff35a992020-04-10 18:03:05 -0700942 for (const auto& pIt : it->topNMajorFaultProcesses) {
943 if (pIt.count == 0) {
944 break;
945 }
946 stats.topNProcesses.emplace_back(
947 ProcessIoPerfData::UidStats::ProcessStats{pIt.comm, pIt.count});
948 }
949 processIoPerfData->topNMajorFaultUids.emplace_back(stats);
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800950 }
951 if (mLastMajorFaults == 0) {
952 processIoPerfData->majorFaultsPercentChange = 0;
953 } else {
954 int64_t increase = processIoPerfData->totalMajorFaults - mLastMajorFaults;
955 processIoPerfData->majorFaultsPercentChange =
Lakshman Annadoraif2855b22020-03-03 14:13:10 -0800956 (static_cast<double>(increase) / static_cast<double>(mLastMajorFaults)) * 100.0;
Lakshman Annadorai325e9652020-02-20 17:27:11 -0800957 }
958 mLastMajorFaults = processIoPerfData->totalMajorFaults;
959 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -0800960}
961
Lakshman Annadoraiab4d3fd2020-02-06 11:24:56 -0800962Result<void> IoPerfCollection::updateUidToPackageNameMapping(
963 const std::unordered_set<uint32_t>& uids) {
964 std::vector<int32_t> appUids;
965
966 for (const auto& uid : uids) {
967 if (uid >= AID_APP_START) {
968 appUids.emplace_back(static_cast<int32_t>(uid));
969 continue;
970 }
971 // System/native UIDs.
972 passwd* usrpwd = getpwuid(uid);
973 if (!usrpwd) {
974 continue;
975 }
976 mUidToPackageNameMapping[uid] = std::string(usrpwd->pw_name);
977 }
978
979 if (appUids.empty()) {
980 return {};
981 }
982
983 if (mPackageManager == nullptr) {
984 auto ret = retrievePackageManager();
985 if (!ret) {
986 return Error() << "Failed to retrieve package manager: " << ret.error();
987 }
988 }
989
990 std::vector<std::string> packageNames;
991 const binder::Status& status = mPackageManager->getNamesForUids(appUids, &packageNames);
992 if (!status.isOk()) {
993 return Error() << "package_native::getNamesForUids failed: " << status.exceptionMessage();
994 }
995
996 for (uint32_t i = 0; i < appUids.size(); i++) {
997 if (!packageNames[i].empty()) {
998 mUidToPackageNameMapping[appUids[i]] = packageNames[i];
999 }
1000 }
1001
1002 return {};
1003}
1004
1005Result<void> IoPerfCollection::retrievePackageManager() {
1006 const sp<IServiceManager> sm = defaultServiceManager();
1007 if (sm == nullptr) {
1008 return Error() << "Failed to retrieve defaultServiceManager";
1009 }
1010
1011 sp<IBinder> binder = sm->getService(String16("package_native"));
1012 if (binder == nullptr) {
1013 return Error() << "Failed to get service package_native";
1014 }
1015 mPackageManager = interface_cast<IPackageManagerNative>(binder);
1016 return {};
Lakshman Annadorai6094e9a2020-01-31 10:03:33 -08001017}
1018
1019} // namespace watchdog
1020} // namespace automotive
1021} // namespace android