Reset statsd and correctly record the dump reason when system
server restarts/crashes.
Test: statsd test
BUG: b/79161505
Change-Id: I0646c764964f6eafde91f9ae0179a1c837af320d
diff --git a/cmds/statsd/src/StatsLogProcessor.cpp b/cmds/statsd/src/StatsLogProcessor.cpp
index 986f2ef..d557913 100644
--- a/cmds/statsd/src/StatsLogProcessor.cpp
+++ b/cmds/statsd/src/StatsLogProcessor.cpp
@@ -162,6 +162,19 @@
OnLogEvent(event, false);
}
+void StatsLogProcessor::resetConfigs() {
+ std::lock_guard<std::mutex> lock(mMetricsMutex);
+ resetConfigsLocked(getElapsedRealtimeNs());
+}
+
+void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) {
+ std::vector<ConfigKey> configKeys;
+ for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
+ configKeys.push_back(it->first);
+ }
+ resetConfigsLocked(timestampNs, configKeys);
+}
+
void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
const int64_t currentTimestampNs = event->GetElapsedTimestampNs();
@@ -188,11 +201,7 @@
WriteDataToDiskLocked(CONFIG_RESET);
// We see fresher event before we see the checkpoint. We might have lost data.
// The best we can do is to reset.
- std::vector<ConfigKey> configKeys;
- for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
- configKeys.push_back(it->first);
- }
- resetConfigsLocked(currentTimestampNs, configKeys);
+ resetConfigsLocked(currentTimestampNs);
} else {
// Still in search of the CP. Keep going.
return;
@@ -242,6 +251,7 @@
void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key,
const StatsdConfig& config) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
+ WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED);
OnConfigUpdatedLocked(timestampNs, key, config);
}
@@ -251,10 +261,6 @@
sp<MetricsManager> newMetricsManager =
new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap,
mAnomalyAlarmMonitor, mPeriodicAlarmMonitor);
- auto it = mMetricsManagers.find(key);
- if (it != mMetricsManagers.end()) {
- WriteDataToDiskLocked(it->first, CONFIG_UPDATED);
- }
if (newMetricsManager->isConfigValid()) {
mUidMap->OnConfigUpdated(key);
if (newMetricsManager->shouldAddUidMapListener()) {
@@ -419,6 +425,7 @@
}
}
if (configKeysTtlExpired.size() > 0) {
+ WriteDataToDiskLocked(CONFIG_RESET);
resetConfigsLocked(timestampNs, configKeysTtlExpired);
}
}
@@ -427,7 +434,7 @@
std::lock_guard<std::mutex> lock(mMetricsMutex);
auto it = mMetricsManagers.find(key);
if (it != mMetricsManagers.end()) {
- WriteDataToDiskLocked(key, CONFIG_REMOVED);
+ WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED);
mMetricsManagers.erase(it);
mUidMap->OnConfigRemoved(key);
}
@@ -474,9 +481,13 @@
}
void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key,
+ const int64_t timestampNs,
const DumpReportReason dumpReportReason) {
+ if (mMetricsManagers.find(key) == mMetricsManagers.end()) {
+ return;
+ }
ProtoOutputStream proto;
- onConfigMetricsReportLocked(key, getElapsedRealtimeNs(),
+ onConfigMetricsReportLocked(key, timestampNs,
true /* include_current_partial_bucket*/,
false /* include strings */, dumpReportReason, &proto);
string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR,
@@ -491,14 +502,15 @@
}
void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) {
+ const int64_t timeNs = getElapsedRealtimeNs();
for (auto& pair : mMetricsManagers) {
- WriteDataToDiskLocked(pair.first, dumpReportReason);
+ WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason);
}
}
-void StatsLogProcessor::WriteDataToDisk(bool isShutdown) {
+void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
- WriteDataToDiskLocked(DEVICE_SHUTDOWN);
+ WriteDataToDiskLocked(dumpReportReason);
}
void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) {
diff --git a/cmds/statsd/src/StatsLogProcessor.h b/cmds/statsd/src/StatsLogProcessor.h
index c3c4663..a6c4d86 100644
--- a/cmds/statsd/src/StatsLogProcessor.h
+++ b/cmds/statsd/src/StatsLogProcessor.h
@@ -77,7 +77,10 @@
unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet);
/* Flushes data to disk. Data on memory will be gone after written to disk. */
- void WriteDataToDisk(bool shutdown);
+ void WriteDataToDisk(const DumpReportReason dumpReportReason);
+
+ // Reset all configs.
+ void resetConfigs();
inline sp<UidMap> getUidMap() {
return mUidMap;
@@ -121,8 +124,9 @@
void OnConfigUpdatedLocked(
const int64_t currentTimestampNs, const ConfigKey& key, const StatsdConfig& config);
- void WriteDataToDiskLocked(DumpReportReason dumpReportReason);
- void WriteDataToDiskLocked(const ConfigKey& key, DumpReportReason dumpReportReason);
+ void WriteDataToDiskLocked(const DumpReportReason dumpReportReason);
+ void WriteDataToDiskLocked(const ConfigKey& key, const int64_t timestampNs,
+ const DumpReportReason dumpReportReason);
void onConfigMetricsReportLocked(const ConfigKey& key, const int64_t dumpTimeStampNs,
const bool include_current_partial_bucket,
@@ -141,6 +145,9 @@
// Handler over the isolated uid change event.
void onIsolatedUidChangedEventLocked(const LogEvent& event);
+ // Reset all configs.
+ void resetConfigsLocked(const int64_t timestampNs);
+ // Reset the specified configs.
void resetConfigsLocked(const int64_t timestampNs, const std::vector<ConfigKey>& configs);
// Function used to send a broadcast so that receiver for the config key can call getData
diff --git a/cmds/statsd/src/StatsService.cpp b/cmds/statsd/src/StatsService.cpp
index 0e7b4f9..811edb5 100644
--- a/cmds/statsd/src/StatsService.cpp
+++ b/cmds/statsd/src/StatsService.cpp
@@ -659,7 +659,7 @@
status_t StatsService::cmd_write_data_to_disk(FILE* out) {
fprintf(out, "Writing data to disk\n");
- mProcessor->WriteDataToDisk(false);
+ mProcessor->WriteDataToDisk(ADB_DUMP);
return NO_ERROR;
}
@@ -816,10 +816,10 @@
return Status::ok();
}
-Status StatsService::informDeviceShutdown(bool isShutdown) {
+Status StatsService::informDeviceShutdown() {
ENFORCE_UID(AID_SYSTEM);
VLOG("StatsService::informDeviceShutdown");
- mProcessor->WriteDataToDisk(isShutdown);
+ mProcessor->WriteDataToDisk(DEVICE_SHUTDOWN);
return Status::ok();
}
@@ -967,7 +967,12 @@
void StatsService::binderDied(const wp <IBinder>& who) {
ALOGW("statscompanion service died");
- mProcessor->WriteDataToDisk(STATSCOMPANION_DIED);
+ StatsdStats::getInstance().noteSystemServerRestart(getWallClockSec());
+ if (mProcessor != nullptr) {
+ ALOGW("Reset statsd upon system server restars.");
+ mProcessor->WriteDataToDisk(STATSCOMPANION_DIED);
+ mProcessor->resetConfigs();
+ }
mAnomalyAlarmMonitor->setStatsCompanionService(nullptr);
mPeriodicAlarmMonitor->setStatsCompanionService(nullptr);
SubscriberReporter::getInstance().setStatsCompanionService(nullptr);
diff --git a/cmds/statsd/src/StatsService.h b/cmds/statsd/src/StatsService.h
index e409a71..af4cbff 100644
--- a/cmds/statsd/src/StatsService.h
+++ b/cmds/statsd/src/StatsService.h
@@ -66,7 +66,7 @@
const vector<String16>& app);
virtual Status informOnePackage(const String16& app, int32_t uid, int64_t version);
virtual Status informOnePackageRemoved(const String16& app, int32_t uid);
- virtual Status informDeviceShutdown(bool isShutdown);
+ virtual Status informDeviceShutdown();
/**
* Called right before we start processing events.
diff --git a/cmds/statsd/src/guardrail/StatsdStats.cpp b/cmds/statsd/src/guardrail/StatsdStats.cpp
index ee3ed23..764366f 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.cpp
+++ b/cmds/statsd/src/guardrail/StatsdStats.cpp
@@ -51,6 +51,7 @@
const int FIELD_ID_LOGGER_ERROR_STATS = 11;
const int FIELD_ID_PERIODIC_ALARM_STATS = 12;
const int FIELD_ID_LOG_LOSS_STATS = 14;
+const int FIELD_ID_SYSTEM_SERVER_RESTART = 15;
const int FIELD_ID_ATOM_STATS_TAG = 1;
const int FIELD_ID_ATOM_STATS_COUNT = 2;
@@ -355,6 +356,15 @@
mPushedAtomStats[atomId]++;
}
+void StatsdStats::noteSystemServerRestart(int32_t timeSec) {
+ lock_guard<std::mutex> lock(mLock);
+
+ if (mSystemServerRestartSec.size() == kMaxSystemServerRestarts) {
+ mSystemServerRestartSec.pop_front();
+ }
+ mSystemServerRestartSec.push_back(timeSec);
+}
+
void StatsdStats::noteLoggerError(int error) {
lock_guard<std::mutex> lock(mLock);
// grows strictly one at a time. so it won't > kMaxLoggerErrors
@@ -377,6 +387,7 @@
mAnomalyAlarmRegisteredStats = 0;
mPeriodicAlarmRegisteredStats = 0;
mLoggerErrors.clear();
+ mSystemServerRestartSec.clear();
mLogLossTimestampNs.clear();
for (auto& config : mConfigStats) {
config.second->broadcast_sent_time_sec.clear();
@@ -395,7 +406,7 @@
time_t t = timeSec;
struct tm* tm = localtime(&t);
char timeBuffer[80];
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %I:%M%p\n", tm);
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %I:%M%p", tm);
return string(timeBuffer);
}
@@ -511,6 +522,12 @@
strftime(buffer, sizeof(buffer), "%Y-%m-%d %I:%M%p\n", error_tm);
fprintf(out, "Logger error %d at %s\n", error.second, buffer);
}
+
+ for (const auto& restart : mSystemServerRestartSec) {
+ fprintf(out, "System server restarts at %s(%lld)\n",
+ buildTimeString(restart).c_str(), (long long)restart);
+ }
+
for (const auto& loss : mLogLossTimestampNs) {
fprintf(out, "Log loss detected at %lld (elapsedRealtimeNs)\n", (long long)loss);
}
@@ -673,6 +690,11 @@
(long long)loss);
}
+ for (const auto& restart : mSystemServerRestartSec) {
+ proto.write(FIELD_TYPE_INT32 | FIELD_ID_SYSTEM_SERVER_RESTART | FIELD_COUNT_REPEATED,
+ restart);
+ }
+
output->clear();
size_t bufferSize = proto.size();
output->resize(bufferSize);
diff --git a/cmds/statsd/src/guardrail/StatsdStats.h b/cmds/statsd/src/guardrail/StatsdStats.h
index 65ba4f7..74541d3 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.h
+++ b/cmds/statsd/src/guardrail/StatsdStats.h
@@ -104,6 +104,8 @@
const static int kMaxLoggerErrors = 20;
+ const static int kMaxSystemServerRestarts = 20;
+
const static int kMaxTimestampCount = 20;
const static int kMaxLogSourceCount = 50;
@@ -275,6 +277,11 @@
*/
void noteLoggerError(int error);
+ /*
+ * Records when system server restarts.
+ */
+ void noteSystemServerRestart(int32_t timeSec);
+
/**
* Records statsd skipped an event.
*/
@@ -338,6 +345,8 @@
// Timestamps when we detect log loss after logd reconnect.
std::list<int64_t> mLogLossTimestampNs;
+ std::list<int32_t> mSystemServerRestartSec;
+
// Stores the number of times statsd modified the anomaly alarm registered with
// StatsCompanionService.
int mAnomalyAlarmRegisteredStats = 0;
@@ -366,6 +375,7 @@
FRIEND_TEST(StatsdStatsTest, TestAtomLog);
FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold);
FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor);
+ FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash);
};
} // namespace statsd
diff --git a/cmds/statsd/src/stats_log.proto b/cmds/statsd/src/stats_log.proto
index 9236864..2fe17da 100644
--- a/cmds/statsd/src/stats_log.proto
+++ b/cmds/statsd/src/stats_log.proto
@@ -383,4 +383,6 @@
repeated SkippedLogEventStats skipped_log_event_stats = 13;
repeated int64 log_loss_stats = 14;
+
+ repeated int32 system_restart_sec = 15;
}
diff --git a/cmds/statsd/tests/guardrail/StatsdStats_test.cpp b/cmds/statsd/tests/guardrail/StatsdStats_test.cpp
index e99e402..967ef3c 100644
--- a/cmds/statsd/tests/guardrail/StatsdStats_test.cpp
+++ b/cmds/statsd/tests/guardrail/StatsdStats_test.cpp
@@ -298,6 +298,28 @@
EXPECT_EQ(newTimestamp, configStats->dump_report_stats.back().first);
}
+TEST(StatsdStatsTest, TestSystemServerCrash) {
+ StatsdStats stats;
+ vector<int32_t> timestamps;
+ for (int i = 0; i < StatsdStats::kMaxSystemServerRestarts; i++) {
+ timestamps.push_back(i);
+ stats.noteSystemServerRestart(timestamps[i]);
+ }
+ vector<uint8_t> output;
+ stats.dumpStats(&output, false);
+ StatsdStatsReport report;
+ EXPECT_TRUE(report.ParseFromArray(&output[0], output.size()));
+ const int maxCount = StatsdStats::kMaxSystemServerRestarts;
+ EXPECT_EQ(maxCount, (int)report.system_restart_sec_size());
+
+ stats.noteSystemServerRestart(StatsdStats::kMaxSystemServerRestarts + 1);
+ output.clear();
+ stats.dumpStats(&output, false);
+ EXPECT_TRUE(report.ParseFromArray(&output[0], output.size()));
+ EXPECT_EQ(maxCount, (int)report.system_restart_sec_size());
+ EXPECT_EQ(StatsdStats::kMaxSystemServerRestarts + 1, report.system_restart_sec(maxCount - 1));
+}
+
} // namespace statsd
} // namespace os
} // namespace android