blob: 891c6e24b96eba984a79bb05bc324a7924ce172c [file] [log] [blame]
/**
* Copyright (c) 2020, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "carwatchdogd"
#define DEBUG false // STOPSHIP if true.
#include "WatchdogProcessService.h"
#include "WatchdogServiceHelper.h"
#include <android-base/file.h>
#include <android-base/macros.h>
#include <android-base/properties.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>
#include <android/automotive/watchdog/BnCarWatchdogClient.h>
#include <android/automotive/watchdog/internal/BnCarWatchdogMonitor.h>
#include <android/automotive/watchdog/internal/BnCarWatchdogServiceForSystem.h>
#include <android/hardware/automotive/vehicle/2.0/types.h>
#include <android/hidl/manager/1.0/IServiceManager.h>
#include <binder/IPCThreadState.h>
#include <hidl/HidlTransportSupport.h>
#include <utils/SystemClock.h>
#include <utility>
namespace android {
namespace automotive {
namespace watchdog {
namespace aawi = ::android::automotive::watchdog::internal;
using aawi::BnCarWatchdogServiceForSystem;
using aawi::ICarWatchdogServiceForSystem;
using ::android::IBinder;
using ::android::sp;
using ::android::String16;
using ::android::base::Error;
using ::android::base::GetIntProperty;
using ::android::base::GetProperty;
using ::android::base::ReadFileToString;
using ::android::base::Result;
using ::android::base::StringAppendF;
using ::android::base::StringPrintf;
using ::android::base::Trim;
using ::android::base::WriteStringToFd;
using ::android::binder::Status;
using ::android::hardware::hidl_vec;
using ::android::hardware::interfacesEqual;
using ::android::hardware::Return;
using ::android::hardware::automotive::vehicle::V2_0::IVehicle;
using ::android::hardware::automotive::vehicle::V2_0::ProcessTerminationReason;
using ::android::hardware::automotive::vehicle::V2_0::StatusCode;
using ::android::hardware::automotive::vehicle::V2_0::SubscribeFlags;
using ::android::hardware::automotive::vehicle::V2_0::SubscribeOptions;
using ::android::hardware::automotive::vehicle::V2_0::VehiclePropConfig;
using ::android::hardware::automotive::vehicle::V2_0::VehicleProperty;
using ::android::hardware::automotive::vehicle::V2_0::VehiclePropertyStatus;
using ::android::hardware::automotive::vehicle::V2_0::VehiclePropValue;
using ::android::hidl::base::V1_0::IBase;
namespace {
const std::vector<TimeoutLength> kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL,
TimeoutLength::TIMEOUT_MODERATE,
TimeoutLength::TIMEOUT_NORMAL};
// TimeoutLength is also used as a message ID. Other message IDs should start next to
// TimeoutLength::TIMEOUT_NORMAL.
const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast<int>(TimeoutLength::TIMEOUT_NORMAL) + 1;
const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1;
// VHAL is supposed to send heart beat every 3s. Car watchdog checks if there is the latest heart
// beat from VHAL within 3s, allowing 1s marginal time.
// If {@code ro.carwatchdog.vhal_healthcheck.interval} is set, car watchdog checks VHAL health at
// the given interval. The lower bound of the interval is 3s.
constexpr int32_t kDefaultVhalCheckIntervalSec = 3;
constexpr std::chrono::milliseconds kHealthCheckDelayMs = 1s;
constexpr const char kPropertyVhalCheckInterval[] = "ro.carwatchdog.vhal_healthcheck.interval";
constexpr const char kServiceName[] = "WatchdogProcessService";
constexpr const char kVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle";
std::chrono::nanoseconds timeoutToDurationNs(const TimeoutLength& timeout) {
switch (timeout) {
case TimeoutLength::TIMEOUT_CRITICAL:
return 3s; // 3s and no buffer time.
case TimeoutLength::TIMEOUT_MODERATE:
return 6s; // 5s + 1s as buffer time.
case TimeoutLength::TIMEOUT_NORMAL:
return 12s; // 10s + 2s as buffer time.
}
}
std::string pidArrayToString(const std::vector<int32_t>& pids) {
size_t size = pids.size();
if (size == 0) {
return "";
}
std::string buffer;
StringAppendF(&buffer, "%d", pids[0]);
for (int i = 1; i < size; i++) {
int pid = pids[i];
StringAppendF(&buffer, ", %d", pid);
}
return buffer;
}
bool isSystemShuttingDown() {
std::string sysPowerCtl;
std::istringstream tokenStream(GetProperty("sys.powerctl", ""));
std::getline(tokenStream, sysPowerCtl, ',');
return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown";
}
} // namespace
WatchdogProcessService::WatchdogProcessService(const sp<Looper>& handlerLooper) :
mHandlerLooper(handlerLooper),
mIsEnabled(true),
mLastSessionId(0),
mServiceStarted(false),
mVhalService(nullptr) {
mMessageHandler = sp<MessageHandlerImpl>::make(this);
mBinderDeathRecipient = sp<BinderDeathRecipient>::make(this);
mHidlDeathRecipient = sp<HidlDeathRecipient>::make(this);
mPropertyChangeListener = sp<PropertyChangeListener>::make(this);
for (const auto& timeout : kTimeouts) {
mClients.insert(std::make_pair(timeout, std::vector<ClientInfo>()));
mPingedClients.insert(std::make_pair(timeout, PingedClientMap()));
}
int32_t vhalHealthCheckIntervalSec =
GetIntProperty(kPropertyVhalCheckInterval, kDefaultVhalCheckIntervalSec);
vhalHealthCheckIntervalSec = std::max(vhalHealthCheckIntervalSec, kDefaultVhalCheckIntervalSec);
mVhalHealthCheckWindowMs = std::chrono::seconds(vhalHealthCheckIntervalSec);
}
Result<void> WatchdogProcessService::registerWatchdogServiceHelper(
const sp<IWatchdogServiceHelper>& helper) {
if (helper == nullptr) {
return Error() << "Must provide a non-null watchdog service helper instance";
}
Mutex::Autolock lock(mMutex);
mWatchdogServiceHelper = helper;
return {};
}
Status WatchdogProcessService::registerClient(const sp<ICarWatchdogClient>& client,
TimeoutLength timeout) {
pid_t callingPid = IPCThreadState::self()->getCallingPid();
uid_t callingUid = IPCThreadState::self()->getCallingUid();
ClientInfo clientInfo(client, callingPid, callingUid);
Mutex::Autolock lock(mMutex);
return registerClientLocked(clientInfo, timeout);
}
Status WatchdogProcessService::unregisterClient(const sp<ICarWatchdogClient>& client) {
Mutex::Autolock lock(mMutex);
sp<IBinder> binder = BnCarWatchdogClient::asBinder(client);
// kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL,
// MODERATE, NORMAL).
return unregisterClientLocked(kTimeouts, binder, ClientType::Regular);
}
Status WatchdogProcessService::registerCarWatchdogService(const sp<IBinder>& binder) {
pid_t callingPid = IPCThreadState::self()->getCallingPid();
uid_t callingUid = IPCThreadState::self()->getCallingUid();
Mutex::Autolock lock(mMutex);
if (mWatchdogServiceHelper == nullptr) {
return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
"Watchdog service helper instance is null");
}
ClientInfo clientInfo(mWatchdogServiceHelper, binder, callingPid, callingUid);
return registerClientLocked(clientInfo, TimeoutLength::TIMEOUT_CRITICAL);
}
void WatchdogProcessService::unregisterCarWatchdogService(const sp<IBinder>& binder) {
Mutex::Autolock lock(mMutex);
std::vector<TimeoutLength> timeouts = {TimeoutLength::TIMEOUT_CRITICAL};
unregisterClientLocked(timeouts, binder, ClientType::Service);
}
Status WatchdogProcessService::registerMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
Mutex::Autolock lock(mMutex);
sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
return Status::ok();
}
status_t ret = binder->linkToDeath(mBinderDeathRecipient);
if (ret != OK) {
ALOGW("Failed to register the monitor as it is dead.");
return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead.");
}
mMonitor = monitor;
if (DEBUG) {
ALOGD("Car watchdog monitor is registered");
}
return Status::ok();
}
Status WatchdogProcessService::unregisterMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
Mutex::Autolock lock(mMutex);
sp<IBinder> curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
sp<IBinder> newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
if (curBinder != newBinder) {
ALOGW("Failed to unregister the monitor as it has not been registered.");
return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
"The monitor has not been registered.");
}
curBinder->unlinkToDeath(mBinderDeathRecipient);
mMonitor = nullptr;
if (DEBUG) {
ALOGD("Car watchdog monitor is unregistered");
}
return Status::ok();
}
Status WatchdogProcessService::tellClientAlive(const sp<ICarWatchdogClient>& client,
int32_t sessionId) {
Mutex::Autolock lock(mMutex);
return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId);
}
Status WatchdogProcessService::tellCarWatchdogServiceAlive(
const sp<ICarWatchdogServiceForSystem>& service,
const std::vector<int32_t>& clientsNotResponding, int32_t sessionId) {
Status status;
{
Mutex::Autolock lock(mMutex);
if (DEBUG) {
std::string buffer;
int size = clientsNotResponding.size();
if (size != 0) {
StringAppendF(&buffer, "%d", clientsNotResponding[0]);
for (int i = 1; i < clientsNotResponding.size(); i++) {
StringAppendF(&buffer, ", %d", clientsNotResponding[i]);
}
ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s",
sessionId, buffer.c_str());
}
}
status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId);
}
if (status.isOk()) {
dumpAndKillAllProcesses(clientsNotResponding, true);
}
return status;
}
Status WatchdogProcessService::tellDumpFinished(const sp<aawi::ICarWatchdogMonitor>& monitor,
int32_t pid) {
Mutex::Autolock lock(mMutex);
if (mMonitor == nullptr || monitor == nullptr ||
aawi::BnCarWatchdogMonitor::asBinder(monitor) !=
aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
return Status::
fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
"The monitor is not registered or an invalid monitor is given");
}
ALOGI("Process(pid: %d) has been dumped and killed", pid);
return Status::ok();
}
void WatchdogProcessService::setEnabled(bool isEnabled) {
Mutex::Autolock lock(mMutex);
if (mIsEnabled != isEnabled) {
ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled");
}
mIsEnabled = isEnabled;
if (mIsEnabled) {
for (const auto& timeout : kTimeouts) {
startHealthCheckingLocked(timeout);
}
}
}
Status WatchdogProcessService::notifyUserStateChange(userid_t userId, aawi::UserState state) {
std::string buffer;
Mutex::Autolock lock(mMutex);
switch (state) {
case aawi::UserState::USER_STATE_STARTED:
mStoppedUserIds.erase(userId);
buffer = StringPrintf("user(%d) is started", userId);
break;
case aawi::UserState::USER_STATE_STOPPED:
mStoppedUserIds.insert(userId);
buffer = StringPrintf("user(%d) is stopped", userId);
break;
default:
ALOGW("Unsupported user state: %d", state);
return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, "Unsupported user state");
}
ALOGI("Received user state change: %s", buffer.c_str());
return Status::ok();
}
Result<void> WatchdogProcessService::dump(int fd, const Vector<String16>& /*args*/) {
Mutex::Autolock lock(mMutex);
const char* indent = " ";
const char* doubleIndent = " ";
std::string buffer;
WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd);
WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName,
mIsEnabled ? "true" : "false"),
fd);
WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd);
int count = 1;
for (const auto& timeout : kTimeouts) {
std::vector<ClientInfo>& clients = mClients[timeout];
for (auto it = clients.begin(); it != clients.end(); it++, count++) {
WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count,
it->toString().c_str()),
fd);
}
}
WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent,
mMonitor == nullptr ? "false" : "true"),
fd);
WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent,
isSystemShuttingDown() ? "true" : "false"),
fd);
buffer = "none";
bool first = true;
for (const auto& userId : mStoppedUserIds) {
if (first) {
buffer = StringPrintf("%d", userId);
first = false;
} else {
StringAppendF(&buffer, ", %d", userId);
}
}
WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd);
WriteStringToFd(StringPrintf("%sVHAL health check interval: %lldms\n", indent,
mVhalHealthCheckWindowMs.count()),
fd);
return {};
}
void WatchdogProcessService::doHealthCheck(int what) {
mHandlerLooper->removeMessages(mMessageHandler, what);
if (Mutex::Autolock lock(mMutex); !mIsEnabled) {
return;
}
const TimeoutLength timeout = static_cast<TimeoutLength>(what);
dumpAndKillClientsIfNotResponding(timeout);
/* Generates a temporary/local vector containing clients.
* Using a local copy may send unnecessary ping messages to clients after they are unregistered.
* Clients should be able to handle them.
*/
std::vector<ClientInfo> clientsToCheck;
PingedClientMap& pingedClients = mPingedClients[timeout];
{
Mutex::Autolock lock(mMutex);
pingedClients.clear();
clientsToCheck = mClients[timeout];
for (auto& clientInfo : clientsToCheck) {
if (mStoppedUserIds.count(clientInfo.userId) > 0) {
continue;
}
int sessionId = getNewSessionId();
clientInfo.sessionId = sessionId;
pingedClients.insert(std::make_pair(sessionId, clientInfo));
}
}
for (const auto& clientInfo : clientsToCheck) {
Status status = clientInfo.checkIfAlive(timeout);
if (!status.isOk()) {
ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid,
status.exceptionMessage().c_str());
{
Mutex::Autolock lock(mMutex);
pingedClients.erase(clientInfo.sessionId);
}
}
}
// Though the size of pingedClients is a more specific measure, clientsToCheck is used as a
// conservative approach.
if (clientsToCheck.size() > 0) {
auto durationNs = timeoutToDurationNs(timeout);
mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
}
}
Result<void> WatchdogProcessService::start() {
if (mServiceStarted) {
return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once";
}
mServiceStarted = true;
reportWatchdogAliveToVhal();
return {};
}
void WatchdogProcessService::terminate() {
Mutex::Autolock lock(mMutex);
for (const auto& timeout : kTimeouts) {
std::vector<ClientInfo>& clients = mClients[timeout];
for (auto it = clients.begin(); it != clients.end();) {
it->unlinkToDeath(mBinderDeathRecipient);
it = clients.erase(it);
}
}
mWatchdogServiceHelper.clear();
if (mMonitor != nullptr) {
sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
binder->unlinkToDeath(mBinderDeathRecipient);
}
if (mVhalService != nullptr) {
mVhalService->unlinkToDeath(mHidlDeathRecipient);
}
mServiceStarted = false;
}
Status WatchdogProcessService::registerClientLocked(const ClientInfo& clientInfo,
TimeoutLength timeout) {
if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) {
ALOGW("Failed to register (%s) as it is already registered.",
clientInfo.toString().c_str());
return Status::ok();
}
status_t status = clientInfo.linkToDeath(mBinderDeathRecipient);
if (status != OK) {
ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str());
std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str());
return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str());
}
std::vector<ClientInfo>& clients = mClients[timeout];
clients.emplace_back(clientInfo);
// If the client array becomes non-empty, start health checking.
if (clients.size() == 1) {
startHealthCheckingLocked(timeout);
}
if (DEBUG) {
ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(),
timeout);
}
return Status::ok();
}
Status WatchdogProcessService::unregisterClientLocked(const std::vector<TimeoutLength>& timeouts,
sp<IBinder> binder, ClientType clientType) {
const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service";
bool result = findClientAndProcessLocked(timeouts, binder,
[&](std::vector<ClientInfo>& clients,
std::vector<ClientInfo>::const_iterator it) {
it->unlinkToDeath(mBinderDeathRecipient);
clients.erase(it);
});
if (!result) {
std::string errorStr = StringPrintf("The %s has not been registered", clientName);
const char* errorCause = errorStr.c_str();
ALOGW("Failed to unregister the %s: %s", clientName, errorCause);
return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause);
}
if (DEBUG) {
ALOGD("Car watchdog %s is unregistered", clientName);
}
return Status::ok();
}
Status WatchdogProcessService::tellClientAliveLocked(const sp<IBinder>& binder, int32_t sessionId) {
for (const auto& timeout : kTimeouts) {
PingedClientMap& clients = mPingedClients[timeout];
PingedClientMap::const_iterator it = clients.find(sessionId);
if (it == clients.cend() || !it->second.matchesBinder(binder)) {
continue;
}
clients.erase(it);
return Status::ok();
}
return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
"The client is not registered or the session ID is not found");
}
bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
const ClientInfo& clientInfo,
const Processor& processor) {
for (const auto& timeout : timeouts) {
std::vector<ClientInfo>& clients = mClients[timeout];
for (auto it = clients.begin(); it != clients.end(); it++) {
if (std::as_const(*it) != clientInfo) {
continue;
}
if (processor != nullptr) {
processor(clients, it);
}
return true;
}
}
return false;
}
bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
const sp<IBinder> binder,
const Processor& processor) {
for (const auto& timeout : timeouts) {
std::vector<ClientInfo>& clients = mClients[timeout];
for (auto it = clients.begin(); it != clients.end(); it++) {
if (!it->matchesBinder(binder)) {
continue;
}
if (processor != nullptr) {
processor(clients, it);
}
return true;
}
}
return false;
}
Result<void> WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) {
PingedClientMap& clients = mPingedClients[timeout];
clients.clear();
int what = static_cast<int>(timeout);
auto durationNs = timeoutToDurationNs(timeout);
mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
return {};
}
Result<void> WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) {
std::vector<int32_t> processIds;
std::vector<const ClientInfo*> clientsToNotify;
{
Mutex::Autolock lock(mMutex);
PingedClientMap& clients = mPingedClients[timeout];
for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) {
pid_t pid = -1;
userid_t userId = -1;
std::vector<TimeoutLength> timeouts = {timeout};
findClientAndProcessLocked(timeouts, it->second,
[&](std::vector<ClientInfo>& cachedClients,
std::vector<ClientInfo>::const_iterator
cachedClientsIt) {
pid = cachedClientsIt->pid;
userId = cachedClientsIt->userId;
cachedClients.erase(cachedClientsIt);
});
if (pid != -1 && mStoppedUserIds.count(userId) == 0) {
clientsToNotify.emplace_back(&it->second);
processIds.push_back(pid);
}
}
}
for (const ClientInfo*& clientInfo : clientsToNotify) {
clientInfo->prepareProcessTermination();
}
return dumpAndKillAllProcesses(processIds, true);
}
Result<void> WatchdogProcessService::dumpAndKillAllProcesses(
const std::vector<int32_t>& processesNotResponding, bool reportToVhal) {
size_t size = processesNotResponding.size();
if (size == 0) {
return {};
}
std::string pidString = pidArrayToString(processesNotResponding);
sp<aawi::ICarWatchdogMonitor> monitor;
{
Mutex::Autolock lock(mMutex);
if (mMonitor == nullptr) {
std::string errorMsg =
StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set",
pidString.c_str());
ALOGW("%s", errorMsg.c_str());
return Error() << errorMsg;
}
monitor = mMonitor;
}
if (isSystemShuttingDown()) {
ALOGI("Skip dumping and killing processes(%s): The system is shutting down",
pidString.c_str());
return {};
}
if (reportToVhal) {
reportTerminatedProcessToVhal(processesNotResponding);
}
monitor->onClientsNotResponding(processesNotResponding);
if (DEBUG) {
ALOGD("Dumping and killing processes is requested: %s", pidString.c_str());
}
return {};
}
// Handle when car watchdog clients die.
void WatchdogProcessService::handleBinderDeath(const wp<IBinder>& who) {
Mutex::Autolock lock(mMutex);
IBinder* binder = who.unsafe_get();
// Check if dead binder is monitor.
sp<IBinder> monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
if (monitor == binder) {
mMonitor = nullptr;
ALOGW("The monitor has died.");
return;
}
findClientAndProcessLocked(kTimeouts, binder,
[&](std::vector<ClientInfo>& clients,
std::vector<ClientInfo>::const_iterator it) {
ALOGW("Client(pid: %d) died", it->pid);
clients.erase(it);
});
}
// Handle when VHAL dies.
void WatchdogProcessService::handleHidlDeath(const wp<IBase>& who) {
Mutex::Autolock lock(mMutex);
if (!interfacesEqual(mVhalService, who.promote())) {
return;
}
ALOGW("VHAL has died.");
mVhalService->unlinkToDeath(mHidlDeathRecipient);
mVhalService = nullptr;
}
void WatchdogProcessService::reportWatchdogAliveToVhal() {
if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) {
ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE.");
return;
}
int64_t systemUptime = uptimeMillis();
VehiclePropValue propValue{
.prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE),
.status = VehiclePropertyStatus::AVAILABLE,
.value = {.int64Values = {systemUptime}},
};
const auto& ret = updateVhal(propValue);
if (!ret.ok()) {
ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s");
}
// Update VHAL with the interval of TIMEOUT_CRITICAL(3s).
auto durationNs = timeoutToDurationNs(TimeoutLength::TIMEOUT_CRITICAL);
mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE);
mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler,
Message(MSG_VHAL_WATCHDOG_ALIVE));
}
void WatchdogProcessService::reportTerminatedProcessToVhal(
const std::vector<int32_t>& processesNotResponding) {
if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) {
ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not "
"reported to VHAL.");
return;
}
for (auto&& pid : processesNotResponding) {
const auto& retCmdLine = readProcCmdLine(pid);
if (!retCmdLine.ok()) {
ALOGW("Failed to get process command line for pid(%d): %s", pid,
retCmdLine.error().message().c_str());
continue;
}
std::string procCmdLine = retCmdLine.value();
VehiclePropValue propValue{
.prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS),
.status = VehiclePropertyStatus::AVAILABLE,
.value = {
.int32Values = {static_cast<int32_t>(
ProcessTerminationReason::NOT_RESPONDING)},
.stringValue = procCmdLine,
},
};
const auto& retUpdate = updateVhal(propValue);
if (!retUpdate.ok()) {
ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)",
procCmdLine.c_str());
}
}
}
Result<void> WatchdogProcessService::updateVhal(const VehiclePropValue& value) {
Mutex::Autolock lock(mMutex);
const auto& connectRet = connectToVhalLocked();
if (!connectRet.ok()) {
std::string errorMsg = "VHAL is not connected: " + connectRet.error().message();
ALOGW("%s", errorMsg.c_str());
return Error() << errorMsg;
}
if (mNotSupportedVhalProperties.count(static_cast<VehicleProperty>(value.prop)) > 0) {
std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", value.prop);
ALOGW("%s", errorMsg.c_str());
return Error() << errorMsg;
}
const auto& updateRet = mVhalService->set(value);
if (updateRet.isOk() && updateRet == StatusCode::OK) {
return {};
}
return Error() << "Failed to set propValue(" << value.prop << ") to VHAL";
}
Result<std::string> WatchdogProcessService::readProcCmdLine(int32_t pid) {
std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid);
std::string procCmdLine;
if (ReadFileToString(cmdLinePath, &procCmdLine)) {
std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' ');
procCmdLine = Trim(procCmdLine);
return procCmdLine;
}
return Error() << "Failed to read " << cmdLinePath;
}
Result<void> WatchdogProcessService::connectToVhalLocked() {
if (mVhalService.get() != nullptr) {
return {};
}
mVhalService = IVehicle::tryGetService();
if (mVhalService.get() == nullptr) {
return Error() << "Failed to connect to VHAL.";
}
mVhalService->linkToDeath(mHidlDeathRecipient, /*cookie=*/0);
queryVhalPropertiesLocked();
subscribeToVhalHeartBeatLocked();
ALOGI("Successfully connected to VHAL.");
return {};
}
void WatchdogProcessService::queryVhalPropertiesLocked() {
mNotSupportedVhalProperties.clear();
std::vector<VehicleProperty> propIds = {VehicleProperty::WATCHDOG_ALIVE,
VehicleProperty::WATCHDOG_TERMINATED_PROCESS,
VehicleProperty::VHAL_HEARTBEAT};
for (const auto& propId : propIds) {
if (!isVhalPropertySupportedLocked(propId)) {
mNotSupportedVhalProperties.insert(propId);
}
}
}
bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) {
StatusCode status;
hidl_vec<int32_t> props = {static_cast<int32_t>(propId)};
mVhalService->getPropConfigs(props,
[&status](StatusCode s,
hidl_vec<VehiclePropConfig> /*propConfigs*/) {
status = s;
});
return status == StatusCode::OK;
}
void WatchdogProcessService::subscribeToVhalHeartBeatLocked() {
if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) {
ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled.");
return;
}
mVhalHeartBeat = {
.eventTime = 0,
.value = 0,
};
SubscribeOptions reqVhalProperties[] = {
{.propId = static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT),
.flags = SubscribeFlags::EVENTS_FROM_CAR},
};
hidl_vec<SubscribeOptions> options;
options.setToExternal(reqVhalProperties, arraysize(reqVhalProperties));
StatusCode status = mVhalService->subscribe(mPropertyChangeListener, options);
if (status != StatusCode::OK) {
ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled.");
return;
}
std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
Message(MSG_VHAL_HEALTH_CHECK));
}
int32_t WatchdogProcessService::getNewSessionId() {
// Make sure that session id is always positive number.
if (++mLastSessionId <= 0) {
mLastSessionId = 1;
}
return mLastSessionId;
}
void WatchdogProcessService::updateVhalHeartBeat(int64_t value) {
bool wrongHeartBeat;
{
Mutex::Autolock lock(mMutex);
wrongHeartBeat = value <= mVhalHeartBeat.value;
mVhalHeartBeat.eventTime = uptimeMillis();
mVhalHeartBeat.value = value;
}
if (wrongHeartBeat) {
ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL...");
terminateVhal();
return;
}
std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
Message(MSG_VHAL_HEALTH_CHECK));
}
void WatchdogProcessService::checkVhalHealth() {
int64_t lastEventTime;
int64_t currentUptime = uptimeMillis();
{
Mutex::Autolock lock(mMutex);
lastEventTime = mVhalHeartBeat.eventTime;
}
if (currentUptime > lastEventTime + mVhalHealthCheckWindowMs.count()) {
ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL...");
terminateVhal();
}
}
void WatchdogProcessService::terminateVhal() {
using ::android::hidl::manager::V1_0::IServiceManager;
std::vector<int32_t> processIds;
sp<IServiceManager> manager = IServiceManager::getService();
Return<void> ret = manager->debugDump([&](auto& hals) {
for (const auto& info : hals) {
if (info.pid == static_cast<int>(IServiceManager::PidConstant::NO_PID)) {
continue;
}
if (info.interfaceName == kVhalInterfaceName) {
processIds.push_back(info.pid);
break;
}
}
});
if (!ret.isOk()) {
ALOGE("Failed to terminate VHAL: could not get VHAL process id");
return;
} else if (processIds.empty()) {
ALOGE("Failed to terminate VHAL: VHAL is not running");
return;
}
dumpAndKillAllProcesses(processIds, false);
}
std::string WatchdogProcessService::ClientInfo::toString() const {
std::string buffer;
StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId,
type == ClientType::Regular ? "regular" : "watchdog service");
return buffer;
}
sp<IBinder> WatchdogProcessService::ClientInfo::getBinder() const {
if (type == ClientType::Regular) {
return BnCarWatchdogClient::asBinder(client);
}
return watchdogServiceBinder;
}
status_t WatchdogProcessService::ClientInfo::linkToDeath(
const sp<IBinder::DeathRecipient>& recipient) const {
if (type == ClientType::Regular) {
return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient);
}
// WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
// skip this step.
return OK;
}
status_t WatchdogProcessService::ClientInfo::unlinkToDeath(
const wp<IBinder::DeathRecipient>& recipient) const {
if (type == ClientType::Regular) {
return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient);
}
// WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
// skip this step.
return OK;
}
Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const {
if (type == ClientType::Regular) {
return client->checkIfAlive(sessionId, timeout);
}
return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout);
}
Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const {
if (type == ClientType::Regular) {
return client->prepareProcessTermination();
}
return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder);
}
WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient(
const sp<WatchdogProcessService>& service) :
mService(service) {}
void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp<IBinder>& who) {
mService->handleBinderDeath(who);
}
WatchdogProcessService::HidlDeathRecipient::HidlDeathRecipient(
const sp<WatchdogProcessService>& service) :
mService(service) {}
void WatchdogProcessService::HidlDeathRecipient::serviceDied(uint64_t /*cookie*/,
const wp<IBase>& who) {
mService->handleHidlDeath(who);
}
WatchdogProcessService::PropertyChangeListener::PropertyChangeListener(
const sp<WatchdogProcessService>& service) :
mService(service) {}
Return<void> WatchdogProcessService::PropertyChangeListener::onPropertyEvent(
const hidl_vec<VehiclePropValue>& propValues) {
for (const auto& value : propValues) {
if (value.prop == static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)) {
mService->updateVhalHeartBeat(value.value.int64Values[0]);
break;
}
}
return Return<void>();
}
Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySet(
const VehiclePropValue& /*propValue*/) {
return Return<void>();
}
Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySetError(
StatusCode /*status*/, int32_t /*propId*/, int32_t /*areaId*/) {
return Return<void>();
}
WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl(
const sp<WatchdogProcessService>& service) :
mService(service) {}
void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) {
switch (message.what) {
case static_cast<int>(TimeoutLength::TIMEOUT_CRITICAL):
case static_cast<int>(TimeoutLength::TIMEOUT_MODERATE):
case static_cast<int>(TimeoutLength::TIMEOUT_NORMAL):
mService->doHealthCheck(message.what);
break;
case MSG_VHAL_WATCHDOG_ALIVE:
mService->reportWatchdogAliveToVhal();
break;
case MSG_VHAL_HEALTH_CHECK:
mService->checkVhalHealth();
break;
default:
ALOGW("Unknown message: %d", message.what);
}
}
} // namespace watchdog
} // namespace automotive
} // namespace android