More robust handling of iptables-restore process termination
Bug: 32323979
Test: unit tests pass
Test: bullhead builds and boots
Change-Id: Ib3ea4221b1b2025a0a236f2607db29e1cd30ffa9
diff --git a/server/IptablesRestoreController.cpp b/server/IptablesRestoreController.cpp
index bedb0a4..45f9ab6 100644
--- a/server/IptablesRestoreController.cpp
+++ b/server/IptablesRestoreController.cpp
@@ -21,6 +21,7 @@
#include <sys/wait.h>
#include <unistd.h>
+#define LOG_TAG "IptablesRestoreController"
#include <android-base/logging.h>
#include <android-base/file.h>
@@ -55,13 +56,55 @@
close(pollFds[STDERR_IDX].fd);
}
+ bool outputReady() {
+ struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT };
+ int ret = poll(&pollfd, 1, 0);
+ if (ret == -1) {
+ ALOGE("outputReady poll failed: %s", strerror(errno));
+ return false;
+ }
+ return (ret == 1) && !(pollfd.revents & POLLERR);
+ }
+
+ void stop() {
+ if (processTerminated) return;
+
+ // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the
+ // process was killed by something else on the system). In both cases, it's safe to send the
+ // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls
+ // waitpid on it, so there's no risk that the PID is reused.
+ int err = kill(pid, SIGTERM);
+ if (err) {
+ err = errno;
+ }
+
+ if (err == ESRCH) {
+ // This means that someone else inside netd but outside this class called waitpid(),
+ // which is a programming error. There's no point in calling waitpid() here since we
+ // know that the process is gone.
+ ALOGE("iptables child process %d unexpectedly disappeared", pid);
+ processTerminated = true;
+ return;
+ }
+
+ if (err) {
+ ALOGE("Error killing iptables child process %d: %s", pid, strerror(err));
+ }
+
+ if (waitpid(pid, nullptr, 0) == -1) {
+ ALOGE("Error waiting for iptables child process %d: %s", pid, strerror(errno));
+ }
+
+ processTerminated = true;
+ }
+
const pid_t pid;
const int stdIn;
struct pollfd pollFds[2];
std::string errBuf;
- bool processTerminated;
+ std::atomic_bool processTerminated;
static constexpr size_t STDOUT_IDX = 0;
static constexpr size_t STDERR_IDX = 1;
@@ -152,76 +195,6 @@
return new IptablesProcess(child_pid, stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]);
}
-void sigchldHandler(int /* signal_number */, siginfo_t *siginfo, void* /* context */) {
- // Save and restore errno to prevent threads from spuriously seeing
- // incorrect errors due to errors from this signal handler.
- int saved_errno = errno;
-
- // Notify the IptablesRestoreController so that it can try to recover. Log
- // relevant information if it's one of the process we care about. netd
- // forks other processes as well, so there's no need to spam the logs
- // every time one of those dies.
- const pid_t child_pid = siginfo->si_pid;
- const IptablesRestoreController::IptablesProcessType process =
- sInstance->notifyChildTermination(child_pid);
-
- if (process != IptablesRestoreController::INVALID_PROCESS) {
- // This should return immediately because we've been informed that
- // |child_pid| just exited.
- pid_t wait_result = waitpid(child_pid, nullptr, WNOHANG);
- if (wait_result < 0) {
- PLOG(WARNING) << "waitpid for child " << child_pid << " unexpectedly failed";
- }
-
- if (siginfo->si_code == CLD_EXITED) {
- LOG(WARNING) << "iptables[6]-restore process exited (pid=" << child_pid
- << ") exit_status=" << siginfo->si_status
- << " type=" << process;
- } else {
- LOG(WARNING) << "iptables[6]-restore process was signalled (pid=" << child_pid
- << ") signal=" << siginfo->si_status
- << " type=" << process;
- }
- }
-
- errno = saved_errno;
-}
-
-/* static */
-void IptablesRestoreController::installSignalHandler(IptablesRestoreController *singleton) {
- if (singleton == nullptr) {
- LOG(ERROR) << "installSignalHandler: singleton == nullptr";
- }
-
- sInstance = singleton;
-
- struct sigaction sa = {};
- sa.sa_flags = SA_SIGINFO;
- sa.sa_sigaction = sigchldHandler;
- const int err = sigaction(SIGCHLD, &sa, nullptr);
- if (err < 0) {
- PLOG(ERROR) << "Unable to set SIGCHLD handler.";
- }
-}
-
-IptablesRestoreController::IptablesProcessType
-IptablesRestoreController::notifyChildTermination(pid_t pid) {
- // We minimize the amount of work that we do from the signal handler, given
- // that this can be called at any arbitrary point of time.
-
- if (mIpRestore != nullptr && mIpRestore->pid == pid) {
- mIpRestore->processTerminated = true;
- return IPTABLES_PROCESS;
- }
-
- if (mIp6Restore != nullptr && mIp6Restore->pid == pid) {
- mIp6Restore->processTerminated = true;
- return IP6TABLES_PROCESS;
- }
-
- return INVALID_PROCESS;
-}
-
// TODO: Return -errno on failure instead of -1.
// TODO: Maybe we should keep a rotating buffer of the last N commands
// so that they can be dumped on dumpsys.
@@ -230,6 +203,7 @@
std::unique_ptr<IptablesProcess> *process =
(type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore;
+
// We might need to fork a new process if we haven't forked one yet, or
// if the forked process terminated.
//
@@ -237,7 +211,13 @@
// recover from a child death. If the child dies at some later point during
// the execution of this method, we will receive an EPIPE and return an
// error. The command will then need to be retried at a higher level.
- if (process->get() == nullptr || (*process)->processTerminated) {
+ IptablesProcess *existingProcess = process->get();
+ if (existingProcess != nullptr && !existingProcess->outputReady()) {
+ existingProcess->stop();
+ existingProcess = nullptr;
+ }
+
+ if (existingProcess == nullptr) {
// Fork a new iptables[6]-restore process.
IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type);
if (newProcess == nullptr) {
@@ -259,6 +239,7 @@
fixedCommand.data(),
fixedCommand.length())) {
PLOG(ERROR) << "Unable to send command";
+ return -1;
}
if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) {
@@ -267,7 +248,7 @@
}
if (!drainAndWaitForAck(*process)) {
- LOG(ERROR) << "Timed out waiting for response from iptables process: " << (*process)->pid;
+ // drainAndWaitForAck has already logged an error.
return -1;
}
@@ -324,7 +305,8 @@
}
// We've timed out, which means something has gone wrong - we know that stdout should have
- // become available to read with the ACK message.
+ // become available to read with the ACK message, or that stderr should have been available
+ // to read with an error message.
if (numEvents == 0) {
continue;
}
@@ -361,9 +343,22 @@
IptablesRestoreController::maybeLogStderr(process, buffer, size);
}
}
+ if (pollfd.revents & POLLHUP) {
+ // The pipe was closed. This likely means the subprocess is exiting, since
+ // iptables-restore only closes stdin on error.
+ process->stop();
+ break;
+ }
}
}
+ if (!receivedAck) {
+ if (process->processTerminated)
+ ALOGE("iptables-restore process %d terminated", process->pid);
+ else
+ ALOGE("Timed out waiting for response from iptables process %d", process->pid);
+ }
+
return receivedAck;
}
@@ -379,3 +374,7 @@
}
return res;
}
+
+int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) {
+ return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid;
+}