dumpsys: use a socket for dumping, add timeout support.

Passing the stdout file descriptor directly to a service to dump with
leads to bad things happening if the service hangs, or dumpsys is
terminated prematurely. For example, `dumpsys foo | cat` will not
terminate, even if the dumpsys process is killed, since the write end of
the pipe is still alive in the hung service.

Pass an intermediate socketpair when dumping services to avoid this.

Bug: http://b/26849443
Change-Id: Ide18741080355b3c680275a59172c61734eca92d
diff --git a/cmds/dumpsys/Android.mk b/cmds/dumpsys/Android.mk
index 9be0901..8335c14 100644
--- a/cmds/dumpsys/Android.mk
+++ b/cmds/dumpsys/Android.mk
@@ -5,10 +5,11 @@
 	dumpsys.cpp
 
 LOCAL_SHARED_LIBRARIES := \
+	libbase \
 	libutils \
 	liblog \
 	libbinder
-	
+
 
 ifeq ($(TARGET_OS),linux)
 	LOCAL_CFLAGS += -DXP_UNIX
diff --git a/cmds/dumpsys/dumpsys.cpp b/cmds/dumpsys/dumpsys.cpp
index ef009da..46fb81a 100644
--- a/cmds/dumpsys/dumpsys.cpp
+++ b/cmds/dumpsys/dumpsys.cpp
@@ -5,21 +5,33 @@
 
 #define LOG_TAG "dumpsys"
 
-#include <utils/Log.h>
+#include <algorithm>
+#include <chrono>
+#include <thread>
+
+#include <android-base/file.h>
+#include <android-base/unique_fd.h>
+#include <binder/IServiceManager.h>
 #include <binder/Parcel.h>
 #include <binder/ProcessState.h>
-#include <binder/IServiceManager.h>
 #include <binder/TextOutput.h>
+#include <utils/Log.h>
 #include <utils/Vector.h>
 
+#include <fcntl.h>
 #include <getopt.h>
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
 #include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
 
 using namespace android;
+using android::base::unique_fd;
+using android::base::WriteFully;
 
 static int sort_func(const String16* lhs, const String16* rhs)
 {
@@ -121,23 +133,108 @@
         return 0;
     }
 
-    for (size_t i=0; i<N; i++) {
-        if (IsSkipped(skippedServices, services[i])) continue;
+    for (size_t i = 0; i < N; i++) {
+        String16 service_name = std::move(services[i]);
+        if (IsSkipped(skippedServices, service_name)) continue;
 
-        sp<IBinder> service = sm->checkService(services[i]);
+        sp<IBinder> service = sm->checkService(service_name);
         if (service != NULL) {
+            int sfd[2];
+
+            // Use a socketpair instead of a pipe to avoid sending SIGPIPE to services that timeout.
+            if (socketpair(AF_UNIX, SOCK_STREAM, 0, sfd) != 0) {
+                aerr << "Failed to create socketpair to dump service info for " << service_name
+                     << ": " << strerror(errno) << endl;
+                continue;
+            }
+
+            unique_fd local_end(sfd[0]);
+            unique_fd remote_end(sfd[1]);
+            sfd[0] = sfd[1] = -1;
+
             if (N > 1) {
                 aout << "------------------------------------------------------------"
                         "-------------------" << endl;
-                aout << "DUMP OF SERVICE " << services[i] << ":" << endl;
+                aout << "DUMP OF SERVICE " << service_name << ":" << endl;
             }
-            int err = service->dump(STDOUT_FILENO, args);
-            if (err != 0) {
-                aerr << "Error dumping service info: (" << strerror(err)
-                        << ") " << services[i] << endl;
+
+            // dump blocks until completion, so spawn a thread..
+            std::thread dump_thread([=, remote_end { std::move(remote_end) }]() mutable {
+                int err = service->dump(remote_end.get(), args);
+
+                // It'd be nice to be able to close the remote end of the socketpair before the dump
+                // call returns, to terminate our reads if the other end closes their copy of the
+                // file descriptor, but then hangs for some reason. There doesn't seem to be a good
+                // way to do this, though.
+                remote_end.clear();
+
+                if (err != 0) {
+                    aerr << "Error dumping service info: (" << strerror(err) << ") " << service_name
+                         << endl;
+                }
+            });
+
+            // TODO: Make this configurable at runtime.
+            constexpr auto timeout = std::chrono::seconds(10);
+            auto end = std::chrono::steady_clock::now() + timeout;
+
+            struct pollfd pfd = {
+                .fd = local_end.get(),
+                .events = POLLIN
+            };
+
+            bool timed_out = false;
+            bool error = false;
+            while (true) {
+                // Wrap this in a lambda so that TEMP_FAILURE_RETRY recalculates the timeout.
+                auto time_left_ms = [end]() {
+                    auto now = std::chrono::steady_clock::now();
+                    auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - now);
+                    return std::max(diff.count(), 0ll);
+                };
+
+                int rc = TEMP_FAILURE_RETRY(poll(&pfd, 1, time_left_ms()));
+                if (rc < 0) {
+                    aerr << "Error in poll while dumping service " << service_name << " : "
+                         << strerror(errno) << endl;
+                    error = true;
+                    break;
+                } else if (rc == 0) {
+                    timed_out = true;
+                    break;
+                }
+
+                char buf[4096];
+                rc = TEMP_FAILURE_RETRY(read(local_end.get(), buf, sizeof(buf)));
+                if (rc < 0) {
+                    aerr << "Failed to read while dumping service " << service_name << ": "
+                         << strerror(errno) << endl;
+                    error = true;
+                    break;
+                } else if (rc == 0) {
+                    // EOF.
+                    break;
+                }
+
+                if (!WriteFully(STDOUT_FILENO, buf, rc)) {
+                    aerr << "Failed to write while dumping service " << service_name << ": "
+                         << strerror(errno) << endl;
+                    error = true;
+                    break;
+                }
+            }
+
+            if (timed_out) {
+                aout << endl << "*** SERVICE DUMP TIMEOUT EXPIRED ***" << endl << endl;
+            }
+
+            if (timed_out || error) {
+                dump_thread.detach();
+            } else {
+                dump_thread.join();
             }
         } else {
-            aerr << "Can't find service: " << services[i] << endl;
+            aerr << "Can't find service: " << service_name << endl;
         }
     }