ueventd: parallelize uevent handling

fork() subprocesses to handle uevents in parallel.

This reduces coldboot time on bullhead from ~446ms to ~230ms.
This reduces coldboot time on sailfish from ~690ms to ~360ms.
This reduces coldboot time on ryu from ~187ms to ~122ms.

Bug: 33785894

Test: boot bullhead x40, observe no major differences in /dev and /sys
Test: boot sailfish x40, observe no major differences in /dev and /sys
Test: boot ryu x40, observe no major differences in /dev and /sys
Test: boottime tests on bullhead and sailfish
Test: init unit tests

Change-Id: Ie2f63e000b8af78d187477d31fe109f20304d749
diff --git a/init/devices.cpp b/init/devices.cpp
index 40f9740..c52d8f8 100644
--- a/init/devices.cpp
+++ b/init/devices.cpp
@@ -175,7 +175,7 @@
         if (s.MatchWithSubsystem(path, subsystem)) s.SetPermissions(path);
     }
 
-    if (access(path.c_str(), F_OK) == 0) {
+    if (!skip_restorecon_ && access(path.c_str(), F_OK) == 0) {
         LOG(VERBOSE) << "restorecon_recursive: " << path;
         if (selinux_android_restorecon(path.c_str(), SELINUX_ANDROID_RESTORECON_RECURSE) != 0) {
             PLOG(ERROR) << "selinux_android_restorecon(" << path << ") failed";
@@ -467,12 +467,13 @@
 
 DeviceHandler::DeviceHandler(std::vector<Permissions> dev_permissions,
                              std::vector<SysfsPermissions> sysfs_permissions,
-                             std::vector<Subsystem> subsystems)
+                             std::vector<Subsystem> subsystems, bool skip_restorecon)
     : dev_permissions_(std::move(dev_permissions)),
       sysfs_permissions_(std::move(sysfs_permissions)),
       subsystems_(std::move(subsystems)),
-      sehandle_(selinux_android_file_context_handle()) {}
+      sehandle_(selinux_android_file_context_handle()),
+      skip_restorecon_(skip_restorecon) {}
 
 DeviceHandler::DeviceHandler()
     : DeviceHandler(std::vector<Permissions>{}, std::vector<SysfsPermissions>{},
-                    std::vector<Subsystem>{}) {}
+                    std::vector<Subsystem>{}, false) {}
diff --git a/init/devices.h b/init/devices.h
index 50f49fc..09a0ce3 100644
--- a/init/devices.h
+++ b/init/devices.h
@@ -114,14 +114,21 @@
     DeviceHandler();
     DeviceHandler(std::vector<Permissions> dev_permissions,
                   std::vector<SysfsPermissions> sysfs_permissions,
-                  std::vector<Subsystem> subsystems);
+                  std::vector<Subsystem> subsystems, bool skip_restorecon);
     ~DeviceHandler(){};
 
     void HandleDeviceEvent(const Uevent& uevent);
+
+    void FixupSysPermissions(const std::string& upath, const std::string& subsystem) const;
+
+    void HandlePlatformDeviceEvent(const Uevent& uevent);
+    void HandleBlockDeviceEvent(const Uevent& uevent) const;
+    void HandleGenericDeviceEvent(const Uevent& uevent) const;
+
     std::vector<std::string> GetBlockDeviceSymlinks(const Uevent& uevent) const;
+    void set_skip_restorecon(bool value) { skip_restorecon_ = value; }
 
   private:
-    void FixupSysPermissions(const std::string& upath, const std::string& subsystem) const;
     std::tuple<mode_t, uid_t, gid_t> GetDevicePermissions(
         const std::string& path, const std::vector<std::string>& links) const;
     void MakeDevice(const std::string& path, int block, int major, int minor,
@@ -129,15 +136,13 @@
     std::vector<std::string> GetCharacterDeviceSymlinks(const Uevent& uevent) const;
     void HandleDevice(const std::string& action, const std::string& devpath, int block, int major,
                       int minor, const std::vector<std::string>& links) const;
-    void HandlePlatformDeviceEvent(const Uevent& uevent);
-    void HandleBlockDeviceEvent(const Uevent& uevent) const;
-    void HandleGenericDeviceEvent(const Uevent& uevent) const;
 
     std::vector<Permissions> dev_permissions_;
     std::vector<SysfsPermissions> sysfs_permissions_;
     std::vector<Subsystem> subsystems_;
     PlatformDeviceList platform_devices_;
     selabel_handle* sehandle_;
+    bool skip_restorecon_;
 };
 
 // Exposed for testing
diff --git a/init/firmware_handler.cpp b/init/firmware_handler.cpp
index 1471aeb..844c605 100644
--- a/init/firmware_handler.cpp
+++ b/init/firmware_handler.cpp
@@ -18,6 +18,7 @@
 
 #include <fcntl.h>
 #include <sys/sendfile.h>
+#include <sys/wait.h>
 #include <unistd.h>
 
 #include <string>
@@ -103,14 +104,29 @@
     if (uevent.subsystem != "firmware" || uevent.action != "add") return;
 
     // Loading the firmware in a child means we can do that in parallel...
-    // (We ignore SIGCHLD rather than wait for our children.)
+    // We double fork instead of waiting for these processes.
     pid_t pid = fork();
-    if (pid == 0) {
-        Timer t;
-        ProcessFirmwareEvent(uevent);
-        LOG(INFO) << "loading " << uevent.path << " took " << t;
-        _exit(EXIT_SUCCESS);
-    } else if (pid == -1) {
+    if (pid == -1) {
         PLOG(ERROR) << "could not fork to process firmware event for " << uevent.firmware;
+        return;
     }
+
+    if (pid == 0) {
+        pid = fork();
+        if (pid == -1) {
+            PLOG(ERROR) << "could not fork a sceond time to process firmware event for "
+                        << uevent.firmware;
+            _exit(EXIT_FAILURE);
+        }
+        if (pid == 0) {
+            Timer t;
+            ProcessFirmwareEvent(uevent);
+            LOG(INFO) << "loading " << uevent.path << " took " << t;
+            _exit(EXIT_SUCCESS);
+        }
+
+        _exit(EXIT_SUCCESS);
+    }
+
+    waitpid(pid, nullptr, 0);
 }
diff --git a/init/uevent_listener.cpp b/init/uevent_listener.cpp
index 27c5d23..01b8250 100644
--- a/init/uevent_listener.cpp
+++ b/init/uevent_listener.cpp
@@ -165,7 +165,7 @@
     return RegenerateUeventsForDir(d.get(), callback);
 }
 
-static const char* kRegenerationPaths[] = {"/sys/class", "/sys/block", "/sys/devices"};
+const char* kRegenerationPaths[] = {"/sys/class", "/sys/block", "/sys/devices"};
 
 void UeventListener::RegenerateUevents(RegenerateCallback callback) const {
     for (const auto path : kRegenerationPaths) {
diff --git a/init/uevent_listener.h b/init/uevent_listener.h
index ba31aaa..8e6f3b4 100644
--- a/init/uevent_listener.h
+++ b/init/uevent_listener.h
@@ -35,6 +35,8 @@
 using RegenerateCallback = std::function<RegenerationAction(const Uevent&)>;
 using PollCallback = std::function<void(const Uevent&)>;
 
+extern const char* kRegenerationPaths[3];
+
 class UeventListener {
   public:
     UeventListener();
diff --git a/init/ueventd.cpp b/init/ueventd.cpp
index bd21a3e..31e4106 100644
--- a/init/ueventd.cpp
+++ b/init/ueventd.cpp
@@ -22,10 +22,15 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/wait.h>
+
+#include <set>
+#include <thread>
 
 #include <android-base/logging.h>
 #include <android-base/properties.h>
 #include <android-base/stringprintf.h>
+#include <selinux/android.h>
 #include <selinux/selinux.h>
 
 #include "devices.h"
@@ -35,6 +40,194 @@
 #include "ueventd_parser.h"
 #include "util.h"
 
+// At a high level, ueventd listens for uevent messages generated by the kernel through a netlink
+// socket.  When ueventd receives such a message it handles it by taking appropriate actions,
+// which can typically be creating a device node in /dev, setting file permissions, setting selinux
+// labels, etc.
+// Ueventd also handles loading of firmware that the kernel requests, and creates symlinks for block
+// and character devices.
+
+// When ueventd starts, it regenerates uevents for all currently registered devices by traversing
+// /sys and writing 'add' to each 'uevent' file that it finds.  This causes the kernel to generate
+// and resend uevent messages for all of the currently registered devices.  This is done, because
+// ueventd would not have been running when these devices were registered and therefore was unable
+// to receive their uevent messages and handle them appropriately.  This process is known as
+// 'cold boot'.
+
+// 'init' currently waits synchronously on the cold boot process of ueventd before it continues
+// its boot process.  For this reason, cold boot should be as quick as possible.  One way to achieve
+// a speed up here is to parallelize the handling of ueventd messages, which consume the bulk of the
+// time during cold boot.
+
+// Handling of uevent messages has two unique properties:
+// 1) It can be done in isolation; it doesn't need to read or write any status once it is started.
+// 2) It uses setegid() and setfscreatecon() so either care (aka locking) must be taken to ensure
+//    that no file system operations are done while the uevent process has an abnormal egid or
+//    fscreatecon or this handling must happen in a separate process.
+// Given the above two properties, it is best to fork() subprocesses to handle the uevents.  This
+// reduces the overhead and complexity that would be required in a solution with threads and locks.
+// In testing, a racy multithreaded solution has the same performance as the fork() solution, so
+// there is no reason to deal with the complexity of the former.
+
+// One other important caveat during the boot process is the handling of SELinux restorecon.
+// Since many devices have child devices, calling selinux_android_restorecon() recursively for each
+// device when its uevent is handled, results in multiple restorecon operations being done on a
+// given file.  It is more efficient to simply do restorecon recursively on /sys during cold boot,
+// than to do restorecon on each device as its uevent is handled.  This only applies to cold boot;
+// once that has completed, restorecon is done for each device as its uevent is handled.
+
+// With all of the above considered, the cold boot process has the below steps:
+// 1) ueventd regenerates uevents by doing the /sys traversal and listens to the netlink socket for
+//    the generated uevents.  It writes these uevents into a queue represented by a vector.
+//
+// 2) ueventd forks 'n' separate uevent handler subprocesses and has each of them to handle the
+//    uevents in the queue based on a starting offset (their process number) and a stride (the total
+//    number of processes).  Note that no IPC happens at this point and only const functions from
+//    DeviceHandler should be called from this context.
+//
+// 3) In parallel to the subprocesses handling the uevents, the main thread of ueventd calls
+//    selinux_android_restorecon() recursively on /sys/class, /sys/block, and /sys/devices.
+//
+// 4) Once the restorecon operation finishes, the main thread calls waitpid() to wait for all
+//    subprocess handlers to complete and exit.  Once this happens, it marks coldboot as having
+//    completed.
+//
+// At this point, ueventd is single threaded, poll()'s and then handles any future uevents.
+
+// Lastly, it should be noted that uevents that occur during the coldboot process are handled
+// without issue after the coldboot process completes.  This is because the uevent listener is
+// paused while the uevent handler and restorecon actions take place.  Once coldboot completes,
+// the uevent listener resumes in polling mode and will handle the uevents that occurred during
+// coldboot.
+
+class ColdBoot {
+  public:
+    ColdBoot(UeventListener& uevent_listener, DeviceHandler& device_handler)
+        : uevent_listener_(uevent_listener),
+          device_handler_(device_handler),
+          num_handler_subprocesses_(std::thread::hardware_concurrency() ?: 4) {}
+
+    void Run();
+
+  private:
+    void UeventHandlerMain(unsigned int process_num, unsigned int total_processes);
+    void RegenerateUevents();
+    void ForkSubProcesses();
+    void DoRestoreCon();
+    void WaitForSubProcesses();
+
+    UeventListener& uevent_listener_;
+    DeviceHandler& device_handler_;
+
+    unsigned int num_handler_subprocesses_;
+    std::vector<Uevent> uevent_queue_;
+
+    std::set<pid_t> subprocess_pids_;
+};
+
+void ColdBoot::UeventHandlerMain(unsigned int process_num, unsigned int total_processes) {
+    for (unsigned int i = process_num; i < uevent_queue_.size(); i += total_processes) {
+        auto& uevent = uevent_queue_[i];
+        if (uevent.action == "add" || uevent.action == "change" || uevent.action == "online") {
+            device_handler_.FixupSysPermissions(uevent.path, uevent.subsystem);
+        }
+
+        if (uevent.subsystem == "block") {
+            device_handler_.HandleBlockDeviceEvent(uevent);
+        } else {
+            device_handler_.HandleGenericDeviceEvent(uevent);
+        }
+    }
+    _exit(EXIT_SUCCESS);
+}
+
+void ColdBoot::RegenerateUevents() {
+    uevent_listener_.RegenerateUevents([this](const Uevent& uevent) {
+        HandleFirmwareEvent(uevent);
+
+        // This is the one mutable part of DeviceHandler, in which platform devices are
+        // added to a vector for later reference.  Since there is no communication after
+        // fork()'ing subprocess handlers, all platform devices must be in the vector before
+        // we fork, and therefore they must be handled in this loop.
+        if (uevent.subsystem == "platform") {
+            device_handler_.HandlePlatformDeviceEvent(uevent);
+        }
+
+        uevent_queue_.emplace_back(std::move(uevent));
+        return RegenerationAction::kContinue;
+    });
+}
+
+void ColdBoot::ForkSubProcesses() {
+    for (unsigned int i = 0; i < num_handler_subprocesses_; ++i) {
+        auto pid = fork();
+        if (pid < 0) {
+            PLOG(FATAL) << "fork() failed!";
+        }
+
+        if (pid == 0) {
+            UeventHandlerMain(i, num_handler_subprocesses_);
+        }
+
+        subprocess_pids_.emplace(pid);
+    }
+}
+
+void ColdBoot::DoRestoreCon() {
+    for (const char* path : kRegenerationPaths) {
+        selinux_android_restorecon(path, SELINUX_ANDROID_RESTORECON_RECURSE);
+    }
+    device_handler_.set_skip_restorecon(false);
+}
+
+void ColdBoot::WaitForSubProcesses() {
+    // Treat subprocesses that crash or get stuck the same as if ueventd itself has crashed or gets
+    // stuck.
+    //
+    // When a subprocess crashes, we fatally abort from ueventd.  init will restart ueventd when
+    // init reaps it, and the cold boot process will start again.  If this continues to fail, then
+    // since ueventd is marked as a critical service, init will reboot to recovery.
+    //
+    // When a subprocess gets stuck, keep ueventd spinning waiting for it.  init has a timeout for
+    // cold boot and will reboot to the bootloader if ueventd does not complete in time.
+    while (!subprocess_pids_.empty()) {
+        int status;
+        pid_t pid = TEMP_FAILURE_RETRY(waitpid(-1, &status, 0));
+        if (pid == -1) {
+            PLOG(ERROR) << "waitpid() failed";
+            continue;
+        }
+
+        auto it = std::find(subprocess_pids_.begin(), subprocess_pids_.end(), pid);
+        if (it == subprocess_pids_.end()) continue;
+
+        if (WIFEXITED(status)) {
+            if (WEXITSTATUS(status) == EXIT_SUCCESS) {
+                subprocess_pids_.erase(it);
+            } else {
+                LOG(FATAL) << "subprocess exited with status " << WEXITSTATUS(status);
+            }
+        } else if (WIFSIGNALED(status)) {
+            LOG(FATAL) << "subprocess killed by signal " << WTERMSIG(status);
+        }
+    }
+}
+
+void ColdBoot::Run() {
+    Timer cold_boot_timer;
+
+    RegenerateUevents();
+
+    ForkSubProcesses();
+
+    DoRestoreCon();
+
+    WaitForSubProcesses();
+
+    close(open(COLDBOOT_DONE, O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
+    LOG(INFO) << "Coldboot took " << cold_boot_timer;
+}
+
 DeviceHandler CreateDeviceHandler() {
     Parser parser;
 
@@ -64,11 +257,10 @@
     parser.ParseConfig("/ueventd." + hardware + ".rc");
 
     return DeviceHandler(std::move(dev_permissions), std::move(sysfs_permissions),
-                         std::move(subsystems));
+                         std::move(subsystems), true);
 }
 
-int ueventd_main(int argc, char **argv)
-{
+int ueventd_main(int argc, char** argv) {
     /*
      * init sets the umask to 077 for forked processes. We need to
      * create files with exact permissions, without modification by
@@ -76,13 +268,6 @@
      */
     umask(000);
 
-    /* Prevent fire-and-forget children from becoming zombies.
-     * If we should need to wait() for some children in the future
-     * (as opposed to none right now), double-forking here instead
-     * of ignoring SIGCHLD may be the better solution.
-     */
-    signal(SIGCHLD, SIG_IGN);
-
     InitKernelLogging(argv);
 
     LOG(INFO) << "ueventd started!";
@@ -95,16 +280,8 @@
     UeventListener uevent_listener;
 
     if (access(COLDBOOT_DONE, F_OK) != 0) {
-        Timer t;
-
-        uevent_listener.RegenerateUevents([&device_handler](const Uevent& uevent) {
-            HandleFirmwareEvent(uevent);
-            device_handler.HandleDeviceEvent(uevent);
-            return RegenerationAction::kContinue;
-        });
-
-        close(open(COLDBOOT_DONE, O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
-        LOG(INFO) << "Coldboot took " << t;
+        ColdBoot cold_boot(uevent_listener, device_handler);
+        cold_boot.Run();
     }
 
     uevent_listener.DoPolling([&device_handler](const Uevent& uevent) {