base: Port subprocess to Windows

Introduces a functionally-equivalent implementation of
base::Subprocess for Windows. Unlike POSIX, the windows
implementation is based on using blocking I/O and spawning two
threads (one for draining stdout/err, one for pumping stdin).
Using OVERLAPPED I/O on Windows is too complex (and also
internally seems to use a thread pool anyways) and isn't
worth the risk, given Subprocess is used only by tools/.
The only changes introduced on Linux/Android/Mac are:
- Changing the MovableState to be a unique_ptr. This make it safe to use
from other threads across std::move.
- Removing the kKilledBySignal state and folding it into
  kRunning.
- Adding an explicit bool timed_out() operator.

Test: perfetto_unittests.exe --gtest_filter=Subprocess*
Bug: 174454879
Change-Id: Ifa6ace5c58cf043861f474fff98e798f7764fb69
diff --git a/Android.bp b/Android.bp
index d089dff..2265a60 100644
--- a/Android.bp
+++ b/Android.bp
@@ -6412,7 +6412,9 @@
     "src/base/string_splitter.cc",
     "src/base/string_utils.cc",
     "src/base/string_view.cc",
+    "src/base/subprocess.cc",
     "src/base/subprocess_posix.cc",
+    "src/base/subprocess_windows.cc",
     "src/base/temp_file.cc",
     "src/base/thread_checker.cc",
     "src/base/thread_task_runner.cc",
diff --git a/BUILD b/BUILD
index 752d469..8d3c5ea 100644
--- a/BUILD
+++ b/BUILD
@@ -584,7 +584,9 @@
         "src/base/string_splitter.cc",
         "src/base/string_utils.cc",
         "src/base/string_view.cc",
+        "src/base/subprocess.cc",
         "src/base/subprocess_posix.cc",
+        "src/base/subprocess_windows.cc",
         "src/base/temp_file.cc",
         "src/base/thread_checker.cc",
         "src/base/thread_task_runner.cc",
diff --git a/include/perfetto/ext/base/subprocess.h b/include/perfetto/ext/base/subprocess.h
index a57ada9..77b2dfd 100644
--- a/include/perfetto/ext/base/subprocess.h
+++ b/include/perfetto/ext/base/subprocess.h
@@ -17,27 +17,19 @@
 #ifndef INCLUDE_PERFETTO_EXT_BASE_SUBPROCESS_H_
 #define INCLUDE_PERFETTO_EXT_BASE_SUBPROCESS_H_
 
-#include "perfetto/base/build_config.h"
-
-// This is a #if as opposite to a GN condition, because GN conditions aren't propagated when
-// translating to Bazel or other build systems, as they get resolved at translation time. Without
-// this, the Bazel build breaks on Windows.
-#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
-    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
-    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
-#define PERFETTO_HAS_SUBPROCESS() 1
-#else
-#define PERFETTO_HAS_SUBPROCESS() 0
-#endif
-
+#include <condition_variable>
 #include <functional>
 #include <initializer_list>
+#include <mutex>
 #include <string>
 #include <thread>
 #include <vector>
 
+#include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
+#include "perfetto/base/platform_handle.h"
 #include "perfetto/base/proc_utils.h"
+#include "perfetto/ext/base/event_fd.h"
 #include "perfetto/ext/base/pipe.h"
 #include "perfetto/ext/base/scoped_file.h"
 
@@ -51,7 +43,7 @@
 //    This happens when |args.exec_cmd| is not empty.
 //    This is safe to use even in a multi-threaded environment.
 // 2) fork(): for spawning a process and running a function.
-//    This happens when |args.entrypoint_for_testing| is not empty.
+//    This happens when |args.posix_entrypoint_for_testing| is not empty.
 //    This is intended only for tests as it is extremely subtle.
 //    This mode must be used with extreme care. Before the entrypoint is
 //    invoked all file descriptors other than stdin/out/err and the ones
@@ -102,15 +94,15 @@
 //     p.Start();
 //     p.Wait();
 // )
-// EXPECT_EQ(p.status(), base::Subprocess::kExited);
+// EXPECT_EQ(p.status(), base::Subprocess::kTerminated);
 // EXPECT_EQ(p.returncode(), 0);
 class Subprocess {
  public:
   enum Status {
     kNotStarted = 0,  // Before calling Start() or Call().
     kRunning,         // After calling Start(), before Wait().
-    kExited,          // The subprocess exited (either succesully or not).
-    kKilledBySignal,  // The subprocess has been killed by a signal.
+    kTerminated,      // The subprocess terminated, either successfully or not.
+                      // This includes crashes or other signals on UNIX.
   };
 
   enum OutputMode {
@@ -128,19 +120,20 @@
     // If non-empty this will cause an exec() when Start()/Call() are called.
     std::vector<std::string> exec_cmd;
 
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
     // If non-empty, it changes the argv[0] argument passed to exec. If
     // unset, argv[0] == exec_cmd[0]. This is to handle cases like:
     // exec_cmd = {"/proc/self/exec"}, argv0: "my_custom_test_override".
-    std::string argv0_override;
+    std::string posix_argv0_override_for_testing;
 
     // If non-empty this will be invoked on the fork()-ed child process, after
     // stdin/out/err has been redirected and all other file descriptor are
-    // closed.
-    // It is valid to specify both |exec_cmd| AND |entrypoint_for_testing|.
-    // In this case |entrypoint_for_testing| will be invoked just before the
-    // exec() call, but after having closed all fds % stdin/out/err.
+    // closed. It is valid to specify both |exec_cmd| AND
+    // |posix_entrypoint_for_testing|. In this case the latter will be invoked
+    // just before the exec() call, but after having closed all fds % stdin/o/e.
     // This is for synchronization barriers in tests.
-    std::function<void()> entrypoint_for_testing;
+    std::function<void()> posix_entrypoint_for_testing;
+#endif
 
     // If non-empty, replaces the environment passed to exec().
     std::vector<std::string> env;
@@ -154,7 +147,7 @@
     OutputMode stdout_mode = kInherit;
     OutputMode stderr_mode = kInherit;
 
-    base::ScopedFile out_fd;
+    base::ScopedPlatformHandle out_fd;
 
     // Returns " ".join(exec_cmd), quoting arguments.
     std::string GetCmdString() const;
@@ -201,47 +194,71 @@
   // Sends a signal (SIGKILL if not specified) and wait for process termination.
   void KillAndWaitForTermination(int sig_num = 0);
 
-  PlatformProcessId pid() const { return s_.pid; }
+  PlatformProcessId pid() const { return s_->pid; }
 
   // The accessors below are updated only after a call to Poll(), Wait() or
   // KillAndWaitForTermination().
   // In most cases you want to call Poll() rather than these accessors.
 
-  Status status() const { return s_.status; }
-  int returncode() const { return s_.returncode; }
+  Status status() const { return s_->status; }
+  int returncode() const { return s_->returncode; }
+  bool timed_out() const { return s_->timed_out; }
 
   // This contains both stdout and stderr (if the corresponding _mode ==
   // kBuffer). It's non-const so the caller can std::move() it.
-  std::string& output() { return s_.output; }
-  const ResourceUsage& rusage() const { return *s_.rusage; }
+  std::string& output() { return s_->output; }
+  const std::string& output() const { return s_->output; }
+
+  const ResourceUsage& posix_rusage() const { return *s_->rusage; }
 
   Args args;
 
  private:
+  // The signal/exit code used when killing the process in case of a timeout.
+  static const int kTimeoutSignal;
+
   Subprocess(const Subprocess&) = delete;
   Subprocess& operator=(const Subprocess&) = delete;
-  void TryPushStdin();
-  void TryReadStdoutAndErr();
-  void TryReadExitStatus();
-  void KillAtMostOnce();
-  bool PollInternal(int poll_timeout_ms);
 
   // This is to deal robustly with the move operators, without having to
   // manually maintain member-wise move instructions.
   struct MovableState {
     base::Pipe stdin_pipe;
     base::Pipe stdouterr_pipe;
-    base::Pipe exit_status_pipe;
     PlatformProcessId pid;
-    size_t input_written = 0;
     Status status = kNotStarted;
     int returncode = -1;
     std::string output;  // Stdin+stderr. Only when kBuffer.
+    std::unique_ptr<ResourceUsage> rusage{new ResourceUsage()};
+    bool timed_out = false;
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+    std::thread stdouterr_thread;
+    std::thread stdin_thread;
+    ScopedPlatformHandle win_proc_handle;
+    ScopedPlatformHandle win_thread_handle;
+
+    base::EventFd stdouterr_done_event;
+    std::mutex mutex;  // Protects locked_outerr_buf and the two pipes.
+    std::string locked_outerr_buf;
+#else
+    base::Pipe exit_status_pipe;
+    size_t input_written = 0;
     std::thread waitpid_thread;
-    std::unique_ptr<ResourceUsage> rusage;
+#endif
   };
 
-  MovableState s_;
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  static void StdinThread(MovableState*, std::string input);
+  static void StdoutErrThread(MovableState*);
+#else
+  void TryPushStdin();
+  void TryReadStdoutAndErr();
+  void TryReadExitStatus();
+  void KillAtMostOnce();
+  bool PollInternal(int poll_timeout_ms);
+#endif
+
+  std::unique_ptr<MovableState> s_;
 };
 
 }  // namespace base
diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn
index d638f09..b67ed31 100644
--- a/src/base/BUILD.gn
+++ b/src/base/BUILD.gn
@@ -36,7 +36,9 @@
     "string_splitter.cc",
     "string_utils.cc",
     "string_view.cc",
+    "subprocess.cc",
     "subprocess_posix.cc",
+    "subprocess_windows.cc",
     "temp_file.cc",
     "thread_checker.cc",
     "time.cc",
@@ -148,7 +150,6 @@
     "string_utils_unittest.cc",
     "string_view_unittest.cc",
     "string_writer_unittest.cc",
-    "subprocess_unittest.cc",
     "task_runner_unittest.cc",
     "temp_file_unittest.cc",
     "thread_checker_unittest.cc",
@@ -157,6 +158,11 @@
     "uuid_unittest.cc",
     "weak_ptr_unittest.cc",
   ]
+  if (is_linux || is_android || is_mac || is_win) {
+    # Don't run on Fuchsia, NaCL. They pretend to be POSIX but then give up on
+    # execve(2).
+    sources += [ "subprocess_unittest.cc" ]
+  }
 
   # TODO: Enable these for Windows when possible.
   if (!is_win) {
diff --git a/src/base/subprocess.cc b/src/base/subprocess.cc
new file mode 100644
index 0000000..3a18239
--- /dev/null
+++ b/src/base/subprocess.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/ext/base/subprocess.h"
+
+#include <tuple>
+
+// This file contains only the common bits (ctors / dtors / move operators).
+// The rest lives in subprocess_posix.cc and subprocess_windows.cc.
+
+namespace perfetto {
+namespace base {
+
+Subprocess::Args::Args(Args&&) noexcept = default;
+Subprocess::Args& Subprocess::Args::operator=(Args&&) = default;
+
+Subprocess::Subprocess(std::initializer_list<std::string> a)
+    : args(a), s_(new MovableState()) {}
+
+Subprocess::Subprocess(Subprocess&& other) noexcept {
+  static_assert(sizeof(Subprocess) ==
+                    sizeof(std::tuple<std::unique_ptr<MovableState>, Args>),
+                "base::Subprocess' move ctor needs updating");
+  s_ = std::move(other.s_);
+  args = std::move(other.args);
+
+  // Reset the state of the moved-from object.
+  other.s_.reset(new MovableState());
+  other.~Subprocess();
+  new (&other) Subprocess();
+}
+
+Subprocess& Subprocess::operator=(Subprocess&& other) {
+  this->~Subprocess();
+  new (this) Subprocess(std::move(other));
+  return *this;
+}
+
+Subprocess::~Subprocess() {
+  if (s_->status == kRunning)
+    KillAndWaitForTermination();
+}
+
+bool Subprocess::Call(int timeout_ms) {
+  PERFETTO_CHECK(s_->status == kNotStarted);
+  Start();
+
+  if (!Wait(timeout_ms)) {
+    s_->timed_out = true;
+    KillAndWaitForTermination(kTimeoutSignal);
+  }
+  PERFETTO_DCHECK(s_->status != kRunning);
+  return s_->status == kTerminated && s_->returncode == 0;
+}
+
+std::string Subprocess::Args::GetCmdString() const {
+  std::string str;
+  for (size_t i = 0; i < exec_cmd.size(); i++) {
+    str += i > 0 ? " \"" : "";
+    str += exec_cmd[i];
+    str += i > 0 ? "\"" : "";
+  }
+  return str;
+}
+
+}  // namespace base
+}  // namespace perfetto
diff --git a/src/base/subprocess_posix.cc b/src/base/subprocess_posix.cc
index ab9be44..c5321f6 100644
--- a/src/base/subprocess_posix.cc
+++ b/src/base/subprocess_posix.cc
@@ -16,7 +16,11 @@
 
 #include "perfetto/ext/base/subprocess.h"
 
-#if PERFETTO_HAS_SUBPROCESS()
+#include "perfetto/base/build_config.h"
+
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
 
 #include <fcntl.h>
 #include <poll.h>
@@ -31,16 +35,15 @@
 #include <thread>
 #include <tuple>
 
-#include "perfetto/base/build_config.h"
-#include "perfetto/base/logging.h"
-#include "perfetto/base/time.h"
-#include "perfetto/ext/base/utils.h"
-
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
     PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
 #include <sys/prctl.h>
 #endif
 
+#include "perfetto/base/logging.h"
+#include "perfetto/base/time.h"
+#include "perfetto/ext/base/utils.h"
+
 // In MacOS this is not defined in any header.
 extern "C" char** environ;
 
@@ -154,8 +157,8 @@
   set_fd_close_on_exec(STDERR_FILENO, false);
 
   // If the caller specified a std::function entrypoint, run that first.
-  if (args->create_args->entrypoint_for_testing)
-    args->create_args->entrypoint_for_testing();
+  if (args->create_args->posix_entrypoint_for_testing)
+    args->create_args->posix_entrypoint_for_testing();
 
   // If the caller specified only an entrypoint, without any args, exit now.
   // Otherwise proceed with the exec() below.
@@ -180,36 +183,8 @@
 
 }  // namespace
 
-Subprocess::Args::Args(Args&&) noexcept = default;
-Subprocess::Args& Subprocess::Args::operator=(Args&&) = default;
-
-Subprocess::Subprocess(std::initializer_list<std::string> a) : args(a) {
-  s_.rusage.reset(new ResourceUsage());
-}
-
-Subprocess::Subprocess(Subprocess&& other) noexcept {
-  static_assert(sizeof(Subprocess) == sizeof(std::tuple<MovableState, Args>),
-                "base::Subprocess' move ctor needs updating");
-  s_ = std::move(other.s_);
-  args = std::move(other.args);
-
-  // Reset the state of the moved-from object.
-  other.s_.status = kNotStarted;  // So the dtor doesn't try to kill().
-  other.~Subprocess();
-  new (&other) Subprocess();
-}
-
-Subprocess& Subprocess::operator=(Subprocess&& other) {
-  this->~Subprocess();
-  new (this) Subprocess(std::move(other));
-  return *this;
-}
-
-Subprocess::~Subprocess() {
-  if (s_.status == kRunning)
-    KillAndWaitForTermination();
-  PERFETTO_CHECK(!s_.waitpid_thread.joinable());
-}
+// static
+const int Subprocess::kTimeoutSignal = SIGKILL;
 
 void Subprocess::Start() {
   ChildProcessArgs proc_args;
@@ -222,8 +197,10 @@
       proc_args.argv.push_back(const_cast<char*>(arg.c_str()));
     proc_args.argv.push_back(nullptr);
 
-    if (!args.argv0_override.empty())
-      proc_args.argv[0] = const_cast<char*>(args.argv0_override.c_str());
+    if (!args.posix_argv0_override_for_testing.empty()) {
+      proc_args.argv[0] =
+          const_cast<char*>(args.posix_argv0_override_for_testing.c_str());
+    }
   }
 
   // Setup env.
@@ -234,30 +211,30 @@
   }
 
   // Setup the pipes for stdin/err redirection.
-  s_.stdin_pipe = base::Pipe::Create(base::Pipe::kWrNonBlock);
-  proc_args.stdin_pipe_rd = *s_.stdin_pipe.rd;
-  s_.stdouterr_pipe = base::Pipe::Create(base::Pipe::kRdNonBlock);
-  proc_args.stdouterr_pipe_wr = *s_.stdouterr_pipe.wr;
+  s_->stdin_pipe = base::Pipe::Create(base::Pipe::kWrNonBlock);
+  proc_args.stdin_pipe_rd = *s_->stdin_pipe.rd;
+  s_->stdouterr_pipe = base::Pipe::Create(base::Pipe::kRdNonBlock);
+  proc_args.stdouterr_pipe_wr = *s_->stdouterr_pipe.wr;
 
   // Spawn the child process that will exec().
-  s_.pid = fork();
-  PERFETTO_CHECK(s_.pid >= 0);
-  if (s_.pid == 0) {
+  s_->pid = fork();
+  PERFETTO_CHECK(s_->pid >= 0);
+  if (s_->pid == 0) {
     // Close the parent-ends of the pipes.
-    s_.stdin_pipe.wr.reset();
-    s_.stdouterr_pipe.rd.reset();
+    s_->stdin_pipe.wr.reset();
+    s_->stdouterr_pipe.rd.reset();
     ChildProcess(&proc_args);
     // ChildProcess() doesn't return, not even in case of failures.
     PERFETTO_FATAL("not reached");
   }
 
-  s_.status = kRunning;
+  s_->status = kRunning;
 
   // Close the child-end of the pipes.
-  // Deliberately NOT closing the s_.stdin_pipe.rd. This is to avoid crashing
+  // Deliberately NOT closing the s_->stdin_pipe.rd. This is to avoid crashing
   // with a SIGPIPE if the process exits without consuming its stdin, while
   // the parent tries to write() on the other end of the stdin pipe.
-  s_.stdouterr_pipe.wr.reset();
+  s_->stdouterr_pipe.wr.reset();
   proc_args.create_args->out_fd.reset();
 
   // Spawn a thread that is blocked on waitpid() and writes the termination
@@ -265,13 +242,13 @@
   // timeout option and can't be passed to poll(). The alternative would be
   // using a SIGCHLD handler, but anecdotally signal handlers introduce more
   // problems than what they solve.
-  s_.exit_status_pipe = base::Pipe::Create(base::Pipe::kRdNonBlock);
+  s_->exit_status_pipe = base::Pipe::Create(base::Pipe::kRdNonBlock);
 
   // Both ends of the pipe are closed after the thread.join().
-  int pid = s_.pid;
-  int exit_status_pipe_wr = s_.exit_status_pipe.wr.release();
-  auto* rusage = s_.rusage.get();
-  s_.waitpid_thread = std::thread([pid, exit_status_pipe_wr, rusage] {
+  int pid = s_->pid;
+  int exit_status_pipe_wr = s_->exit_status_pipe.wr.release();
+  auto* rusage = s_->rusage.get();
+  s_->waitpid_thread = std::thread([pid, exit_status_pipe_wr, rusage] {
     int pid_stat = -1;
     struct rusage usg {};
     int wait_res = PERFETTO_EINTR(wait4(pid, &pid_stat, 0, &usg));
@@ -295,11 +272,11 @@
 }
 
 Subprocess::Status Subprocess::Poll() {
-  if (s_.status != kRunning)
-    return s_.status;  // Nothing to poll.
+  if (s_->status != kRunning)
+    return s_->status;  // Nothing to poll.
   while (PollInternal(0 /* don't block*/)) {
   }
-  return s_.status;
+  return s_->status;
 }
 
 // |timeout_ms| semantic:
@@ -312,18 +289,18 @@
 bool Subprocess::PollInternal(int poll_timeout_ms) {
   struct pollfd fds[3]{};
   size_t num_fds = 0;
-  if (s_.exit_status_pipe.rd) {
-    fds[num_fds].fd = *s_.exit_status_pipe.rd;
+  if (s_->exit_status_pipe.rd) {
+    fds[num_fds].fd = *s_->exit_status_pipe.rd;
     fds[num_fds].events = POLLIN;
     num_fds++;
   }
-  if (s_.stdouterr_pipe.rd) {
-    fds[num_fds].fd = *s_.stdouterr_pipe.rd;
+  if (s_->stdouterr_pipe.rd) {
+    fds[num_fds].fd = *s_->stdouterr_pipe.rd;
     fds[num_fds].events = POLLIN;
     num_fds++;
   }
-  if (s_.stdin_pipe.wr) {
-    fds[num_fds].fd = *s_.stdin_pipe.wr;
+  if (s_->stdin_pipe.wr) {
+    fds[num_fds].fd = *s_->stdin_pipe.wr;
     fds[num_fds].events = POLLOUT;
     num_fds++;
   }
@@ -343,7 +320,7 @@
 }
 
 bool Subprocess::Wait(int timeout_ms) {
-  PERFETTO_CHECK(s_.status != kNotStarted);
+  PERFETTO_CHECK(s_->status != kNotStarted);
 
   // Break out of the loop only after both conditions are satisfied:
   // - All stdout/stderr data has been read (if kBuffer).
@@ -357,7 +334,7 @@
   // state where the write(stdin_pipe_.wr) will never unblock.
 
   const int64_t t_start = base::GetWallTimeMs().count();
-  while (s_.exit_status_pipe.rd || s_.stdouterr_pipe.rd) {
+  while (s_->exit_status_pipe.rd || s_->stdouterr_pipe.rd) {
     int poll_timeout_ms = -1;  // Block until a FD is ready.
     if (timeout_ms > 0) {
       const int64_t now = GetWallTimeMs().count();
@@ -370,43 +347,29 @@
   return true;
 }
 
-bool Subprocess::Call(int timeout_ms) {
-  PERFETTO_CHECK(s_.status == kNotStarted);
-  Start();
-
-  if (!Wait(timeout_ms)) {
-    KillAndWaitForTermination();
-    // TryReadExitStatus must have joined the thread.
-    PERFETTO_DCHECK(!s_.waitpid_thread.joinable());
-  }
-  PERFETTO_DCHECK(s_.status != kRunning);
-  return s_.status == kExited && s_.returncode == 0;
-}
-
 void Subprocess::TryReadExitStatus() {
-  if (!s_.exit_status_pipe.rd)
+  if (!s_->exit_status_pipe.rd)
     return;
 
   int pid_stat = -1;
   int64_t rsize = PERFETTO_EINTR(
-      read(*s_.exit_status_pipe.rd, &pid_stat, sizeof(pid_stat)));
+      read(*s_->exit_status_pipe.rd, &pid_stat, sizeof(pid_stat)));
   if (rsize < 0 && errno == EAGAIN)
     return;
 
   if (rsize > 0) {
     PERFETTO_CHECK(rsize == sizeof(pid_stat));
   } else if (rsize < 0) {
-    PERFETTO_PLOG("Subprocess read(s_.exit_status_pipe) failed");
+    PERFETTO_PLOG("Subprocess read(s_->exit_status_pipe) failed");
   }
-  s_.waitpid_thread.join();
-  s_.exit_status_pipe.rd.reset();
+  s_->waitpid_thread.join();
+  s_->exit_status_pipe.rd.reset();
 
+  s_->status = kTerminated;
   if (WIFEXITED(pid_stat)) {
-    s_.returncode = WEXITSTATUS(pid_stat);
-    s_.status = kExited;
+    s_->returncode = WEXITSTATUS(pid_stat);
   } else if (WIFSIGNALED(pid_stat)) {
-    s_.returncode = 128 + WTERMSIG(pid_stat);  // Follow bash convention.
-    s_.status = kKilledBySignal;
+    s_->returncode = 128 + WTERMSIG(pid_stat);  // Follow bash convention.
   } else {
     PERFETTO_FATAL("waitpid() returned an unexpected value (0x%x)", pid_stat);
   }
@@ -414,65 +377,58 @@
 
 // If the stidn pipe is still open, push input data and close it at the end.
 void Subprocess::TryPushStdin() {
-  if (!s_.stdin_pipe.wr)
+  if (!s_->stdin_pipe.wr)
     return;
 
-  PERFETTO_DCHECK(args.input.empty() || s_.input_written < args.input.size());
+  PERFETTO_DCHECK(args.input.empty() || s_->input_written < args.input.size());
   if (args.input.size()) {
     int64_t wsize =
-        PERFETTO_EINTR(write(*s_.stdin_pipe.wr, &args.input[s_.input_written],
-                             args.input.size() - s_.input_written));
+        PERFETTO_EINTR(write(*s_->stdin_pipe.wr, &args.input[s_->input_written],
+                             args.input.size() - s_->input_written));
     if (wsize < 0 && errno == EAGAIN)
       return;
 
     if (wsize >= 0) {
       // Whether write() can return 0 is one of the greatest mysteries of UNIX.
       // Just ignore it.
-      s_.input_written += static_cast<size_t>(wsize);
+      s_->input_written += static_cast<size_t>(wsize);
     } else {
       PERFETTO_PLOG("Subprocess write(stdin) failed");
-      s_.stdin_pipe.wr.reset();
+      s_->stdin_pipe.wr.reset();
     }
   }
-  PERFETTO_DCHECK(s_.input_written <= args.input.size());
-  if (s_.input_written == args.input.size())
-    s_.stdin_pipe.wr.reset();  // Close stdin.
+  PERFETTO_DCHECK(s_->input_written <= args.input.size());
+  if (s_->input_written == args.input.size())
+    s_->stdin_pipe.wr.reset();  // Close stdin.
 }
 
 void Subprocess::TryReadStdoutAndErr() {
-  if (!s_.stdouterr_pipe.rd)
+  if (!s_->stdouterr_pipe.rd)
     return;
   char buf[4096];
-  int64_t rsize = PERFETTO_EINTR(read(*s_.stdouterr_pipe.rd, buf, sizeof(buf)));
+  int64_t rsize =
+      PERFETTO_EINTR(read(*s_->stdouterr_pipe.rd, buf, sizeof(buf)));
   if (rsize < 0 && errno == EAGAIN)
     return;
 
   if (rsize > 0) {
-    s_.output.append(buf, static_cast<size_t>(rsize));
+    s_->output.append(buf, static_cast<size_t>(rsize));
   } else if (rsize == 0 /* EOF */) {
-    s_.stdouterr_pipe.rd.reset();
+    s_->stdouterr_pipe.rd.reset();
   } else {
     PERFETTO_PLOG("Subprocess read(stdout/err) failed");
-    s_.stdouterr_pipe.rd.reset();
+    s_->stdouterr_pipe.rd.reset();
   }
 }
 
 void Subprocess::KillAndWaitForTermination(int sig_num) {
-  kill(s_.pid, sig_num ? sig_num : SIGKILL);
+  kill(s_->pid, sig_num ? sig_num : SIGKILL);
   Wait();
-}
-
-std::string Subprocess::Args::GetCmdString() const {
-  std::string str;
-  for (size_t i = 0; i < exec_cmd.size(); i++) {
-    str += i > 0 ? " \"" : "";
-    str += exec_cmd[i];
-    str += i > 0 ? "\"" : "";
-  }
-  return str;
+  // TryReadExitStatus must have joined the thread.
+  PERFETTO_DCHECK(!s_->waitpid_thread.joinable());
 }
 
 }  // namespace base
 }  // namespace perfetto
 
-#endif  // PERFETTO_HAS_SUBPROCESS()
+#endif  // PERFETTO_OS_LINUX || PERFETTO_OS_ANDROID || PERFETTO_OS_APPLE
diff --git a/src/base/subprocess_unittest.cc b/src/base/subprocess_unittest.cc
index 5f8c5d6..b52c804 100644
--- a/src/base/subprocess_unittest.cc
+++ b/src/base/subprocess_unittest.cc
@@ -16,12 +16,15 @@
 
 #include "perfetto/ext/base/subprocess.h"
 
-#if PERFETTO_HAS_SUBPROCESS()
 #include <thread>
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+#include <Windows.h>
+#else
 #include <signal.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#endif
 
 #include "perfetto/base/time.h"
 #include "perfetto/ext/base/file_utils.h"
@@ -33,6 +36,16 @@
 namespace base {
 namespace {
 
+std::string GetOutput(const Subprocess& p) {
+  std::string output = p.output();
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  size_t pos = 0;
+  while ((pos = output.find("\r\n", pos)) != std::string::npos)
+    output.erase(pos, 1);
+#endif
+  return output;
+}
+
 std::string GenLargeString() {
   std::string contents;
   for (int i = 0; i < 4096; i++) {
@@ -46,68 +59,101 @@
 TEST(SubprocessTest, InvalidPath) {
   Subprocess p({"/usr/bin/invalid_1337"});
   EXPECT_FALSE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  EXPECT_EQ(p.returncode(), ERROR_FILE_NOT_FOUND);
+#else
   EXPECT_EQ(p.returncode(), 128);
-  EXPECT_EQ(p.output(), "execve() failed\n");
+  EXPECT_EQ(GetOutput(p), "execve() failed\n");
+#endif
 }
 
 TEST(SubprocessTest, StdoutOnly) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "(echo skip_err 1>&2) && echo out_only"});
+#else
   Subprocess p({"sh", "-c", "(echo skip_err >&2); echo out_only"});
+#endif
   p.args.stdout_mode = Subprocess::kBuffer;
   p.args.stderr_mode = Subprocess::kDevNull;
+
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
-  EXPECT_EQ(p.output(), "out_only\n");
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+  EXPECT_EQ(GetOutput(p), "out_only\n");
 }
 
 TEST(SubprocessTest, StderrOnly) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "(echo err_only>&2) && echo skip_out"});
+#else
   Subprocess p({"sh", "-c", "(echo err_only >&2); echo skip_out"});
+#endif
   p.args.stdout_mode = Subprocess::kDevNull;
   p.args.stderr_mode = Subprocess::kBuffer;
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.output(), "err_only\n");
+  EXPECT_EQ(GetOutput(p), "err_only\n");
 }
 
 TEST(SubprocessTest, BothStdoutAndStderr) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "echo out&&(echo err>&2)&&echo out2"});
+#else
   Subprocess p({"sh", "-c", "echo out; (echo err >&2); echo out2"});
+#endif
   p.args.stdout_mode = Subprocess::kBuffer;
   p.args.stderr_mode = Subprocess::kBuffer;
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.output(), "out\nerr\nout2\n");
+  EXPECT_EQ(GetOutput(p), "out\nerr\nout2\n");
 }
 
 TEST(SubprocessTest, BinTrue) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "(exit 0)"});
+#else
   Subprocess p({"true"});
+#endif
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
   EXPECT_EQ(p.returncode(), 0);
 }
 
 TEST(SubprocessTest, BinFalse) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "(exit 1)"});
+#else
   Subprocess p({"false"});
+#endif
   EXPECT_FALSE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
   EXPECT_EQ(p.returncode(), 1);
 }
 
 TEST(SubprocessTest, Echo) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "echo|set /p ignored_var=foobar"});
+#else
   Subprocess p({"echo", "-n", "foobar"});
+#endif
   p.args.stdout_mode = Subprocess::kBuffer;
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
   EXPECT_EQ(p.returncode(), 0);
-  EXPECT_EQ(p.output(), "foobar");
+  EXPECT_EQ(GetOutput(p), "foobar");
 }
 
 TEST(SubprocessTest, FeedbackLongInput) {
   std::string contents = GenLargeString();
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/C", "findstr ."});
+#else
   Subprocess p({"cat", "-"});
+#endif
   p.args.stdout_mode = Subprocess::kBuffer;
   p.args.input = contents;
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
   EXPECT_EQ(p.returncode(), 0);
-  EXPECT_EQ(p.output(), contents);
+  EXPECT_EQ(GetOutput(p), contents);
 }
 
 TEST(SubprocessTest, CatLargeFile) {
@@ -115,98 +161,253 @@
   TempFile tf = TempFile::Create();
   WriteAll(tf.fd(), contents.data(), contents.size());
   FlushFile(tf.fd());
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", ("type \"" + tf.path() + "\"").c_str()});
+#else
   Subprocess p({"cat", tf.path().c_str()});
+#endif
   p.args.stdout_mode = Subprocess::kBuffer;
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.output(), contents);
+  EXPECT_EQ(GetOutput(p), contents);
 }
 
 TEST(SubprocessTest, Timeout) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"ping", "127.0.0.1", "-n", "60"});
+  p.args.stdout_mode = Subprocess::kDevNull;
+#else
   Subprocess p({"sleep", "60"});
+#endif
+
   EXPECT_FALSE(p.Call(/*timeout_ms=*/1));
-  EXPECT_EQ(p.status(), Subprocess::kKilledBySignal);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+  EXPECT_TRUE(p.timed_out());
 }
 
 TEST(SubprocessTest, TimeoutNotHit) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"ping", "127.0.0.1", "-n", "1"});
+  p.args.stdout_mode = Subprocess::kDevNull;
+#else
   Subprocess p({"sleep", "0.01"});
+#endif
   EXPECT_TRUE(p.Call(/*timeout_ms=*/100000));
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
 }
 
 TEST(SubprocessTest, TimeoutStopOutput) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "FOR /L %N IN () DO @echo stuff>NUL"});
+#else
   Subprocess p({"sh", "-c", "while true; do echo stuff; done"});
+#endif
   p.args.stdout_mode = Subprocess::kDevNull;
   EXPECT_FALSE(p.Call(/*timeout_ms=*/10));
-  EXPECT_EQ(p.status(), Subprocess::kKilledBySignal);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+  EXPECT_TRUE(p.timed_out());
 }
 
 TEST(SubprocessTest, ExitBeforeReadingStdin) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"ping", "127.0.0.1", "-n", "1"});
+#else
   // 'sh -c' is to avoid closing stdin (sleep closes it before sleeping).
   Subprocess p({"sh", "-c", "sleep 0.01"});
+#endif
   p.args.stdout_mode = Subprocess::kDevNull;
   p.args.stderr_mode = Subprocess::kDevNull;
   p.args.input = GenLargeString();
   EXPECT_TRUE(p.Call());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
   EXPECT_EQ(p.returncode(), 0);
 }
 
 TEST(SubprocessTest, StdinWriteStall) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"ping", "127.0.0.1", "-n", "10"});
+#else
   // 'sh -c' is to avoid closing stdin (sleep closes it before sleeping).
   // This causes a situation where the write on the stdin will stall because
   // nobody reads it and the pipe buffer fills up. In this situation we should
   // still handle the timeout properly.
   Subprocess p({"sh", "-c", "sleep 10"});
+#endif
   p.args.stdout_mode = Subprocess::kDevNull;
   p.args.stderr_mode = Subprocess::kDevNull;
   p.args.input = GenLargeString();
   EXPECT_FALSE(p.Call(/*timeout_ms=*/10));
-  EXPECT_EQ(p.status(), Subprocess::kKilledBySignal);
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+  EXPECT_TRUE(p.timed_out());
 }
 
 TEST(SubprocessTest, StartAndWait) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"ping", "127.0.0.1", "-n", "1000"});
+#else
   Subprocess p({"sleep", "1000"});
+#endif
+  p.args.stdout_mode = Subprocess::kDevNull;
   p.Start();
   EXPECT_EQ(p.Poll(), Subprocess::kRunning);
   p.KillAndWaitForTermination();
-  EXPECT_EQ(p.status(), Subprocess::kKilledBySignal);
-  EXPECT_EQ(p.Poll(), Subprocess::kKilledBySignal);
-  EXPECT_EQ(p.returncode(), 128 + SIGKILL);
+
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+  EXPECT_EQ(p.Poll(), Subprocess::kTerminated);
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  EXPECT_EQ(p.returncode(), static_cast<int>(STATUS_CONTROL_C_EXIT));
+#else
+  EXPECT_EQ(p.returncode(), static_cast<int>(128 + SIGKILL));
+#endif
 }
 
 TEST(SubprocessTest, PollBehavesProperly) {
   Pipe pipe = Pipe::Create();
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "(exit 0)"});
+#else
   Subprocess p({"true"});
+#endif
   p.args.stdout_mode = Subprocess::kFd;
   p.args.out_fd = std::move(pipe.wr);
   p.Start();
 
   // Wait for EOF (which really means the child process has terminated).
-  char buf;
-  while (PERFETTO_EINTR(read(*pipe.rd, &buf, 1)) != 0) {
-    usleep(1000);
-  }
+  std::string ignored;
+  ReadPlatformHandle(*pipe.rd, &ignored);
 
   // The kernel takes some time to detect the termination of the process. The
   // best thing we can do here is check that we detect the termination within
   // some reasonable time.
   auto start_ms = GetWallTimeMs();
-  while (p.Poll() != Subprocess::kExited) {
+  while (p.Poll() != Subprocess::kTerminated) {
     auto elapsed_ms = GetWallTimeMs() - start_ms;
     ASSERT_LT(elapsed_ms, TimeMillis(10000));
-    usleep(1000);
+    std::this_thread::sleep_for(TimeMillis(5));
   }
 
   // At this point Poll() must detect the termination.
-  EXPECT_EQ(p.Poll(), Subprocess::kExited);
+  EXPECT_EQ(p.Poll(), Subprocess::kTerminated);
   EXPECT_EQ(p.returncode(), 0);
 }
 
+TEST(SubprocessTest, Wait) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  Subprocess p({"cmd", "/c", "echo exec_done && FOR /L %N IN () DO @echo>NUL"});
+#else
+  Subprocess p({"sh", "-c", "echo exec_done; while true; do true; done"});
+#endif
+  p.args.stdout_mode = Subprocess::kBuffer;
+  p.Start();
+
+  // Wait for the fork()+exec() to complete.
+  while (p.output().find("exec_done") == std::string::npos) {
+    EXPECT_FALSE(p.Wait(1 /*ms*/));
+    EXPECT_EQ(p.status(), Subprocess::kRunning);
+  }
+
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  ScopedPlatformHandle proc_handle(::OpenProcess(
+      PROCESS_TERMINATE, /*inherit=*/false, static_cast<DWORD>(p.pid())));
+  ASSERT_TRUE(proc_handle);
+  ASSERT_TRUE(::TerminateProcess(*proc_handle, DBG_CONTROL_BREAK));
+#else
+  kill(p.pid(), SIGBUS);
+#endif
+  EXPECT_TRUE(p.Wait(30000 /*ms*/));  // We shouldn't hit this.
+  EXPECT_TRUE(p.Wait());              // Should be a no-op.
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+  EXPECT_EQ(p.returncode(), static_cast<int>(DBG_CONTROL_BREAK));
+#else
+  EXPECT_EQ(p.returncode(), 128 + SIGBUS);
+#endif
+}
+
+TEST(SubprocessTest, KillOnDtor) {
+  auto is_process_alive = [](PlatformProcessId pid) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+    DWORD ignored = 0;
+    return ProcessIdToSessionId(static_cast<DWORD>(pid), &ignored);
+#else
+    // We use kill(SIGWINCH) as a way to tell if the process is still alive by
+    // looking at the kill(2) return value. SIGWINCH is one of the few signals
+    // that has default ignore disposition.
+    return kill(pid, SIGWINCH) == 0;
+#endif
+  };
+
+  PlatformProcessId pid;
+  {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+    Subprocess p({"ping", "127.0.0.1", "-n", "1000"});
+#else
+    Subprocess p({"sleep", "1000"});
+#endif
+    p.Start();
+    pid = p.pid();
+    EXPECT_TRUE(is_process_alive(pid));
+  }
+
+  // Both on Windows and Linux, kill can take some time to free up the pid.
+  bool alive = true;
+  for (int attempt = 0; attempt < 1000 && alive; attempt++) {
+    alive = is_process_alive(pid);
+    std::this_thread::sleep_for(TimeMillis(5));
+  }
+  EXPECT_FALSE(alive);
+}
+
+// Regression test for b/162505491.
+TEST(SubprocessTest, MoveOperators) {
+  {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+    Subprocess initial({"ping", "127.0.0.1", "-n", "100"});
+#else
+    Subprocess initial = Subprocess({"sleep", "10000"});
+#endif
+    initial.args.stdout_mode = Subprocess::kDevNull;
+    initial.Start();
+    Subprocess moved(std::move(initial));
+    EXPECT_EQ(moved.Poll(), Subprocess::kRunning);
+    EXPECT_EQ(initial.Poll(), Subprocess::kNotStarted);
+
+    // Check that reuse works
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+    initial = Subprocess({"cmd", "/c", "echo|set /p ignored_var=hello"});
+#else
+    initial = Subprocess({"echo", "-n", "hello"});
+#endif
+    initial.args.stdout_mode = Subprocess::OutputMode::kBuffer;
+    initial.Start();
+    initial.Wait(/*timeout=*/5000);
+    EXPECT_EQ(initial.status(), Subprocess::kTerminated);
+    EXPECT_EQ(initial.returncode(), 0);
+    EXPECT_EQ(initial.output(), "hello");
+  }
+
+  std::vector<Subprocess> v;
+  for (int i = 0; i < 10; i++) {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+    v.emplace_back(Subprocess({"ping", "127.0.0.1", "-n", "10"}));
+#else
+    v.emplace_back(Subprocess({"sleep", "10"}));
+#endif
+    v.back().args.stdout_mode = Subprocess::OutputMode::kDevNull;
+    v.back().Start();
+  }
+  for (auto& p : v)
+    EXPECT_EQ(p.Poll(), Subprocess::kRunning);
+}
+
+// posix_entrypoint_for_testing is not supported on Windows.
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
 // Test the case of passing a lambda in |entrypoint| but no cmd.c
 TEST(SubprocessTest, Entrypoint) {
   Subprocess p;
   p.args.input = "ping\n";
   p.args.stdout_mode = Subprocess::kBuffer;
-  p.args.entrypoint_for_testing = [] {
+  p.args.posix_entrypoint_for_testing = [] {
     char buf[32]{};
     PERFETTO_CHECK(fgets(buf, sizeof(buf), stdin));
     PERFETTO_CHECK(strcmp(buf, "ping\n") == 0);
@@ -216,7 +417,7 @@
   };
   EXPECT_FALSE(p.Call());
   EXPECT_EQ(p.returncode(), 42);
-  EXPECT_EQ(p.output(), "pong\n");
+  EXPECT_EQ(GetOutput(p), "pong\n");
 }
 
 // Test the case of passing both a lambda entrypoint and a process to exec.
@@ -229,7 +430,7 @@
   Subprocess p({"echo", "123"});
   p.args.stdout_mode = Subprocess::kBuffer;
   p.args.preserve_fds.push_back(pipe2_wr);
-  p.args.entrypoint_for_testing = [pipe1_wr, pipe2_wr] {
+  p.args.posix_entrypoint_for_testing = [pipe1_wr, pipe2_wr] {
     base::ignore_result(write(pipe1_wr, "fail", 4));
     base::ignore_result(write(pipe2_wr, "pass", 4));
   };
@@ -244,71 +445,12 @@
   buf[4] = '\0';
   EXPECT_STREQ(buf, "pass");
   EXPECT_TRUE(p.Wait());
-  EXPECT_EQ(p.status(), Subprocess::kExited);
-  EXPECT_EQ(p.output(), "123\n");
+  EXPECT_EQ(p.status(), Subprocess::kTerminated);
+  EXPECT_EQ(GetOutput(p), "123\n");
 }
 
-TEST(SubprocessTest, Wait) {
-  Subprocess p({"sh", "-c", "echo exec_done; while true; do true; done"});
-  p.args.stdout_mode = Subprocess::kBuffer;
-  p.Start();
-
-  // Wait for the fork()+exec() to complete.
-  while (p.output().find("exec_done") == std::string::npos) {
-    EXPECT_FALSE(p.Wait(1 /*ms*/));
-    EXPECT_EQ(p.status(), Subprocess::kRunning);
-  }
-
-  kill(p.pid(), SIGBUS);
-  EXPECT_TRUE(p.Wait(30000 /*ms*/));  // We shouldn't hit this.
-  EXPECT_TRUE(p.Wait());  // Should be a no-op.
-  EXPECT_EQ(p.status(), Subprocess::kKilledBySignal);
-  EXPECT_EQ(p.returncode(), 128 + SIGBUS);
-}
-
-TEST(SubprocessTest, KillOnDtor) {
-  // Here we use kill(SIGWINCH) as a way to tell if the process is still alive.
-  // SIGWINCH is one of the few signals that has default ignore disposition.
-  int pid;
-  {
-    Subprocess p({"sleep", "10000"});
-    p.Start();
-    pid = p.pid();
-    EXPECT_EQ(kill(pid, SIGWINCH), 0);
-  }
-  EXPECT_EQ(kill(pid, SIGWINCH), -1);
-}
-
-// Regression test for b/162505491.
-TEST(SubprocessTest, MoveOperators) {
-  {
-    Subprocess initial = Subprocess({"sleep", "10000"});
-    initial.Start();
-    Subprocess moved(std::move(initial));
-    EXPECT_EQ(moved.Poll(), Subprocess::kRunning);
-    EXPECT_EQ(initial.Poll(), Subprocess::kNotStarted);
-
-    // Check that reuse works
-    initial = Subprocess({"echo", "-n", "hello"});
-    initial.args.stdout_mode = Subprocess::OutputMode::kBuffer;
-    initial.Start();
-    initial.Wait(/*timeout=*/5000);
-    EXPECT_EQ(initial.status(), Subprocess::kExited);
-    EXPECT_EQ(initial.returncode(), 0);
-    EXPECT_EQ(initial.output(), "hello");
-  }
-
-  std::vector<Subprocess> v;
-  for (int i = 0; i < 10; i++) {
-    v.emplace_back(Subprocess({"sleep", "10"}));
-    v.back().Start();
-  }
-  for (auto& p : v)
-    EXPECT_EQ(p.Poll(), Subprocess::kRunning);
-}
+#endif
 
 }  // namespace
 }  // namespace base
 }  // namespace perfetto
-
-#endif  // PERFETTO_HAS_SUBPROCESS()
diff --git a/src/base/subprocess_windows.cc b/src/base/subprocess_windows.cc
new file mode 100644
index 0000000..d58d7bd
--- /dev/null
+++ b/src/base/subprocess_windows.cc
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/ext/base/subprocess.h"
+
+#include "perfetto/base/build_config.h"
+
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <mutex>
+#include <tuple>
+
+#include <Windows.h>
+
+#include "perfetto/base/logging.h"
+#include "perfetto/base/time.h"
+#include "perfetto/ext/base/pipe.h"
+#include "perfetto/ext/base/utils.h"
+
+namespace perfetto {
+namespace base {
+
+// static
+const int Subprocess::kTimeoutSignal = static_cast<int>(STATUS_TIMEOUT);
+
+void Subprocess::Start() {
+  if (args.exec_cmd.empty()) {
+    PERFETTO_ELOG("Subprocess.exec_cmd cannot be empty on Windows");
+    return;
+  }
+
+  // Quote arguments but only when ambiguous. When quoting, CreateProcess()
+  // assumes that the command is an absolute path and does not search in the
+  // %PATH%. If non quoted, instead, CreateProcess() tries both. This is to
+  // allow Subprocess("cmd.exe", "/c", "shell command").
+  std::string cmd;
+  for (const auto& part : args.exec_cmd) {
+    if (part.find(" ") != std::string::npos) {
+      cmd += "\"" + part + "\" ";
+    } else {
+      cmd += part + " ";
+    }
+  }
+  // Remove trailing space.
+  if (!cmd.empty())
+    cmd.resize(cmd.size() - 1);
+
+  s_->stdin_pipe = Pipe::Create();
+  // Allow the child process to inherit the other end of the pipe.
+  PERFETTO_CHECK(
+      ::SetHandleInformation(*s_->stdin_pipe.rd, HANDLE_FLAG_INHERIT, 1));
+
+  if (args.stderr_mode == kBuffer || args.stdout_mode == kBuffer) {
+    s_->stdouterr_pipe = Pipe::Create();
+    PERFETTO_CHECK(
+        ::SetHandleInformation(*s_->stdouterr_pipe.wr, HANDLE_FLAG_INHERIT, 1));
+  }
+
+  ScopedPlatformHandle nul_handle;
+  if (args.stderr_mode == kDevNull || args.stdout_mode == kDevNull) {
+    nul_handle.reset(::CreateFileA("NUL", GENERIC_WRITE, FILE_SHARE_WRITE,
+                                   nullptr, OPEN_EXISTING,
+                                   FILE_ATTRIBUTE_NORMAL, nullptr));
+    PERFETTO_CHECK(::SetHandleInformation(*nul_handle, HANDLE_FLAG_INHERIT, 1));
+  }
+
+  PROCESS_INFORMATION proc_info{};
+  STARTUPINFOA start_info{};
+  start_info.cb = sizeof(STARTUPINFOA);
+
+  if (args.stderr_mode == kInherit) {
+    start_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
+  } else if (args.stderr_mode == kBuffer) {
+    start_info.hStdError = *s_->stdouterr_pipe.wr;
+  } else if (args.stderr_mode == kDevNull) {
+    start_info.hStdError = *nul_handle;
+  } else if (args.stderr_mode == kFd) {
+    PERFETTO_CHECK(
+        ::SetHandleInformation(*args.out_fd, HANDLE_FLAG_INHERIT, 1));
+    start_info.hStdError = *args.out_fd;
+  } else {
+    PERFETTO_CHECK(false);
+  }
+
+  if (args.stdout_mode == kInherit) {
+    start_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
+  } else if (args.stdout_mode == kBuffer) {
+    start_info.hStdOutput = *s_->stdouterr_pipe.wr;
+  } else if (args.stdout_mode == kDevNull) {
+    start_info.hStdOutput = *nul_handle;
+  } else if (args.stdout_mode == kFd) {
+    PERFETTO_CHECK(
+        ::SetHandleInformation(*args.out_fd, HANDLE_FLAG_INHERIT, 1));
+    start_info.hStdOutput = *args.out_fd;
+  } else {
+    PERFETTO_CHECK(false);
+  }
+
+  start_info.hStdInput = *s_->stdin_pipe.rd;
+  start_info.dwFlags |= STARTF_USESTDHANDLES;
+
+  // Create the child process.
+  bool success =
+      ::CreateProcessA(nullptr,      // App name. Needs to be null to use PATH.
+                       &cmd[0],      // Command line.
+                       nullptr,      // Process security attributes.
+                       nullptr,      // Primary thread security attributes.
+                       true,         // Handles are inherited.
+                       0,            // Flags.
+                       nullptr,      // Use parent's environment.
+                       nullptr,      // Use parent's current directory.
+                       &start_info,  // STARTUPINFO pointer.
+                       &proc_info);  // Receives PROCESS_INFORMATION.
+
+  // Close on our side the pipe ends that we passed to the child process.
+  s_->stdin_pipe.rd.reset();
+  s_->stdouterr_pipe.wr.reset();
+  args.out_fd.reset();
+
+  if (!success) {
+    s_->returncode = ERROR_FILE_NOT_FOUND;
+    s_->status = kTerminated;
+    s_->stdin_pipe.wr.reset();
+    s_->stdouterr_pipe.rd.reset();
+    PERFETTO_ELOG("CreateProcess failed: %lx, cmd: %s", GetLastError(),
+                  &cmd[0]);
+    return;
+  }
+
+  s_->pid = proc_info.dwProcessId;
+  s_->win_proc_handle = ScopedPlatformHandle(proc_info.hProcess);
+  s_->win_thread_handle = ScopedPlatformHandle(proc_info.hThread);
+  s_->status = kRunning;
+
+  MovableState* s = s_.get();
+  s_->stdin_thread = std::thread(&Subprocess::StdinThread, s, args.input);
+
+  if (args.stderr_mode == kBuffer || args.stdout_mode == kBuffer) {
+    PERFETTO_DCHECK(s_->stdouterr_pipe.rd);
+    s_->stdouterr_thread = std::thread(&Subprocess::StdoutErrThread, s);
+  }
+}
+
+// static
+void Subprocess::StdinThread(MovableState* s, std::string input) {
+  size_t input_written = 0;
+  while (input_written < input.size()) {
+    DWORD wsize = 0;
+    if (::WriteFile(*s->stdin_pipe.wr, input.data() + input_written,
+                    static_cast<DWORD>(input.size() - input_written), &wsize,
+                    nullptr)) {
+      input_written += wsize;
+    } else {
+      // ERROR_BROKEN_PIPE is WAI when the child just closes stdin and stops
+      // accepting input.
+      auto err = ::GetLastError();
+      if (err != ERROR_BROKEN_PIPE)
+        PERFETTO_PLOG("Subprocess WriteFile(stdin) failed %lx", err);
+      break;
+    }
+  }  // while(...)
+  std::unique_lock<std::mutex> lock(s->mutex);
+  s->stdin_pipe.wr.reset();
+}
+
+// static
+void Subprocess::StdoutErrThread(MovableState* s) {
+  char buf[4096];
+  for (;;) {
+    DWORD rsize = 0;
+    bool res =
+        ::ReadFile(*s->stdouterr_pipe.rd, buf, sizeof(buf), &rsize, nullptr);
+    if (!res) {
+      auto err = GetLastError();
+      if (err != ERROR_BROKEN_PIPE)
+        PERFETTO_PLOG("Subprocess ReadFile(stdouterr) failed %ld", err);
+    }
+
+    if (rsize > 0) {
+      std::unique_lock<std::mutex> lock(s->mutex);
+      s->locked_outerr_buf.append(buf, static_cast<size_t>(rsize));
+    } else {  // EOF or some error.
+      break;
+    }
+  }  // For(..)
+
+  // Close the stdouterr_pipe. The main loop looks at the pipe closure to
+  // determine whether the stdout/err thread has completed.
+  {
+    std::unique_lock<std::mutex> lock(s->mutex);
+    s->stdouterr_pipe.rd.reset();
+  }
+  s->stdouterr_done_event.Notify();
+}
+
+Subprocess::Status Subprocess::Poll() {
+  if (s_->status != kRunning)
+    return s_->status;  // Nothing to poll.
+  Wait(1 /*ms*/);
+  return s_->status;
+}
+
+bool Subprocess::Wait(int timeout_ms) {
+  PERFETTO_CHECK(s_->status != kNotStarted);
+  const bool wait_forever = timeout_ms == 0;
+  const int64_t wait_start_ms = base::GetWallTimeMs().count();
+
+  // Break out of the loop only after both conditions are satisfied:
+  // - All stdout/stderr data has been read (if kBuffer).
+  // - The process exited.
+  // Note that the two events can happen arbitrary order. After the process
+  // exits, there might be still data in the pipe buffer, which we want to
+  // read fully.
+  // Note also that stdout/err might be "complete" before starting, if neither
+  // is operating in kBuffer mode. In that case we just want to wait for the
+  // process termination.
+  //
+  // Instead, don't wait on the stdin to be fully written. The child process
+  // might exit prematurely (or crash). If that happens, we can end up in a
+  // state where the write(stdin_pipe_.wr) will never unblock.
+  bool stdouterr_complete = false;
+  for (;;) {
+    HANDLE wait_handles[2]{};
+    DWORD num_handles = 0;
+
+    // Check if the process exited.
+    bool process_exited = !s_->win_proc_handle;
+    if (!process_exited) {
+      DWORD exit_code = STILL_ACTIVE;
+      PERFETTO_CHECK(::GetExitCodeProcess(*s_->win_proc_handle, &exit_code));
+      if (exit_code != STILL_ACTIVE) {
+        s_->returncode = static_cast<int>(exit_code);
+        s_->status = kTerminated;
+        s_->win_proc_handle.reset();
+        s_->win_thread_handle.reset();
+        process_exited = true;
+      }
+    } else {
+      PERFETTO_DCHECK(s_->status != kRunning);
+    }
+    if (!process_exited) {
+      wait_handles[num_handles++] = *s_->win_proc_handle;
+    }
+
+    // Check if there is more output and if the stdout/err pipe has been closed.
+    {
+      std::unique_lock<std::mutex> lock(s_->mutex);
+      // Move the output from the internal buffer shared with the
+      // stdouterr_thread to the final buffer exposed to the client.
+      if (!s_->locked_outerr_buf.empty()) {
+        s_->output.append(std::move(s_->locked_outerr_buf));
+        s_->locked_outerr_buf.clear();
+      }
+      stdouterr_complete = !s_->stdouterr_pipe.rd;
+      if (!stdouterr_complete) {
+        wait_handles[num_handles++] = s_->stdouterr_done_event.fd();
+      }
+    }  // lock(s_->mutex)
+
+    if (num_handles == 0) {
+      PERFETTO_DCHECK(process_exited && stdouterr_complete);
+      break;
+    }
+
+    DWORD wait_ms;  // Note: DWORD is unsigned.
+    if (wait_forever) {
+      wait_ms = INFINITE;
+    } else {
+      const int64_t now = GetWallTimeMs().count();
+      const int64_t wait_left_ms = timeout_ms - (now - wait_start_ms);
+      if (wait_left_ms <= 0)
+        return false;  // Timed out
+      wait_ms = static_cast<DWORD>(wait_left_ms);
+    }
+
+    auto wait_res =
+        ::WaitForMultipleObjects(num_handles, wait_handles, false, wait_ms);
+    PERFETTO_CHECK(wait_res != WAIT_FAILED);
+  }
+
+  PERFETTO_DCHECK(!s_->win_proc_handle);
+  PERFETTO_DCHECK(!s_->win_thread_handle);
+
+  if (s_->stdin_thread.joinable())  // Might not exist if CreateProcess failed.
+    s_->stdin_thread.join();
+  if (s_->stdouterr_thread.joinable())
+    s_->stdouterr_thread.join();
+
+  // The stdin pipe is closed by the dedicated stdin thread. However if that is
+  // not started (e.g. because of no redirection) force close it now. Needs to
+  // happen after the join() to be thread safe.
+  s_->stdin_pipe.wr.reset();
+  s_->stdouterr_pipe.rd.reset();
+
+  return true;
+}
+
+void Subprocess::KillAndWaitForTermination(int exit_code) {
+  auto code = exit_code ? static_cast<DWORD>(exit_code) : STATUS_CONTROL_C_EXIT;
+  ::TerminateProcess(*s_->win_proc_handle, code);
+  Wait();
+  // TryReadExitStatus must have joined the threads.
+  PERFETTO_DCHECK(!s_->stdin_thread.joinable());
+  PERFETTO_DCHECK(!s_->stdouterr_thread.joinable());
+}
+
+}  // namespace base
+}  // namespace perfetto
+
+#endif  // PERFETTO_OS_WIN
diff --git a/src/profiling/memory/heapprofd_end_to_end_test.cc b/src/profiling/memory/heapprofd_end_to_end_test.cc
index 1f7fef0..15fae25 100644
--- a/src/profiling/memory/heapprofd_end_to_end_test.cc
+++ b/src/profiling/memory/heapprofd_end_to_end_test.cc
@@ -247,7 +247,7 @@
                                      size_t secondary_bytes = 0,
                                      ssize_t max_iter = -1) {
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG0=" +
@@ -733,7 +733,7 @@
     GTEST_SKIP();
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_ACCURATE_MALLOC=1");
@@ -773,7 +773,7 @@
     GTEST_SKIP();
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_ACCURATE_MALLOC=1");
@@ -887,7 +887,7 @@
   sleep(1);
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG0=" +
@@ -959,7 +959,7 @@
   sleep(1);
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG0=" +
@@ -1010,7 +1010,7 @@
   auto helper = GetHelper(&task_runner);
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG0=" +
@@ -1074,7 +1074,7 @@
 
   // Make sure the forked process does not get reparented to init.
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG0=" +
@@ -1155,7 +1155,7 @@
   int ack_pipe_wr = ack_pipe[1];
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.preserve_fds.push_back(signal_pipe_rd);
   child.args.preserve_fds.push_back(ack_pipe_wr);
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_REINIT_ARG0=" +
@@ -1240,7 +1240,7 @@
   int ack_pipe_wr = ack_pipe[1];
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.preserve_fds.push_back(signal_pipe_rd);
   child.args.preserve_fds.push_back(ack_pipe_wr);
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_REINIT_ARG0=" +
@@ -1355,7 +1355,7 @@
   int start_pipe_wr = *start_pipe.wr;
 
   base::Subprocess child({"/proc/self/exe"});
-  child.args.argv0_override = "heapprofd_continuous_malloc";
+  child.args.posix_argv0_override_for_testing = "heapprofd_continuous_malloc";
   child.args.stdout_mode = base::Subprocess::kDevNull;
   child.args.stderr_mode = base::Subprocess::kDevNull;
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG0=" +
@@ -1367,7 +1367,7 @@
   child.args.env.push_back("HEAPPROFD_TESTING_RUN_MALLOC_ARG3=" +
                            std::to_string(200));
   child.args.preserve_fds.push_back(start_pipe_wr);
-  child.args.entrypoint_for_testing = [start_pipe_wr] {
+  child.args.posix_entrypoint_for_testing = [start_pipe_wr] {
     PERFETTO_CHECK(PERFETTO_EINTR(write(start_pipe_wr, "1", 1)) == 1);
     PERFETTO_CHECK(close(start_pipe_wr) == 0 || errno == EINTR);
   };
@@ -1397,7 +1397,7 @@
 
   // Wait for the child and assert that it exited successfully.
   EXPECT_TRUE(child.Wait(30000));
-  EXPECT_EQ(child.status(), base::Subprocess::kExited);
+  EXPECT_EQ(child.status(), base::Subprocess::kTerminated);
   EXPECT_EQ(child.returncode(), 0);
 
   // Assert that we did profile the process.
diff --git a/test/end_to_end_integrationtest.cc b/test/end_to_end_integrationtest.cc
index a8d29e6..46ff80e 100644
--- a/test/end_to_end_integrationtest.cc
+++ b/test/end_to_end_integrationtest.cc
@@ -168,7 +168,7 @@
     // This lambda will be called on the forked child process after having
     // setup pipe redirection and closed all FDs, right before the exec().
     // The Subprocesss harness will take care of closing also |sync_pipe_.wr|.
-    subprocess_.args.entrypoint_for_testing = [sync_pipe_rd] {
+    subprocess_.args.posix_entrypoint_for_testing = [sync_pipe_rd] {
       // Don't add any logging here, all file descriptors are closed and trying
       // to log will likely cause undefined behaviors.
       char ignored = 0;
diff --git a/test/stress_test/stress_test.cc b/test/stress_test/stress_test.cc
index 6215e00..ac53bd3 100644
--- a/test/stress_test/stress_test.cc
+++ b/test/stress_test/stress_test.cc
@@ -215,12 +215,12 @@
 
   for (auto& producer : producers) {
     producer.KillAndWaitForTermination();
-    test_result.prod_rusage = producer.rusage();  // Only keep last one
+    test_result.prod_rusage = producer.posix_rusage();  // Only keep last one
   }
   producers.clear();
   traced.KillAndWaitForTermination();
 
-  test_result.svc_rusage = traced.rusage();
+  test_result.svc_rusage = traced.posix_rusage();
   test_result.run_time_ms = static_cast<uint32_t>((t_end - t_start) / 1000000);
 
   // Verify