Fixed some issues with signal handling and cleanup.

Fixed several issues related to signal handling and cleanup:
- Got rid of the extra process wrapper around crosperf. We now do an
"exec" to get rid of this extra process. This facilitated signal
handling.
- Found a better fix for the problem we had with ignoring
ctrl-c. Instead of creating a pipe for child process stdin, we now
create the child with setsid which has the side effect of creating the
child without a terminal associated to it (see man setsid).
- By creating the child with setsid, we also allow for easier killing of
the child and its descendants since it allows us to use killpg (see man
killpg).
- Handle SIGTERM by "converting" it into an exit call. This will make
sure the atexit routines are called and, furthermore, we can terminate
child process when you catch the "SystemExit" exception.
- With these changes, crosperf will send a SIGTERM to any cros_sdk,
test_that processes that it has started. Before, we would have left over
test_that processes that could interfere with a new test_that
invocation.

BUG=chromium:492826
BUG=chromium:470658
TEST=verified by hand signals are working. Ran toolchain suite.

Change-Id: Ibd09ff428a7b402ea2296ecf9b9a34aa5756c93f
Reviewed-on: https://chrome-internal-review.googlesource.com/232616
Commit-Ready: Luis Lozano <llozano@chromium.org>
Tested-by: Luis Lozano <llozano@chromium.org>
Reviewed-by: Yunlian Jiang <yunlian@google.com>
diff --git a/crosperf/crosperf b/crosperf/crosperf
index 904a172..a29dcbf 100755
--- a/crosperf/crosperf
+++ b/crosperf/crosperf
@@ -1,2 +1,2 @@
 #!/bin/bash
-PYTHONPATH=$(dirname $0)/..:$PYTHONPATH python $(dirname $0)/crosperf.py "$@"
+PYTHONPATH=$(dirname $0)/..:$PYTHONPATH exec python $(dirname $0)/crosperf.py "$@"
diff --git a/crosperf/crosperf.py b/crosperf/crosperf.py
index fc0098d..e107ea4 100755
--- a/crosperf/crosperf.py
+++ b/crosperf/crosperf.py
@@ -7,6 +7,7 @@
 import atexit
 import optparse
 import os
+import signal
 import sys
 from experiment_runner import ExperimentRunner
 from experiment_runner import MockExperimentRunner
@@ -58,6 +59,16 @@
   experiment.Cleanup()
 
 
+def CallExitHandler(signum, _):
+  """Signal handler that transforms a signal into a call to exit.
+
+  This is useful because functionality registered by "atexit" will
+  be called. It also means you can "catch" the signal by catching
+  the SystemExit exception.
+  """
+  sys.exit(128 + signum)
+
+
 def Main(argv):
   parser = optparse.OptionParser(usage=Help().GetUsage(),
                                  description=Help().GetHelp(),
@@ -104,6 +115,7 @@
 
   json_report = experiment_file.GetGlobalSettings().GetField("json_report")
 
+  signal.signal(signal.SIGTERM, CallExitHandler)
   atexit.register(Cleanup, experiment)
 
   if options.dry_run:
diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py
index d7749b5..01324b0 100644
--- a/crosperf/experiment_runner.py
+++ b/crosperf/experiment_runner.py
@@ -180,6 +180,12 @@
         self._terminated = True
         self.l.LogError("Ctrl-c pressed. Cleaning up...")
         experiment.Terminate()
+        raise
+      except SystemExit:
+        self._terminate = True
+        self.l.LogError("Unexpected exit. Cleaning up...")
+        experiment.Terminate()
+        raise
     finally:
       if not experiment.locks_dir:
         self._UnlockAllMachines(experiment)
@@ -252,11 +258,14 @@
         benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp)
 
   def Run(self):
-    self._Run(self._experiment)
-    self._PrintTable(self._experiment)
-    if not self._terminated:
-      self._StoreResults(self._experiment)
-      self._Email(self._experiment)
+    try:
+      self._Run(self._experiment)
+    finally:
+      # Always print the report at the end of the run.
+      self._PrintTable(self._experiment)
+      if not self._terminated:
+        self._StoreResults(self._experiment)
+        self._Email(self._experiment)
 
 
 class MockExperimentRunner(ExperimentRunner):
diff --git a/crosperf/suite_runner.py b/crosperf/suite_runner.py
index f9a2ba3..6531045 100644
--- a/crosperf/suite_runner.py
+++ b/crosperf/suite_runner.py
@@ -154,10 +154,14 @@
     if self.log_level != "verbose":
       self._logger.LogOutput("Running test.")
       self._logger.LogOutput("CMD: %s" % command)
+    # Use --no-ns-pid so that cros_sdk does not create a different
+    # process namespace and we can kill process created easily by
+    # their process group.
     return self._ce.ChrootRunCommand(label.chromeos_root,
                                      command,
                                      True,
-                                     self._ct)
+                                     self._ct,
+                                     cros_sdk_options="--no-ns-pid")
 
   def RemoveTelemetryTempFile (self, machine, chromeos_root):
     filename = "telemetry@%s" % machine
@@ -208,20 +212,23 @@
                                               profiler_args,
                                               machine))
 
-    chrome_root_options = ""
-    chrome_root_options = (" --chrome_root={} --chrome_root_mount={} "
-                           " FEATURES=\"-usersandbox\" "
+    # Use --no-ns-pid so that cros_sdk does not create a different
+    # process namespace and we can kill process created easily by their
+    # process group.
+    chrome_root_options = ("--no-ns-pid "
+                           "--chrome_root={} --chrome_root_mount={} "
+                           "FEATURES=\"-usersandbox\" "
                            "CHROME_ROOT={}".format(label.chrome_src,
                                                     CHROME_MOUNT_DIR,
                                                     CHROME_MOUNT_DIR))
     if self.log_level != "verbose":
       self._logger.LogOutput("Running test.")
       self._logger.LogOutput("CMD: %s" % cmd)
-    return self._ce.ChrootRunCommand (label.chromeos_root,
-                                      cmd,
-                                      return_output=True,
-                                      command_terminator=self._ct,
-                                      cros_sdk_options=chrome_root_options)
+    return self._ce.ChrootRunCommand(label.chromeos_root,
+                                     cmd,
+                                     return_output=True,
+                                     command_terminator=self._ct,
+                                     cros_sdk_options=chrome_root_options)
 
 
   def Telemetry_Run(self, machine, label, benchmark, profiler_args):
diff --git a/utils/command_executer.py b/utils/command_executer.py
index 08a5dc7..851791f 100644
--- a/utils/command_executer.py
+++ b/utils/command_executer.py
@@ -8,6 +8,7 @@
 import os
 import re
 import select
+import signal
 import subprocess
 import tempfile
 import time
@@ -55,11 +56,7 @@
                  command_timeout=None,
                  terminated_timeout=10,
                  print_to_console=True):
-    """Run a command.
-
-    Note: As this is written, the stdin for the process executed is
-    not associated with the stdin of the caller of this routine.
-    """
+    """Run a command."""
 
     cmd = str(cmd)
 
@@ -84,13 +81,13 @@
         user = username + "@"
       cmd = "ssh -t -t %s%s -- '%s'" % (user, machine, cmd)
 
-    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE, shell=True)
-
-    # We explicitly disconect the client stdin from the command to
-    # execute by explicitly requesting a new pipe. Now, let's close it
-    # so that the executed process does not try to read from it.
-    p.stdin.close()
+    # We use setsid so that the child will have a different session id
+    # and we can easily kill the process group. This is also important
+    # because the child will be disassociated from the parent terminal.
+    # In this way the child cannot mess the parent's terminal.
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=True,
+                         preexec_fn=os.setsid)
 
     full_stdout = ""
     full_stderr = ""
@@ -108,15 +105,12 @@
 
     while len(pipes):
       if command_terminator and command_terminator.IsTerminated():
-        self.RunCommand("sudo kill -9 " + str(p.pid),
-                        print_to_console=print_to_console)
-        wait = p.wait()
+        os.killpg(os.getpgid(p.pid), signal.SIGTERM)
         if self.logger:
-          self.logger.LogError("Command was terminated!", print_to_console)
-        if return_output:
-          return (p.wait, full_stdout, full_stderr)
-        else:
-          return wait
+          self.logger.LogError("Command received termination request. "
+                               "Killed child process group.",
+                               print_to_console)
+        break
 
       l=my_poll.poll(100)
       for (fd, evt) in l:
@@ -144,21 +138,19 @@
           terminated_time = time.time()
         elif (terminated_timeout is not None and
               time.time() - terminated_time > terminated_timeout):
-          m = ("Timeout of %s seconds reached since process termination."
-               % terminated_timeout)
           if self.logger:
-            self.logger.LogWarning(m, print_to_console)
+            self.logger.LogWarning("Timeout of %s seconds reached since "
+                                   "process termination."
+                                   % terminated_timeout, print_to_console)
           break
 
       if (command_timeout is not None and
           time.time() - started_time > command_timeout):
-        m = ("Timeout of %s seconds reached since process started."
-             % command_timeout)
+        os.killpg(os.getpgid(p.pid), signal.SIGTERM)
         if self.logger:
-          self.logger.LogWarning(m, print_to_console)
-        self.RunCommand("kill %d || sudo kill %d || sudo kill -9 %d" %
-                        (p.pid, p.pid, p.pid),
-                        print_to_console=print_to_console)
+          self.logger.LogWarning("Timeout of %s seconds reached since process"
+                                 "started. Killed child process group."
+                                 % command_timeout, print_to_console)
         break
 
       if out == err == "":
@@ -448,15 +440,16 @@
       self.logger.LogCmd(cmd)
     elif self.logger:
       self.logger.LogCmdToFileOnly(cmd)
+
+    # We use setsid so that the child will have a different session id
+    # and we can easily kill the process group. This is also important
+    # because the child will be disassociated from the parent terminal.
+    # In this way the child cannot mess the parent's terminal.
     pobject = subprocess.Popen(
         cmd, cwd=cwd, bufsize=1024, env=env, shell=shell,
-        universal_newlines=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT if join_stderr else subprocess.PIPE)
-
-    # We explicitly disconect the client stdin from the command to
-    # execute by explicitly requesting a new pipe. Now, let's close it
-    # so that the executed process does not try to read from it.
-    pobject.stdin.close()
+        universal_newlines=True, stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT if join_stderr else subprocess.PIPE,
+        preexec_fn=os.setsid)
 
     # We provide a default line_consumer
     if line_consumer is None:
@@ -484,7 +477,7 @@
                 del handlermap[fd]
 
         if timeout is not None and (time.time() - start_time > timeout):
-            pobject.kill()
+            os.killpg(os.getpgid(pobject.pid), signal.SIGTERM)
 
     return pobject.wait()