[autotest] Scheduler, drone_manager, drone_utility stats.

Adds some useful stats to drone manager, handle agents and
drone utility. These stats should help us track processes,
figure out where the drone_manager latency is coming from
and draw correlations between the number of agents scheduled
and drone refresh time.

This cl also moves site_drone_utility's kill_process method
into drone_utility, and modifies the nuke_pids function to only
wait on and kill processes that haven't already died.

TEST=Ran suites.
BUG=chromium:400486
DEPLOY=scheduler

Change-Id: I56e6ee05fa2ae1935435dbc2055d7f99a9a89e5e
Reviewed-on: https://chromium-review.googlesource.com/211769
Reviewed-by: Prashanth B <beeps@chromium.org>
Commit-Queue: Prashanth B <beeps@chromium.org>
Tested-by: Prashanth B <beeps@chromium.org>
diff --git a/client/common_lib/site_utils.py b/client/common_lib/site_utils.py
index d5fe764..8f44a9e 100644
--- a/client/common_lib/site_utils.py
+++ b/client/common_lib/site_utils.py
@@ -222,9 +222,17 @@
 
     @param pid_list: List of PID's to kill.
     @param signal_queue: Queue of signals to send the PID's to terminate them.
+
+    @return: A mapping of the signal name to the number of processes it
+        was sent to.
     """
+    sig_count = {}
+    # Though this is slightly hacky it beats hardcoding names anyday.
+    sig_names = dict((k, v) for v, k in signal.__dict__.iteritems()
+                     if v.startswith('SIG'))
     for sig in signal_queue:
         logging.debug('Sending signal %s to the following pids:', sig)
+        sig_count[sig_names.get(sig, 'unknown_signal')] = len(pid_list)
         for pid in pid_list:
             logging.debug('Pid %d', pid)
             try:
@@ -233,10 +241,13 @@
                 # The process may have died from a previous signal before we
                 # could kill it.
                 pass
+        pid_list = [pid for pid in pid_list if base_utils.pid_is_alive(pid)]
+        if not pid_list:
+            break
         time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT)
     failed_list = []
     if signal.SIGKILL in signal_queue:
-        return
+        return sig_count
     for pid in pid_list:
         if base_utils.pid_is_alive(pid):
             failed_list.append('Could not kill %d for process name: %s.' % pid,
@@ -244,6 +255,7 @@
     if failed_list:
         raise error.AutoservRunError('Following errors occured: %s' %
                                      failed_list, None)
+    return sig_count
 
 
 def externalize_host(host):
@@ -410,4 +422,4 @@
     try:
         return int(base_utils.system_output('pgrep -o ^X$')) > 0
     except Exception:
-        return False
\ No newline at end of file
+        return False