autotest: move stop/start servod logical into servo_host and always stop servod after a task.

BUG=chromium:1028665
TEST=run test locally

Change-Id: I40918f7f0306e7f6fd1eed1dbea054dd0e87b2bc
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/autotest/+/1972693
Reviewed-by: Xixuan Wu <xixuan@chromium.org>
Reviewed-by: Gregory Nisbet <gregorynisbet@google.com>
Commit-Queue: Garry Wang <xianuowang@chromium.org>
Tested-by: Garry Wang <xianuowang@chromium.org>
diff --git a/server/hosts/servo_host.py b/server/hosts/servo_host.py
index 860f69f..28ba1fd 100644
--- a/server/hosts/servo_host.py
+++ b/server/hosts/servo_host.py
@@ -11,6 +11,7 @@
 
 import logging
 import os
+import time
 import traceback
 import xmlrpclib
 
@@ -40,6 +41,11 @@
         SERVO_SERIAL_ATTR,
 )
 
+# Timeout value for stop/start servod process.
+SERVOD_TEARDOWN_TIMEOUT = 3
+SERVOD_QUICK_STARTUP_TIMEOUT = 20
+SERVOD_STARTUP_TIMEOUT = 60
+
 _CONFIG = global_config.global_config
 ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
         'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
@@ -148,6 +154,7 @@
             self.rpc_server_tracker.disconnect(self.servo_port)
             self._servo = None
 
+
     def _create_servod_server_proxy(self):
         """Create a proxy that can be used to communicate with servod server.
 
@@ -189,6 +196,7 @@
             self._repair_strategy.verify(self, silent)
         except:
             self.disconnect_servo()
+            self.stop_servod()
             raise
 
 
@@ -208,6 +216,7 @@
                 self.withdraw_reboot_request()
         except:
             self.disconnect_servo()
+            self.stop_servod()
             raise
 
 
@@ -238,6 +247,55 @@
         self.run('rm -f %s' % self._reboot_file, ignore_status=True)
 
 
+    def start_servod(self, quick_startup=False):
+        """Start the servod process on servohost.
+        """
+        if self.servo_board:
+            cmd = 'start servod BOARD=%s' % self.servo_board
+            if self.servo_model:
+                cmd += ' MODEL=%s' % self.servo_model
+            cmd += ' PORT=%d' % self.servo_port
+            if self.servo_serial:
+                cmd += ' SERIAL=%s' % self.servo_serial
+            self.run(cmd)
+        else:
+            raise hosts.AutoservVerifyError('Servo board is not configured!')
+
+        # There's a lag between when `start servod` completes and when
+        # the _ServodConnectionVerifier trigger can actually succeed.
+        # The call to time.sleep() below gives time to make sure that
+        # the trigger won't fail after we return.
+
+        # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
+        # But in the rare case all servo on a labstation are in heavy use they
+        # may take ~30 seconds. So the timeout value will double these value,
+        # and we'll try quick start up when first time initialize servohost,
+        # and use standard start up timeout in repair.
+        if quick_startup:
+            timeout = SERVOD_QUICK_STARTUP_TIMEOUT
+        else:
+            timeout = SERVOD_STARTUP_TIMEOUT
+        logging.debug('Wait %s seconds for servod process fully up.', timeout)
+        time.sleep(timeout)
+
+
+    def stop_servod(self):
+        """Stop the servod process on servohost.
+        """
+        logging.debug('Stopping servod on port %s', self.servo_port)
+        self.run('stop servod PORT=%d' % self.servo_port, ignore_status=True)
+        logging.debug('Wait %s seconds for servod process fully teardown.',
+                      SERVOD_TEARDOWN_TIMEOUT)
+        time.sleep(SERVOD_TEARDOWN_TIMEOUT)
+
+
+    def restart_servod(self, quick_startup=False):
+        """Restart the servod process on servohost.
+        """
+        self.stop_servod()
+        self.start_servod(quick_startup)
+
+
     def _lock(self):
         """lock servohost by touching a file.
         """
@@ -273,6 +331,10 @@
                               ' It may caused by servohost went down during'
                               ' the task.')
 
+        # We want always stop servod after task to minimum the impact of bad
+        # servod process interfere other servods.(see crbug.com/1028665)
+        self.stop_servod()
+
         super(ServoHost, self).close()
 
 
@@ -448,6 +510,7 @@
         return None
 
     newhost = ServoHost(**servo_args)
+    newhost.restart_servod(quick_startup=True)
 
     # TODO(gregorynisbet): Clean all of this up.
     logging.debug('create_servo_host: attempt to set info store on '
diff --git a/server/hosts/servo_repair.py b/server/hosts/servo_repair.py
index f553be2..ecddfa1 100644
--- a/server/hosts/servo_repair.py
+++ b/server/hosts/servo_repair.py
@@ -3,7 +3,6 @@
 # found in the LICENSE file.
 
 import logging
-import time
 
 import common
 from autotest_lib.client.common_lib import hosts
@@ -281,35 +280,7 @@
                     'Can\'t restart servod: not running '
                     'embedded Chrome OS.',
                     'servo_not_applicable_to_non_cros_host')
-        host.run('stop servod PORT=%d || true' % host.servo_port)
-        # Wait for existing servod process turned down.
-        time.sleep(3)
-
-        serial = 'SERIAL=%s' % host.servo_serial if host.servo_serial else ''
-        model = 'MODEL=%s' % host.servo_model if host.servo_model else ''
-        if host.servo_board:
-            host.run('start servod BOARD=%s %s PORT=%d %s' %
-                     (host.servo_board, model, host.servo_port, serial))
-        else:
-            # TODO(jrbarnette):  It remains to be seen whether
-            # this action is the right thing to do...
-            logging.warning('Board for DUT is unknown; starting '
-                            'servod assuming a pre-configured '
-                            'board.')
-            host.run('start servod PORT=%d %s' % (host.servo_port, serial))
-        # There's a lag between when `start servod` completes and when
-        # the _ServodConnectionVerifier trigger can actually succeed.
-        # The call to time.sleep() below gives time to make sure that
-        # the trigger won't fail after we return.
-        #
-        # The delay selection was based on empirical testing against
-        # servo V3 on a desktop:
-        #   + 10 seconds was usually too slow; 11 seconds was
-        #     usually fast enough.
-        #   + So, the 20 second delay is about double what we
-        #     expect to need.
-        time.sleep(20)
-
+        host.restart_servod()
 
     @property
     def description(self):