autotest: add network_WiFi_LinkMonitorFailure test

This test checks how fast the DUT detects the link failure when the
network link goes down (restarting the local server with a different
gateway IP), and how fast it reconnects back after the failure.

BUG=chromium:469761
TEST=Run this test
CQ-DEPEND=CL:264459

Change-Id: Ibf8c74f0361798b6f099ba9c9282d96d4b0d741c
Reviewed-on: https://chromium-review.googlesource.com/264457
Reviewed-by: Zeping Qiu <zqiu@chromium.org>
Commit-Queue: Zeping Qiu <zqiu@chromium.org>
Tested-by: Zeping Qiu <zqiu@chromium.org>
diff --git a/client/common_lib/cros/network/iw_event_logger.py b/client/common_lib/cros/network/iw_event_logger.py
index 597baed..81a2c57 100644
--- a/client/common_lib/cros/network/iw_event_logger.py
+++ b/client/common_lib/cros/network/iw_event_logger.py
@@ -19,6 +19,7 @@
         self._command_iw = command_iw
         self._local_file = local_file
         self._pid = None
+        self._start_time = 0
 
 
     def __enter__(self):
@@ -44,7 +45,10 @@
         """
         command = '%s event -t > %s & echo $!' % (self._command_iw,
                                                   IW_REMOTE_EVENT_LOG_FILE)
-        self._pid = int(self._host.run(command).stdout)
+        command += ';date +%s'
+        out_lines = self._host.run(command).stdout.splitlines()
+        self._pid = int(out_lines[0])
+        self._start_time = float(out_lines[1])
 
 
     def stop(self):
@@ -138,3 +142,20 @@
         """
         return [entry.message.startswith('disconnected')
                 for entry in self.get_log_entries()].count(True)
+
+
+    def get_time_to_disconnected(self):
+        """Return disconnect time.
+
+        This function will search the iw event log to determine the number of
+        seconds between the time iw event logger is started to the time
+        "disconnected" event is received.
+
+        @return float number of seconds between the time iw event logger is
+                started to the time "disconnected" event is received. Return
+                None if no "disconnected" event is detected in the iw event log.
+        """
+        for entry in self.get_log_entries():
+            if entry.message.startswith('disconnected'):
+                return entry.timestamp - self._start_time
+        return None
diff --git a/server/site_linux_router.py b/server/site_linux_router.py
index 5684338..74d109b 100644
--- a/server/site_linux_router.py
+++ b/server/site_linux_router.py
@@ -179,6 +179,7 @@
 
         self._total_hostapd_instances = 0
         self.local_servers = []
+        self.server_address_index = []
         self.hostapd_instances = []
         self.station_instances = []
         self.dhcp_low = 1
@@ -469,10 +470,46 @@
         return iface.mac_address
 
 
-    def start_local_server(self, interface):
+    def _get_unused_server_address_index(self):
+        """@return an unused server address index."""
+        for address_index in range(0, 256):
+            if address_index not in self.server_address_index:
+                return address_index
+        raise error.TestFail('No available server address index')
+
+
+    def change_server_address_index(self, ap_num=0, server_address_index=None):
+        """Restart the local server with a different server address index.
+
+        This will restart the local server with different gateway IP address
+        and DHCP address ranges.
+
+        @param ap_num: int hostapd instance number.
+        @param server_address_index: int server address index.
+
+        """
+        interface = self.local_servers[ap_num]['interface'];
+        # Get an unused server address index if one is not specified, which
+        # will be different from the one that's currently in used.
+        if server_address_index is None:
+            server_address_index = self._get_unused_server_address_index()
+
+        # Restart local server with the new server address index.
+        self.stop_local_server(self.local_servers[ap_num])
+        self.start_local_server(interface,
+                                ap_num=ap_num,
+                                server_address_index=server_address_index)
+
+
+    def start_local_server(self,
+                           interface,
+                           ap_num=None,
+                           server_address_index=None):
         """Start a local server on an interface.
 
         @param interface string (e.g. wlan0)
+        @param ap_num int the ap instance to start the server for
+        @param server_address_index int server address index
 
         """
         logging.info('Starting up local server...')
@@ -480,11 +517,20 @@
         if len(self.local_servers) >= 256:
             raise error.TestFail('Exhausted available local servers')
 
+        # Get an unused server address index if one is not specified.
+        # Validate server address index if one is specified.
+        if server_address_index is None:
+            server_address_index = self._get_unused_server_address_index()
+        elif server_address_index in self.server_address_index:
+            raise error.TestFail('Server address index %d already in used' %
+                                 server_address_index)
+
         server_addr = netblock.from_addr(
-                self.local_server_address(len(self.local_servers)),
+                self.local_server_address(server_address_index),
                 prefix_len=24)
 
         params = {}
+        params['address_index'] = server_address_index
         params['netblock'] = server_addr
         params['dhcp_range'] = ' '.join(
             (server_addr.get_addr_in_block(1),
@@ -494,7 +540,11 @@
                                (server_addr.netblock,
                                 server_addr.broadcast,
                                 interface))
-        self.local_servers.append(params)
+        if ap_num is None:
+            self.local_servers.append(params)
+        else:
+            self.local_servers.insert(ap_num, params)
+        self.server_address_index.append(server_address_index)
 
         self.router.run('%s addr flush %s' %
                         (self.cmd_ip, interface))
@@ -505,6 +555,20 @@
         self.start_dhcp_server(interface)
 
 
+    def stop_local_server(self, server):
+        """Stop a local server on the router
+
+        @param server object server configuration parameters.
+
+        """
+        self.stop_dhcp_server(server['interface'])
+        self.router.run("%s addr del %s" %
+                        (self.cmd_ip, server['ip_params']),
+                        ignore_status=True)
+        self.server_address_index.remove(server['address_index'])
+        self.local_servers.remove(server)
+
+
     def start_dhcp_server(self, interface):
         """Start a dhcp server on an interface.
 
@@ -663,13 +727,11 @@
                 for server in self.local_servers:
                     if server['interface'] == instances[0].interface:
                         local_servers = [server]
-                        self.local_servers.remove(server)
                         break
             else:
                 instances = self.hostapd_instances
                 self.hostapd_instances = []
                 local_servers = self.local_servers
-                self.local_servers = []
 
             for instance in instances:
                 if silent:
@@ -681,7 +743,6 @@
                 self.release_interface(instance.interface)
         if self.station_instances:
             local_servers = self.local_servers
-            self.local_servers = []
             instance = self.station_instances.pop()
             if instance.dev_type == 'ibss':
                 self.iw_runner.ibss_leave(instance.interface)
@@ -694,10 +755,7 @@
                             (self.cmd_ip, instance.interface))
 
         for server in local_servers:
-            self.stop_dhcp_server(server['interface'])
-            self.router.run("%s addr del %s" %
-                            (self.cmd_ip, server['ip_params']),
-                             ignore_status=True)
+            self.stop_local_server(server)
 
 
     def set_ap_interface_down(self, instance=0):
diff --git a/server/site_tests/network_WiFi_LinkMonitorFailure/control b/server/site_tests/network_WiFi_LinkMonitorFailure/control
new file mode 100644
index 0000000..ed55066
--- /dev/null
+++ b/server/site_tests/network_WiFi_LinkMonitorFailure/control
@@ -0,0 +1,24 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+AUTHOR = 'zqiu, wiley, pstew, quiche'
+NAME = 'network_WiFi_LinkMonitorFailure'
+TIME = 'SHORT'
+TEST_TYPE = 'Server'
+SUITE = 'wifi_matfunc'
+DEPENDENCIES = 'wificell'
+
+DOC = """
+This test checks how fast the DUT detects the link failure when an AP changes
+its DHCP configuration, and how fast the DUT reconnects after the failure.
+"""
+
+
+def run(machine):
+    job.run_test('network_WiFi_LinkMonitorFailure',
+                 host=hosts.create_host(machine),
+                 raw_cmdline_args=args)
+
+
+parallel_simple(run, machines)
diff --git a/server/site_tests/network_WiFi_LinkMonitorFailure/network_WiFi_LinkMonitorFailure.py b/server/site_tests/network_WiFi_LinkMonitorFailure/network_WiFi_LinkMonitorFailure.py
new file mode 100644
index 0000000..2ee5dec
--- /dev/null
+++ b/server/site_tests/network_WiFi_LinkMonitorFailure/network_WiFi_LinkMonitorFailure.py
@@ -0,0 +1,70 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import logging
+import time
+
+from autotest_lib.client.common_lib import error
+from autotest_lib.client.common_lib.cros.network import xmlrpc_datatypes
+from autotest_lib.server.cros.network import hostap_config
+from autotest_lib.server.cros.network import wifi_cell_test_base
+
+
+class network_WiFi_LinkMonitorFailure(wifi_cell_test_base.WiFiCellTestBase):
+    """Test how a DUT behaves when the network link disappears.
+
+    Connects a DUT to an AP, then silently change the gateway IP on the AP
+    to simulate network link disappearance. Determine the time the DUT take
+    to detect link failure and the time for the subsequent reassociation
+    request.
+
+    """
+
+    version = 1
+
+    # Passive link monitor takes 25 seconds to fail, active link monitor
+    # takes upto 50 seconds to fail (unicast ARP failures doesn't count since
+    # unicast ARP gateway support is not established).
+    LINK_FAILURE_MAX_SECONDS = 80
+    REASSOCIATE_TIMEOUT_SECONDS = 10
+
+    def run_once(self):
+        """Body of the test."""
+        # Establish a connection with an AP.
+        ap_config = hostap_config.HostapConfig(channel=1)
+        self.context.configure(ap_config)
+        ssid = self.context.router.get_ssid()
+        client_config = xmlrpc_datatypes.AssociationParameters(ssid=ssid)
+        self.context.assert_connect_wifi(client_config)
+        self.context.assert_ping_from_dut()
+
+        # Restart local server with a different address index. This will
+        # simulate the disappearance of the network link from the client's
+        # point of view.
+        logging.info("Restart local server with different address")
+        self.context.router.change_server_address_index()
+        with self.context.client.iw_runner.get_event_logger() as logger:
+            logger.start()
+            # wait for the timeout seconds for link failure and reassociation
+            # to complete.
+            time.sleep(self.LINK_FAILURE_MAX_SECONDS +
+                       self.REASSOCIATE_TIMEOUT_SECONDS)
+            logger.stop()
+
+            # Link failure detection time.
+            link_failure_time = logger.get_time_to_disconnected()
+            if (link_failure_time is None or
+                link_failure_time > self.LINK_FAILURE_MAX_SECONDS):
+                raise error.TestFail(
+                        'Failed to detect link failure within given timeout')
+            logging.info('Link failure detection time: %.2f seconds',
+                         link_failure_time)
+
+            # Reassociation time.
+            reassociate_time = logger.get_reassociation_time()
+            if (reassociate_time is None or
+                reassociate_time > self.REASSOCIATE_TIMEOUT_SECONDS):
+                raise error.TestFail(
+                        'Failed to reassociate within given timeout')
+            logging.info('Reassociate time: %.2f seconds', reassociate_time)