blob: 2e9821e549e67ec65ac950f199574c68467de25a [file] [log] [blame]
Ilja H. Friedelbee84a72016-09-28 15:57:06 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5# repohooks/pre-upload.py currently does not run pylint. But for developers who
6# want to check their code manually we disable several harmless pylint warnings
7# which just distract from more serious remaining issues.
8#
9# The instance variables _host and _install_paths are not defined in __init__().
10# pylint: disable=attribute-defined-outside-init
11#
12# Many short variable names don't follow the naming convention.
13# pylint: disable=invalid-name
14#
15# _parse_result() and _dir_size() don't access self and could be functions.
16# pylint: disable=no-self-use
17#
18# _ChromeLogin and _TradefedLogCollector have no public methods.
19# pylint: disable=too-few-public-methods
20
21import contextlib
22import errno
23import hashlib
24import logging
25import os
26import pipes
27import random
28import re
29import shutil
30import stat
31import tempfile
32import urlparse
33
34from autotest_lib.client.bin import utils as client_utils
35from autotest_lib.client.common_lib import error
36from autotest_lib.client.common_lib.cros import dev_server
37from autotest_lib.server import afe_utils
38from autotest_lib.server import autotest
39from autotest_lib.server import test
40from autotest_lib.server import utils
41from autotest_lib.site_utils import lxc
42
43try:
44 import lockfile
45except ImportError:
46 if utils.is_in_container():
47 # Ensure the container has the required packages installed.
48 lxc.install_packages(python_packages=['lockfile'])
49 import lockfile
50 else:
51 raise
52
53
54_SDK_TOOLS_DIR = ('gs://chromeos-arc-images/builds/'
55 'git_mnc-dr-arc-dev-linux-static_sdk_tools/3264272')
56_SDK_TOOLS_FILES = ['aapt']
57# To stabilize adb behavior, we use dynamically linked adb.
58_ADB_DIR = ('gs://chromeos-arc-images/builds/'
59 'git_mnc-dr-arc-dev-linux-cheets_arm-user/3264272')
60_ADB_FILES = ['adb']
61
62_ADB_POLLING_INTERVAL_SECONDS = 1
63_ADB_READY_TIMEOUT_SECONDS = 60
64_ANDROID_ADB_KEYS_PATH = '/data/misc/adb/adb_keys'
65
66_ARC_POLLING_INTERVAL_SECONDS = 1
67_ARC_READY_TIMEOUT_SECONDS = 60
68
69_TRADEFED_PREFIX = 'autotest-tradefed-install_'
70_TRADEFED_CACHE_LOCAL = '/tmp/autotest-tradefed-cache'
71_TRADEFED_CACHE_CONTAINER = '/usr/local/autotest/results/shared/cache'
72_TRADEFED_CACHE_CONTAINER_LOCK = '/usr/local/autotest/results/shared/lock'
73
74# According to dshi a drone has 500GB of disk space. It is ok for now to use
75# 10GB of disk space, as no more than 10 tests should run in parallel.
76# TODO(ihf): Investigate tighter cache size.
77_TRADEFED_CACHE_MAX_SIZE = (10 * 1024 * 1024 * 1024)
78
79
80class _ChromeLogin(object):
81 """Context manager to handle Chrome login state."""
82
83 def __init__(self, host):
84 self._host = host
85
86 def __enter__(self):
87 """Logs in to the Chrome."""
88 logging.info('Ensure Android is running...')
89 autotest.Autotest(self._host).run_test('cheets_CTSHelper',
90 check_client_result=True)
91
92 def __exit__(self, exc_type, exc_value, traceback):
93 """On exit, to wipe out all the login state, reboot the machine.
94
95 @param exc_type: Exception type if an exception is raised from the
96 with-block.
97 @param exc_value: Exception instance if an exception is raised from
98 the with-block.
99 @param traceback: Stack trace info if an exception is raised from
100 the with-block.
101 @return None, indicating not to ignore an exception from the with-block
102 if raised.
103 """
104 logging.info('Rebooting...')
105 try:
106 self._host.reboot()
107 except Exception:
108 if exc_type is None:
109 raise
110 # If an exception is raise from the with-block, just record the
111 # exception for the rebooting to avoid ignoring the original
112 # exception.
113 logging.exception('Rebooting failed.')
114
115
116@contextlib.contextmanager
117def lock(filename):
118 """Prevents other autotest/tradefed instances from accessing cache."""
119 filelock = lockfile.FileLock(filename)
120 # It is tempting just to call filelock.acquire(3600). But the implementation
121 # has very poor temporal granularity (timeout/10), which is unsuitable for
122 # our needs. See /usr/lib64/python2.7/site-packages/lockfile/
123 while not filelock.i_am_locking():
124 try:
125 logging.info('Waiting for cache lock...')
126 filelock.acquire(random.randint(1, 5))
127 except (lockfile.AlreadyLocked, lockfile.LockTimeout):
128 pass
129 else:
130 logging.info('Acquired cache lock.')
131 try:
132 yield
133 finally:
134 filelock.release()
135 logging.info('Released cache lock.')
136
137
138class TradefedTest(test.test):
139 """Base class to prepare DUT to run tests via tradefed."""
140 version = 1
141
142 def initialize(self, host=None):
143 """Sets up the tools and binary bundles for the test."""
144 logging.info('Hostname: %s', host.hostname)
145 self._host = host
146 self._install_paths = []
147 # Tests in the lab run within individual lxc container instances.
148 if utils.is_in_container():
149 # Ensure the container has the required packages installed.
150 lxc.install_packages(packages=['unzip', 'default-jre'])
151 cache_root = _TRADEFED_CACHE_CONTAINER
152 else:
153 cache_root = _TRADEFED_CACHE_LOCAL
154 # The content of the cache survives across jobs.
155 self._safe_makedirs(cache_root)
156 self._tradefed_cache = os.path.join(cache_root, 'cache')
157 self._tradefed_cache_lock = os.path.join(cache_root, 'lock')
158 # The content of the install location does not survive across jobs and
159 # is isolated (by using a unique path)_against other autotest instances.
160 # This is not needed for the lab, but if somebody wants to run multiple
161 # TradedefTest instance.
162 self._tradefed_install = tempfile.mkdtemp(prefix=_TRADEFED_PREFIX)
163 # Under lxc the cache is shared between multiple autotest/tradefed
164 # instances. We need to synchronize access to it. All binaries are
165 # installed through the (shared) cache into the local (unshared)
166 # lxc/autotest instance storage.
167 # If clearing the cache it must happen before all downloads.
168 self._clear_download_cache_if_needed()
169 # Set permissions (rwxr-xr-x) to the executable binaries.
170 permission = (stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH
171 | stat.S_IXOTH)
172 self._install_files(_ADB_DIR, _ADB_FILES, permission)
173 self._install_files(_SDK_TOOLS_DIR, _SDK_TOOLS_FILES, permission)
174
175 def cleanup(self):
176 """Cleans up any dirtied state."""
177 # Kill any lingering adb servers.
178 self._run('adb', verbose=True, args=('kill-server',))
179 logging.info('Cleaning up %s.', self._tradefed_install)
180 shutil.rmtree(self._tradefed_install)
181
182 def _login_chrome(self):
183 """Returns Chrome log-in context manager.
184
185 Please see also cheets_CTSHelper for details about how this works.
186 """
187 return _ChromeLogin(self._host)
188
189 def _try_adb_connect(self):
190 """Attempts to connect to adb on the DUT.
191
192 @return boolean indicating if adb connected successfully.
193 """
194 # This may fail return failure due to a race condition in adb connect
195 # (b/29370989). If adb is already connected, this command will
196 # immediately return success.
197 hostport = '{}:{}'.format(self._host.hostname, self._host.port)
198 result = self._run(
199 'adb',
200 args=('connect', hostport),
201 verbose=True,
202 ignore_status=True)
203 logging.info('adb connect {}:\n{}'.format(hostport, result.stdout))
204 if result.exit_status != 0:
205 return False
206
207 result = self._run('adb', args=('devices',))
208 logging.info('adb devices:\n' + result.stdout)
209 if not re.search(
210 r'{}\s+(device|unauthorized)'.format(re.escape(hostport)),
211 result.stdout):
212 return False
213
214 # Actually test the connection with an adb command as there can be
215 # a race between detecting the connected device and actually being
216 # able to run a commmand with authenticated adb.
217 result = self._run('adb', args=('shell', 'exit'), ignore_status=True)
218 return result.exit_status == 0
219
220 def _android_shell(self, command):
221 """Run a command remotely on the device in an android shell
222
223 This function is strictly for internal use only, as commands do not run
224 in a fully consistent Android environment. Prefer adb shell instead.
225 """
226 self._host.run('android-sh -c ' + pipes.quote(command))
227
228 def _write_android_file(self, filename, data):
229 """Writes a file to a location relative to the android container.
230
231 This is an internal function used to bootstrap adb.
232 Tests should use adb push to write files.
233 """
234 android_cmd = 'echo %s > %s' % (pipes.quote(data),
235 pipes.quote(filename))
236 self._android_shell(android_cmd)
237
238 def _connect_adb(self):
239 """Sets up ADB connection to the ARC container."""
240 logging.info('Setting up adb connection.')
241 # Generate and push keys for adb.
242 # TODO(elijahtaylor): Extract this code to arc_common and de-duplicate
243 # code in arc.py on the client side tests.
244 key_path = os.path.join(self.tmpdir, 'test_key')
245 pubkey_path = key_path + '.pub'
246 self._run('adb', verbose=True, args=('keygen', pipes.quote(key_path)))
247 with open(pubkey_path, 'r') as f:
248 self._write_android_file(_ANDROID_ADB_KEYS_PATH, f.read())
249 self._android_shell('restorecon ' + pipes.quote(_ANDROID_ADB_KEYS_PATH))
250 os.environ['ADB_VENDOR_KEYS'] = key_path
251
252 # Kill existing adb server to ensure that the env var is picked up.
253 self._run('adb', verbose=True, args=('kill-server',))
254
255 # This starts adbd.
256 self._android_shell('setprop sys.usb.config mtp,adb')
257
258 # adbd may take some time to come up. Repeatedly try to connect to adb.
259 utils.poll_for_condition(lambda: self._try_adb_connect(),
260 exception=error.TestError('Failed to set up '
261 'adb connection'),
262 timeout=_ADB_READY_TIMEOUT_SECONDS,
263 sleep_interval=_ADB_POLLING_INTERVAL_SECONDS)
264
265 logging.info('Successfully setup adb connection.')
266
267 def _wait_for_arc_boot(self):
268 """Wait until ARC is fully booted.
269
270 Tests for the presence of the intent helper app to determine whether ARC
271 has finished booting.
272 """
273 def intent_helper_running():
274 result = self._run('adb', args=('shell', 'pgrep',
275 'org.chromium.arc.intent_helper'))
276 return bool(result.stdout)
277 utils.poll_for_condition(
278 intent_helper_running,
279 exception=error.TestError('Timed out waiting for intent helper.'),
280 timeout=_ARC_READY_TIMEOUT_SECONDS,
281 sleep_interval=_ARC_POLLING_INTERVAL_SECONDS)
282
283 def _disable_adb_install_dialog(self):
284 """Disables a dialog shown on adb install execution.
285
286 By default, on adb install execution, "Allow Google to regularly check
287 device activity ... " dialog is shown. It requires manual user action
288 so that tests are blocked at the point.
289 This method disables it.
290 """
291 logging.info('Disabling the adb install dialog.')
292 result = self._run(
293 'adb',
294 verbose=True,
295 args=(
296 'shell',
297 'settings',
298 'put',
299 'global',
300 'verifier_verify_adb_installs',
301 '0'))
302 logging.info('Disable adb dialog: %s', result.stdout)
303
304 def _ready_arc(self):
305 """Ready ARC and adb for running tests via tradefed."""
306 self._connect_adb()
307 self._disable_adb_install_dialog()
308 self._wait_for_arc_boot()
309
310 def _safe_makedirs(self, path):
311 """Creates a directory at |path| and its ancestors.
312
313 Unlike os.makedirs(), ignore errors even if directories exist.
314 """
315 try:
316 os.makedirs(path)
317 except OSError as e:
318 if not (e.errno == errno.EEXIST and os.path.isdir(path)):
319 raise
320
321 def _unzip(self, filename):
322 """Unzip the file.
323
324 The destination directory name will be the stem of filename.
325 E.g., _unzip('foo/bar/baz.zip') will create directory at
326 'foo/bar/baz', and then will inflate zip's content under the directory.
327 If here is already a directory at the stem, that directory will be used.
328
329 @param filename: Path to the zip archive.
330 @return Path to the inflated directory.
331 """
332 destination = os.path.splitext(filename)[0]
333 if os.path.isdir(destination):
334 return destination
335 self._safe_makedirs(destination)
336 utils.run('unzip', args=('-d', destination, filename))
337 return destination
338
339 def _dir_size(self, directory):
340 """Compute recursive size in bytes of directory."""
341 size = 0
342 for root, _, files in os.walk(directory):
343 size += sum(os.path.getsize(os.path.join(root, name))
344 for name in files)
345 return size
346
347 def _clear_download_cache_if_needed(self):
348 """Invalidates cache to prevent it from growing too large."""
349 # If the cache is large enough to hold a working set, we can simply
350 # delete everything without thrashing.
351 # TODO(ihf): Investigate strategies like LRU.
352 with lock(self._tradefed_cache_lock):
353 size = self._dir_size(self._tradefed_cache)
354 if size > _TRADEFED_CACHE_MAX_SIZE:
355 logging.info('Current cache size=%d got too large. Clearing %s.'
356 , size, self._tradefed_cache)
357 shutil.rmtree(self._tradefed_cache)
358 self._safe_makedirs(self._tradefed_cache)
359 else:
360 logging.info('Current cache size=%d of %s.', size,
361 self._tradefed_cache)
362
363 def _download_to_cache(self, uri):
364 """Downloads the uri from the storage server.
365
366 It always checks the cache for available binaries first and skips
367 download if binaries are already in cache.
368
369 The caller of this function is responsible for holding the cache lock.
370
371 @param uri: The Google Storage or dl.google.com uri.
372 @return Path to the downloaded object, name.
373 """
374 # Split uri into 3 pieces for use by gsutil and also by wget.
375 parsed = urlparse.urlparse(uri)
376 filename = os.path.basename(parsed.path)
377 # We are hashing the uri instead of the binary. This is acceptable, as
378 # the uris are supposed to contain version information and an object is
379 # not supposed to be changed once created.
380 output_dir = os.path.join(self._tradefed_cache,
381 hashlib.md5(uri).hexdigest())
382 output = os.path.join(output_dir, filename)
383 # Check for existence of file.
384 if os.path.exists(output):
385 logging.info('Skipping download of %s, reusing %s.', uri, output)
386 return output
387 self._safe_makedirs(output_dir)
388
389 if parsed.scheme not in ['gs', 'http', 'https']:
390 raise error.TestError('Unknown download scheme %s' % parsed.scheme)
391 if parsed.scheme in ['http', 'https']:
392 logging.info('Using wget to download %s to %s.', uri, output_dir)
393 # We are downloading 1 file at a time, hence using -O over -P.
394 # We also limit the rate to 20MBytes/s
395 utils.run(
396 'wget',
397 args=(
398 '--report-speed=bits',
399 '--limit-rate=20M',
400 '-O',
401 output,
402 uri),
403 verbose=True)
404 return output
405
406 if not client_utils.is_moblab():
407 # If the machine can access to the storage server directly,
408 # defer to "gsutil" for downloading.
409 logging.info('Host %s not in lab. Downloading %s directly to %s.',
410 self._host.hostname, uri, output)
411 # b/17445576: gsutil rsync of individual files is not implemented.
412 utils.run('gsutil', args=('cp', uri, output), verbose=True)
413 return output
414
415 # We are in the moblab. Because the machine cannot access the storage
416 # server directly, use dev server to proxy.
417 logging.info('Host %s is in lab. Downloading %s by staging to %s.',
418 self._host.hostname, uri, output)
419
420 dirname = os.path.dirname(parsed.path)
421 archive_url = '%s://%s%s' % (parsed.scheme, parsed.netloc, dirname)
422
423 # First, request the devserver to download files into the lab network.
424 # TODO(ihf): Switch stage_artifacts to honor rsync. Then we don't have
425 # to shuffle files inside of tarballs.
426 build = afe_utils.get_build(self._host)
427 ds = dev_server.ImageServer.resolve(build)
428 ds.stage_artifacts(build, files=[filename], archive_url=archive_url)
429
430 # Then download files from the dev server.
431 # TODO(ihf): use rsync instead of wget. Are there 3 machines involved?
432 # Itself, dev_server plus DUT? Or is there just no rsync in moblab?
433 ds_src = '/'.join([ds.url(), 'static', dirname, filename])
434 logging.info('dev_server URL: %s', ds_src)
435 # Calls into DUT to pull uri from dev_server.
436 utils.run(
437 'wget',
438 args=(
439 '--report-speed=bits',
440 '--limit-rate=20M',
441 '-O',
442 output) + tuple(ds_src),
443 verbose=True)
444 return output
445
446 def _instance_copy(self, cache_path):
447 """Makes a copy of a file from the (shared) cache to a wholy owned
448 local instance. Also copies one level of cache directoy (MD5 named).
449 """
450 filename = os.path.basename(cache_path)
451 dirname = os.path.basename(os.path.dirname(cache_path))
452 instance_dir = os.path.join(self._tradefed_install, dirname)
453 # Make sure destination directory is named the same.
454 self._safe_makedirs(instance_dir)
455 instance_path = os.path.join(instance_dir, filename)
456 shutil.copyfile(cache_path, instance_path)
457 return instance_path
458
459 def _install_bundle(self, gs_uri):
460 """Downloads a zip file, installs it and returns the local path."""
461 if not gs_uri.endswith('.zip'):
462 raise error.TestError('Not a .zip file %s.', gs_uri)
463 # Atomic write through of file.
464 with lock(self._tradefed_cache_lock):
465 cache_path = self._download_to_cache(gs_uri)
466 local = self._instance_copy(cache_path)
467 return self._unzip(local)
468
469 def _install_files(self, gs_dir, files, permission):
470 """Installs binary tools."""
471 for filename in files:
472 gs_uri = os.path.join(gs_dir, filename)
473 # Atomic write through of file.
474 with lock(self._tradefed_cache_lock):
475 cache_path = self._download_to_cache(gs_uri)
476 local = self._instance_copy(cache_path)
477 os.chmod(local, permission)
478 # Keep track of PATH.
479 self._install_paths.append(os.path.dirname(local))
480
481 def _run(self, *args, **kwargs):
482 """Executes the given command line.
483
484 To support SDK tools, such as adb or aapt, this adds _install_paths
485 to the extra_paths. Before invoking this, ensure _install_files() has
486 been called.
487 """
488 kwargs['extra_paths'] = (
489 kwargs.get('extra_paths', []) + self._install_paths)
490 return utils.run(*args, **kwargs)
491
492 def _parse_tradefed_datetime(self, result, summary=None):
493 """Get the tradefed provided result ID consisting of a datetime stamp.
494
495 Unfortunately we are unable to tell tradefed where to store the results.
496 In the lab we have multiple instances of tradefed running in parallel
497 writing results and logs to the same base directory. This function
498 finds the identifier which tradefed used during the current run and
499 returns it for further processing of result files.
500
501 @param result: The result object from utils.run.
502 @param summary: Test result summary from runs so far.
503 @return datetime_id: The result ID chosen by tradefed.
504 Example: '2016.07.14_00.34.50'.
505 """
506 # This string is show for both 'run' and 'continue' after all tests.
507 match = re.search(r': XML test result file generated at (\S+). Passed',
508 result.stdout)
509 if not (match and match.group(1)):
510 # TODO(ihf): Find out if we ever recover something interesting in
511 # this case. Otherwise delete it.
512 # Try harder to find the remains. This string shows before all
513 # tests but only with 'run', not 'continue'.
514 logging.warning('XML test result file incomplete?')
515 match = re.search(r': Created result dir (\S+)', result.stdout)
516 if not (match and match.group(1)):
517 error_msg = 'Test did not complete due to Chrome or ARC crash.'
518 if summary:
519 error_msg += (' Test summary from previous runs: %s'
520 % summary)
521 raise error.TestError(error_msg)
522 datetime_id = match.group(1)
523 logging.info('Tradefed identified results and logs with %s.',
524 datetime_id)
525 return datetime_id
526
527 def _parse_result(self, result):
528 """Check the result from the tradefed output.
529
530 This extracts the test pass/fail/executed list from the output of
531 tradefed. It is up to the caller to handle inconsistencies.
532
533 @param result: The result object from utils.run.
534 """
535 # Parse the stdout to extract test status. In particular step over
536 # similar output for each ABI and just look at the final summary.
537 match = re.search(r'(XML test result file generated at (\S+). '
538 r'Passed (\d+), Failed (\d+), Not Executed (\d+))',
539 result.stdout)
540 if not match:
541 raise error.Test('Test log does not contain a summary.')
542
543 passed = int(match.group(3))
544 failed = int(match.group(4))
545 not_executed = int(match.group(5))
546 match = re.search(r'(Start test run of (\d+) packages, containing '
547 r'(\d+(?:,\d+)?) tests)', result.stdout)
548 if match and match.group(3):
549 tests = int(match.group(3).replace(',', ''))
550 else:
551 # Unfortunately this happens. Assume it made no other mistakes.
552 logging.warning('Tradefed forgot to print number of tests.')
553 tests = passed + failed + not_executed
554 return (tests, passed, failed, not_executed)
555
556 def _collect_logs(self, repository, datetime, destination):
557 """Collects the tradefed logs.
558
559 It is legal to collect the same logs multiple times. This is normal
560 after 'tradefed continue' updates existing logs with new results.
561
562 @param repository: Full path to tradefeds output on disk.
563 @param datetime: The identifier which tradefed assigned to the run.
564 Currently this looks like '2016.07.14_00.34.50'.
565 @param destination: Autotest result directory (destination of logs).
566 """
567 logging.info('Collecting tradefed testResult.xml and logs to %s.',
568 destination)
569 repository_results = os.path.join(repository, 'results')
570 repository_logs = os.path.join(repository, 'logs')
571 # Because other tools rely on the currently chosen Google storage paths
572 # we need to keep destination_results in
573 # cheets_CTS.*/results/android-cts/2016.mm.dd_hh.mm.ss(/|.zip)
574 # and destination_logs in
575 # cheets_CTS.*/results/android-cts/logs/2016.mm.dd_hh.mm.ss/
576 destination_results = destination
577 destination_results_datetime = os.path.join(destination_results, datetime)
578 destination_results_datetime_zip = destination_results_datetime + '.zip'
579 destination_logs = os.path.join(destination, 'logs')
580 destination_logs_datetime = os.path.join(destination_logs, datetime)
581 # We may have collected the same logs before, clean old versions.
582 if os.path.exists(destination_results_datetime_zip):
583 os.remove(destination_results_datetime_zip)
584 if os.path.exists(destination_results_datetime):
585 shutil.rmtree(destination_results_datetime)
586 if os.path.exists(destination_logs_datetime):
587 shutil.rmtree(destination_logs_datetime)
588 shutil.copytree(
589 os.path.join(repository_results, datetime),
590 destination_results_datetime)
591 # Copying the zip file has to happen after the tree so the destination
592 # directory is available.
593 shutil.copy(
594 os.path.join(repository_results, datetime) + '.zip',
595 destination_results_datetime_zip)
596 shutil.copytree(
597 os.path.join(repository_logs, datetime),
598 destination_logs_datetime)