blob: 8cfbeae048b4e95c73f7d079f3e35ab60b02e413 [file] [log] [blame]
Dan Shia1ecd5c2013-06-06 11:21:31 -07001# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Dan Shia1ecd5c2013-06-06 11:21:31 -07005
Alex Zamorzaevf0573b52019-04-05 12:07:59 -07006import collections
Paul Hobbs20cc72a2016-08-30 16:57:05 -07007import contextlib
Fang Deng18699fe2015-12-04 16:40:27 -08008import grp
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -08009import httplib
10import json
Alex Millerdadc2c22013-07-08 15:21:21 -070011import logging
MK Ryu35d661e2014-09-25 17:44:10 -070012import os
beeps023afc62014-02-04 16:59:22 -080013import random
Alex Millerdadc2c22013-07-08 15:21:21 -070014import re
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080015import time
Dan Shiffd5b822017-07-14 11:16:23 -070016import traceback
Paul Drewsbef578d2013-09-24 15:10:36 -070017import urllib2
Alex Millerdadc2c22013-07-08 15:21:21 -070018
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080019import common
Dan Shiffd5b822017-07-14 11:16:23 -070020from autotest_lib.client.bin.result_tools import utils as result_utils
21from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
22from autotest_lib.client.bin.result_tools import view as result_view
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -070023from autotest_lib.client.common_lib import lsbrelease_utils
Dan Shief31f032016-05-13 15:51:39 -070024from autotest_lib.client.common_lib import utils
beeps023afc62014-02-04 16:59:22 -080025from autotest_lib.client.common_lib import error
Dan Shiffd5b822017-07-14 11:16:23 -070026from autotest_lib.client.common_lib import file_utils
beeps023afc62014-02-04 16:59:22 -080027from autotest_lib.client.common_lib import global_config
MK Ryu0c1a37d2015-04-30 12:00:55 -070028from autotest_lib.client.common_lib import host_queue_entry_states
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070029from autotest_lib.client.common_lib import host_states
Simran Basi7756a0b2016-03-16 13:10:07 -070030from autotest_lib.server.cros import provision
Dan Shia1ecd5c2013-06-06 11:21:31 -070031from autotest_lib.server.cros.dynamic_suite import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070032from autotest_lib.server.cros.dynamic_suite import job_status
Dan Shia1ecd5c2013-06-06 11:21:31 -070033
Dan Shiffd5b822017-07-14 11:16:23 -070034try:
35 from chromite.lib import metrics
36except ImportError:
37 metrics = utils.metrics_mock
38
Dan Shia1ecd5c2013-06-06 11:21:31 -070039
Dan Shid37736b2016-07-06 15:10:29 -070040CONFIG = global_config.global_config
41
42_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
43_LAB_SHERIFF_JS = CONFIG.get_config_value(
44 'NOTIFICATIONS', 'lab_sheriffs', default='')
45_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
46 'NOTIFICATIONS', 'chromium_build_url', default='')
Alex Millerdadc2c22013-07-08 15:21:21 -070047
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080048LAB_GOOD_STATES = ('open', 'throttled')
49
Dan Shid37736b2016-07-06 15:10:29 -070050ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
51 'CROS', 'enable_drone_in_restricted_subnet', type=bool,
52 default=False)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080053
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070054# Wait at most 10 mins for duts to go idle.
55IDLE_DUT_WAIT_TIMEOUT = 600
56
Dan Shi43274402016-11-04 15:13:43 -070057# Mapping between board name and build target. This is for special case handling
58# for certain Android board that the board name and build target name does not
59# match.
tturney08fc62e2016-11-17 15:44:30 -080060ANDROID_TARGET_TO_BOARD_MAP = {
61 'seed_l8150': 'gm4g_sprout',
62 'bat_land': 'bat'
63 }
64ANDROID_BOARD_TO_TARGET_MAP = {
65 'gm4g_sprout': 'seed_l8150',
66 'bat': 'bat_land'
67 }
Dan Shiffd5b822017-07-14 11:16:23 -070068# Prefix for the metrics name for result size information.
69RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
Dan Shi43274402016-11-04 15:13:43 -070070
J. Richard Barnetteabbe0962013-12-10 18:15:44 -080071class TestLabException(Exception):
72 """Exception raised when the Test Lab blocks a test or suite."""
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080073 pass
74
75
76class ParseBuildNameException(Exception):
77 """Raised when ParseBuildName() cannot parse a build name."""
78 pass
79
80
Fang Dengf08814a2015-08-03 18:12:18 +000081class Singleton(type):
82 """Enforce that only one client class is instantiated per process."""
83 _instances = {}
84
85 def __call__(cls, *args, **kwargs):
86 """Fetch the instance of a class to use for subsequent calls."""
87 if cls not in cls._instances:
88 cls._instances[cls] = super(Singleton, cls).__call__(
89 *args, **kwargs)
90 return cls._instances[cls]
91
Kevin Cheng05ae2a42016-06-06 10:12:48 -070092class EmptyAFEHost(object):
93 """Object to represent an AFE host object when there is no AFE."""
94
95 def __init__(self):
96 """
97 We'll be setting the instance attributes as we use them. Right now
98 we only use attributes and labels but as time goes by and other
99 attributes are used from an actual AFE Host object (check
100 rpc_interfaces.get_hosts()), we'll add them in here so users won't be
101 perplexed why their host's afe_host object complains that attribute
102 doesn't exist.
103 """
104 self.attributes = {}
105 self.labels = []
106
Fang Dengf08814a2015-08-03 18:12:18 +0000107
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800108def ParseBuildName(name):
109 """Format a build name, given board, type, milestone, and manifest num.
110
Simran Basib7d21162014-05-21 15:26:16 -0700111 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
112 relative build name, e.g. 'x86-alex-release/LATEST'
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800113
114 @return board: board the manifest is for, e.g. x86-alex.
115 @return type: one of 'release', 'factory', or 'firmware'
116 @return milestone: (numeric) milestone the manifest was associated with.
Simran Basib7d21162014-05-21 15:26:16 -0700117 Will be None for relative build names.
118 @return manifest: manifest number, e.g. '2015.0.0'.
119 Will be None for relative build names.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800120
121 """
Dan Shie02810d2016-08-25 09:44:57 -0700122 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
123 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
124 r'(?P<manifest>[\d.ab-]+)|LATEST)',
Simran Basif8f648e2014-09-09 11:40:03 -0700125 name)
126 if match and len(match.groups()) >= 5:
Simran Basib7d21162014-05-21 15:26:16 -0700127 return (match.group('board'), match.group('type'),
128 match.group('milestone'), match.group('manifest'))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800129 raise ParseBuildNameException('%s is a malformed build name.' % name)
130
Alex Millerdadc2c22013-07-08 15:21:21 -0700131
Dan Shi3d7a0e12015-10-12 11:55:45 -0700132def get_labels_from_afe(hostname, label_prefix, afe):
133 """Retrieve a host's specific labels from the AFE.
134
135 Looks for the host labels that have the form <label_prefix>:<value>
136 and returns the "<value>" part of the label. None is returned
137 if there is not a label matching the pattern
138
139 @param hostname: hostname of given DUT.
140 @param label_prefix: prefix of label to be matched, e.g., |board:|
141 @param afe: afe instance.
142
143 @returns A list of labels that match the prefix or 'None'
144
145 """
146 labels = afe.get_labels(name__startswith=label_prefix,
147 host__hostname__in=[hostname])
148 if labels:
149 return [l.name.split(label_prefix, 1)[1] for l in labels]
150
151
Dan Shia1ecd5c2013-06-06 11:21:31 -0700152def get_label_from_afe(hostname, label_prefix, afe):
153 """Retrieve a host's specific label from the AFE.
154
155 Looks for a host label that has the form <label_prefix>:<value>
156 and returns the "<value>" part of the label. None is returned
157 if there is not a label matching the pattern
158
159 @param hostname: hostname of given DUT.
160 @param label_prefix: prefix of label to be matched, e.g., |board:|
161 @param afe: afe instance.
162 @returns the label that matches the prefix or 'None'
163
164 """
Dan Shi3d7a0e12015-10-12 11:55:45 -0700165 labels = get_labels_from_afe(hostname, label_prefix, afe)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700166 if labels and len(labels) == 1:
Dan Shi3d7a0e12015-10-12 11:55:45 -0700167 return labels[0]
Dan Shia1ecd5c2013-06-06 11:21:31 -0700168
169
170def get_board_from_afe(hostname, afe):
171 """Retrieve given host's board from its labels in the AFE.
172
173 Looks for a host label of the form "board:<board>", and
174 returns the "<board>" part of the label. `None` is returned
175 if there is not a single, unique label matching the pattern.
176
177 @param hostname: hostname of given DUT.
178 @param afe: afe instance.
179 @returns board from label, or `None`.
180
181 """
182 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
183
184
185def get_build_from_afe(hostname, afe):
186 """Retrieve the current build for given host from the AFE.
187
188 Looks through the host's labels in the AFE to determine its build.
189
190 @param hostname: hostname of given DUT.
191 @param afe: afe instance.
192 @returns The current build or None if it could not find it or if there
193 were multiple build labels assigned to this host.
194
195 """
Richard Barnette66eb19d2018-04-30 23:46:52 +0000196 prefix = provision.CROS_VERSION_PREFIX
197 build = get_label_from_afe(hostname, prefix + ':', afe)
198 if build:
199 return build
Simran Basi7756a0b2016-03-16 13:10:07 -0700200 return None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700201
202
Allen Li6a612392016-08-18 12:09:32 -0700203# TODO(fdeng): fix get_sheriffs crbug.com/483254
Fang Deng3197b392013-06-26 11:42:02 -0700204def get_sheriffs(lab_only=False):
Alex Millerdadc2c22013-07-08 15:21:21 -0700205 """
206 Polls the javascript file that holds the identity of the sheriff and
207 parses it's output to return a list of chromium sheriff email addresses.
208 The javascript file can contain the ldap of more than one sheriff, eg:
209 document.write('sheriff_one, sheriff_two').
210
Fang Deng3197b392013-06-26 11:42:02 -0700211 @param lab_only: if True, only pulls lab sheriff.
212 @return: A list of chroium.org sheriff email addresses to cc on the bug.
213 An empty list if failed to parse the javascript.
Alex Millerdadc2c22013-07-08 15:21:21 -0700214 """
215 sheriff_ids = []
Fang Deng3197b392013-06-26 11:42:02 -0700216 sheriff_js_list = _LAB_SHERIFF_JS.split(',')
217 if not lab_only:
218 sheriff_js_list.extend(_SHERIFF_JS.split(','))
219
220 for sheriff_js in sheriff_js_list:
Alex Millerdadc2c22013-07-08 15:21:21 -0700221 try:
Dan Shief31f032016-05-13 15:51:39 -0700222 url_content = utils.urlopen('%s%s'% (
Alex Millerdadc2c22013-07-08 15:21:21 -0700223 _CHROMIUM_BUILD_URL, sheriff_js)).read()
224 except (ValueError, IOError) as e:
beeps4efdf032013-09-17 11:27:14 -0700225 logging.warning('could not parse sheriff from url %s%s: %s',
226 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Paul Drewsbef578d2013-09-24 15:10:36 -0700227 except (urllib2.URLError, httplib.HTTPException) as e:
228 logging.warning('unexpected error reading from url "%s%s": %s',
229 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Alex Millerdadc2c22013-07-08 15:21:21 -0700230 else:
231 ldaps = re.search(r"document.write\('(.*)'\)", url_content)
232 if not ldaps:
beeps4efdf032013-09-17 11:27:14 -0700233 logging.warning('Could not retrieve sheriff ldaps for: %s',
234 url_content)
Alex Millerdadc2c22013-07-08 15:21:21 -0700235 continue
236 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
237 for alias in ldaps.group(1).split(',')]
238 return sheriff_ids
beeps46dadc92013-11-07 14:07:10 -0800239
240
241def remote_wget(source_url, dest_path, ssh_cmd):
242 """wget source_url from localhost to dest_path on remote host using ssh.
243
244 @param source_url: The complete url of the source of the package to send.
245 @param dest_path: The path on the remote host's file system where we would
246 like to store the package.
247 @param ssh_cmd: The ssh command to use in performing the remote wget.
248 """
249 wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
250 (source_url, ssh_cmd, dest_path))
Dan Shief31f032016-05-13 15:51:39 -0700251 utils.run(wget_cmd)
beeps46dadc92013-11-07 14:07:10 -0800252
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800253
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800254_MAX_LAB_STATUS_ATTEMPTS = 5
255def _get_lab_status(status_url):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800256 """Grabs the current lab status and message.
257
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800258 @returns The JSON object obtained from the given URL.
259
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800260 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800261 retry_waittime = 1
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800262 for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800263 try:
264 response = urllib2.urlopen(status_url)
265 except IOError as e:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800266 logging.debug('Error occurred when grabbing the lab status: %s.',
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800267 e)
268 time.sleep(retry_waittime)
269 continue
270 # Check for successful response code.
271 if response.getcode() == 200:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800272 return json.load(response)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800273 time.sleep(retry_waittime)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800274 return None
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800275
276
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800277def _decode_lab_status(lab_status, build):
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800278 """Decode lab status, and report exceptions as needed.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800279
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800280 Take a deserialized JSON object from the lab status page, and
281 interpret it to determine the actual lab status. Raise
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800282 exceptions as required to report when the lab is down.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800283
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800284 @param build: build name that we want to check the status of.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800285
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800286 @raises TestLabException Raised if a request to test for the given
287 status and build should be blocked.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800288 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800289 # First check if the lab is up.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800290 if not lab_status['general_state'] in LAB_GOOD_STATES:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800291 raise TestLabException('Chromium OS Test Lab is closed: '
292 '%s.' % lab_status['message'])
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800293
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800294 # Check if the build we wish to use is disabled.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800295 # Lab messages should be in the format of:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800296 # Lab is 'status' [regex ...] (comment)
297 # If the build name matches any regex, it will be blocked.
298 build_exceptions = re.search('\[(.*)\]', lab_status['message'])
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700299 if not build_exceptions or not build:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800300 return
301 for build_pattern in build_exceptions.group(1).split():
J. Richard Barnette7f215d32015-06-19 12:44:38 -0700302 if re.match(build_pattern, build):
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800303 raise TestLabException('Chromium OS Test Lab is closed: '
304 '%s matches %s.' % (
305 build, build_pattern))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800306 return
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800307
308
Dan Shi94234cb2014-05-23 20:04:31 -0700309def is_in_lab():
310 """Check if current Autotest instance is in lab
311
312 @return: True if the Autotest instance is in lab.
313 """
Dan Shid37736b2016-07-06 15:10:29 -0700314 test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
Dan Shi94234cb2014-05-23 20:04:31 -0700315 return test_server_name.startswith('cautotest')
316
317
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800318def check_lab_status(build):
319 """Check if the lab status allows us to schedule for a build.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800320
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800321 Checks if the lab is down, or if testing for the requested build
322 should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800323
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800324 @param build: Name of the build to be scheduled for testing.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800325
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800326 @raises TestLabException Raised if a request to test for the given
327 status and build should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800328
329 """
330 # Ensure we are trying to schedule on the actual lab.
Dan Shi94234cb2014-05-23 20:04:31 -0700331 if not is_in_lab():
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800332 return
333
334 # Download the lab status from its home on the web.
Dan Shid37736b2016-07-06 15:10:29 -0700335 status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800336 json_status = _get_lab_status(status_url)
337 if json_status is None:
338 # We go ahead and say the lab is open if we can't get the status.
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700339 logging.warning('Could not get a status from %s', status_url)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800340 return
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800341 _decode_lab_status(json_status, build)
beeps023afc62014-02-04 16:59:22 -0800342
343
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700344def host_in_lab(hostname):
Alex Zamorzaevf0573b52019-04-05 12:07:59 -0700345 """Check if the execution is against a host in the lab"""
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700346 return (not utils.in_moblab_ssp()
347 and not lsbrelease_utils.is_moblab()
348 and utils.host_is_in_lab_zone(hostname))
349
350
beeps023afc62014-02-04 16:59:22 -0800351def lock_host_with_labels(afe, lock_manager, labels):
352 """Lookup and lock one host that matches the list of input labels.
353
354 @param afe: An instance of the afe class, as defined in server.frontend.
355 @param lock_manager: A lock manager capable of locking hosts, eg the
356 one defined in server.cros.host_lock_manager.
357 @param labels: A list of labels to look for on hosts.
358
359 @return: The hostname of a host matching all labels, and locked through the
360 lock_manager. The hostname will be as specified in the database the afe
361 object is associated with, i.e if it exists in afe_hosts with a .cros
362 suffix, the hostname returned will contain a .cros suffix.
363
364 @raises: error.NoEligibleHostException: If no hosts matching the list of
365 input labels are available.
366 @raises: error.TestError: If unable to lock a host matching the labels.
367 """
368 potential_hosts = afe.get_hosts(multiple_labels=labels)
369 if not potential_hosts:
370 raise error.NoEligibleHostException(
371 'No devices found with labels %s.' % labels)
372
373 # This prevents errors where a fault might seem repeatable
374 # because we lock, say, the same packet capturer for each test run.
375 random.shuffle(potential_hosts)
376 for host in potential_hosts:
377 if lock_manager.lock([host.hostname]):
378 logging.info('Locked device %s with labels %s.',
379 host.hostname, labels)
380 return host.hostname
381 else:
382 logging.info('Unable to lock device %s with labels %s.',
383 host.hostname, labels)
384
385 raise error.TestError('Could not lock a device with labels %s' % labels)
Dan Shi7e04fa82013-07-25 15:08:48 -0700386
387
388def get_test_views_from_tko(suite_job_id, tko):
389 """Get test name and result for given suite job ID.
390
391 @param suite_job_id: ID of suite job.
392 @param tko: an instance of TKO as defined in server/frontend.py.
Alex Zamorzaevf0573b52019-04-05 12:07:59 -0700393 @return: A defaultdict where keys are test names and values are
394 lists of test statuses, e.g.,
395 {'dummy_Fail.Error': ['ERROR'. 'ERROR'],
396 'dummy_Fail.NAError': ['TEST_NA'],
397 'dummy_Fail.RetrySuccess': ['ERROR', 'GOOD'],
398 }
Dan Shi7e04fa82013-07-25 15:08:48 -0700399 @raise: Exception when there is no test view found.
400
401 """
402 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
403 relevant_views = filter(job_status.view_is_relevant, views)
404 if not relevant_views:
405 raise Exception('Failed to retrieve job results.')
406
Alex Zamorzaevf0573b52019-04-05 12:07:59 -0700407 test_views = collections.defaultdict(list)
Dan Shi7e04fa82013-07-25 15:08:48 -0700408 for view in relevant_views:
Alex Zamorzaevd81fed52019-04-25 10:56:32 -0700409 test_views[view['test_name']].append(view['status'])
Dan Shi7e04fa82013-07-25 15:08:48 -0700410 return test_views
MK Ryu35d661e2014-09-25 17:44:10 -0700411
412
MK Ryuc9c0c3f2014-10-27 14:36:01 -0700413def get_data_key(prefix, suite, build, board):
414 """
415 Constructs a key string from parameters.
416
417 @param prefix: Prefix for the generating key.
418 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
419 @param build: The build string. This string should have a consistent
420 format eg: x86-mario-release/R26-3570.0.0. If the format of this
421 string changes such that we can't determine build_type or branch
422 we give up and use the parametes we're sure of instead (suite,
423 board). eg:
424 1. build = x86-alex-pgo-release/R26-3570.0.0
425 branch = 26
426 build_type = pgo-release
427 2. build = lumpy-paladin/R28-3993.0.0-rc5
428 branch = 28
429 build_type = paladin
430 @param board: The board that this suite ran on.
431 @return: The key string used for a dictionary.
432 """
433 try:
434 _board, build_type, branch = ParseBuildName(build)[:3]
435 except ParseBuildNameException as e:
436 logging.error(str(e))
437 branch = 'Unknown'
438 build_type = 'Unknown'
439 else:
440 embedded_str = re.search(r'x86-\w+-(.*)', _board)
441 if embedded_str:
442 build_type = embedded_str.group(1) + '-' + build_type
443
444 data_key_dict = {
445 'prefix': prefix,
446 'board': board,
447 'branch': branch,
448 'build_type': build_type,
449 'suite': suite,
450 }
451 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
452 % data_key_dict)
MK Ryu83184352014-12-10 14:59:40 -0800453
454
MK Ryu2d0a3642015-01-07 15:11:19 -0800455def setup_logging(logfile=None, prefix=False):
MK Ryu83184352014-12-10 14:59:40 -0800456 """Setup basic logging with all logging info stripped.
457
458 Calls to logging will only show the message. No severity is logged.
459
460 @param logfile: If specified dump output to a file as well.
MK Ryu2d0a3642015-01-07 15:11:19 -0800461 @param prefix: Flag for log prefix. Set to True to add prefix to log
462 entries to include timestamp and log level. Default is False.
MK Ryu83184352014-12-10 14:59:40 -0800463 """
Xixuan Wueb0a3e22018-04-23 10:51:44 -0700464 # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
465 # abort_suite.py in skylab.
MK Ryu83184352014-12-10 14:59:40 -0800466 # Remove all existing handlers. client/common_lib/logging_config adds
467 # a StreamHandler to logger when modules are imported, e.g.,
468 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
469 # log only messages, not severity.
470 logging.getLogger().handlers = []
471
MK Ryu2d0a3642015-01-07 15:11:19 -0800472 if prefix:
473 log_format = '%(asctime)s %(levelname)-5s| %(message)s'
474 else:
475 log_format = '%(message)s'
476
MK Ryu83184352014-12-10 14:59:40 -0800477 screen_handler = logging.StreamHandler()
MK Ryu2d0a3642015-01-07 15:11:19 -0800478 screen_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800479 logging.getLogger().addHandler(screen_handler)
480 logging.getLogger().setLevel(logging.INFO)
481 if logfile:
482 file_handler = logging.FileHandler(logfile)
MK Ryu2d0a3642015-01-07 15:11:19 -0800483 file_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800484 file_handler.setLevel(logging.DEBUG)
485 logging.getLogger().addHandler(file_handler)
Prashanth Balasubramanian8c98ac12014-12-23 11:26:44 -0800486
487
488def is_shard():
489 """Determines if this instance is running as a shard.
490
491 Reads the global_config value shard_hostname in the section SHARD.
492
493 @return True, if shard_hostname is set, False otherwise.
494 """
Dan Shid37736b2016-07-06 15:10:29 -0700495 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
MK Ryu0c1a37d2015-04-30 12:00:55 -0700496 return bool(hostname)
497
498
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800499def get_global_afe_hostname():
500 """Read the hostname of the global AFE from the global configuration."""
Dan Shid37736b2016-07-06 15:10:29 -0700501 return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800502
503
Fang Deng18699fe2015-12-04 16:40:27 -0800504def is_restricted_user(username):
505 """Determines if a user is in a restricted group.
506
507 User in restricted group only have access to master.
508
509 @param username: A string, representing a username.
510
511 @returns: True if the user is in a restricted group.
512 """
513 if not username:
514 return False
515
Dan Shid37736b2016-07-06 15:10:29 -0700516 restricted_groups = CONFIG.get_config_value(
Fang Deng18699fe2015-12-04 16:40:27 -0800517 'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
518 for group in restricted_groups:
Fang Deng5229c852016-02-09 13:30:31 -0800519 try:
520 if group and username in grp.getgrnam(group).gr_mem:
521 return True
522 except KeyError as e:
523 logging.debug("%s is not a valid group.", group)
Fang Deng18699fe2015-12-04 16:40:27 -0800524 return False
525
526
MK Ryu0c1a37d2015-04-30 12:00:55 -0700527def get_special_task_status(is_complete, success, is_active):
528 """Get the status of a special task.
529
530 Emulate a host queue entry status for a special task
531 Although SpecialTasks are not HostQueueEntries, it is helpful to
532 the user to present similar statuses.
533
534 @param is_complete Boolean if the task is completed.
535 @param success Boolean if the task succeeded.
536 @param is_active Boolean if the task is active.
537
538 @return The status of a special task.
539 """
540 if is_complete:
541 if success:
542 return host_queue_entry_states.Status.COMPLETED
543 return host_queue_entry_states.Status.FAILED
544 if is_active:
545 return host_queue_entry_states.Status.RUNNING
546 return host_queue_entry_states.Status.QUEUED
547
548
549def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
550 """Get the execution path of the SpecialTask.
551
552 This method returns different paths depending on where a
553 the task ran:
554 * Master: hosts/hostname/task_id-task_type
555 * Shard: Master_path/time_created
556 This is to work around the fact that a shard can fail independent
557 of the master, and be replaced by another shard that has the same
558 hosts. Without the time_created stamp the logs of the tasks running
559 on the second shard will clobber the logs from the first in google
560 storage, because task ids are not globally unique.
561
562 @param hostname Hostname
563 @param task_id Special task id
564 @param task_name Special task name (e.g., Verify, Repair, etc)
565 @param time_requested Special task requested time.
566
567 @return An execution path for the task.
568 """
569 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
570
571 # If we do this on the master it will break backward compatibility,
572 # as there are tasks that currently don't have timestamps. If a host
573 # or job has been sent to a shard, the rpc for that host/job will
574 # be redirected to the shard, so this global_config check will happen
575 # on the shard the logs are on.
576 if not is_shard():
577 return results_path
578
579 # Generate a uid to disambiguate special task result directories
580 # in case this shard fails. The simplest uid is the job_id, however
581 # in rare cases tasks do not have jobs associated with them (eg:
582 # frontend verify), so just use the creation timestamp. The clocks
583 # between a shard and master should always be in sync. Any discrepancies
584 # will be brought to our attention in the form of job timeouts.
585 uid = time_requested.strftime('%Y%d%m%H%M%S')
586
587 # TODO: This is a hack, however it is the easiest way to achieve
588 # correctness. There is currently some debate over the future of
589 # tasks in our infrastructure and refactoring everything right
590 # now isn't worth the time.
591 return '%s/%s' % (results_path, uid)
592
593
594def get_job_tag(id, owner):
595 """Returns a string tag for a job.
596
597 @param id Job id
598 @param owner Job owner
599
600 """
601 return '%s-%s' % (id, owner)
602
603
604def get_hqe_exec_path(tag, execution_subdir):
605 """Returns a execution path to a HQE's results.
606
607 @param tag Tag string for a job associated with a HQE.
608 @param execution_subdir Execution sub-directory string of a HQE.
609
610 """
611 return os.path.join(tag, execution_subdir)
Dan Shi82997b92015-05-06 12:08:02 -0700612
613
614def is_inside_chroot():
615 """Check if the process is running inside chroot.
616
617 This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The
618 method checks if cros_build_lib can be imported first.
619
620 @return: True if the process is running inside chroot or cros_build_lib
621 cannot be imported.
622
623 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700624 try:
625 # TODO(crbug.com/739466) This module import is delayed because it adds
626 # 1-2 seconds to the module import time and most users of site_utils
627 # don't need it. The correct fix is to break apart site_utils into more
628 # meaningful chunks.
629 from chromite.lib import cros_build_lib
630 except ImportError:
631 logging.warn('Unable to import chromite. Can not detect chroot. '
632 'Defaulting to False')
633 return False
634 return cros_build_lib.IsInsideChroot()
Dan Shi70647ca2015-07-16 22:52:35 -0700635
636
637def parse_job_name(name):
638 """Parse job name to get information including build, board and suite etc.
639
640 Suite job created by run_suite follows the naming convention of:
641 [build]-test_suites/control.[suite]
642 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
Allen Licdd00f22017-02-01 18:01:52 -0800643 The naming convention is defined in rpc_interface.create_suite_job.
Dan Shi70647ca2015-07-16 22:52:35 -0700644
645 Test job created by suite job follows the naming convention of:
646 [build]/[suite]/[test name]
647 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
648 The naming convention is defined in
649 server/cros/dynamic_suite/tools.create_job_name
650
651 Note that pgo and chrome-perf builds will fail the method. Since lab does
652 not run test for these builds, they can be ignored.
Dan Shief31f032016-05-13 15:51:39 -0700653 Also, tests for Launch Control builds have different naming convention.
654 The build ID will be used as build_version.
Dan Shi70647ca2015-07-16 22:52:35 -0700655
656 @param name: Name of the job.
657
658 @return: A dictionary containing the test information. The keyvals include:
659 build: Name of the build, e.g., lumpy-release/R46-7272.0.0
660 build_version: The version of the build, e.g., R46-7272.0.0
661 board: Name of the board, e.g., lumpy
662 suite: Name of the test suite, e.g., bvt
663
664 """
665 info = {}
Dan Shief31f032016-05-13 15:51:39 -0700666 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
667 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
Dan Shi70647ca2015-07-16 22:52:35 -0700668 match = re.match(suite_job_regex, name)
669 if not match:
670 match = re.match(test_job_regex, name)
671 if match:
672 info['build'] = match.groups()[0]
673 info['suite'] = match.groups()[1]
674 info['build_version'] = info['build'].split('/')[1]
675 try:
676 info['board'], _, _, _ = ParseBuildName(info['build'])
677 except ParseBuildNameException:
Dan Shief31f032016-05-13 15:51:39 -0700678 # Try to parse it as Launch Control build
679 # Launch Control builds have name format:
680 # branch/build_target-build_type/build_id.
681 try:
682 _, target, build_id = utils.parse_launch_control_build(
683 info['build'])
684 build_target, _ = utils.parse_launch_control_target(target)
685 if build_target:
686 info['board'] = build_target
687 info['build_version'] = build_id
688 except ValueError:
689 pass
Dan Shi70647ca2015-07-16 22:52:35 -0700690 return info
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700691
692
Simran Basi9f364a62015-12-07 14:15:19 -0800693def verify_not_root_user():
694 """Simple function to error out if running with uid == 0"""
695 if os.getuid() == 0:
Simran Basi1bf60eb2015-12-01 16:39:29 -0800696 raise error.IllegalUser('This script can not be ran as root.')
697
698
699def get_hostname_from_machine(machine):
700 """Lookup hostname from a machine string or dict.
701
702 @returns: Machine hostname in string format.
703 """
704 hostname, _ = get_host_info_from_machine(machine)
705 return hostname
706
707
708def get_host_info_from_machine(machine):
709 """Lookup host information from a machine string or dict.
710
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700711 @returns: Tuple of (hostname, afe_host)
Simran Basi1bf60eb2015-12-01 16:39:29 -0800712 """
713 if isinstance(machine, dict):
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700714 return (machine['hostname'], machine['afe_host'])
Simran Basi1bf60eb2015-12-01 16:39:29 -0800715 else:
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700716 return (machine, EmptyAFEHost())
717
718
719def get_afe_host_from_machine(machine):
720 """Return the afe_host from the machine dict if possible.
721
722 @returns: AFE host object.
723 """
724 _, afe_host = get_host_info_from_machine(machine)
725 return afe_host
Fang Dengf8a94e22015-12-07 13:39:13 -0800726
727
Hidehiko Abe06893302017-06-24 07:32:38 +0900728def get_connection_pool_from_machine(machine):
729 """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
730 if not isinstance(machine, dict):
731 return None
732 return machine.get('connection_pool')
733
734
Fang Dengf8a94e22015-12-07 13:39:13 -0800735def get_creds_abspath(creds_file):
736 """Returns the abspath of the credentials file.
737
738 If creds_file is already an absolute path, just return it.
739 Otherwise, assume it is located in the creds directory
740 specified in global_config and return the absolute path.
741
742 @param: creds_path, a path to the credentials.
743 @return: An absolute path to the credentials file.
744 """
745 if not creds_file:
746 return None
747 if os.path.isabs(creds_file):
748 return creds_file
Dan Shid37736b2016-07-06 15:10:29 -0700749 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
Fang Dengf8a94e22015-12-07 13:39:13 -0800750 if not creds_dir or not os.path.exists(creds_dir):
751 creds_dir = common.autotest_dir
752 return os.path.join(creds_dir, creds_file)
Kevin Cheng3b111812015-12-15 11:52:08 -0800753
754
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700755def SetupTsMonGlobalState(*args, **kwargs):
756 """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
757
758 @param *args: Args to pass through.
759 @param **kwargs: Kwargs to pass through.
760 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700761 try:
762 # TODO(crbug.com/739466) This module import is delayed because it adds
763 # 1-2 seconds to the module import time and most users of site_utils
764 # don't need it. The correct fix is to break apart site_utils into more
765 # meaningful chunks.
766 from chromite.lib import ts_mon_config
767 except ImportError:
768 logging.warn('Unable to import chromite. Monarch is disabled.')
Paul Hobbs604fc872016-09-29 16:41:55 -0700769 return TrivialContextManager()
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700770
771 try:
772 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
773 if hasattr(context, '__exit__'):
774 return context
775 except Exception as e:
776 logging.warning('Caught an exception trying to setup ts_mon, '
777 'monitoring is disabled: %s', e, exc_info=True)
778 return TrivialContextManager()
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700779
780
781@contextlib.contextmanager
Paul Hobbs604fc872016-09-29 16:41:55 -0700782def TrivialContextManager(*args, **kwargs):
783 """Context manager that does nothing.
784
785 @param *args: Ignored args
786 @param **kwargs: Ignored kwargs.
787 """
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700788 yield
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700789
790
791def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
792 """Wait for the hosts to all go idle.
793
794 @param duts: List of duts to check for idle state.
795 @param afe: afe instance.
Dan Shiffd5b822017-07-14 11:16:23 -0700796 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700797
798 @returns Boolean True if all hosts are idle or False if any hosts did not
799 go idle within max_wait.
800 """
801 start_time = time.time()
802 # We make a shallow copy since we're going to be modifying active_dut_list.
803 active_dut_list = duts[:]
804 while active_dut_list:
805 # Let's rate-limit how often we hit the AFE.
806 time.sleep(1)
807
808 # Check if we've waited too long.
809 if (time.time() - start_time) > max_wait:
810 return False
811
812 idle_duts = []
813 # Get the status for the duts and see if they're in the idle state.
814 afe_hosts = afe.get_hosts(active_dut_list)
815 idle_duts = [afe_host.hostname for afe_host in afe_hosts
816 if afe_host.status in host_states.IDLE_STATES]
817
818 # Take out idle duts so we don't needlessly check them
819 # next time around.
820 for idle_dut in idle_duts:
821 active_dut_list.remove(idle_dut)
822
823 logging.info('still waiting for following duts to go idle: %s',
824 active_dut_list)
825 return True
826
827
828@contextlib.contextmanager
829def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
830 max_wait=IDLE_DUT_WAIT_TIMEOUT):
831 """Context manager to lock the duts and wait for them to go idle.
832
833 @param duts: List of duts to lock.
834 @param afe: afe instance.
Hidehiko Abe06893302017-06-24 07:32:38 +0900835 @param lock_msg: message for afe on locking this host.
Dan Shiffd5b822017-07-14 11:16:23 -0700836 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700837
838 @returns Boolean lock_success where True if all duts locked successfully or
839 False if we timed out waiting too long for hosts to go idle.
840 """
841 try:
842 locked_duts = []
843 duts.sort()
844 for dut in duts:
845 if afe.lock_host(dut, lock_msg, fail_if_locked=True):
846 locked_duts.append(dut)
847 else:
848 logging.info('%s already locked', dut)
849 yield wait_for_idle_duts(locked_duts, afe, max_wait)
850 finally:
851 afe.unlock_hosts(locked_duts)
Dan Shib5b8b4f2016-11-02 14:04:02 -0700852
853
Dan Shiffd5b822017-07-14 11:16:23 -0700854def _get_default_size_info(path):
855 """Get the default result size information.
856
857 In case directory summary is failed to build, assume the test result is not
858 throttled and all result sizes are the size of existing test results.
859
860 @return: A namedtuple of result size informations, including:
861 client_result_collected_KB: The total size (in KB) of test results
862 collected from test device. Set to be the total size of the
863 given path.
864 original_result_total_KB: The original size (in KB) of test results
865 before being trimmed. Set to be the total size of the given
866 path.
867 result_uploaded_KB: The total size (in KB) of test results to be
868 uploaded. Set to be the total size of the given path.
869 result_throttled: True if test results collection is throttled.
870 It's set to False in this default behavior.
871 """
872 total_size = file_utils.get_directory_size_kibibytes(path);
873 return result_utils_lib.ResultSizeInfo(
874 client_result_collected_KB=total_size,
875 original_result_total_KB=total_size,
876 result_uploaded_KB=total_size,
877 result_throttled=False)
878
879
880def _report_result_size_metrics(result_size_info):
881 """Report result sizes information to metrics.
882
883 @param result_size_info: A ResultSizeInfo namedtuple containing information
884 of test result sizes.
885 """
886 fields = {'result_throttled' : result_size_info.result_throttled}
887 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
888 description='The total size (in KB) of test results '
889 'collected from test device. Set to be the total size of '
890 'the given path.'
891 ).increment_by(result_size_info.client_result_collected_KB,
892 fields=fields)
893 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
894 description='The original size (in KB) of test results '
895 'before being trimmed.'
896 ).increment_by(result_size_info.original_result_total_KB,
897 fields=fields)
898 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
899 description='The total size (in KB) of test results to be '
900 'uploaded.'
901 ).increment_by(result_size_info.result_uploaded_KB,
902 fields=fields)
903
904
Dan Shi11e35062017-11-03 10:09:05 -0700905@metrics.SecondsTimerDecorator(
906 'chromeos/autotest/result_collection/collect_result_sizes_duration')
Dan Shiffd5b822017-07-14 11:16:23 -0700907def collect_result_sizes(path, log=logging.debug):
908 """Collect the result sizes information and build result summary.
909
910 It first tries to merge directory summaries and calculate the result sizes
911 including:
912 client_result_collected_KB: The volume in KB that's transfered from the test
913 device.
914 original_result_total_KB: The volume in KB that's the original size of the
915 result files before being trimmed.
916 result_uploaded_KB: The volume in KB that will be uploaded.
917 result_throttled: Indicating if the result files were throttled.
918
919 If directory summary merging failed for any reason, fall back to use the
920 total size of the given result directory.
921
922 @param path: Path of the result directory to get size information.
923 @param log: The logging method, default to logging.debug
924 @return: A ResultSizeInfo namedtuple containing information of test result
925 sizes.
926 """
927 try:
Dan Shi5aaf9062017-09-20 11:53:17 -0700928 client_collected_bytes, summary, files = result_utils.merge_summaries(
929 path)
Dan Shiffd5b822017-07-14 11:16:23 -0700930 result_size_info = result_utils_lib.get_result_size_info(
931 client_collected_bytes, summary)
932 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
933 result_view.build(client_collected_bytes, summary, html_file)
Dan Shi5aaf9062017-09-20 11:53:17 -0700934
935 # Delete all summary files after final view is built.
936 for summary_file in files:
937 os.remove(summary_file)
Dan Shiffd5b822017-07-14 11:16:23 -0700938 except:
939 log('Failed to calculate result sizes based on directory summaries for '
940 'directory %s. Fall back to record the total size.\nException: %s' %
941 (path, traceback.format_exc()))
942 result_size_info = _get_default_size_info(path)
943
944 _report_result_size_metrics(result_size_info)
945
Richard Barnette9db80682018-04-26 00:55:15 +0000946 return result_size_info