blob: 219fd6ed2ac0742964726725a8c7df71110eab77 [file] [log] [blame]
Dan Shia1ecd5c2013-06-06 11:21:31 -07001# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Dan Shia1ecd5c2013-06-06 11:21:31 -07005
Alex Zamorzaevf0573b52019-04-05 12:07:59 -07006import collections
Paul Hobbs20cc72a2016-08-30 16:57:05 -07007import contextlib
Fang Deng18699fe2015-12-04 16:40:27 -08008import grp
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -08009import httplib
10import json
Alex Millerdadc2c22013-07-08 15:21:21 -070011import logging
MK Ryu35d661e2014-09-25 17:44:10 -070012import os
beeps023afc62014-02-04 16:59:22 -080013import random
Alex Millerdadc2c22013-07-08 15:21:21 -070014import re
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080015import time
Dan Shiffd5b822017-07-14 11:16:23 -070016import traceback
Paul Drewsbef578d2013-09-24 15:10:36 -070017import urllib2
Alex Millerdadc2c22013-07-08 15:21:21 -070018
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080019import common
Dan Shiffd5b822017-07-14 11:16:23 -070020from autotest_lib.client.bin.result_tools import utils as result_utils
21from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
22from autotest_lib.client.bin.result_tools import view as result_view
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -070023from autotest_lib.client.common_lib import lsbrelease_utils
Dan Shief31f032016-05-13 15:51:39 -070024from autotest_lib.client.common_lib import utils
beeps023afc62014-02-04 16:59:22 -080025from autotest_lib.client.common_lib import error
Dan Shiffd5b822017-07-14 11:16:23 -070026from autotest_lib.client.common_lib import file_utils
beeps023afc62014-02-04 16:59:22 -080027from autotest_lib.client.common_lib import global_config
MK Ryu0c1a37d2015-04-30 12:00:55 -070028from autotest_lib.client.common_lib import host_queue_entry_states
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070029from autotest_lib.client.common_lib import host_states
Simran Basi7756a0b2016-03-16 13:10:07 -070030from autotest_lib.server.cros import provision
Dan Shia1ecd5c2013-06-06 11:21:31 -070031from autotest_lib.server.cros.dynamic_suite import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070032from autotest_lib.server.cros.dynamic_suite import job_status
Dan Shia1ecd5c2013-06-06 11:21:31 -070033
Dan Shiffd5b822017-07-14 11:16:23 -070034try:
35 from chromite.lib import metrics
36except ImportError:
37 metrics = utils.metrics_mock
38
Dan Shia1ecd5c2013-06-06 11:21:31 -070039
Dan Shid37736b2016-07-06 15:10:29 -070040CONFIG = global_config.global_config
41
42_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
43_LAB_SHERIFF_JS = CONFIG.get_config_value(
44 'NOTIFICATIONS', 'lab_sheriffs', default='')
45_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
46 'NOTIFICATIONS', 'chromium_build_url', default='')
Alex Millerdadc2c22013-07-08 15:21:21 -070047
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080048LAB_GOOD_STATES = ('open', 'throttled')
49
Dan Shid37736b2016-07-06 15:10:29 -070050ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
51 'CROS', 'enable_drone_in_restricted_subnet', type=bool,
52 default=False)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080053
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070054# Wait at most 10 mins for duts to go idle.
55IDLE_DUT_WAIT_TIMEOUT = 600
56
Dan Shi43274402016-11-04 15:13:43 -070057# Mapping between board name and build target. This is for special case handling
58# for certain Android board that the board name and build target name does not
59# match.
tturney08fc62e2016-11-17 15:44:30 -080060ANDROID_TARGET_TO_BOARD_MAP = {
61 'seed_l8150': 'gm4g_sprout',
62 'bat_land': 'bat'
63 }
64ANDROID_BOARD_TO_TARGET_MAP = {
65 'gm4g_sprout': 'seed_l8150',
66 'bat': 'bat_land'
67 }
Dan Shiffd5b822017-07-14 11:16:23 -070068# Prefix for the metrics name for result size information.
69RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
Dan Shi43274402016-11-04 15:13:43 -070070
J. Richard Barnetteabbe0962013-12-10 18:15:44 -080071class TestLabException(Exception):
72 """Exception raised when the Test Lab blocks a test or suite."""
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080073 pass
74
75
76class ParseBuildNameException(Exception):
77 """Raised when ParseBuildName() cannot parse a build name."""
78 pass
79
80
Fang Dengf08814a2015-08-03 18:12:18 +000081class Singleton(type):
82 """Enforce that only one client class is instantiated per process."""
83 _instances = {}
84
85 def __call__(cls, *args, **kwargs):
86 """Fetch the instance of a class to use for subsequent calls."""
87 if cls not in cls._instances:
88 cls._instances[cls] = super(Singleton, cls).__call__(
89 *args, **kwargs)
90 return cls._instances[cls]
91
Kevin Cheng05ae2a42016-06-06 10:12:48 -070092class EmptyAFEHost(object):
93 """Object to represent an AFE host object when there is no AFE."""
94
95 def __init__(self):
96 """
97 We'll be setting the instance attributes as we use them. Right now
98 we only use attributes and labels but as time goes by and other
99 attributes are used from an actual AFE Host object (check
100 rpc_interfaces.get_hosts()), we'll add them in here so users won't be
101 perplexed why their host's afe_host object complains that attribute
102 doesn't exist.
103 """
104 self.attributes = {}
105 self.labels = []
106
Fang Dengf08814a2015-08-03 18:12:18 +0000107
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800108def ParseBuildName(name):
109 """Format a build name, given board, type, milestone, and manifest num.
110
Simran Basib7d21162014-05-21 15:26:16 -0700111 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
112 relative build name, e.g. 'x86-alex-release/LATEST'
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800113
114 @return board: board the manifest is for, e.g. x86-alex.
115 @return type: one of 'release', 'factory', or 'firmware'
116 @return milestone: (numeric) milestone the manifest was associated with.
Simran Basib7d21162014-05-21 15:26:16 -0700117 Will be None for relative build names.
118 @return manifest: manifest number, e.g. '2015.0.0'.
119 Will be None for relative build names.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800120
121 """
Dan Shie02810d2016-08-25 09:44:57 -0700122 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
123 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
124 r'(?P<manifest>[\d.ab-]+)|LATEST)',
Simran Basif8f648e2014-09-09 11:40:03 -0700125 name)
126 if match and len(match.groups()) >= 5:
Simran Basib7d21162014-05-21 15:26:16 -0700127 return (match.group('board'), match.group('type'),
128 match.group('milestone'), match.group('manifest'))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800129 raise ParseBuildNameException('%s is a malformed build name.' % name)
130
Alex Millerdadc2c22013-07-08 15:21:21 -0700131
Dan Shi3d7a0e12015-10-12 11:55:45 -0700132def get_labels_from_afe(hostname, label_prefix, afe):
133 """Retrieve a host's specific labels from the AFE.
134
135 Looks for the host labels that have the form <label_prefix>:<value>
136 and returns the "<value>" part of the label. None is returned
137 if there is not a label matching the pattern
138
139 @param hostname: hostname of given DUT.
140 @param label_prefix: prefix of label to be matched, e.g., |board:|
141 @param afe: afe instance.
142
143 @returns A list of labels that match the prefix or 'None'
144
145 """
146 labels = afe.get_labels(name__startswith=label_prefix,
147 host__hostname__in=[hostname])
148 if labels:
149 return [l.name.split(label_prefix, 1)[1] for l in labels]
150
151
Dan Shia1ecd5c2013-06-06 11:21:31 -0700152def get_label_from_afe(hostname, label_prefix, afe):
153 """Retrieve a host's specific label from the AFE.
154
155 Looks for a host label that has the form <label_prefix>:<value>
156 and returns the "<value>" part of the label. None is returned
157 if there is not a label matching the pattern
158
159 @param hostname: hostname of given DUT.
160 @param label_prefix: prefix of label to be matched, e.g., |board:|
161 @param afe: afe instance.
162 @returns the label that matches the prefix or 'None'
163
164 """
Dan Shi3d7a0e12015-10-12 11:55:45 -0700165 labels = get_labels_from_afe(hostname, label_prefix, afe)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700166 if labels and len(labels) == 1:
Dan Shi3d7a0e12015-10-12 11:55:45 -0700167 return labels[0]
Dan Shia1ecd5c2013-06-06 11:21:31 -0700168
169
170def get_board_from_afe(hostname, afe):
171 """Retrieve given host's board from its labels in the AFE.
172
173 Looks for a host label of the form "board:<board>", and
174 returns the "<board>" part of the label. `None` is returned
175 if there is not a single, unique label matching the pattern.
176
177 @param hostname: hostname of given DUT.
178 @param afe: afe instance.
179 @returns board from label, or `None`.
180
181 """
182 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
183
184
185def get_build_from_afe(hostname, afe):
186 """Retrieve the current build for given host from the AFE.
187
188 Looks through the host's labels in the AFE to determine its build.
189
190 @param hostname: hostname of given DUT.
191 @param afe: afe instance.
192 @returns The current build or None if it could not find it or if there
193 were multiple build labels assigned to this host.
194
195 """
Richard Barnette66eb19d2018-04-30 23:46:52 +0000196 prefix = provision.CROS_VERSION_PREFIX
197 build = get_label_from_afe(hostname, prefix + ':', afe)
198 if build:
199 return build
Simran Basi7756a0b2016-03-16 13:10:07 -0700200 return None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700201
202
Allen Li6a612392016-08-18 12:09:32 -0700203# TODO(fdeng): fix get_sheriffs crbug.com/483254
Fang Deng3197b392013-06-26 11:42:02 -0700204def get_sheriffs(lab_only=False):
Alex Millerdadc2c22013-07-08 15:21:21 -0700205 """
206 Polls the javascript file that holds the identity of the sheriff and
207 parses it's output to return a list of chromium sheriff email addresses.
208 The javascript file can contain the ldap of more than one sheriff, eg:
209 document.write('sheriff_one, sheriff_two').
210
Fang Deng3197b392013-06-26 11:42:02 -0700211 @param lab_only: if True, only pulls lab sheriff.
212 @return: A list of chroium.org sheriff email addresses to cc on the bug.
213 An empty list if failed to parse the javascript.
Alex Millerdadc2c22013-07-08 15:21:21 -0700214 """
215 sheriff_ids = []
Fang Deng3197b392013-06-26 11:42:02 -0700216 sheriff_js_list = _LAB_SHERIFF_JS.split(',')
217 if not lab_only:
218 sheriff_js_list.extend(_SHERIFF_JS.split(','))
219
220 for sheriff_js in sheriff_js_list:
Alex Millerdadc2c22013-07-08 15:21:21 -0700221 try:
Dan Shief31f032016-05-13 15:51:39 -0700222 url_content = utils.urlopen('%s%s'% (
Alex Millerdadc2c22013-07-08 15:21:21 -0700223 _CHROMIUM_BUILD_URL, sheriff_js)).read()
224 except (ValueError, IOError) as e:
beeps4efdf032013-09-17 11:27:14 -0700225 logging.warning('could not parse sheriff from url %s%s: %s',
226 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Paul Drewsbef578d2013-09-24 15:10:36 -0700227 except (urllib2.URLError, httplib.HTTPException) as e:
228 logging.warning('unexpected error reading from url "%s%s": %s',
229 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Alex Millerdadc2c22013-07-08 15:21:21 -0700230 else:
231 ldaps = re.search(r"document.write\('(.*)'\)", url_content)
232 if not ldaps:
beeps4efdf032013-09-17 11:27:14 -0700233 logging.warning('Could not retrieve sheriff ldaps for: %s',
234 url_content)
Alex Millerdadc2c22013-07-08 15:21:21 -0700235 continue
236 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
237 for alias in ldaps.group(1).split(',')]
238 return sheriff_ids
beeps46dadc92013-11-07 14:07:10 -0800239
240
241def remote_wget(source_url, dest_path, ssh_cmd):
242 """wget source_url from localhost to dest_path on remote host using ssh.
243
244 @param source_url: The complete url of the source of the package to send.
245 @param dest_path: The path on the remote host's file system where we would
246 like to store the package.
247 @param ssh_cmd: The ssh command to use in performing the remote wget.
248 """
249 wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
250 (source_url, ssh_cmd, dest_path))
Dan Shief31f032016-05-13 15:51:39 -0700251 utils.run(wget_cmd)
beeps46dadc92013-11-07 14:07:10 -0800252
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800253
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800254_MAX_LAB_STATUS_ATTEMPTS = 5
255def _get_lab_status(status_url):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800256 """Grabs the current lab status and message.
257
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800258 @returns The JSON object obtained from the given URL.
259
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800260 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800261 retry_waittime = 1
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800262 for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800263 try:
264 response = urllib2.urlopen(status_url)
265 except IOError as e:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800266 logging.debug('Error occurred when grabbing the lab status: %s.',
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800267 e)
268 time.sleep(retry_waittime)
269 continue
270 # Check for successful response code.
271 if response.getcode() == 200:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800272 return json.load(response)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800273 time.sleep(retry_waittime)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800274 return None
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800275
276
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800277def _decode_lab_status(lab_status, build):
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800278 """Decode lab status, and report exceptions as needed.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800279
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800280 Take a deserialized JSON object from the lab status page, and
281 interpret it to determine the actual lab status. Raise
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800282 exceptions as required to report when the lab is down.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800283
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800284 @param build: build name that we want to check the status of.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800285
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800286 @raises TestLabException Raised if a request to test for the given
287 status and build should be blocked.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800288 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800289 # First check if the lab is up.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800290 if not lab_status['general_state'] in LAB_GOOD_STATES:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800291 raise TestLabException('Chromium OS Test Lab is closed: '
292 '%s.' % lab_status['message'])
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800293
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800294 # Check if the build we wish to use is disabled.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800295 # Lab messages should be in the format of:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800296 # Lab is 'status' [regex ...] (comment)
297 # If the build name matches any regex, it will be blocked.
298 build_exceptions = re.search('\[(.*)\]', lab_status['message'])
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700299 if not build_exceptions or not build:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800300 return
301 for build_pattern in build_exceptions.group(1).split():
J. Richard Barnette7f215d32015-06-19 12:44:38 -0700302 if re.match(build_pattern, build):
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800303 raise TestLabException('Chromium OS Test Lab is closed: '
304 '%s matches %s.' % (
305 build, build_pattern))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800306 return
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800307
308
Dan Shi94234cb2014-05-23 20:04:31 -0700309def is_in_lab():
310 """Check if current Autotest instance is in lab
311
312 @return: True if the Autotest instance is in lab.
313 """
Dan Shid37736b2016-07-06 15:10:29 -0700314 test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
Dan Shi94234cb2014-05-23 20:04:31 -0700315 return test_server_name.startswith('cautotest')
316
317
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800318def check_lab_status(build):
319 """Check if the lab status allows us to schedule for a build.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800320
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800321 Checks if the lab is down, or if testing for the requested build
322 should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800323
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800324 @param build: Name of the build to be scheduled for testing.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800325
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800326 @raises TestLabException Raised if a request to test for the given
327 status and build should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800328
329 """
330 # Ensure we are trying to schedule on the actual lab.
Dan Shi94234cb2014-05-23 20:04:31 -0700331 if not is_in_lab():
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800332 return
333
334 # Download the lab status from its home on the web.
Dan Shid37736b2016-07-06 15:10:29 -0700335 status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800336 json_status = _get_lab_status(status_url)
337 if json_status is None:
338 # We go ahead and say the lab is open if we can't get the status.
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700339 logging.warning('Could not get a status from %s', status_url)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800340 return
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800341 _decode_lab_status(json_status, build)
beeps023afc62014-02-04 16:59:22 -0800342
343
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700344def host_in_lab(hostname):
Alex Zamorzaevf0573b52019-04-05 12:07:59 -0700345 """Check if the execution is against a host in the lab"""
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700346 return (not utils.in_moblab_ssp()
347 and not lsbrelease_utils.is_moblab()
348 and utils.host_is_in_lab_zone(hostname))
349
350
beeps023afc62014-02-04 16:59:22 -0800351def lock_host_with_labels(afe, lock_manager, labels):
352 """Lookup and lock one host that matches the list of input labels.
353
354 @param afe: An instance of the afe class, as defined in server.frontend.
355 @param lock_manager: A lock manager capable of locking hosts, eg the
356 one defined in server.cros.host_lock_manager.
357 @param labels: A list of labels to look for on hosts.
358
359 @return: The hostname of a host matching all labels, and locked through the
360 lock_manager. The hostname will be as specified in the database the afe
361 object is associated with, i.e if it exists in afe_hosts with a .cros
362 suffix, the hostname returned will contain a .cros suffix.
363
364 @raises: error.NoEligibleHostException: If no hosts matching the list of
365 input labels are available.
366 @raises: error.TestError: If unable to lock a host matching the labels.
367 """
368 potential_hosts = afe.get_hosts(multiple_labels=labels)
369 if not potential_hosts:
370 raise error.NoEligibleHostException(
371 'No devices found with labels %s.' % labels)
372
373 # This prevents errors where a fault might seem repeatable
374 # because we lock, say, the same packet capturer for each test run.
375 random.shuffle(potential_hosts)
376 for host in potential_hosts:
377 if lock_manager.lock([host.hostname]):
378 logging.info('Locked device %s with labels %s.',
379 host.hostname, labels)
380 return host.hostname
381 else:
382 logging.info('Unable to lock device %s with labels %s.',
383 host.hostname, labels)
384
385 raise error.TestError('Could not lock a device with labels %s' % labels)
Dan Shi7e04fa82013-07-25 15:08:48 -0700386
387
388def get_test_views_from_tko(suite_job_id, tko):
389 """Get test name and result for given suite job ID.
390
391 @param suite_job_id: ID of suite job.
392 @param tko: an instance of TKO as defined in server/frontend.py.
Alex Zamorzaevf0573b52019-04-05 12:07:59 -0700393 @return: A defaultdict where keys are test names and values are
394 lists of test statuses, e.g.,
395 {'dummy_Fail.Error': ['ERROR'. 'ERROR'],
396 'dummy_Fail.NAError': ['TEST_NA'],
397 'dummy_Fail.RetrySuccess': ['ERROR', 'GOOD'],
398 }
Dan Shi7e04fa82013-07-25 15:08:48 -0700399 @raise: Exception when there is no test view found.
400
401 """
402 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
403 relevant_views = filter(job_status.view_is_relevant, views)
404 if not relevant_views:
405 raise Exception('Failed to retrieve job results.')
406
Alex Zamorzaevf0573b52019-04-05 12:07:59 -0700407 test_views = collections.defaultdict(list)
Dan Shi7e04fa82013-07-25 15:08:48 -0700408 for view in relevant_views:
Alex Zamorzaevd81fed52019-04-25 10:56:32 -0700409 test_views[view['test_name']].append(view['status'])
Dan Shi7e04fa82013-07-25 15:08:48 -0700410 return test_views
MK Ryu35d661e2014-09-25 17:44:10 -0700411
412
MK Ryuc9c0c3f2014-10-27 14:36:01 -0700413def get_data_key(prefix, suite, build, board):
414 """
415 Constructs a key string from parameters.
416
417 @param prefix: Prefix for the generating key.
418 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
419 @param build: The build string. This string should have a consistent
420 format eg: x86-mario-release/R26-3570.0.0. If the format of this
421 string changes such that we can't determine build_type or branch
422 we give up and use the parametes we're sure of instead (suite,
423 board). eg:
424 1. build = x86-alex-pgo-release/R26-3570.0.0
425 branch = 26
426 build_type = pgo-release
427 2. build = lumpy-paladin/R28-3993.0.0-rc5
428 branch = 28
429 build_type = paladin
430 @param board: The board that this suite ran on.
431 @return: The key string used for a dictionary.
432 """
433 try:
434 _board, build_type, branch = ParseBuildName(build)[:3]
435 except ParseBuildNameException as e:
436 logging.error(str(e))
437 branch = 'Unknown'
438 build_type = 'Unknown'
439 else:
440 embedded_str = re.search(r'x86-\w+-(.*)', _board)
441 if embedded_str:
442 build_type = embedded_str.group(1) + '-' + build_type
443
444 data_key_dict = {
445 'prefix': prefix,
446 'board': board,
447 'branch': branch,
448 'build_type': build_type,
449 'suite': suite,
450 }
451 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
452 % data_key_dict)
MK Ryu83184352014-12-10 14:59:40 -0800453
454
MK Ryu2d0a3642015-01-07 15:11:19 -0800455def setup_logging(logfile=None, prefix=False):
MK Ryu83184352014-12-10 14:59:40 -0800456 """Setup basic logging with all logging info stripped.
457
458 Calls to logging will only show the message. No severity is logged.
459
460 @param logfile: If specified dump output to a file as well.
MK Ryu2d0a3642015-01-07 15:11:19 -0800461 @param prefix: Flag for log prefix. Set to True to add prefix to log
462 entries to include timestamp and log level. Default is False.
MK Ryu83184352014-12-10 14:59:40 -0800463 """
Xixuan Wueb0a3e22018-04-23 10:51:44 -0700464 # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
465 # abort_suite.py in skylab.
MK Ryu83184352014-12-10 14:59:40 -0800466 # Remove all existing handlers. client/common_lib/logging_config adds
467 # a StreamHandler to logger when modules are imported, e.g.,
468 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
469 # log only messages, not severity.
470 logging.getLogger().handlers = []
471
MK Ryu2d0a3642015-01-07 15:11:19 -0800472 if prefix:
473 log_format = '%(asctime)s %(levelname)-5s| %(message)s'
474 else:
475 log_format = '%(message)s'
476
MK Ryu83184352014-12-10 14:59:40 -0800477 screen_handler = logging.StreamHandler()
MK Ryu2d0a3642015-01-07 15:11:19 -0800478 screen_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800479 logging.getLogger().addHandler(screen_handler)
480 logging.getLogger().setLevel(logging.INFO)
481 if logfile:
482 file_handler = logging.FileHandler(logfile)
MK Ryu2d0a3642015-01-07 15:11:19 -0800483 file_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800484 file_handler.setLevel(logging.DEBUG)
485 logging.getLogger().addHandler(file_handler)
Prashanth Balasubramanian8c98ac12014-12-23 11:26:44 -0800486
487
488def is_shard():
489 """Determines if this instance is running as a shard.
490
491 Reads the global_config value shard_hostname in the section SHARD.
492
493 @return True, if shard_hostname is set, False otherwise.
494 """
Dan Shid37736b2016-07-06 15:10:29 -0700495 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
MK Ryu0c1a37d2015-04-30 12:00:55 -0700496 return bool(hostname)
497
498
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800499def get_global_afe_hostname():
500 """Read the hostname of the global AFE from the global configuration."""
Dan Shid37736b2016-07-06 15:10:29 -0700501 return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800502
503
Fang Deng18699fe2015-12-04 16:40:27 -0800504def is_restricted_user(username):
505 """Determines if a user is in a restricted group.
506
507 User in restricted group only have access to master.
508
509 @param username: A string, representing a username.
510
511 @returns: True if the user is in a restricted group.
512 """
513 if not username:
514 return False
515
Dan Shid37736b2016-07-06 15:10:29 -0700516 restricted_groups = CONFIG.get_config_value(
Fang Deng18699fe2015-12-04 16:40:27 -0800517 'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
518 for group in restricted_groups:
Fang Deng5229c852016-02-09 13:30:31 -0800519 try:
520 if group and username in grp.getgrnam(group).gr_mem:
521 return True
522 except KeyError as e:
523 logging.debug("%s is not a valid group.", group)
Fang Deng18699fe2015-12-04 16:40:27 -0800524 return False
525
526
MK Ryu0c1a37d2015-04-30 12:00:55 -0700527def get_special_task_status(is_complete, success, is_active):
528 """Get the status of a special task.
529
530 Emulate a host queue entry status for a special task
531 Although SpecialTasks are not HostQueueEntries, it is helpful to
532 the user to present similar statuses.
533
534 @param is_complete Boolean if the task is completed.
535 @param success Boolean if the task succeeded.
536 @param is_active Boolean if the task is active.
537
538 @return The status of a special task.
539 """
540 if is_complete:
541 if success:
542 return host_queue_entry_states.Status.COMPLETED
543 return host_queue_entry_states.Status.FAILED
544 if is_active:
545 return host_queue_entry_states.Status.RUNNING
546 return host_queue_entry_states.Status.QUEUED
547
548
549def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
550 """Get the execution path of the SpecialTask.
551
552 This method returns different paths depending on where a
553 the task ran:
554 * Master: hosts/hostname/task_id-task_type
555 * Shard: Master_path/time_created
556 This is to work around the fact that a shard can fail independent
557 of the master, and be replaced by another shard that has the same
558 hosts. Without the time_created stamp the logs of the tasks running
559 on the second shard will clobber the logs from the first in google
560 storage, because task ids are not globally unique.
561
562 @param hostname Hostname
563 @param task_id Special task id
564 @param task_name Special task name (e.g., Verify, Repair, etc)
565 @param time_requested Special task requested time.
566
567 @return An execution path for the task.
568 """
569 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
570
571 # If we do this on the master it will break backward compatibility,
572 # as there are tasks that currently don't have timestamps. If a host
573 # or job has been sent to a shard, the rpc for that host/job will
574 # be redirected to the shard, so this global_config check will happen
575 # on the shard the logs are on.
576 if not is_shard():
577 return results_path
578
579 # Generate a uid to disambiguate special task result directories
580 # in case this shard fails. The simplest uid is the job_id, however
581 # in rare cases tasks do not have jobs associated with them (eg:
582 # frontend verify), so just use the creation timestamp. The clocks
583 # between a shard and master should always be in sync. Any discrepancies
584 # will be brought to our attention in the form of job timeouts.
585 uid = time_requested.strftime('%Y%d%m%H%M%S')
586
587 # TODO: This is a hack, however it is the easiest way to achieve
588 # correctness. There is currently some debate over the future of
589 # tasks in our infrastructure and refactoring everything right
590 # now isn't worth the time.
591 return '%s/%s' % (results_path, uid)
592
593
594def get_job_tag(id, owner):
595 """Returns a string tag for a job.
596
597 @param id Job id
598 @param owner Job owner
599
600 """
601 return '%s-%s' % (id, owner)
602
603
604def get_hqe_exec_path(tag, execution_subdir):
605 """Returns a execution path to a HQE's results.
606
607 @param tag Tag string for a job associated with a HQE.
608 @param execution_subdir Execution sub-directory string of a HQE.
609
610 """
611 return os.path.join(tag, execution_subdir)
Dan Shi82997b92015-05-06 12:08:02 -0700612
613
614def is_inside_chroot():
615 """Check if the process is running inside chroot.
616
Mike Frysinger9187a722020-02-07 01:55:04 -0500617 @return: True if the process is running inside chroot.
Dan Shi82997b92015-05-06 12:08:02 -0700618
619 """
Mike Frysinger9187a722020-02-07 01:55:04 -0500620 return os.path.exists('/etc/cros_chroot_version')
Dan Shi70647ca2015-07-16 22:52:35 -0700621
622
623def parse_job_name(name):
624 """Parse job name to get information including build, board and suite etc.
625
626 Suite job created by run_suite follows the naming convention of:
627 [build]-test_suites/control.[suite]
628 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
Allen Licdd00f22017-02-01 18:01:52 -0800629 The naming convention is defined in rpc_interface.create_suite_job.
Dan Shi70647ca2015-07-16 22:52:35 -0700630
631 Test job created by suite job follows the naming convention of:
632 [build]/[suite]/[test name]
633 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
634 The naming convention is defined in
635 server/cros/dynamic_suite/tools.create_job_name
636
637 Note that pgo and chrome-perf builds will fail the method. Since lab does
638 not run test for these builds, they can be ignored.
Dan Shief31f032016-05-13 15:51:39 -0700639 Also, tests for Launch Control builds have different naming convention.
640 The build ID will be used as build_version.
Dan Shi70647ca2015-07-16 22:52:35 -0700641
642 @param name: Name of the job.
643
644 @return: A dictionary containing the test information. The keyvals include:
645 build: Name of the build, e.g., lumpy-release/R46-7272.0.0
646 build_version: The version of the build, e.g., R46-7272.0.0
647 board: Name of the board, e.g., lumpy
648 suite: Name of the test suite, e.g., bvt
649
650 """
651 info = {}
Dan Shief31f032016-05-13 15:51:39 -0700652 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
653 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
Dan Shi70647ca2015-07-16 22:52:35 -0700654 match = re.match(suite_job_regex, name)
655 if not match:
656 match = re.match(test_job_regex, name)
657 if match:
658 info['build'] = match.groups()[0]
659 info['suite'] = match.groups()[1]
660 info['build_version'] = info['build'].split('/')[1]
661 try:
662 info['board'], _, _, _ = ParseBuildName(info['build'])
663 except ParseBuildNameException:
Dan Shief31f032016-05-13 15:51:39 -0700664 # Try to parse it as Launch Control build
665 # Launch Control builds have name format:
666 # branch/build_target-build_type/build_id.
667 try:
668 _, target, build_id = utils.parse_launch_control_build(
669 info['build'])
670 build_target, _ = utils.parse_launch_control_target(target)
671 if build_target:
672 info['board'] = build_target
673 info['build_version'] = build_id
674 except ValueError:
675 pass
Dan Shi70647ca2015-07-16 22:52:35 -0700676 return info
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700677
678
Simran Basi9f364a62015-12-07 14:15:19 -0800679def verify_not_root_user():
680 """Simple function to error out if running with uid == 0"""
681 if os.getuid() == 0:
Simran Basi1bf60eb2015-12-01 16:39:29 -0800682 raise error.IllegalUser('This script can not be ran as root.')
683
684
685def get_hostname_from_machine(machine):
686 """Lookup hostname from a machine string or dict.
687
688 @returns: Machine hostname in string format.
689 """
690 hostname, _ = get_host_info_from_machine(machine)
691 return hostname
692
693
694def get_host_info_from_machine(machine):
695 """Lookup host information from a machine string or dict.
696
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700697 @returns: Tuple of (hostname, afe_host)
Simran Basi1bf60eb2015-12-01 16:39:29 -0800698 """
699 if isinstance(machine, dict):
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700700 return (machine['hostname'], machine['afe_host'])
Simran Basi1bf60eb2015-12-01 16:39:29 -0800701 else:
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700702 return (machine, EmptyAFEHost())
703
704
705def get_afe_host_from_machine(machine):
706 """Return the afe_host from the machine dict if possible.
707
708 @returns: AFE host object.
709 """
710 _, afe_host = get_host_info_from_machine(machine)
711 return afe_host
Fang Dengf8a94e22015-12-07 13:39:13 -0800712
713
Hidehiko Abe06893302017-06-24 07:32:38 +0900714def get_connection_pool_from_machine(machine):
715 """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
716 if not isinstance(machine, dict):
717 return None
718 return machine.get('connection_pool')
719
720
Fang Dengf8a94e22015-12-07 13:39:13 -0800721def get_creds_abspath(creds_file):
722 """Returns the abspath of the credentials file.
723
724 If creds_file is already an absolute path, just return it.
725 Otherwise, assume it is located in the creds directory
726 specified in global_config and return the absolute path.
727
728 @param: creds_path, a path to the credentials.
729 @return: An absolute path to the credentials file.
730 """
731 if not creds_file:
732 return None
733 if os.path.isabs(creds_file):
734 return creds_file
Dan Shid37736b2016-07-06 15:10:29 -0700735 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
Fang Dengf8a94e22015-12-07 13:39:13 -0800736 if not creds_dir or not os.path.exists(creds_dir):
737 creds_dir = common.autotest_dir
738 return os.path.join(creds_dir, creds_file)
Kevin Cheng3b111812015-12-15 11:52:08 -0800739
740
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700741def SetupTsMonGlobalState(*args, **kwargs):
742 """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
743
744 @param *args: Args to pass through.
745 @param **kwargs: Kwargs to pass through.
746 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700747 try:
748 # TODO(crbug.com/739466) This module import is delayed because it adds
749 # 1-2 seconds to the module import time and most users of site_utils
750 # don't need it. The correct fix is to break apart site_utils into more
751 # meaningful chunks.
752 from chromite.lib import ts_mon_config
753 except ImportError:
754 logging.warn('Unable to import chromite. Monarch is disabled.')
Paul Hobbs604fc872016-09-29 16:41:55 -0700755 return TrivialContextManager()
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700756
757 try:
758 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
759 if hasattr(context, '__exit__'):
760 return context
761 except Exception as e:
762 logging.warning('Caught an exception trying to setup ts_mon, '
763 'monitoring is disabled: %s', e, exc_info=True)
764 return TrivialContextManager()
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700765
766
767@contextlib.contextmanager
Paul Hobbs604fc872016-09-29 16:41:55 -0700768def TrivialContextManager(*args, **kwargs):
769 """Context manager that does nothing.
770
771 @param *args: Ignored args
772 @param **kwargs: Ignored kwargs.
773 """
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700774 yield
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700775
776
777def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
778 """Wait for the hosts to all go idle.
779
780 @param duts: List of duts to check for idle state.
781 @param afe: afe instance.
Dan Shiffd5b822017-07-14 11:16:23 -0700782 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700783
784 @returns Boolean True if all hosts are idle or False if any hosts did not
785 go idle within max_wait.
786 """
787 start_time = time.time()
788 # We make a shallow copy since we're going to be modifying active_dut_list.
789 active_dut_list = duts[:]
790 while active_dut_list:
791 # Let's rate-limit how often we hit the AFE.
792 time.sleep(1)
793
794 # Check if we've waited too long.
795 if (time.time() - start_time) > max_wait:
796 return False
797
798 idle_duts = []
799 # Get the status for the duts and see if they're in the idle state.
800 afe_hosts = afe.get_hosts(active_dut_list)
801 idle_duts = [afe_host.hostname for afe_host in afe_hosts
802 if afe_host.status in host_states.IDLE_STATES]
803
804 # Take out idle duts so we don't needlessly check them
805 # next time around.
806 for idle_dut in idle_duts:
807 active_dut_list.remove(idle_dut)
808
809 logging.info('still waiting for following duts to go idle: %s',
810 active_dut_list)
811 return True
812
813
814@contextlib.contextmanager
815def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
816 max_wait=IDLE_DUT_WAIT_TIMEOUT):
817 """Context manager to lock the duts and wait for them to go idle.
818
819 @param duts: List of duts to lock.
820 @param afe: afe instance.
Hidehiko Abe06893302017-06-24 07:32:38 +0900821 @param lock_msg: message for afe on locking this host.
Dan Shiffd5b822017-07-14 11:16:23 -0700822 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700823
824 @returns Boolean lock_success where True if all duts locked successfully or
825 False if we timed out waiting too long for hosts to go idle.
826 """
827 try:
828 locked_duts = []
829 duts.sort()
830 for dut in duts:
831 if afe.lock_host(dut, lock_msg, fail_if_locked=True):
832 locked_duts.append(dut)
833 else:
834 logging.info('%s already locked', dut)
835 yield wait_for_idle_duts(locked_duts, afe, max_wait)
836 finally:
837 afe.unlock_hosts(locked_duts)
Dan Shib5b8b4f2016-11-02 14:04:02 -0700838
839
Dan Shiffd5b822017-07-14 11:16:23 -0700840def _get_default_size_info(path):
841 """Get the default result size information.
842
843 In case directory summary is failed to build, assume the test result is not
844 throttled and all result sizes are the size of existing test results.
845
846 @return: A namedtuple of result size informations, including:
847 client_result_collected_KB: The total size (in KB) of test results
848 collected from test device. Set to be the total size of the
849 given path.
850 original_result_total_KB: The original size (in KB) of test results
851 before being trimmed. Set to be the total size of the given
852 path.
853 result_uploaded_KB: The total size (in KB) of test results to be
854 uploaded. Set to be the total size of the given path.
855 result_throttled: True if test results collection is throttled.
856 It's set to False in this default behavior.
857 """
858 total_size = file_utils.get_directory_size_kibibytes(path);
859 return result_utils_lib.ResultSizeInfo(
860 client_result_collected_KB=total_size,
861 original_result_total_KB=total_size,
862 result_uploaded_KB=total_size,
863 result_throttled=False)
864
865
866def _report_result_size_metrics(result_size_info):
867 """Report result sizes information to metrics.
868
869 @param result_size_info: A ResultSizeInfo namedtuple containing information
870 of test result sizes.
871 """
872 fields = {'result_throttled' : result_size_info.result_throttled}
873 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
874 description='The total size (in KB) of test results '
875 'collected from test device. Set to be the total size of '
876 'the given path.'
877 ).increment_by(result_size_info.client_result_collected_KB,
878 fields=fields)
879 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
880 description='The original size (in KB) of test results '
881 'before being trimmed.'
882 ).increment_by(result_size_info.original_result_total_KB,
883 fields=fields)
884 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
885 description='The total size (in KB) of test results to be '
886 'uploaded.'
887 ).increment_by(result_size_info.result_uploaded_KB,
888 fields=fields)
889
890
Dan Shi11e35062017-11-03 10:09:05 -0700891@metrics.SecondsTimerDecorator(
892 'chromeos/autotest/result_collection/collect_result_sizes_duration')
Dan Shiffd5b822017-07-14 11:16:23 -0700893def collect_result_sizes(path, log=logging.debug):
894 """Collect the result sizes information and build result summary.
895
896 It first tries to merge directory summaries and calculate the result sizes
897 including:
898 client_result_collected_KB: The volume in KB that's transfered from the test
899 device.
900 original_result_total_KB: The volume in KB that's the original size of the
901 result files before being trimmed.
902 result_uploaded_KB: The volume in KB that will be uploaded.
903 result_throttled: Indicating if the result files were throttled.
904
905 If directory summary merging failed for any reason, fall back to use the
906 total size of the given result directory.
907
908 @param path: Path of the result directory to get size information.
909 @param log: The logging method, default to logging.debug
910 @return: A ResultSizeInfo namedtuple containing information of test result
911 sizes.
912 """
913 try:
Dan Shi5aaf9062017-09-20 11:53:17 -0700914 client_collected_bytes, summary, files = result_utils.merge_summaries(
915 path)
Dan Shiffd5b822017-07-14 11:16:23 -0700916 result_size_info = result_utils_lib.get_result_size_info(
917 client_collected_bytes, summary)
918 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
919 result_view.build(client_collected_bytes, summary, html_file)
Dan Shi5aaf9062017-09-20 11:53:17 -0700920
921 # Delete all summary files after final view is built.
922 for summary_file in files:
923 os.remove(summary_file)
Dan Shiffd5b822017-07-14 11:16:23 -0700924 except:
925 log('Failed to calculate result sizes based on directory summaries for '
926 'directory %s. Fall back to record the total size.\nException: %s' %
927 (path, traceback.format_exc()))
928 result_size_info = _get_default_size_info(path)
929
930 _report_result_size_metrics(result_size_info)
931
Richard Barnette9db80682018-04-26 00:55:15 +0000932 return result_size_info