blob: 71e3150823fa2583f515069a92e5a8a6ca93d2be [file] [log] [blame]
Dan Shia1ecd5c2013-06-06 11:21:31 -07001# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Dan Shia1ecd5c2013-06-06 11:21:31 -07005
Paul Hobbs20cc72a2016-08-30 16:57:05 -07006import contextlib
Fang Deng18699fe2015-12-04 16:40:27 -08007import grp
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -08008import httplib
9import json
Alex Millerdadc2c22013-07-08 15:21:21 -070010import logging
MK Ryu35d661e2014-09-25 17:44:10 -070011import os
beeps023afc62014-02-04 16:59:22 -080012import random
Alex Millerdadc2c22013-07-08 15:21:21 -070013import re
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080014import time
Dan Shiffd5b822017-07-14 11:16:23 -070015import traceback
Paul Drewsbef578d2013-09-24 15:10:36 -070016import urllib2
Alex Millerdadc2c22013-07-08 15:21:21 -070017
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080018import common
Dan Shiffd5b822017-07-14 11:16:23 -070019from autotest_lib.client.bin.result_tools import utils as result_utils
20from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
21from autotest_lib.client.bin.result_tools import view as result_view
Dan Shief31f032016-05-13 15:51:39 -070022from autotest_lib.client.common_lib import utils
beeps023afc62014-02-04 16:59:22 -080023from autotest_lib.client.common_lib import error
Dan Shiffd5b822017-07-14 11:16:23 -070024from autotest_lib.client.common_lib import file_utils
beeps023afc62014-02-04 16:59:22 -080025from autotest_lib.client.common_lib import global_config
MK Ryu0c1a37d2015-04-30 12:00:55 -070026from autotest_lib.client.common_lib import host_queue_entry_states
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070027from autotest_lib.client.common_lib import host_states
Simran Basi7756a0b2016-03-16 13:10:07 -070028from autotest_lib.server.cros import provision
Dan Shia1ecd5c2013-06-06 11:21:31 -070029from autotest_lib.server.cros.dynamic_suite import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070030from autotest_lib.server.cros.dynamic_suite import job_status
Dan Shia1ecd5c2013-06-06 11:21:31 -070031
Dan Shiffd5b822017-07-14 11:16:23 -070032try:
33 from chromite.lib import metrics
34except ImportError:
35 metrics = utils.metrics_mock
36
Dan Shia1ecd5c2013-06-06 11:21:31 -070037
Dan Shid37736b2016-07-06 15:10:29 -070038CONFIG = global_config.global_config
39
40_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
41_LAB_SHERIFF_JS = CONFIG.get_config_value(
42 'NOTIFICATIONS', 'lab_sheriffs', default='')
43_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
44 'NOTIFICATIONS', 'chromium_build_url', default='')
Alex Millerdadc2c22013-07-08 15:21:21 -070045
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080046LAB_GOOD_STATES = ('open', 'throttled')
47
Dan Shid37736b2016-07-06 15:10:29 -070048ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
49 'CROS', 'enable_drone_in_restricted_subnet', type=bool,
50 default=False)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080051
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070052# Wait at most 10 mins for duts to go idle.
53IDLE_DUT_WAIT_TIMEOUT = 600
54
Dan Shi43274402016-11-04 15:13:43 -070055# Mapping between board name and build target. This is for special case handling
56# for certain Android board that the board name and build target name does not
57# match.
tturney08fc62e2016-11-17 15:44:30 -080058ANDROID_TARGET_TO_BOARD_MAP = {
59 'seed_l8150': 'gm4g_sprout',
60 'bat_land': 'bat'
61 }
62ANDROID_BOARD_TO_TARGET_MAP = {
63 'gm4g_sprout': 'seed_l8150',
64 'bat': 'bat_land'
65 }
Dan Shiffd5b822017-07-14 11:16:23 -070066# Prefix for the metrics name for result size information.
67RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
Dan Shi43274402016-11-04 15:13:43 -070068
J. Richard Barnetteabbe0962013-12-10 18:15:44 -080069class TestLabException(Exception):
70 """Exception raised when the Test Lab blocks a test or suite."""
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080071 pass
72
73
74class ParseBuildNameException(Exception):
75 """Raised when ParseBuildName() cannot parse a build name."""
76 pass
77
78
Fang Dengf08814a2015-08-03 18:12:18 +000079class Singleton(type):
80 """Enforce that only one client class is instantiated per process."""
81 _instances = {}
82
83 def __call__(cls, *args, **kwargs):
84 """Fetch the instance of a class to use for subsequent calls."""
85 if cls not in cls._instances:
86 cls._instances[cls] = super(Singleton, cls).__call__(
87 *args, **kwargs)
88 return cls._instances[cls]
89
Kevin Cheng05ae2a42016-06-06 10:12:48 -070090class EmptyAFEHost(object):
91 """Object to represent an AFE host object when there is no AFE."""
92
93 def __init__(self):
94 """
95 We'll be setting the instance attributes as we use them. Right now
96 we only use attributes and labels but as time goes by and other
97 attributes are used from an actual AFE Host object (check
98 rpc_interfaces.get_hosts()), we'll add them in here so users won't be
99 perplexed why their host's afe_host object complains that attribute
100 doesn't exist.
101 """
102 self.attributes = {}
103 self.labels = []
104
Fang Dengf08814a2015-08-03 18:12:18 +0000105
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800106def ParseBuildName(name):
107 """Format a build name, given board, type, milestone, and manifest num.
108
Simran Basib7d21162014-05-21 15:26:16 -0700109 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
110 relative build name, e.g. 'x86-alex-release/LATEST'
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800111
112 @return board: board the manifest is for, e.g. x86-alex.
113 @return type: one of 'release', 'factory', or 'firmware'
114 @return milestone: (numeric) milestone the manifest was associated with.
Simran Basib7d21162014-05-21 15:26:16 -0700115 Will be None for relative build names.
116 @return manifest: manifest number, e.g. '2015.0.0'.
117 Will be None for relative build names.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800118
119 """
Dan Shie02810d2016-08-25 09:44:57 -0700120 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
121 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
122 r'(?P<manifest>[\d.ab-]+)|LATEST)',
Simran Basif8f648e2014-09-09 11:40:03 -0700123 name)
124 if match and len(match.groups()) >= 5:
Simran Basib7d21162014-05-21 15:26:16 -0700125 return (match.group('board'), match.group('type'),
126 match.group('milestone'), match.group('manifest'))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800127 raise ParseBuildNameException('%s is a malformed build name.' % name)
128
Alex Millerdadc2c22013-07-08 15:21:21 -0700129
Dan Shi3d7a0e12015-10-12 11:55:45 -0700130def get_labels_from_afe(hostname, label_prefix, afe):
131 """Retrieve a host's specific labels from the AFE.
132
133 Looks for the host labels that have the form <label_prefix>:<value>
134 and returns the "<value>" part of the label. None is returned
135 if there is not a label matching the pattern
136
137 @param hostname: hostname of given DUT.
138 @param label_prefix: prefix of label to be matched, e.g., |board:|
139 @param afe: afe instance.
140
141 @returns A list of labels that match the prefix or 'None'
142
143 """
144 labels = afe.get_labels(name__startswith=label_prefix,
145 host__hostname__in=[hostname])
146 if labels:
147 return [l.name.split(label_prefix, 1)[1] for l in labels]
148
149
Dan Shia1ecd5c2013-06-06 11:21:31 -0700150def get_label_from_afe(hostname, label_prefix, afe):
151 """Retrieve a host's specific label from the AFE.
152
153 Looks for a host label that has the form <label_prefix>:<value>
154 and returns the "<value>" part of the label. None is returned
155 if there is not a label matching the pattern
156
157 @param hostname: hostname of given DUT.
158 @param label_prefix: prefix of label to be matched, e.g., |board:|
159 @param afe: afe instance.
160 @returns the label that matches the prefix or 'None'
161
162 """
Dan Shi3d7a0e12015-10-12 11:55:45 -0700163 labels = get_labels_from_afe(hostname, label_prefix, afe)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700164 if labels and len(labels) == 1:
Dan Shi3d7a0e12015-10-12 11:55:45 -0700165 return labels[0]
Dan Shia1ecd5c2013-06-06 11:21:31 -0700166
167
168def get_board_from_afe(hostname, afe):
169 """Retrieve given host's board from its labels in the AFE.
170
171 Looks for a host label of the form "board:<board>", and
172 returns the "<board>" part of the label. `None` is returned
173 if there is not a single, unique label matching the pattern.
174
175 @param hostname: hostname of given DUT.
176 @param afe: afe instance.
177 @returns board from label, or `None`.
178
179 """
180 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
181
182
183def get_build_from_afe(hostname, afe):
184 """Retrieve the current build for given host from the AFE.
185
186 Looks through the host's labels in the AFE to determine its build.
187
188 @param hostname: hostname of given DUT.
189 @param afe: afe instance.
190 @returns The current build or None if it could not find it or if there
191 were multiple build labels assigned to this host.
192
193 """
Richard Barnette66eb19d2018-04-30 23:46:52 +0000194 prefix = provision.CROS_VERSION_PREFIX
195 build = get_label_from_afe(hostname, prefix + ':', afe)
196 if build:
197 return build
Simran Basi7756a0b2016-03-16 13:10:07 -0700198 return None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700199
200
Allen Li6a612392016-08-18 12:09:32 -0700201# TODO(fdeng): fix get_sheriffs crbug.com/483254
Fang Deng3197b392013-06-26 11:42:02 -0700202def get_sheriffs(lab_only=False):
Alex Millerdadc2c22013-07-08 15:21:21 -0700203 """
204 Polls the javascript file that holds the identity of the sheriff and
205 parses it's output to return a list of chromium sheriff email addresses.
206 The javascript file can contain the ldap of more than one sheriff, eg:
207 document.write('sheriff_one, sheriff_two').
208
Fang Deng3197b392013-06-26 11:42:02 -0700209 @param lab_only: if True, only pulls lab sheriff.
210 @return: A list of chroium.org sheriff email addresses to cc on the bug.
211 An empty list if failed to parse the javascript.
Alex Millerdadc2c22013-07-08 15:21:21 -0700212 """
213 sheriff_ids = []
Fang Deng3197b392013-06-26 11:42:02 -0700214 sheriff_js_list = _LAB_SHERIFF_JS.split(',')
215 if not lab_only:
216 sheriff_js_list.extend(_SHERIFF_JS.split(','))
217
218 for sheriff_js in sheriff_js_list:
Alex Millerdadc2c22013-07-08 15:21:21 -0700219 try:
Dan Shief31f032016-05-13 15:51:39 -0700220 url_content = utils.urlopen('%s%s'% (
Alex Millerdadc2c22013-07-08 15:21:21 -0700221 _CHROMIUM_BUILD_URL, sheriff_js)).read()
222 except (ValueError, IOError) as e:
beeps4efdf032013-09-17 11:27:14 -0700223 logging.warning('could not parse sheriff from url %s%s: %s',
224 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Paul Drewsbef578d2013-09-24 15:10:36 -0700225 except (urllib2.URLError, httplib.HTTPException) as e:
226 logging.warning('unexpected error reading from url "%s%s": %s',
227 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Alex Millerdadc2c22013-07-08 15:21:21 -0700228 else:
229 ldaps = re.search(r"document.write\('(.*)'\)", url_content)
230 if not ldaps:
beeps4efdf032013-09-17 11:27:14 -0700231 logging.warning('Could not retrieve sheriff ldaps for: %s',
232 url_content)
Alex Millerdadc2c22013-07-08 15:21:21 -0700233 continue
234 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
235 for alias in ldaps.group(1).split(',')]
236 return sheriff_ids
beeps46dadc92013-11-07 14:07:10 -0800237
238
239def remote_wget(source_url, dest_path, ssh_cmd):
240 """wget source_url from localhost to dest_path on remote host using ssh.
241
242 @param source_url: The complete url of the source of the package to send.
243 @param dest_path: The path on the remote host's file system where we would
244 like to store the package.
245 @param ssh_cmd: The ssh command to use in performing the remote wget.
246 """
247 wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
248 (source_url, ssh_cmd, dest_path))
Dan Shief31f032016-05-13 15:51:39 -0700249 utils.run(wget_cmd)
beeps46dadc92013-11-07 14:07:10 -0800250
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800251
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800252_MAX_LAB_STATUS_ATTEMPTS = 5
253def _get_lab_status(status_url):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800254 """Grabs the current lab status and message.
255
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800256 @returns The JSON object obtained from the given URL.
257
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800258 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800259 retry_waittime = 1
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800260 for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800261 try:
262 response = urllib2.urlopen(status_url)
263 except IOError as e:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800264 logging.debug('Error occurred when grabbing the lab status: %s.',
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800265 e)
266 time.sleep(retry_waittime)
267 continue
268 # Check for successful response code.
269 if response.getcode() == 200:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800270 return json.load(response)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800271 time.sleep(retry_waittime)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800272 return None
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800273
274
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800275def _decode_lab_status(lab_status, build):
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800276 """Decode lab status, and report exceptions as needed.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800277
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800278 Take a deserialized JSON object from the lab status page, and
279 interpret it to determine the actual lab status. Raise
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800280 exceptions as required to report when the lab is down.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800281
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800282 @param build: build name that we want to check the status of.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800283
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800284 @raises TestLabException Raised if a request to test for the given
285 status and build should be blocked.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800286 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800287 # First check if the lab is up.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800288 if not lab_status['general_state'] in LAB_GOOD_STATES:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800289 raise TestLabException('Chromium OS Test Lab is closed: '
290 '%s.' % lab_status['message'])
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800291
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800292 # Check if the build we wish to use is disabled.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800293 # Lab messages should be in the format of:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800294 # Lab is 'status' [regex ...] (comment)
295 # If the build name matches any regex, it will be blocked.
296 build_exceptions = re.search('\[(.*)\]', lab_status['message'])
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700297 if not build_exceptions or not build:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800298 return
299 for build_pattern in build_exceptions.group(1).split():
J. Richard Barnette7f215d32015-06-19 12:44:38 -0700300 if re.match(build_pattern, build):
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800301 raise TestLabException('Chromium OS Test Lab is closed: '
302 '%s matches %s.' % (
303 build, build_pattern))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800304 return
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800305
306
Dan Shi94234cb2014-05-23 20:04:31 -0700307def is_in_lab():
308 """Check if current Autotest instance is in lab
309
310 @return: True if the Autotest instance is in lab.
311 """
Dan Shid37736b2016-07-06 15:10:29 -0700312 test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
Dan Shi94234cb2014-05-23 20:04:31 -0700313 return test_server_name.startswith('cautotest')
314
315
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800316def check_lab_status(build):
317 """Check if the lab status allows us to schedule for a build.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800318
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800319 Checks if the lab is down, or if testing for the requested build
320 should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800321
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800322 @param build: Name of the build to be scheduled for testing.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800323
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800324 @raises TestLabException Raised if a request to test for the given
325 status and build should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800326
327 """
328 # Ensure we are trying to schedule on the actual lab.
Dan Shi94234cb2014-05-23 20:04:31 -0700329 if not is_in_lab():
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800330 return
331
332 # Download the lab status from its home on the web.
Dan Shid37736b2016-07-06 15:10:29 -0700333 status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800334 json_status = _get_lab_status(status_url)
335 if json_status is None:
336 # We go ahead and say the lab is open if we can't get the status.
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700337 logging.warning('Could not get a status from %s', status_url)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800338 return
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800339 _decode_lab_status(json_status, build)
beeps023afc62014-02-04 16:59:22 -0800340
341
342def lock_host_with_labels(afe, lock_manager, labels):
343 """Lookup and lock one host that matches the list of input labels.
344
345 @param afe: An instance of the afe class, as defined in server.frontend.
346 @param lock_manager: A lock manager capable of locking hosts, eg the
347 one defined in server.cros.host_lock_manager.
348 @param labels: A list of labels to look for on hosts.
349
350 @return: The hostname of a host matching all labels, and locked through the
351 lock_manager. The hostname will be as specified in the database the afe
352 object is associated with, i.e if it exists in afe_hosts with a .cros
353 suffix, the hostname returned will contain a .cros suffix.
354
355 @raises: error.NoEligibleHostException: If no hosts matching the list of
356 input labels are available.
357 @raises: error.TestError: If unable to lock a host matching the labels.
358 """
359 potential_hosts = afe.get_hosts(multiple_labels=labels)
360 if not potential_hosts:
361 raise error.NoEligibleHostException(
362 'No devices found with labels %s.' % labels)
363
364 # This prevents errors where a fault might seem repeatable
365 # because we lock, say, the same packet capturer for each test run.
366 random.shuffle(potential_hosts)
367 for host in potential_hosts:
368 if lock_manager.lock([host.hostname]):
369 logging.info('Locked device %s with labels %s.',
370 host.hostname, labels)
371 return host.hostname
372 else:
373 logging.info('Unable to lock device %s with labels %s.',
374 host.hostname, labels)
375
376 raise error.TestError('Could not lock a device with labels %s' % labels)
Dan Shi7e04fa82013-07-25 15:08:48 -0700377
378
379def get_test_views_from_tko(suite_job_id, tko):
380 """Get test name and result for given suite job ID.
381
382 @param suite_job_id: ID of suite job.
383 @param tko: an instance of TKO as defined in server/frontend.py.
384 @return: A dictionary of test status keyed by test name, e.g.,
385 {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'}
386 @raise: Exception when there is no test view found.
387
388 """
389 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
390 relevant_views = filter(job_status.view_is_relevant, views)
391 if not relevant_views:
392 raise Exception('Failed to retrieve job results.')
393
394 test_views = {}
395 for view in relevant_views:
396 test_views[view['test_name']] = view['status']
397
398 return test_views
MK Ryu35d661e2014-09-25 17:44:10 -0700399
400
MK Ryuc9c0c3f2014-10-27 14:36:01 -0700401def get_data_key(prefix, suite, build, board):
402 """
403 Constructs a key string from parameters.
404
405 @param prefix: Prefix for the generating key.
406 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
407 @param build: The build string. This string should have a consistent
408 format eg: x86-mario-release/R26-3570.0.0. If the format of this
409 string changes such that we can't determine build_type or branch
410 we give up and use the parametes we're sure of instead (suite,
411 board). eg:
412 1. build = x86-alex-pgo-release/R26-3570.0.0
413 branch = 26
414 build_type = pgo-release
415 2. build = lumpy-paladin/R28-3993.0.0-rc5
416 branch = 28
417 build_type = paladin
418 @param board: The board that this suite ran on.
419 @return: The key string used for a dictionary.
420 """
421 try:
422 _board, build_type, branch = ParseBuildName(build)[:3]
423 except ParseBuildNameException as e:
424 logging.error(str(e))
425 branch = 'Unknown'
426 build_type = 'Unknown'
427 else:
428 embedded_str = re.search(r'x86-\w+-(.*)', _board)
429 if embedded_str:
430 build_type = embedded_str.group(1) + '-' + build_type
431
432 data_key_dict = {
433 'prefix': prefix,
434 'board': board,
435 'branch': branch,
436 'build_type': build_type,
437 'suite': suite,
438 }
439 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
440 % data_key_dict)
MK Ryu83184352014-12-10 14:59:40 -0800441
442
MK Ryu2d0a3642015-01-07 15:11:19 -0800443def setup_logging(logfile=None, prefix=False):
MK Ryu83184352014-12-10 14:59:40 -0800444 """Setup basic logging with all logging info stripped.
445
446 Calls to logging will only show the message. No severity is logged.
447
448 @param logfile: If specified dump output to a file as well.
MK Ryu2d0a3642015-01-07 15:11:19 -0800449 @param prefix: Flag for log prefix. Set to True to add prefix to log
450 entries to include timestamp and log level. Default is False.
MK Ryu83184352014-12-10 14:59:40 -0800451 """
Xixuan Wueb0a3e22018-04-23 10:51:44 -0700452 # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
453 # abort_suite.py in skylab.
MK Ryu83184352014-12-10 14:59:40 -0800454 # Remove all existing handlers. client/common_lib/logging_config adds
455 # a StreamHandler to logger when modules are imported, e.g.,
456 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
457 # log only messages, not severity.
458 logging.getLogger().handlers = []
459
MK Ryu2d0a3642015-01-07 15:11:19 -0800460 if prefix:
461 log_format = '%(asctime)s %(levelname)-5s| %(message)s'
462 else:
463 log_format = '%(message)s'
464
MK Ryu83184352014-12-10 14:59:40 -0800465 screen_handler = logging.StreamHandler()
MK Ryu2d0a3642015-01-07 15:11:19 -0800466 screen_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800467 logging.getLogger().addHandler(screen_handler)
468 logging.getLogger().setLevel(logging.INFO)
469 if logfile:
470 file_handler = logging.FileHandler(logfile)
MK Ryu2d0a3642015-01-07 15:11:19 -0800471 file_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800472 file_handler.setLevel(logging.DEBUG)
473 logging.getLogger().addHandler(file_handler)
Prashanth Balasubramanian8c98ac12014-12-23 11:26:44 -0800474
475
476def is_shard():
477 """Determines if this instance is running as a shard.
478
479 Reads the global_config value shard_hostname in the section SHARD.
480
481 @return True, if shard_hostname is set, False otherwise.
482 """
Dan Shid37736b2016-07-06 15:10:29 -0700483 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
MK Ryu0c1a37d2015-04-30 12:00:55 -0700484 return bool(hostname)
485
486
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800487def get_global_afe_hostname():
488 """Read the hostname of the global AFE from the global configuration."""
Dan Shid37736b2016-07-06 15:10:29 -0700489 return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800490
491
Fang Deng18699fe2015-12-04 16:40:27 -0800492def is_restricted_user(username):
493 """Determines if a user is in a restricted group.
494
495 User in restricted group only have access to master.
496
497 @param username: A string, representing a username.
498
499 @returns: True if the user is in a restricted group.
500 """
501 if not username:
502 return False
503
Dan Shid37736b2016-07-06 15:10:29 -0700504 restricted_groups = CONFIG.get_config_value(
Fang Deng18699fe2015-12-04 16:40:27 -0800505 'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
506 for group in restricted_groups:
Fang Deng5229c852016-02-09 13:30:31 -0800507 try:
508 if group and username in grp.getgrnam(group).gr_mem:
509 return True
510 except KeyError as e:
511 logging.debug("%s is not a valid group.", group)
Fang Deng18699fe2015-12-04 16:40:27 -0800512 return False
513
514
MK Ryu0c1a37d2015-04-30 12:00:55 -0700515def get_special_task_status(is_complete, success, is_active):
516 """Get the status of a special task.
517
518 Emulate a host queue entry status for a special task
519 Although SpecialTasks are not HostQueueEntries, it is helpful to
520 the user to present similar statuses.
521
522 @param is_complete Boolean if the task is completed.
523 @param success Boolean if the task succeeded.
524 @param is_active Boolean if the task is active.
525
526 @return The status of a special task.
527 """
528 if is_complete:
529 if success:
530 return host_queue_entry_states.Status.COMPLETED
531 return host_queue_entry_states.Status.FAILED
532 if is_active:
533 return host_queue_entry_states.Status.RUNNING
534 return host_queue_entry_states.Status.QUEUED
535
536
537def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
538 """Get the execution path of the SpecialTask.
539
540 This method returns different paths depending on where a
541 the task ran:
542 * Master: hosts/hostname/task_id-task_type
543 * Shard: Master_path/time_created
544 This is to work around the fact that a shard can fail independent
545 of the master, and be replaced by another shard that has the same
546 hosts. Without the time_created stamp the logs of the tasks running
547 on the second shard will clobber the logs from the first in google
548 storage, because task ids are not globally unique.
549
550 @param hostname Hostname
551 @param task_id Special task id
552 @param task_name Special task name (e.g., Verify, Repair, etc)
553 @param time_requested Special task requested time.
554
555 @return An execution path for the task.
556 """
557 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
558
559 # If we do this on the master it will break backward compatibility,
560 # as there are tasks that currently don't have timestamps. If a host
561 # or job has been sent to a shard, the rpc for that host/job will
562 # be redirected to the shard, so this global_config check will happen
563 # on the shard the logs are on.
564 if not is_shard():
565 return results_path
566
567 # Generate a uid to disambiguate special task result directories
568 # in case this shard fails. The simplest uid is the job_id, however
569 # in rare cases tasks do not have jobs associated with them (eg:
570 # frontend verify), so just use the creation timestamp. The clocks
571 # between a shard and master should always be in sync. Any discrepancies
572 # will be brought to our attention in the form of job timeouts.
573 uid = time_requested.strftime('%Y%d%m%H%M%S')
574
575 # TODO: This is a hack, however it is the easiest way to achieve
576 # correctness. There is currently some debate over the future of
577 # tasks in our infrastructure and refactoring everything right
578 # now isn't worth the time.
579 return '%s/%s' % (results_path, uid)
580
581
582def get_job_tag(id, owner):
583 """Returns a string tag for a job.
584
585 @param id Job id
586 @param owner Job owner
587
588 """
589 return '%s-%s' % (id, owner)
590
591
592def get_hqe_exec_path(tag, execution_subdir):
593 """Returns a execution path to a HQE's results.
594
595 @param tag Tag string for a job associated with a HQE.
596 @param execution_subdir Execution sub-directory string of a HQE.
597
598 """
599 return os.path.join(tag, execution_subdir)
Dan Shi82997b92015-05-06 12:08:02 -0700600
601
602def is_inside_chroot():
603 """Check if the process is running inside chroot.
604
605 This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The
606 method checks if cros_build_lib can be imported first.
607
608 @return: True if the process is running inside chroot or cros_build_lib
609 cannot be imported.
610
611 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700612 try:
613 # TODO(crbug.com/739466) This module import is delayed because it adds
614 # 1-2 seconds to the module import time and most users of site_utils
615 # don't need it. The correct fix is to break apart site_utils into more
616 # meaningful chunks.
617 from chromite.lib import cros_build_lib
618 except ImportError:
619 logging.warn('Unable to import chromite. Can not detect chroot. '
620 'Defaulting to False')
621 return False
622 return cros_build_lib.IsInsideChroot()
Dan Shi70647ca2015-07-16 22:52:35 -0700623
624
625def parse_job_name(name):
626 """Parse job name to get information including build, board and suite etc.
627
628 Suite job created by run_suite follows the naming convention of:
629 [build]-test_suites/control.[suite]
630 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
Allen Licdd00f22017-02-01 18:01:52 -0800631 The naming convention is defined in rpc_interface.create_suite_job.
Dan Shi70647ca2015-07-16 22:52:35 -0700632
633 Test job created by suite job follows the naming convention of:
634 [build]/[suite]/[test name]
635 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
636 The naming convention is defined in
637 server/cros/dynamic_suite/tools.create_job_name
638
639 Note that pgo and chrome-perf builds will fail the method. Since lab does
640 not run test for these builds, they can be ignored.
Dan Shief31f032016-05-13 15:51:39 -0700641 Also, tests for Launch Control builds have different naming convention.
642 The build ID will be used as build_version.
Dan Shi70647ca2015-07-16 22:52:35 -0700643
644 @param name: Name of the job.
645
646 @return: A dictionary containing the test information. The keyvals include:
647 build: Name of the build, e.g., lumpy-release/R46-7272.0.0
648 build_version: The version of the build, e.g., R46-7272.0.0
649 board: Name of the board, e.g., lumpy
650 suite: Name of the test suite, e.g., bvt
651
652 """
653 info = {}
Dan Shief31f032016-05-13 15:51:39 -0700654 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
655 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
Dan Shi70647ca2015-07-16 22:52:35 -0700656 match = re.match(suite_job_regex, name)
657 if not match:
658 match = re.match(test_job_regex, name)
659 if match:
660 info['build'] = match.groups()[0]
661 info['suite'] = match.groups()[1]
662 info['build_version'] = info['build'].split('/')[1]
663 try:
664 info['board'], _, _, _ = ParseBuildName(info['build'])
665 except ParseBuildNameException:
Dan Shief31f032016-05-13 15:51:39 -0700666 # Try to parse it as Launch Control build
667 # Launch Control builds have name format:
668 # branch/build_target-build_type/build_id.
669 try:
670 _, target, build_id = utils.parse_launch_control_build(
671 info['build'])
672 build_target, _ = utils.parse_launch_control_target(target)
673 if build_target:
674 info['board'] = build_target
675 info['build_version'] = build_id
676 except ValueError:
677 pass
Dan Shi70647ca2015-07-16 22:52:35 -0700678 return info
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700679
680
Simran Basi9f364a62015-12-07 14:15:19 -0800681def verify_not_root_user():
682 """Simple function to error out if running with uid == 0"""
683 if os.getuid() == 0:
Simran Basi1bf60eb2015-12-01 16:39:29 -0800684 raise error.IllegalUser('This script can not be ran as root.')
685
686
687def get_hostname_from_machine(machine):
688 """Lookup hostname from a machine string or dict.
689
690 @returns: Machine hostname in string format.
691 """
692 hostname, _ = get_host_info_from_machine(machine)
693 return hostname
694
695
696def get_host_info_from_machine(machine):
697 """Lookup host information from a machine string or dict.
698
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700699 @returns: Tuple of (hostname, afe_host)
Simran Basi1bf60eb2015-12-01 16:39:29 -0800700 """
701 if isinstance(machine, dict):
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700702 return (machine['hostname'], machine['afe_host'])
Simran Basi1bf60eb2015-12-01 16:39:29 -0800703 else:
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700704 return (machine, EmptyAFEHost())
705
706
707def get_afe_host_from_machine(machine):
708 """Return the afe_host from the machine dict if possible.
709
710 @returns: AFE host object.
711 """
712 _, afe_host = get_host_info_from_machine(machine)
713 return afe_host
Fang Dengf8a94e22015-12-07 13:39:13 -0800714
715
Hidehiko Abe06893302017-06-24 07:32:38 +0900716def get_connection_pool_from_machine(machine):
717 """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
718 if not isinstance(machine, dict):
719 return None
720 return machine.get('connection_pool')
721
722
Fang Dengf8a94e22015-12-07 13:39:13 -0800723def get_creds_abspath(creds_file):
724 """Returns the abspath of the credentials file.
725
726 If creds_file is already an absolute path, just return it.
727 Otherwise, assume it is located in the creds directory
728 specified in global_config and return the absolute path.
729
730 @param: creds_path, a path to the credentials.
731 @return: An absolute path to the credentials file.
732 """
733 if not creds_file:
734 return None
735 if os.path.isabs(creds_file):
736 return creds_file
Dan Shid37736b2016-07-06 15:10:29 -0700737 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
Fang Dengf8a94e22015-12-07 13:39:13 -0800738 if not creds_dir or not os.path.exists(creds_dir):
739 creds_dir = common.autotest_dir
740 return os.path.join(creds_dir, creds_file)
Kevin Cheng3b111812015-12-15 11:52:08 -0800741
742
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700743def SetupTsMonGlobalState(*args, **kwargs):
744 """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
745
746 @param *args: Args to pass through.
747 @param **kwargs: Kwargs to pass through.
748 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700749 try:
750 # TODO(crbug.com/739466) This module import is delayed because it adds
751 # 1-2 seconds to the module import time and most users of site_utils
752 # don't need it. The correct fix is to break apart site_utils into more
753 # meaningful chunks.
754 from chromite.lib import ts_mon_config
755 except ImportError:
756 logging.warn('Unable to import chromite. Monarch is disabled.')
Paul Hobbs604fc872016-09-29 16:41:55 -0700757 return TrivialContextManager()
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700758
759 try:
760 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
761 if hasattr(context, '__exit__'):
762 return context
763 except Exception as e:
764 logging.warning('Caught an exception trying to setup ts_mon, '
765 'monitoring is disabled: %s', e, exc_info=True)
766 return TrivialContextManager()
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700767
768
769@contextlib.contextmanager
Paul Hobbs604fc872016-09-29 16:41:55 -0700770def TrivialContextManager(*args, **kwargs):
771 """Context manager that does nothing.
772
773 @param *args: Ignored args
774 @param **kwargs: Ignored kwargs.
775 """
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700776 yield
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700777
778
779def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
780 """Wait for the hosts to all go idle.
781
782 @param duts: List of duts to check for idle state.
783 @param afe: afe instance.
Dan Shiffd5b822017-07-14 11:16:23 -0700784 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700785
786 @returns Boolean True if all hosts are idle or False if any hosts did not
787 go idle within max_wait.
788 """
789 start_time = time.time()
790 # We make a shallow copy since we're going to be modifying active_dut_list.
791 active_dut_list = duts[:]
792 while active_dut_list:
793 # Let's rate-limit how often we hit the AFE.
794 time.sleep(1)
795
796 # Check if we've waited too long.
797 if (time.time() - start_time) > max_wait:
798 return False
799
800 idle_duts = []
801 # Get the status for the duts and see if they're in the idle state.
802 afe_hosts = afe.get_hosts(active_dut_list)
803 idle_duts = [afe_host.hostname for afe_host in afe_hosts
804 if afe_host.status in host_states.IDLE_STATES]
805
806 # Take out idle duts so we don't needlessly check them
807 # next time around.
808 for idle_dut in idle_duts:
809 active_dut_list.remove(idle_dut)
810
811 logging.info('still waiting for following duts to go idle: %s',
812 active_dut_list)
813 return True
814
815
816@contextlib.contextmanager
817def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
818 max_wait=IDLE_DUT_WAIT_TIMEOUT):
819 """Context manager to lock the duts and wait for them to go idle.
820
821 @param duts: List of duts to lock.
822 @param afe: afe instance.
Hidehiko Abe06893302017-06-24 07:32:38 +0900823 @param lock_msg: message for afe on locking this host.
Dan Shiffd5b822017-07-14 11:16:23 -0700824 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700825
826 @returns Boolean lock_success where True if all duts locked successfully or
827 False if we timed out waiting too long for hosts to go idle.
828 """
829 try:
830 locked_duts = []
831 duts.sort()
832 for dut in duts:
833 if afe.lock_host(dut, lock_msg, fail_if_locked=True):
834 locked_duts.append(dut)
835 else:
836 logging.info('%s already locked', dut)
837 yield wait_for_idle_duts(locked_duts, afe, max_wait)
838 finally:
839 afe.unlock_hosts(locked_duts)
Dan Shib5b8b4f2016-11-02 14:04:02 -0700840
841
Dan Shiffd5b822017-07-14 11:16:23 -0700842def _get_default_size_info(path):
843 """Get the default result size information.
844
845 In case directory summary is failed to build, assume the test result is not
846 throttled and all result sizes are the size of existing test results.
847
848 @return: A namedtuple of result size informations, including:
849 client_result_collected_KB: The total size (in KB) of test results
850 collected from test device. Set to be the total size of the
851 given path.
852 original_result_total_KB: The original size (in KB) of test results
853 before being trimmed. Set to be the total size of the given
854 path.
855 result_uploaded_KB: The total size (in KB) of test results to be
856 uploaded. Set to be the total size of the given path.
857 result_throttled: True if test results collection is throttled.
858 It's set to False in this default behavior.
859 """
860 total_size = file_utils.get_directory_size_kibibytes(path);
861 return result_utils_lib.ResultSizeInfo(
862 client_result_collected_KB=total_size,
863 original_result_total_KB=total_size,
864 result_uploaded_KB=total_size,
865 result_throttled=False)
866
867
868def _report_result_size_metrics(result_size_info):
869 """Report result sizes information to metrics.
870
871 @param result_size_info: A ResultSizeInfo namedtuple containing information
872 of test result sizes.
873 """
874 fields = {'result_throttled' : result_size_info.result_throttled}
875 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
876 description='The total size (in KB) of test results '
877 'collected from test device. Set to be the total size of '
878 'the given path.'
879 ).increment_by(result_size_info.client_result_collected_KB,
880 fields=fields)
881 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
882 description='The original size (in KB) of test results '
883 'before being trimmed.'
884 ).increment_by(result_size_info.original_result_total_KB,
885 fields=fields)
886 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
887 description='The total size (in KB) of test results to be '
888 'uploaded.'
889 ).increment_by(result_size_info.result_uploaded_KB,
890 fields=fields)
891
892
Dan Shi11e35062017-11-03 10:09:05 -0700893@metrics.SecondsTimerDecorator(
894 'chromeos/autotest/result_collection/collect_result_sizes_duration')
Dan Shiffd5b822017-07-14 11:16:23 -0700895def collect_result_sizes(path, log=logging.debug):
896 """Collect the result sizes information and build result summary.
897
898 It first tries to merge directory summaries and calculate the result sizes
899 including:
900 client_result_collected_KB: The volume in KB that's transfered from the test
901 device.
902 original_result_total_KB: The volume in KB that's the original size of the
903 result files before being trimmed.
904 result_uploaded_KB: The volume in KB that will be uploaded.
905 result_throttled: Indicating if the result files were throttled.
906
907 If directory summary merging failed for any reason, fall back to use the
908 total size of the given result directory.
909
910 @param path: Path of the result directory to get size information.
911 @param log: The logging method, default to logging.debug
912 @return: A ResultSizeInfo namedtuple containing information of test result
913 sizes.
914 """
915 try:
Dan Shi5aaf9062017-09-20 11:53:17 -0700916 client_collected_bytes, summary, files = result_utils.merge_summaries(
917 path)
Dan Shiffd5b822017-07-14 11:16:23 -0700918 result_size_info = result_utils_lib.get_result_size_info(
919 client_collected_bytes, summary)
920 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
921 result_view.build(client_collected_bytes, summary, html_file)
Dan Shi5aaf9062017-09-20 11:53:17 -0700922
923 # Delete all summary files after final view is built.
924 for summary_file in files:
925 os.remove(summary_file)
Dan Shiffd5b822017-07-14 11:16:23 -0700926 except:
927 log('Failed to calculate result sizes based on directory summaries for '
928 'directory %s. Fall back to record the total size.\nException: %s' %
929 (path, traceback.format_exc()))
930 result_size_info = _get_default_size_info(path)
931
932 _report_result_size_metrics(result_size_info)
933
Richard Barnette9db80682018-04-26 00:55:15 +0000934 return result_size_info