blob: 0891f119d42a145f4286e45dd53da5c47261ebe3 [file] [log] [blame]
Dan Shia1ecd5c2013-06-06 11:21:31 -07001# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Dan Shia1ecd5c2013-06-06 11:21:31 -07005
Paul Hobbs20cc72a2016-08-30 16:57:05 -07006import contextlib
Fang Deng18699fe2015-12-04 16:40:27 -08007import grp
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -08008import httplib
9import json
Alex Millerdadc2c22013-07-08 15:21:21 -070010import logging
MK Ryu35d661e2014-09-25 17:44:10 -070011import os
beeps023afc62014-02-04 16:59:22 -080012import random
Alex Millerdadc2c22013-07-08 15:21:21 -070013import re
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080014import time
Dan Shiffd5b822017-07-14 11:16:23 -070015import traceback
Paul Drewsbef578d2013-09-24 15:10:36 -070016import urllib2
Alex Millerdadc2c22013-07-08 15:21:21 -070017
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080018import common
Dan Shiffd5b822017-07-14 11:16:23 -070019from autotest_lib.client.bin.result_tools import utils as result_utils
20from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
21from autotest_lib.client.bin.result_tools import view as result_view
Dan Shief31f032016-05-13 15:51:39 -070022from autotest_lib.client.common_lib import utils
beeps023afc62014-02-04 16:59:22 -080023from autotest_lib.client.common_lib import error
Dan Shiffd5b822017-07-14 11:16:23 -070024from autotest_lib.client.common_lib import file_utils
beeps023afc62014-02-04 16:59:22 -080025from autotest_lib.client.common_lib import global_config
MK Ryu0c1a37d2015-04-30 12:00:55 -070026from autotest_lib.client.common_lib import host_queue_entry_states
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070027from autotest_lib.client.common_lib import host_states
Simran Basi7756a0b2016-03-16 13:10:07 -070028from autotest_lib.server.cros import provision
Dan Shia1ecd5c2013-06-06 11:21:31 -070029from autotest_lib.server.cros.dynamic_suite import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070030from autotest_lib.server.cros.dynamic_suite import job_status
Dan Shia1ecd5c2013-06-06 11:21:31 -070031
Dan Shiffd5b822017-07-14 11:16:23 -070032try:
33 from chromite.lib import metrics
34except ImportError:
35 metrics = utils.metrics_mock
36
Dan Shia1ecd5c2013-06-06 11:21:31 -070037
Dan Shid37736b2016-07-06 15:10:29 -070038CONFIG = global_config.global_config
39
40_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
41_LAB_SHERIFF_JS = CONFIG.get_config_value(
42 'NOTIFICATIONS', 'lab_sheriffs', default='')
43_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
44 'NOTIFICATIONS', 'chromium_build_url', default='')
Alex Millerdadc2c22013-07-08 15:21:21 -070045
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080046LAB_GOOD_STATES = ('open', 'throttled')
47
Dan Shid37736b2016-07-06 15:10:29 -070048ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
49 'CROS', 'enable_drone_in_restricted_subnet', type=bool,
50 default=False)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080051
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070052# Wait at most 10 mins for duts to go idle.
53IDLE_DUT_WAIT_TIMEOUT = 600
54
Dan Shi43274402016-11-04 15:13:43 -070055# Mapping between board name and build target. This is for special case handling
56# for certain Android board that the board name and build target name does not
57# match.
tturney08fc62e2016-11-17 15:44:30 -080058ANDROID_TARGET_TO_BOARD_MAP = {
59 'seed_l8150': 'gm4g_sprout',
60 'bat_land': 'bat'
61 }
62ANDROID_BOARD_TO_TARGET_MAP = {
63 'gm4g_sprout': 'seed_l8150',
64 'bat': 'bat_land'
65 }
Dan Shiffd5b822017-07-14 11:16:23 -070066# Prefix for the metrics name for result size information.
67RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
Dan Shi43274402016-11-04 15:13:43 -070068
J. Richard Barnetteabbe0962013-12-10 18:15:44 -080069class TestLabException(Exception):
70 """Exception raised when the Test Lab blocks a test or suite."""
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080071 pass
72
73
74class ParseBuildNameException(Exception):
75 """Raised when ParseBuildName() cannot parse a build name."""
76 pass
77
78
Fang Dengf08814a2015-08-03 18:12:18 +000079class Singleton(type):
80 """Enforce that only one client class is instantiated per process."""
81 _instances = {}
82
83 def __call__(cls, *args, **kwargs):
84 """Fetch the instance of a class to use for subsequent calls."""
85 if cls not in cls._instances:
86 cls._instances[cls] = super(Singleton, cls).__call__(
87 *args, **kwargs)
88 return cls._instances[cls]
89
Kevin Cheng05ae2a42016-06-06 10:12:48 -070090class EmptyAFEHost(object):
91 """Object to represent an AFE host object when there is no AFE."""
92
93 def __init__(self):
94 """
95 We'll be setting the instance attributes as we use them. Right now
96 we only use attributes and labels but as time goes by and other
97 attributes are used from an actual AFE Host object (check
98 rpc_interfaces.get_hosts()), we'll add them in here so users won't be
99 perplexed why their host's afe_host object complains that attribute
100 doesn't exist.
101 """
102 self.attributes = {}
103 self.labels = []
104
Fang Dengf08814a2015-08-03 18:12:18 +0000105
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800106def ParseBuildName(name):
107 """Format a build name, given board, type, milestone, and manifest num.
108
Simran Basib7d21162014-05-21 15:26:16 -0700109 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
110 relative build name, e.g. 'x86-alex-release/LATEST'
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800111
112 @return board: board the manifest is for, e.g. x86-alex.
113 @return type: one of 'release', 'factory', or 'firmware'
114 @return milestone: (numeric) milestone the manifest was associated with.
Simran Basib7d21162014-05-21 15:26:16 -0700115 Will be None for relative build names.
116 @return manifest: manifest number, e.g. '2015.0.0'.
117 Will be None for relative build names.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800118
119 """
Dan Shie02810d2016-08-25 09:44:57 -0700120 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
121 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
122 r'(?P<manifest>[\d.ab-]+)|LATEST)',
Simran Basif8f648e2014-09-09 11:40:03 -0700123 name)
124 if match and len(match.groups()) >= 5:
Simran Basib7d21162014-05-21 15:26:16 -0700125 return (match.group('board'), match.group('type'),
126 match.group('milestone'), match.group('manifest'))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800127 raise ParseBuildNameException('%s is a malformed build name.' % name)
128
Alex Millerdadc2c22013-07-08 15:21:21 -0700129
Dan Shi3d7a0e12015-10-12 11:55:45 -0700130def get_labels_from_afe(hostname, label_prefix, afe):
131 """Retrieve a host's specific labels from the AFE.
132
133 Looks for the host labels that have the form <label_prefix>:<value>
134 and returns the "<value>" part of the label. None is returned
135 if there is not a label matching the pattern
136
137 @param hostname: hostname of given DUT.
138 @param label_prefix: prefix of label to be matched, e.g., |board:|
139 @param afe: afe instance.
140
141 @returns A list of labels that match the prefix or 'None'
142
143 """
144 labels = afe.get_labels(name__startswith=label_prefix,
145 host__hostname__in=[hostname])
146 if labels:
147 return [l.name.split(label_prefix, 1)[1] for l in labels]
148
149
Dan Shia1ecd5c2013-06-06 11:21:31 -0700150def get_label_from_afe(hostname, label_prefix, afe):
151 """Retrieve a host's specific label from the AFE.
152
153 Looks for a host label that has the form <label_prefix>:<value>
154 and returns the "<value>" part of the label. None is returned
155 if there is not a label matching the pattern
156
157 @param hostname: hostname of given DUT.
158 @param label_prefix: prefix of label to be matched, e.g., |board:|
159 @param afe: afe instance.
160 @returns the label that matches the prefix or 'None'
161
162 """
Dan Shi3d7a0e12015-10-12 11:55:45 -0700163 labels = get_labels_from_afe(hostname, label_prefix, afe)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700164 if labels and len(labels) == 1:
Dan Shi3d7a0e12015-10-12 11:55:45 -0700165 return labels[0]
Dan Shia1ecd5c2013-06-06 11:21:31 -0700166
167
168def get_board_from_afe(hostname, afe):
169 """Retrieve given host's board from its labels in the AFE.
170
171 Looks for a host label of the form "board:<board>", and
172 returns the "<board>" part of the label. `None` is returned
173 if there is not a single, unique label matching the pattern.
174
175 @param hostname: hostname of given DUT.
176 @param afe: afe instance.
177 @returns board from label, or `None`.
178
179 """
180 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
181
182
183def get_build_from_afe(hostname, afe):
184 """Retrieve the current build for given host from the AFE.
185
186 Looks through the host's labels in the AFE to determine its build.
187
188 @param hostname: hostname of given DUT.
189 @param afe: afe instance.
190 @returns The current build or None if it could not find it or if there
191 were multiple build labels assigned to this host.
192
193 """
Simran Basi7756a0b2016-03-16 13:10:07 -0700194 for prefix in [provision.CROS_VERSION_PREFIX,
195 provision.ANDROID_BUILD_VERSION_PREFIX]:
196 build = get_label_from_afe(hostname, prefix + ':', afe)
197 if build:
198 return build
199 return None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700200
201
Allen Li6a612392016-08-18 12:09:32 -0700202# TODO(fdeng): fix get_sheriffs crbug.com/483254
Fang Deng3197b392013-06-26 11:42:02 -0700203def get_sheriffs(lab_only=False):
Alex Millerdadc2c22013-07-08 15:21:21 -0700204 """
205 Polls the javascript file that holds the identity of the sheriff and
206 parses it's output to return a list of chromium sheriff email addresses.
207 The javascript file can contain the ldap of more than one sheriff, eg:
208 document.write('sheriff_one, sheriff_two').
209
Fang Deng3197b392013-06-26 11:42:02 -0700210 @param lab_only: if True, only pulls lab sheriff.
211 @return: A list of chroium.org sheriff email addresses to cc on the bug.
212 An empty list if failed to parse the javascript.
Alex Millerdadc2c22013-07-08 15:21:21 -0700213 """
214 sheriff_ids = []
Fang Deng3197b392013-06-26 11:42:02 -0700215 sheriff_js_list = _LAB_SHERIFF_JS.split(',')
216 if not lab_only:
217 sheriff_js_list.extend(_SHERIFF_JS.split(','))
218
219 for sheriff_js in sheriff_js_list:
Alex Millerdadc2c22013-07-08 15:21:21 -0700220 try:
Dan Shief31f032016-05-13 15:51:39 -0700221 url_content = utils.urlopen('%s%s'% (
Alex Millerdadc2c22013-07-08 15:21:21 -0700222 _CHROMIUM_BUILD_URL, sheriff_js)).read()
223 except (ValueError, IOError) as e:
beeps4efdf032013-09-17 11:27:14 -0700224 logging.warning('could not parse sheriff from url %s%s: %s',
225 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Paul Drewsbef578d2013-09-24 15:10:36 -0700226 except (urllib2.URLError, httplib.HTTPException) as e:
227 logging.warning('unexpected error reading from url "%s%s": %s',
228 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
Alex Millerdadc2c22013-07-08 15:21:21 -0700229 else:
230 ldaps = re.search(r"document.write\('(.*)'\)", url_content)
231 if not ldaps:
beeps4efdf032013-09-17 11:27:14 -0700232 logging.warning('Could not retrieve sheriff ldaps for: %s',
233 url_content)
Alex Millerdadc2c22013-07-08 15:21:21 -0700234 continue
235 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
236 for alias in ldaps.group(1).split(',')]
237 return sheriff_ids
beeps46dadc92013-11-07 14:07:10 -0800238
239
240def remote_wget(source_url, dest_path, ssh_cmd):
241 """wget source_url from localhost to dest_path on remote host using ssh.
242
243 @param source_url: The complete url of the source of the package to send.
244 @param dest_path: The path on the remote host's file system where we would
245 like to store the package.
246 @param ssh_cmd: The ssh command to use in performing the remote wget.
247 """
248 wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
249 (source_url, ssh_cmd, dest_path))
Dan Shief31f032016-05-13 15:51:39 -0700250 utils.run(wget_cmd)
beeps46dadc92013-11-07 14:07:10 -0800251
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800252
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800253_MAX_LAB_STATUS_ATTEMPTS = 5
254def _get_lab_status(status_url):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800255 """Grabs the current lab status and message.
256
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800257 @returns The JSON object obtained from the given URL.
258
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800259 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800260 retry_waittime = 1
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800261 for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800262 try:
263 response = urllib2.urlopen(status_url)
264 except IOError as e:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800265 logging.debug('Error occurred when grabbing the lab status: %s.',
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800266 e)
267 time.sleep(retry_waittime)
268 continue
269 # Check for successful response code.
270 if response.getcode() == 200:
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800271 return json.load(response)
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800272 time.sleep(retry_waittime)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800273 return None
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800274
275
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800276def _decode_lab_status(lab_status, build):
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800277 """Decode lab status, and report exceptions as needed.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800278
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800279 Take a deserialized JSON object from the lab status page, and
280 interpret it to determine the actual lab status. Raise
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800281 exceptions as required to report when the lab is down.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800282
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800283 @param build: build name that we want to check the status of.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800284
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800285 @raises TestLabException Raised if a request to test for the given
286 status and build should be blocked.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800287 """
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800288 # First check if the lab is up.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800289 if not lab_status['general_state'] in LAB_GOOD_STATES:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800290 raise TestLabException('Chromium OS Test Lab is closed: '
291 '%s.' % lab_status['message'])
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800292
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800293 # Check if the build we wish to use is disabled.
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800294 # Lab messages should be in the format of:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800295 # Lab is 'status' [regex ...] (comment)
296 # If the build name matches any regex, it will be blocked.
297 build_exceptions = re.search('\[(.*)\]', lab_status['message'])
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700298 if not build_exceptions or not build:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800299 return
300 for build_pattern in build_exceptions.group(1).split():
J. Richard Barnette7f215d32015-06-19 12:44:38 -0700301 if re.match(build_pattern, build):
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800302 raise TestLabException('Chromium OS Test Lab is closed: '
303 '%s matches %s.' % (
304 build, build_pattern))
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800305 return
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800306
307
Dan Shi94234cb2014-05-23 20:04:31 -0700308def is_in_lab():
309 """Check if current Autotest instance is in lab
310
311 @return: True if the Autotest instance is in lab.
312 """
Dan Shid37736b2016-07-06 15:10:29 -0700313 test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
Dan Shi94234cb2014-05-23 20:04:31 -0700314 return test_server_name.startswith('cautotest')
315
316
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800317def check_lab_status(build):
318 """Check if the lab status allows us to schedule for a build.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800319
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800320 Checks if the lab is down, or if testing for the requested build
321 should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800322
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800323 @param build: Name of the build to be scheduled for testing.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800324
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800325 @raises TestLabException Raised if a request to test for the given
326 status and build should be blocked.
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800327
328 """
329 # Ensure we are trying to schedule on the actual lab.
Dan Shi94234cb2014-05-23 20:04:31 -0700330 if not is_in_lab():
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800331 return
332
333 # Download the lab status from its home on the web.
Dan Shid37736b2016-07-06 15:10:29 -0700334 status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800335 json_status = _get_lab_status(status_url)
336 if json_status is None:
337 # We go ahead and say the lab is open if we can't get the status.
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700338 logging.warning('Could not get a status from %s', status_url)
J. Richard Barnette266da2a2013-11-27 15:09:55 -0800339 return
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800340 _decode_lab_status(json_status, build)
beeps023afc62014-02-04 16:59:22 -0800341
342
343def lock_host_with_labels(afe, lock_manager, labels):
344 """Lookup and lock one host that matches the list of input labels.
345
346 @param afe: An instance of the afe class, as defined in server.frontend.
347 @param lock_manager: A lock manager capable of locking hosts, eg the
348 one defined in server.cros.host_lock_manager.
349 @param labels: A list of labels to look for on hosts.
350
351 @return: The hostname of a host matching all labels, and locked through the
352 lock_manager. The hostname will be as specified in the database the afe
353 object is associated with, i.e if it exists in afe_hosts with a .cros
354 suffix, the hostname returned will contain a .cros suffix.
355
356 @raises: error.NoEligibleHostException: If no hosts matching the list of
357 input labels are available.
358 @raises: error.TestError: If unable to lock a host matching the labels.
359 """
360 potential_hosts = afe.get_hosts(multiple_labels=labels)
361 if not potential_hosts:
362 raise error.NoEligibleHostException(
363 'No devices found with labels %s.' % labels)
364
365 # This prevents errors where a fault might seem repeatable
366 # because we lock, say, the same packet capturer for each test run.
367 random.shuffle(potential_hosts)
368 for host in potential_hosts:
369 if lock_manager.lock([host.hostname]):
370 logging.info('Locked device %s with labels %s.',
371 host.hostname, labels)
372 return host.hostname
373 else:
374 logging.info('Unable to lock device %s with labels %s.',
375 host.hostname, labels)
376
377 raise error.TestError('Could not lock a device with labels %s' % labels)
Dan Shi7e04fa82013-07-25 15:08:48 -0700378
379
380def get_test_views_from_tko(suite_job_id, tko):
381 """Get test name and result for given suite job ID.
382
383 @param suite_job_id: ID of suite job.
384 @param tko: an instance of TKO as defined in server/frontend.py.
385 @return: A dictionary of test status keyed by test name, e.g.,
386 {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'}
387 @raise: Exception when there is no test view found.
388
389 """
390 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
391 relevant_views = filter(job_status.view_is_relevant, views)
392 if not relevant_views:
393 raise Exception('Failed to retrieve job results.')
394
395 test_views = {}
396 for view in relevant_views:
397 test_views[view['test_name']] = view['status']
398
399 return test_views
MK Ryu35d661e2014-09-25 17:44:10 -0700400
401
MK Ryuc9c0c3f2014-10-27 14:36:01 -0700402def get_data_key(prefix, suite, build, board):
403 """
404 Constructs a key string from parameters.
405
406 @param prefix: Prefix for the generating key.
407 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
408 @param build: The build string. This string should have a consistent
409 format eg: x86-mario-release/R26-3570.0.0. If the format of this
410 string changes such that we can't determine build_type or branch
411 we give up and use the parametes we're sure of instead (suite,
412 board). eg:
413 1. build = x86-alex-pgo-release/R26-3570.0.0
414 branch = 26
415 build_type = pgo-release
416 2. build = lumpy-paladin/R28-3993.0.0-rc5
417 branch = 28
418 build_type = paladin
419 @param board: The board that this suite ran on.
420 @return: The key string used for a dictionary.
421 """
422 try:
423 _board, build_type, branch = ParseBuildName(build)[:3]
424 except ParseBuildNameException as e:
425 logging.error(str(e))
426 branch = 'Unknown'
427 build_type = 'Unknown'
428 else:
429 embedded_str = re.search(r'x86-\w+-(.*)', _board)
430 if embedded_str:
431 build_type = embedded_str.group(1) + '-' + build_type
432
433 data_key_dict = {
434 'prefix': prefix,
435 'board': board,
436 'branch': branch,
437 'build_type': build_type,
438 'suite': suite,
439 }
440 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
441 % data_key_dict)
MK Ryu83184352014-12-10 14:59:40 -0800442
443
MK Ryu2d0a3642015-01-07 15:11:19 -0800444def setup_logging(logfile=None, prefix=False):
MK Ryu83184352014-12-10 14:59:40 -0800445 """Setup basic logging with all logging info stripped.
446
447 Calls to logging will only show the message. No severity is logged.
448
449 @param logfile: If specified dump output to a file as well.
MK Ryu2d0a3642015-01-07 15:11:19 -0800450 @param prefix: Flag for log prefix. Set to True to add prefix to log
451 entries to include timestamp and log level. Default is False.
MK Ryu83184352014-12-10 14:59:40 -0800452 """
Xixuan Wueb0a3e22018-04-23 10:51:44 -0700453 # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
454 # abort_suite.py in skylab.
MK Ryu83184352014-12-10 14:59:40 -0800455 # Remove all existing handlers. client/common_lib/logging_config adds
456 # a StreamHandler to logger when modules are imported, e.g.,
457 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
458 # log only messages, not severity.
459 logging.getLogger().handlers = []
460
MK Ryu2d0a3642015-01-07 15:11:19 -0800461 if prefix:
462 log_format = '%(asctime)s %(levelname)-5s| %(message)s'
463 else:
464 log_format = '%(message)s'
465
MK Ryu83184352014-12-10 14:59:40 -0800466 screen_handler = logging.StreamHandler()
MK Ryu2d0a3642015-01-07 15:11:19 -0800467 screen_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800468 logging.getLogger().addHandler(screen_handler)
469 logging.getLogger().setLevel(logging.INFO)
470 if logfile:
471 file_handler = logging.FileHandler(logfile)
MK Ryu2d0a3642015-01-07 15:11:19 -0800472 file_handler.setFormatter(logging.Formatter(log_format))
MK Ryu83184352014-12-10 14:59:40 -0800473 file_handler.setLevel(logging.DEBUG)
474 logging.getLogger().addHandler(file_handler)
Prashanth Balasubramanian8c98ac12014-12-23 11:26:44 -0800475
476
477def is_shard():
478 """Determines if this instance is running as a shard.
479
480 Reads the global_config value shard_hostname in the section SHARD.
481
482 @return True, if shard_hostname is set, False otherwise.
483 """
Dan Shid37736b2016-07-06 15:10:29 -0700484 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
MK Ryu0c1a37d2015-04-30 12:00:55 -0700485 return bool(hostname)
486
487
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800488def get_global_afe_hostname():
489 """Read the hostname of the global AFE from the global configuration."""
Dan Shid37736b2016-07-06 15:10:29 -0700490 return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
Fang Deng0cb2a3b2015-12-10 17:59:00 -0800491
492
Fang Deng18699fe2015-12-04 16:40:27 -0800493def is_restricted_user(username):
494 """Determines if a user is in a restricted group.
495
496 User in restricted group only have access to master.
497
498 @param username: A string, representing a username.
499
500 @returns: True if the user is in a restricted group.
501 """
502 if not username:
503 return False
504
Dan Shid37736b2016-07-06 15:10:29 -0700505 restricted_groups = CONFIG.get_config_value(
Fang Deng18699fe2015-12-04 16:40:27 -0800506 'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
507 for group in restricted_groups:
Fang Deng5229c852016-02-09 13:30:31 -0800508 try:
509 if group and username in grp.getgrnam(group).gr_mem:
510 return True
511 except KeyError as e:
512 logging.debug("%s is not a valid group.", group)
Fang Deng18699fe2015-12-04 16:40:27 -0800513 return False
514
515
MK Ryu0c1a37d2015-04-30 12:00:55 -0700516def get_special_task_status(is_complete, success, is_active):
517 """Get the status of a special task.
518
519 Emulate a host queue entry status for a special task
520 Although SpecialTasks are not HostQueueEntries, it is helpful to
521 the user to present similar statuses.
522
523 @param is_complete Boolean if the task is completed.
524 @param success Boolean if the task succeeded.
525 @param is_active Boolean if the task is active.
526
527 @return The status of a special task.
528 """
529 if is_complete:
530 if success:
531 return host_queue_entry_states.Status.COMPLETED
532 return host_queue_entry_states.Status.FAILED
533 if is_active:
534 return host_queue_entry_states.Status.RUNNING
535 return host_queue_entry_states.Status.QUEUED
536
537
538def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
539 """Get the execution path of the SpecialTask.
540
541 This method returns different paths depending on where a
542 the task ran:
543 * Master: hosts/hostname/task_id-task_type
544 * Shard: Master_path/time_created
545 This is to work around the fact that a shard can fail independent
546 of the master, and be replaced by another shard that has the same
547 hosts. Without the time_created stamp the logs of the tasks running
548 on the second shard will clobber the logs from the first in google
549 storage, because task ids are not globally unique.
550
551 @param hostname Hostname
552 @param task_id Special task id
553 @param task_name Special task name (e.g., Verify, Repair, etc)
554 @param time_requested Special task requested time.
555
556 @return An execution path for the task.
557 """
558 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
559
560 # If we do this on the master it will break backward compatibility,
561 # as there are tasks that currently don't have timestamps. If a host
562 # or job has been sent to a shard, the rpc for that host/job will
563 # be redirected to the shard, so this global_config check will happen
564 # on the shard the logs are on.
565 if not is_shard():
566 return results_path
567
568 # Generate a uid to disambiguate special task result directories
569 # in case this shard fails. The simplest uid is the job_id, however
570 # in rare cases tasks do not have jobs associated with them (eg:
571 # frontend verify), so just use the creation timestamp. The clocks
572 # between a shard and master should always be in sync. Any discrepancies
573 # will be brought to our attention in the form of job timeouts.
574 uid = time_requested.strftime('%Y%d%m%H%M%S')
575
576 # TODO: This is a hack, however it is the easiest way to achieve
577 # correctness. There is currently some debate over the future of
578 # tasks in our infrastructure and refactoring everything right
579 # now isn't worth the time.
580 return '%s/%s' % (results_path, uid)
581
582
583def get_job_tag(id, owner):
584 """Returns a string tag for a job.
585
586 @param id Job id
587 @param owner Job owner
588
589 """
590 return '%s-%s' % (id, owner)
591
592
593def get_hqe_exec_path(tag, execution_subdir):
594 """Returns a execution path to a HQE's results.
595
596 @param tag Tag string for a job associated with a HQE.
597 @param execution_subdir Execution sub-directory string of a HQE.
598
599 """
600 return os.path.join(tag, execution_subdir)
Dan Shi82997b92015-05-06 12:08:02 -0700601
602
603def is_inside_chroot():
604 """Check if the process is running inside chroot.
605
606 This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The
607 method checks if cros_build_lib can be imported first.
608
609 @return: True if the process is running inside chroot or cros_build_lib
610 cannot be imported.
611
612 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700613 try:
614 # TODO(crbug.com/739466) This module import is delayed because it adds
615 # 1-2 seconds to the module import time and most users of site_utils
616 # don't need it. The correct fix is to break apart site_utils into more
617 # meaningful chunks.
618 from chromite.lib import cros_build_lib
619 except ImportError:
620 logging.warn('Unable to import chromite. Can not detect chroot. '
621 'Defaulting to False')
622 return False
623 return cros_build_lib.IsInsideChroot()
Dan Shi70647ca2015-07-16 22:52:35 -0700624
625
626def parse_job_name(name):
627 """Parse job name to get information including build, board and suite etc.
628
629 Suite job created by run_suite follows the naming convention of:
630 [build]-test_suites/control.[suite]
631 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
Allen Licdd00f22017-02-01 18:01:52 -0800632 The naming convention is defined in rpc_interface.create_suite_job.
Dan Shi70647ca2015-07-16 22:52:35 -0700633
634 Test job created by suite job follows the naming convention of:
635 [build]/[suite]/[test name]
636 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
637 The naming convention is defined in
638 server/cros/dynamic_suite/tools.create_job_name
639
640 Note that pgo and chrome-perf builds will fail the method. Since lab does
641 not run test for these builds, they can be ignored.
Dan Shief31f032016-05-13 15:51:39 -0700642 Also, tests for Launch Control builds have different naming convention.
643 The build ID will be used as build_version.
Dan Shi70647ca2015-07-16 22:52:35 -0700644
645 @param name: Name of the job.
646
647 @return: A dictionary containing the test information. The keyvals include:
648 build: Name of the build, e.g., lumpy-release/R46-7272.0.0
649 build_version: The version of the build, e.g., R46-7272.0.0
650 board: Name of the board, e.g., lumpy
651 suite: Name of the test suite, e.g., bvt
652
653 """
654 info = {}
Dan Shief31f032016-05-13 15:51:39 -0700655 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
656 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
Dan Shi70647ca2015-07-16 22:52:35 -0700657 match = re.match(suite_job_regex, name)
658 if not match:
659 match = re.match(test_job_regex, name)
660 if match:
661 info['build'] = match.groups()[0]
662 info['suite'] = match.groups()[1]
663 info['build_version'] = info['build'].split('/')[1]
664 try:
665 info['board'], _, _, _ = ParseBuildName(info['build'])
666 except ParseBuildNameException:
Dan Shief31f032016-05-13 15:51:39 -0700667 # Try to parse it as Launch Control build
668 # Launch Control builds have name format:
669 # branch/build_target-build_type/build_id.
670 try:
671 _, target, build_id = utils.parse_launch_control_build(
672 info['build'])
673 build_target, _ = utils.parse_launch_control_target(target)
674 if build_target:
675 info['board'] = build_target
676 info['build_version'] = build_id
677 except ValueError:
678 pass
Dan Shi70647ca2015-07-16 22:52:35 -0700679 return info
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700680
681
682def add_label_detector(label_function_list, label_list=None, label=None):
683 """Decorator used to group functions together into the provided list.
684
685 This is a helper function to automatically add label functions that have
686 the label decorator. This is to help populate the class list of label
687 functions to be retrieved by the get_labels class method.
688
689 @param label_function_list: List of label detecting functions to add
690 decorated function to.
691 @param label_list: List of detectable labels to add detectable labels to.
692 (Default: None)
693 @param label: Label string that is detectable by this detection function
694 (Default: None)
695 """
696 def add_func(func):
697 """
698 @param func: The function to be added as a detector.
699 """
700 label_function_list.append(func)
701 if label and label_list is not None:
702 label_list.append(label)
703 return func
704 return add_func
Simran Basi9f364a62015-12-07 14:15:19 -0800705
706
707def verify_not_root_user():
708 """Simple function to error out if running with uid == 0"""
709 if os.getuid() == 0:
Simran Basi1bf60eb2015-12-01 16:39:29 -0800710 raise error.IllegalUser('This script can not be ran as root.')
711
712
713def get_hostname_from_machine(machine):
714 """Lookup hostname from a machine string or dict.
715
716 @returns: Machine hostname in string format.
717 """
718 hostname, _ = get_host_info_from_machine(machine)
719 return hostname
720
721
722def get_host_info_from_machine(machine):
723 """Lookup host information from a machine string or dict.
724
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700725 @returns: Tuple of (hostname, afe_host)
Simran Basi1bf60eb2015-12-01 16:39:29 -0800726 """
727 if isinstance(machine, dict):
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700728 return (machine['hostname'], machine['afe_host'])
Simran Basi1bf60eb2015-12-01 16:39:29 -0800729 else:
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700730 return (machine, EmptyAFEHost())
731
732
733def get_afe_host_from_machine(machine):
734 """Return the afe_host from the machine dict if possible.
735
736 @returns: AFE host object.
737 """
738 _, afe_host = get_host_info_from_machine(machine)
739 return afe_host
Fang Dengf8a94e22015-12-07 13:39:13 -0800740
741
Hidehiko Abe06893302017-06-24 07:32:38 +0900742def get_connection_pool_from_machine(machine):
743 """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
744 if not isinstance(machine, dict):
745 return None
746 return machine.get('connection_pool')
747
748
Fang Dengf8a94e22015-12-07 13:39:13 -0800749def get_creds_abspath(creds_file):
750 """Returns the abspath of the credentials file.
751
752 If creds_file is already an absolute path, just return it.
753 Otherwise, assume it is located in the creds directory
754 specified in global_config and return the absolute path.
755
756 @param: creds_path, a path to the credentials.
757 @return: An absolute path to the credentials file.
758 """
759 if not creds_file:
760 return None
761 if os.path.isabs(creds_file):
762 return creds_file
Dan Shid37736b2016-07-06 15:10:29 -0700763 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
Fang Dengf8a94e22015-12-07 13:39:13 -0800764 if not creds_dir or not os.path.exists(creds_dir):
765 creds_dir = common.autotest_dir
766 return os.path.join(creds_dir, creds_file)
Kevin Cheng3b111812015-12-15 11:52:08 -0800767
768
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700769def SetupTsMonGlobalState(*args, **kwargs):
770 """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
771
772 @param *args: Args to pass through.
773 @param **kwargs: Kwargs to pass through.
774 """
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700775 try:
776 # TODO(crbug.com/739466) This module import is delayed because it adds
777 # 1-2 seconds to the module import time and most users of site_utils
778 # don't need it. The correct fix is to break apart site_utils into more
779 # meaningful chunks.
780 from chromite.lib import ts_mon_config
781 except ImportError:
782 logging.warn('Unable to import chromite. Monarch is disabled.')
Paul Hobbs604fc872016-09-29 16:41:55 -0700783 return TrivialContextManager()
Prathmesh Prabhu16b46f82017-07-05 12:59:27 -0700784
785 try:
786 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
787 if hasattr(context, '__exit__'):
788 return context
789 except Exception as e:
790 logging.warning('Caught an exception trying to setup ts_mon, '
791 'monitoring is disabled: %s', e, exc_info=True)
792 return TrivialContextManager()
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700793
794
795@contextlib.contextmanager
Paul Hobbs604fc872016-09-29 16:41:55 -0700796def TrivialContextManager(*args, **kwargs):
797 """Context manager that does nothing.
798
799 @param *args: Ignored args
800 @param **kwargs: Ignored kwargs.
801 """
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700802 yield
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700803
804
805def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
806 """Wait for the hosts to all go idle.
807
808 @param duts: List of duts to check for idle state.
809 @param afe: afe instance.
Dan Shiffd5b822017-07-14 11:16:23 -0700810 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700811
812 @returns Boolean True if all hosts are idle or False if any hosts did not
813 go idle within max_wait.
814 """
815 start_time = time.time()
816 # We make a shallow copy since we're going to be modifying active_dut_list.
817 active_dut_list = duts[:]
818 while active_dut_list:
819 # Let's rate-limit how often we hit the AFE.
820 time.sleep(1)
821
822 # Check if we've waited too long.
823 if (time.time() - start_time) > max_wait:
824 return False
825
826 idle_duts = []
827 # Get the status for the duts and see if they're in the idle state.
828 afe_hosts = afe.get_hosts(active_dut_list)
829 idle_duts = [afe_host.hostname for afe_host in afe_hosts
830 if afe_host.status in host_states.IDLE_STATES]
831
832 # Take out idle duts so we don't needlessly check them
833 # next time around.
834 for idle_dut in idle_duts:
835 active_dut_list.remove(idle_dut)
836
837 logging.info('still waiting for following duts to go idle: %s',
838 active_dut_list)
839 return True
840
841
842@contextlib.contextmanager
843def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
844 max_wait=IDLE_DUT_WAIT_TIMEOUT):
845 """Context manager to lock the duts and wait for them to go idle.
846
847 @param duts: List of duts to lock.
848 @param afe: afe instance.
Hidehiko Abe06893302017-06-24 07:32:38 +0900849 @param lock_msg: message for afe on locking this host.
Dan Shiffd5b822017-07-14 11:16:23 -0700850 @param max_wait: Max wait time in seconds to wait for duts to be idle.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700851
852 @returns Boolean lock_success where True if all duts locked successfully or
853 False if we timed out waiting too long for hosts to go idle.
854 """
855 try:
856 locked_duts = []
857 duts.sort()
858 for dut in duts:
859 if afe.lock_host(dut, lock_msg, fail_if_locked=True):
860 locked_duts.append(dut)
861 else:
862 logging.info('%s already locked', dut)
863 yield wait_for_idle_duts(locked_duts, afe, max_wait)
864 finally:
865 afe.unlock_hosts(locked_duts)
Dan Shib5b8b4f2016-11-02 14:04:02 -0700866
867
Dan Shiffd5b822017-07-14 11:16:23 -0700868def _get_default_size_info(path):
869 """Get the default result size information.
870
871 In case directory summary is failed to build, assume the test result is not
872 throttled and all result sizes are the size of existing test results.
873
874 @return: A namedtuple of result size informations, including:
875 client_result_collected_KB: The total size (in KB) of test results
876 collected from test device. Set to be the total size of the
877 given path.
878 original_result_total_KB: The original size (in KB) of test results
879 before being trimmed. Set to be the total size of the given
880 path.
881 result_uploaded_KB: The total size (in KB) of test results to be
882 uploaded. Set to be the total size of the given path.
883 result_throttled: True if test results collection is throttled.
884 It's set to False in this default behavior.
885 """
886 total_size = file_utils.get_directory_size_kibibytes(path);
887 return result_utils_lib.ResultSizeInfo(
888 client_result_collected_KB=total_size,
889 original_result_total_KB=total_size,
890 result_uploaded_KB=total_size,
891 result_throttled=False)
892
893
894def _report_result_size_metrics(result_size_info):
895 """Report result sizes information to metrics.
896
897 @param result_size_info: A ResultSizeInfo namedtuple containing information
898 of test result sizes.
899 """
900 fields = {'result_throttled' : result_size_info.result_throttled}
901 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
902 description='The total size (in KB) of test results '
903 'collected from test device. Set to be the total size of '
904 'the given path.'
905 ).increment_by(result_size_info.client_result_collected_KB,
906 fields=fields)
907 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
908 description='The original size (in KB) of test results '
909 'before being trimmed.'
910 ).increment_by(result_size_info.original_result_total_KB,
911 fields=fields)
912 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
913 description='The total size (in KB) of test results to be '
914 'uploaded.'
915 ).increment_by(result_size_info.result_uploaded_KB,
916 fields=fields)
917
918
Dan Shi11e35062017-11-03 10:09:05 -0700919@metrics.SecondsTimerDecorator(
920 'chromeos/autotest/result_collection/collect_result_sizes_duration')
Dan Shiffd5b822017-07-14 11:16:23 -0700921def collect_result_sizes(path, log=logging.debug):
922 """Collect the result sizes information and build result summary.
923
924 It first tries to merge directory summaries and calculate the result sizes
925 including:
926 client_result_collected_KB: The volume in KB that's transfered from the test
927 device.
928 original_result_total_KB: The volume in KB that's the original size of the
929 result files before being trimmed.
930 result_uploaded_KB: The volume in KB that will be uploaded.
931 result_throttled: Indicating if the result files were throttled.
932
933 If directory summary merging failed for any reason, fall back to use the
934 total size of the given result directory.
935
936 @param path: Path of the result directory to get size information.
937 @param log: The logging method, default to logging.debug
938 @return: A ResultSizeInfo namedtuple containing information of test result
939 sizes.
940 """
941 try:
Dan Shi5aaf9062017-09-20 11:53:17 -0700942 client_collected_bytes, summary, files = result_utils.merge_summaries(
943 path)
Dan Shiffd5b822017-07-14 11:16:23 -0700944 result_size_info = result_utils_lib.get_result_size_info(
945 client_collected_bytes, summary)
946 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
947 result_view.build(client_collected_bytes, summary, html_file)
Dan Shi5aaf9062017-09-20 11:53:17 -0700948
949 # Delete all summary files after final view is built.
950 for summary_file in files:
951 os.remove(summary_file)
Dan Shiffd5b822017-07-14 11:16:23 -0700952 except:
953 log('Failed to calculate result sizes based on directory summaries for '
954 'directory %s. Fall back to record the total size.\nException: %s' %
955 (path, traceback.format_exc()))
956 result_size_info = _get_default_size_info(path)
957
958 _report_result_size_metrics(result_size_info)
959
Richard Barnette9db80682018-04-26 00:55:15 +0000960 return result_size_info