Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 1 | # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 5 | |
Paul Hobbs | 20cc72a | 2016-08-30 16:57:05 -0700 | [diff] [blame] | 6 | import contextlib |
Fang Deng | 18699fe | 2015-12-04 16:40:27 -0800 | [diff] [blame] | 7 | import grp |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 8 | import httplib |
| 9 | import json |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 10 | import logging |
MK Ryu | 35d661e | 2014-09-25 17:44:10 -0700 | [diff] [blame] | 11 | import os |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 12 | import random |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 13 | import re |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 14 | import time |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 15 | import traceback |
Paul Drews | bef578d | 2013-09-24 15:10:36 -0700 | [diff] [blame] | 16 | import urllib2 |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 17 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 18 | import common |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 19 | from autotest_lib.client.bin.result_tools import utils as result_utils |
| 20 | from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib |
| 21 | from autotest_lib.client.bin.result_tools import view as result_view |
Prathmesh Prabhu | cbd5ebb | 2018-08-28 17:04:50 -0700 | [diff] [blame] | 22 | from autotest_lib.client.common_lib import lsbrelease_utils |
Dan Shi | ef31f03 | 2016-05-13 15:51:39 -0700 | [diff] [blame] | 23 | from autotest_lib.client.common_lib import utils |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 24 | from autotest_lib.client.common_lib import error |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 25 | from autotest_lib.client.common_lib import file_utils |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 26 | from autotest_lib.client.common_lib import global_config |
MK Ryu | 0c1a37d | 2015-04-30 12:00:55 -0700 | [diff] [blame] | 27 | from autotest_lib.client.common_lib import host_queue_entry_states |
Kevin Cheng | 5f2ba6c | 2016-09-28 10:20:05 -0700 | [diff] [blame] | 28 | from autotest_lib.client.common_lib import host_states |
Simran Basi | 7756a0b | 2016-03-16 13:10:07 -0700 | [diff] [blame] | 29 | from autotest_lib.server.cros import provision |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 30 | from autotest_lib.server.cros.dynamic_suite import constants |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 31 | from autotest_lib.server.cros.dynamic_suite import job_status |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 32 | |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 33 | try: |
| 34 | from chromite.lib import metrics |
| 35 | except ImportError: |
| 36 | metrics = utils.metrics_mock |
| 37 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 38 | |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 39 | CONFIG = global_config.global_config |
| 40 | |
| 41 | _SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='') |
| 42 | _LAB_SHERIFF_JS = CONFIG.get_config_value( |
| 43 | 'NOTIFICATIONS', 'lab_sheriffs', default='') |
| 44 | _CHROMIUM_BUILD_URL = CONFIG.get_config_value( |
| 45 | 'NOTIFICATIONS', 'chromium_build_url', default='') |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 46 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 47 | LAB_GOOD_STATES = ('open', 'throttled') |
| 48 | |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 49 | ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( |
| 50 | 'CROS', 'enable_drone_in_restricted_subnet', type=bool, |
| 51 | default=False) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 52 | |
Kevin Cheng | 5f2ba6c | 2016-09-28 10:20:05 -0700 | [diff] [blame] | 53 | # Wait at most 10 mins for duts to go idle. |
| 54 | IDLE_DUT_WAIT_TIMEOUT = 600 |
| 55 | |
Dan Shi | 4327440 | 2016-11-04 15:13:43 -0700 | [diff] [blame] | 56 | # Mapping between board name and build target. This is for special case handling |
| 57 | # for certain Android board that the board name and build target name does not |
| 58 | # match. |
tturney | 08fc62e | 2016-11-17 15:44:30 -0800 | [diff] [blame] | 59 | ANDROID_TARGET_TO_BOARD_MAP = { |
| 60 | 'seed_l8150': 'gm4g_sprout', |
| 61 | 'bat_land': 'bat' |
| 62 | } |
| 63 | ANDROID_BOARD_TO_TARGET_MAP = { |
| 64 | 'gm4g_sprout': 'seed_l8150', |
| 65 | 'bat': 'bat_land' |
| 66 | } |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 67 | # Prefix for the metrics name for result size information. |
| 68 | RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/' |
Dan Shi | 4327440 | 2016-11-04 15:13:43 -0700 | [diff] [blame] | 69 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 70 | class TestLabException(Exception): |
| 71 | """Exception raised when the Test Lab blocks a test or suite.""" |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 72 | pass |
| 73 | |
| 74 | |
| 75 | class ParseBuildNameException(Exception): |
| 76 | """Raised when ParseBuildName() cannot parse a build name.""" |
| 77 | pass |
| 78 | |
| 79 | |
Fang Deng | f08814a | 2015-08-03 18:12:18 +0000 | [diff] [blame] | 80 | class Singleton(type): |
| 81 | """Enforce that only one client class is instantiated per process.""" |
| 82 | _instances = {} |
| 83 | |
| 84 | def __call__(cls, *args, **kwargs): |
| 85 | """Fetch the instance of a class to use for subsequent calls.""" |
| 86 | if cls not in cls._instances: |
| 87 | cls._instances[cls] = super(Singleton, cls).__call__( |
| 88 | *args, **kwargs) |
| 89 | return cls._instances[cls] |
| 90 | |
Kevin Cheng | 05ae2a4 | 2016-06-06 10:12:48 -0700 | [diff] [blame] | 91 | class EmptyAFEHost(object): |
| 92 | """Object to represent an AFE host object when there is no AFE.""" |
| 93 | |
| 94 | def __init__(self): |
| 95 | """ |
| 96 | We'll be setting the instance attributes as we use them. Right now |
| 97 | we only use attributes and labels but as time goes by and other |
| 98 | attributes are used from an actual AFE Host object (check |
| 99 | rpc_interfaces.get_hosts()), we'll add them in here so users won't be |
| 100 | perplexed why their host's afe_host object complains that attribute |
| 101 | doesn't exist. |
| 102 | """ |
| 103 | self.attributes = {} |
| 104 | self.labels = [] |
| 105 | |
Fang Deng | f08814a | 2015-08-03 18:12:18 +0000 | [diff] [blame] | 106 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 107 | def ParseBuildName(name): |
| 108 | """Format a build name, given board, type, milestone, and manifest num. |
| 109 | |
Simran Basi | b7d2116 | 2014-05-21 15:26:16 -0700 | [diff] [blame] | 110 | @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a |
| 111 | relative build name, e.g. 'x86-alex-release/LATEST' |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 112 | |
| 113 | @return board: board the manifest is for, e.g. x86-alex. |
| 114 | @return type: one of 'release', 'factory', or 'firmware' |
| 115 | @return milestone: (numeric) milestone the manifest was associated with. |
Simran Basi | b7d2116 | 2014-05-21 15:26:16 -0700 | [diff] [blame] | 116 | Will be None for relative build names. |
| 117 | @return manifest: manifest number, e.g. '2015.0.0'. |
| 118 | Will be None for relative build names. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 119 | |
| 120 | """ |
Dan Shi | e02810d | 2016-08-25 09:44:57 -0700 | [diff] [blame] | 121 | match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?' |
| 122 | r'-(?P<type>\w+)/(R(?P<milestone>\d+)-' |
| 123 | r'(?P<manifest>[\d.ab-]+)|LATEST)', |
Simran Basi | f8f648e | 2014-09-09 11:40:03 -0700 | [diff] [blame] | 124 | name) |
| 125 | if match and len(match.groups()) >= 5: |
Simran Basi | b7d2116 | 2014-05-21 15:26:16 -0700 | [diff] [blame] | 126 | return (match.group('board'), match.group('type'), |
| 127 | match.group('milestone'), match.group('manifest')) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 128 | raise ParseBuildNameException('%s is a malformed build name.' % name) |
| 129 | |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 130 | |
Dan Shi | 3d7a0e1 | 2015-10-12 11:55:45 -0700 | [diff] [blame] | 131 | def get_labels_from_afe(hostname, label_prefix, afe): |
| 132 | """Retrieve a host's specific labels from the AFE. |
| 133 | |
| 134 | Looks for the host labels that have the form <label_prefix>:<value> |
| 135 | and returns the "<value>" part of the label. None is returned |
| 136 | if there is not a label matching the pattern |
| 137 | |
| 138 | @param hostname: hostname of given DUT. |
| 139 | @param label_prefix: prefix of label to be matched, e.g., |board:| |
| 140 | @param afe: afe instance. |
| 141 | |
| 142 | @returns A list of labels that match the prefix or 'None' |
| 143 | |
| 144 | """ |
| 145 | labels = afe.get_labels(name__startswith=label_prefix, |
| 146 | host__hostname__in=[hostname]) |
| 147 | if labels: |
| 148 | return [l.name.split(label_prefix, 1)[1] for l in labels] |
| 149 | |
| 150 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 151 | def get_label_from_afe(hostname, label_prefix, afe): |
| 152 | """Retrieve a host's specific label from the AFE. |
| 153 | |
| 154 | Looks for a host label that has the form <label_prefix>:<value> |
| 155 | and returns the "<value>" part of the label. None is returned |
| 156 | if there is not a label matching the pattern |
| 157 | |
| 158 | @param hostname: hostname of given DUT. |
| 159 | @param label_prefix: prefix of label to be matched, e.g., |board:| |
| 160 | @param afe: afe instance. |
| 161 | @returns the label that matches the prefix or 'None' |
| 162 | |
| 163 | """ |
Dan Shi | 3d7a0e1 | 2015-10-12 11:55:45 -0700 | [diff] [blame] | 164 | labels = get_labels_from_afe(hostname, label_prefix, afe) |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 165 | if labels and len(labels) == 1: |
Dan Shi | 3d7a0e1 | 2015-10-12 11:55:45 -0700 | [diff] [blame] | 166 | return labels[0] |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 167 | |
| 168 | |
| 169 | def get_board_from_afe(hostname, afe): |
| 170 | """Retrieve given host's board from its labels in the AFE. |
| 171 | |
| 172 | Looks for a host label of the form "board:<board>", and |
| 173 | returns the "<board>" part of the label. `None` is returned |
| 174 | if there is not a single, unique label matching the pattern. |
| 175 | |
| 176 | @param hostname: hostname of given DUT. |
| 177 | @param afe: afe instance. |
| 178 | @returns board from label, or `None`. |
| 179 | |
| 180 | """ |
| 181 | return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) |
| 182 | |
| 183 | |
| 184 | def get_build_from_afe(hostname, afe): |
| 185 | """Retrieve the current build for given host from the AFE. |
| 186 | |
| 187 | Looks through the host's labels in the AFE to determine its build. |
| 188 | |
| 189 | @param hostname: hostname of given DUT. |
| 190 | @param afe: afe instance. |
| 191 | @returns The current build or None if it could not find it or if there |
| 192 | were multiple build labels assigned to this host. |
| 193 | |
| 194 | """ |
Richard Barnette | 66eb19d | 2018-04-30 23:46:52 +0000 | [diff] [blame] | 195 | prefix = provision.CROS_VERSION_PREFIX |
| 196 | build = get_label_from_afe(hostname, prefix + ':', afe) |
| 197 | if build: |
| 198 | return build |
Simran Basi | 7756a0b | 2016-03-16 13:10:07 -0700 | [diff] [blame] | 199 | return None |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 200 | |
| 201 | |
Allen Li | 6a61239 | 2016-08-18 12:09:32 -0700 | [diff] [blame] | 202 | # TODO(fdeng): fix get_sheriffs crbug.com/483254 |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 203 | def get_sheriffs(lab_only=False): |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 204 | """ |
| 205 | Polls the javascript file that holds the identity of the sheriff and |
| 206 | parses it's output to return a list of chromium sheriff email addresses. |
| 207 | The javascript file can contain the ldap of more than one sheriff, eg: |
| 208 | document.write('sheriff_one, sheriff_two'). |
| 209 | |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 210 | @param lab_only: if True, only pulls lab sheriff. |
| 211 | @return: A list of chroium.org sheriff email addresses to cc on the bug. |
| 212 | An empty list if failed to parse the javascript. |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 213 | """ |
| 214 | sheriff_ids = [] |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 215 | sheriff_js_list = _LAB_SHERIFF_JS.split(',') |
| 216 | if not lab_only: |
| 217 | sheriff_js_list.extend(_SHERIFF_JS.split(',')) |
| 218 | |
| 219 | for sheriff_js in sheriff_js_list: |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 220 | try: |
Dan Shi | ef31f03 | 2016-05-13 15:51:39 -0700 | [diff] [blame] | 221 | url_content = utils.urlopen('%s%s'% ( |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 222 | _CHROMIUM_BUILD_URL, sheriff_js)).read() |
| 223 | except (ValueError, IOError) as e: |
beeps | 4efdf03 | 2013-09-17 11:27:14 -0700 | [diff] [blame] | 224 | logging.warning('could not parse sheriff from url %s%s: %s', |
| 225 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
Paul Drews | bef578d | 2013-09-24 15:10:36 -0700 | [diff] [blame] | 226 | except (urllib2.URLError, httplib.HTTPException) as e: |
| 227 | logging.warning('unexpected error reading from url "%s%s": %s', |
| 228 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 229 | else: |
| 230 | ldaps = re.search(r"document.write\('(.*)'\)", url_content) |
| 231 | if not ldaps: |
beeps | 4efdf03 | 2013-09-17 11:27:14 -0700 | [diff] [blame] | 232 | logging.warning('Could not retrieve sheriff ldaps for: %s', |
| 233 | url_content) |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 234 | continue |
| 235 | sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') |
| 236 | for alias in ldaps.group(1).split(',')] |
| 237 | return sheriff_ids |
beeps | 46dadc9 | 2013-11-07 14:07:10 -0800 | [diff] [blame] | 238 | |
| 239 | |
| 240 | def remote_wget(source_url, dest_path, ssh_cmd): |
| 241 | """wget source_url from localhost to dest_path on remote host using ssh. |
| 242 | |
| 243 | @param source_url: The complete url of the source of the package to send. |
| 244 | @param dest_path: The path on the remote host's file system where we would |
| 245 | like to store the package. |
| 246 | @param ssh_cmd: The ssh command to use in performing the remote wget. |
| 247 | """ |
| 248 | wget_cmd = ("wget -O - %s | %s 'cat >%s'" % |
| 249 | (source_url, ssh_cmd, dest_path)) |
Dan Shi | ef31f03 | 2016-05-13 15:51:39 -0700 | [diff] [blame] | 250 | utils.run(wget_cmd) |
beeps | 46dadc9 | 2013-11-07 14:07:10 -0800 | [diff] [blame] | 251 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 252 | |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 253 | _MAX_LAB_STATUS_ATTEMPTS = 5 |
| 254 | def _get_lab_status(status_url): |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 255 | """Grabs the current lab status and message. |
| 256 | |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 257 | @returns The JSON object obtained from the given URL. |
| 258 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 259 | """ |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 260 | retry_waittime = 1 |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 261 | for _ in range(_MAX_LAB_STATUS_ATTEMPTS): |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 262 | try: |
| 263 | response = urllib2.urlopen(status_url) |
| 264 | except IOError as e: |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 265 | logging.debug('Error occurred when grabbing the lab status: %s.', |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 266 | e) |
| 267 | time.sleep(retry_waittime) |
| 268 | continue |
| 269 | # Check for successful response code. |
| 270 | if response.getcode() == 200: |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 271 | return json.load(response) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 272 | time.sleep(retry_waittime) |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 273 | return None |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 274 | |
| 275 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 276 | def _decode_lab_status(lab_status, build): |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 277 | """Decode lab status, and report exceptions as needed. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 278 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 279 | Take a deserialized JSON object from the lab status page, and |
| 280 | interpret it to determine the actual lab status. Raise |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 281 | exceptions as required to report when the lab is down. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 282 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 283 | @param build: build name that we want to check the status of. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 284 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 285 | @raises TestLabException Raised if a request to test for the given |
| 286 | status and build should be blocked. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 287 | """ |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 288 | # First check if the lab is up. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 289 | if not lab_status['general_state'] in LAB_GOOD_STATES: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 290 | raise TestLabException('Chromium OS Test Lab is closed: ' |
| 291 | '%s.' % lab_status['message']) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 292 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 293 | # Check if the build we wish to use is disabled. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 294 | # Lab messages should be in the format of: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 295 | # Lab is 'status' [regex ...] (comment) |
| 296 | # If the build name matches any regex, it will be blocked. |
| 297 | build_exceptions = re.search('\[(.*)\]', lab_status['message']) |
Prashanth Balasubramanian | ae43721 | 2014-10-27 11:17:26 -0700 | [diff] [blame] | 298 | if not build_exceptions or not build: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 299 | return |
| 300 | for build_pattern in build_exceptions.group(1).split(): |
J. Richard Barnette | 7f215d3 | 2015-06-19 12:44:38 -0700 | [diff] [blame] | 301 | if re.match(build_pattern, build): |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 302 | raise TestLabException('Chromium OS Test Lab is closed: ' |
| 303 | '%s matches %s.' % ( |
| 304 | build, build_pattern)) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 305 | return |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 306 | |
| 307 | |
Dan Shi | 94234cb | 2014-05-23 20:04:31 -0700 | [diff] [blame] | 308 | def is_in_lab(): |
| 309 | """Check if current Autotest instance is in lab |
| 310 | |
| 311 | @return: True if the Autotest instance is in lab. |
| 312 | """ |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 313 | test_server_name = CONFIG.get_config_value('SERVER', 'hostname') |
Dan Shi | 94234cb | 2014-05-23 20:04:31 -0700 | [diff] [blame] | 314 | return test_server_name.startswith('cautotest') |
| 315 | |
| 316 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 317 | def check_lab_status(build): |
| 318 | """Check if the lab status allows us to schedule for a build. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 319 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 320 | Checks if the lab is down, or if testing for the requested build |
| 321 | should be blocked. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 322 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 323 | @param build: Name of the build to be scheduled for testing. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 324 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 325 | @raises TestLabException Raised if a request to test for the given |
| 326 | status and build should be blocked. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 327 | |
| 328 | """ |
| 329 | # Ensure we are trying to schedule on the actual lab. |
Dan Shi | 94234cb | 2014-05-23 20:04:31 -0700 | [diff] [blame] | 330 | if not is_in_lab(): |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 331 | return |
| 332 | |
| 333 | # Download the lab status from its home on the web. |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 334 | status_url = CONFIG.get_config_value('CROS', 'lab_status_url') |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 335 | json_status = _get_lab_status(status_url) |
| 336 | if json_status is None: |
| 337 | # We go ahead and say the lab is open if we can't get the status. |
Ilja H. Friedel | 04be2bd | 2014-05-07 21:29:59 -0700 | [diff] [blame] | 338 | logging.warning('Could not get a status from %s', status_url) |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 339 | return |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 340 | _decode_lab_status(json_status, build) |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 341 | |
| 342 | |
Prathmesh Prabhu | cbd5ebb | 2018-08-28 17:04:50 -0700 | [diff] [blame] | 343 | def host_in_lab(hostname): |
| 344 | return (not utils.in_moblab_ssp() |
| 345 | and not lsbrelease_utils.is_moblab() |
| 346 | and utils.host_is_in_lab_zone(hostname)) |
| 347 | |
| 348 | |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 349 | def lock_host_with_labels(afe, lock_manager, labels): |
| 350 | """Lookup and lock one host that matches the list of input labels. |
| 351 | |
| 352 | @param afe: An instance of the afe class, as defined in server.frontend. |
| 353 | @param lock_manager: A lock manager capable of locking hosts, eg the |
| 354 | one defined in server.cros.host_lock_manager. |
| 355 | @param labels: A list of labels to look for on hosts. |
| 356 | |
| 357 | @return: The hostname of a host matching all labels, and locked through the |
| 358 | lock_manager. The hostname will be as specified in the database the afe |
| 359 | object is associated with, i.e if it exists in afe_hosts with a .cros |
| 360 | suffix, the hostname returned will contain a .cros suffix. |
| 361 | |
| 362 | @raises: error.NoEligibleHostException: If no hosts matching the list of |
| 363 | input labels are available. |
| 364 | @raises: error.TestError: If unable to lock a host matching the labels. |
| 365 | """ |
| 366 | potential_hosts = afe.get_hosts(multiple_labels=labels) |
| 367 | if not potential_hosts: |
| 368 | raise error.NoEligibleHostException( |
| 369 | 'No devices found with labels %s.' % labels) |
| 370 | |
| 371 | # This prevents errors where a fault might seem repeatable |
| 372 | # because we lock, say, the same packet capturer for each test run. |
| 373 | random.shuffle(potential_hosts) |
| 374 | for host in potential_hosts: |
| 375 | if lock_manager.lock([host.hostname]): |
| 376 | logging.info('Locked device %s with labels %s.', |
| 377 | host.hostname, labels) |
| 378 | return host.hostname |
| 379 | else: |
| 380 | logging.info('Unable to lock device %s with labels %s.', |
| 381 | host.hostname, labels) |
| 382 | |
| 383 | raise error.TestError('Could not lock a device with labels %s' % labels) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 384 | |
| 385 | |
| 386 | def get_test_views_from_tko(suite_job_id, tko): |
| 387 | """Get test name and result for given suite job ID. |
| 388 | |
| 389 | @param suite_job_id: ID of suite job. |
| 390 | @param tko: an instance of TKO as defined in server/frontend.py. |
| 391 | @return: A dictionary of test status keyed by test name, e.g., |
| 392 | {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'} |
| 393 | @raise: Exception when there is no test view found. |
| 394 | |
| 395 | """ |
| 396 | views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) |
| 397 | relevant_views = filter(job_status.view_is_relevant, views) |
| 398 | if not relevant_views: |
| 399 | raise Exception('Failed to retrieve job results.') |
| 400 | |
| 401 | test_views = {} |
| 402 | for view in relevant_views: |
| 403 | test_views[view['test_name']] = view['status'] |
| 404 | |
| 405 | return test_views |
MK Ryu | 35d661e | 2014-09-25 17:44:10 -0700 | [diff] [blame] | 406 | |
| 407 | |
MK Ryu | c9c0c3f | 2014-10-27 14:36:01 -0700 | [diff] [blame] | 408 | def get_data_key(prefix, suite, build, board): |
| 409 | """ |
| 410 | Constructs a key string from parameters. |
| 411 | |
| 412 | @param prefix: Prefix for the generating key. |
| 413 | @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy |
| 414 | @param build: The build string. This string should have a consistent |
| 415 | format eg: x86-mario-release/R26-3570.0.0. If the format of this |
| 416 | string changes such that we can't determine build_type or branch |
| 417 | we give up and use the parametes we're sure of instead (suite, |
| 418 | board). eg: |
| 419 | 1. build = x86-alex-pgo-release/R26-3570.0.0 |
| 420 | branch = 26 |
| 421 | build_type = pgo-release |
| 422 | 2. build = lumpy-paladin/R28-3993.0.0-rc5 |
| 423 | branch = 28 |
| 424 | build_type = paladin |
| 425 | @param board: The board that this suite ran on. |
| 426 | @return: The key string used for a dictionary. |
| 427 | """ |
| 428 | try: |
| 429 | _board, build_type, branch = ParseBuildName(build)[:3] |
| 430 | except ParseBuildNameException as e: |
| 431 | logging.error(str(e)) |
| 432 | branch = 'Unknown' |
| 433 | build_type = 'Unknown' |
| 434 | else: |
| 435 | embedded_str = re.search(r'x86-\w+-(.*)', _board) |
| 436 | if embedded_str: |
| 437 | build_type = embedded_str.group(1) + '-' + build_type |
| 438 | |
| 439 | data_key_dict = { |
| 440 | 'prefix': prefix, |
| 441 | 'board': board, |
| 442 | 'branch': branch, |
| 443 | 'build_type': build_type, |
| 444 | 'suite': suite, |
| 445 | } |
| 446 | return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s' |
| 447 | % data_key_dict) |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 448 | |
| 449 | |
MK Ryu | 2d0a364 | 2015-01-07 15:11:19 -0800 | [diff] [blame] | 450 | def setup_logging(logfile=None, prefix=False): |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 451 | """Setup basic logging with all logging info stripped. |
| 452 | |
| 453 | Calls to logging will only show the message. No severity is logged. |
| 454 | |
| 455 | @param logfile: If specified dump output to a file as well. |
MK Ryu | 2d0a364 | 2015-01-07 15:11:19 -0800 | [diff] [blame] | 456 | @param prefix: Flag for log prefix. Set to True to add prefix to log |
| 457 | entries to include timestamp and log level. Default is False. |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 458 | """ |
Xixuan Wu | eb0a3e2 | 2018-04-23 10:51:44 -0700 | [diff] [blame] | 459 | # TODO (xixuan): Delete this code when finishing replacing run_suite.py & |
| 460 | # abort_suite.py in skylab. |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 461 | # Remove all existing handlers. client/common_lib/logging_config adds |
| 462 | # a StreamHandler to logger when modules are imported, e.g., |
| 463 | # autotest_lib.client.bin.utils. A new StreamHandler will be added here to |
| 464 | # log only messages, not severity. |
| 465 | logging.getLogger().handlers = [] |
| 466 | |
MK Ryu | 2d0a364 | 2015-01-07 15:11:19 -0800 | [diff] [blame] | 467 | if prefix: |
| 468 | log_format = '%(asctime)s %(levelname)-5s| %(message)s' |
| 469 | else: |
| 470 | log_format = '%(message)s' |
| 471 | |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 472 | screen_handler = logging.StreamHandler() |
MK Ryu | 2d0a364 | 2015-01-07 15:11:19 -0800 | [diff] [blame] | 473 | screen_handler.setFormatter(logging.Formatter(log_format)) |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 474 | logging.getLogger().addHandler(screen_handler) |
| 475 | logging.getLogger().setLevel(logging.INFO) |
| 476 | if logfile: |
| 477 | file_handler = logging.FileHandler(logfile) |
MK Ryu | 2d0a364 | 2015-01-07 15:11:19 -0800 | [diff] [blame] | 478 | file_handler.setFormatter(logging.Formatter(log_format)) |
MK Ryu | 8318435 | 2014-12-10 14:59:40 -0800 | [diff] [blame] | 479 | file_handler.setLevel(logging.DEBUG) |
| 480 | logging.getLogger().addHandler(file_handler) |
Prashanth Balasubramanian | 8c98ac1 | 2014-12-23 11:26:44 -0800 | [diff] [blame] | 481 | |
| 482 | |
| 483 | def is_shard(): |
| 484 | """Determines if this instance is running as a shard. |
| 485 | |
| 486 | Reads the global_config value shard_hostname in the section SHARD. |
| 487 | |
| 488 | @return True, if shard_hostname is set, False otherwise. |
| 489 | """ |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 490 | hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None) |
MK Ryu | 0c1a37d | 2015-04-30 12:00:55 -0700 | [diff] [blame] | 491 | return bool(hostname) |
| 492 | |
| 493 | |
Fang Deng | 0cb2a3b | 2015-12-10 17:59:00 -0800 | [diff] [blame] | 494 | def get_global_afe_hostname(): |
| 495 | """Read the hostname of the global AFE from the global configuration.""" |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 496 | return CONFIG.get_config_value('SERVER', 'global_afe_hostname') |
Fang Deng | 0cb2a3b | 2015-12-10 17:59:00 -0800 | [diff] [blame] | 497 | |
| 498 | |
Fang Deng | 18699fe | 2015-12-04 16:40:27 -0800 | [diff] [blame] | 499 | def is_restricted_user(username): |
| 500 | """Determines if a user is in a restricted group. |
| 501 | |
| 502 | User in restricted group only have access to master. |
| 503 | |
| 504 | @param username: A string, representing a username. |
| 505 | |
| 506 | @returns: True if the user is in a restricted group. |
| 507 | """ |
| 508 | if not username: |
| 509 | return False |
| 510 | |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 511 | restricted_groups = CONFIG.get_config_value( |
Fang Deng | 18699fe | 2015-12-04 16:40:27 -0800 | [diff] [blame] | 512 | 'AUTOTEST_WEB', 'restricted_groups', default='').split(',') |
| 513 | for group in restricted_groups: |
Fang Deng | 5229c85 | 2016-02-09 13:30:31 -0800 | [diff] [blame] | 514 | try: |
| 515 | if group and username in grp.getgrnam(group).gr_mem: |
| 516 | return True |
| 517 | except KeyError as e: |
| 518 | logging.debug("%s is not a valid group.", group) |
Fang Deng | 18699fe | 2015-12-04 16:40:27 -0800 | [diff] [blame] | 519 | return False |
| 520 | |
| 521 | |
MK Ryu | 0c1a37d | 2015-04-30 12:00:55 -0700 | [diff] [blame] | 522 | def get_special_task_status(is_complete, success, is_active): |
| 523 | """Get the status of a special task. |
| 524 | |
| 525 | Emulate a host queue entry status for a special task |
| 526 | Although SpecialTasks are not HostQueueEntries, it is helpful to |
| 527 | the user to present similar statuses. |
| 528 | |
| 529 | @param is_complete Boolean if the task is completed. |
| 530 | @param success Boolean if the task succeeded. |
| 531 | @param is_active Boolean if the task is active. |
| 532 | |
| 533 | @return The status of a special task. |
| 534 | """ |
| 535 | if is_complete: |
| 536 | if success: |
| 537 | return host_queue_entry_states.Status.COMPLETED |
| 538 | return host_queue_entry_states.Status.FAILED |
| 539 | if is_active: |
| 540 | return host_queue_entry_states.Status.RUNNING |
| 541 | return host_queue_entry_states.Status.QUEUED |
| 542 | |
| 543 | |
| 544 | def get_special_task_exec_path(hostname, task_id, task_name, time_requested): |
| 545 | """Get the execution path of the SpecialTask. |
| 546 | |
| 547 | This method returns different paths depending on where a |
| 548 | the task ran: |
| 549 | * Master: hosts/hostname/task_id-task_type |
| 550 | * Shard: Master_path/time_created |
| 551 | This is to work around the fact that a shard can fail independent |
| 552 | of the master, and be replaced by another shard that has the same |
| 553 | hosts. Without the time_created stamp the logs of the tasks running |
| 554 | on the second shard will clobber the logs from the first in google |
| 555 | storage, because task ids are not globally unique. |
| 556 | |
| 557 | @param hostname Hostname |
| 558 | @param task_id Special task id |
| 559 | @param task_name Special task name (e.g., Verify, Repair, etc) |
| 560 | @param time_requested Special task requested time. |
| 561 | |
| 562 | @return An execution path for the task. |
| 563 | """ |
| 564 | results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower()) |
| 565 | |
| 566 | # If we do this on the master it will break backward compatibility, |
| 567 | # as there are tasks that currently don't have timestamps. If a host |
| 568 | # or job has been sent to a shard, the rpc for that host/job will |
| 569 | # be redirected to the shard, so this global_config check will happen |
| 570 | # on the shard the logs are on. |
| 571 | if not is_shard(): |
| 572 | return results_path |
| 573 | |
| 574 | # Generate a uid to disambiguate special task result directories |
| 575 | # in case this shard fails. The simplest uid is the job_id, however |
| 576 | # in rare cases tasks do not have jobs associated with them (eg: |
| 577 | # frontend verify), so just use the creation timestamp. The clocks |
| 578 | # between a shard and master should always be in sync. Any discrepancies |
| 579 | # will be brought to our attention in the form of job timeouts. |
| 580 | uid = time_requested.strftime('%Y%d%m%H%M%S') |
| 581 | |
| 582 | # TODO: This is a hack, however it is the easiest way to achieve |
| 583 | # correctness. There is currently some debate over the future of |
| 584 | # tasks in our infrastructure and refactoring everything right |
| 585 | # now isn't worth the time. |
| 586 | return '%s/%s' % (results_path, uid) |
| 587 | |
| 588 | |
| 589 | def get_job_tag(id, owner): |
| 590 | """Returns a string tag for a job. |
| 591 | |
| 592 | @param id Job id |
| 593 | @param owner Job owner |
| 594 | |
| 595 | """ |
| 596 | return '%s-%s' % (id, owner) |
| 597 | |
| 598 | |
| 599 | def get_hqe_exec_path(tag, execution_subdir): |
| 600 | """Returns a execution path to a HQE's results. |
| 601 | |
| 602 | @param tag Tag string for a job associated with a HQE. |
| 603 | @param execution_subdir Execution sub-directory string of a HQE. |
| 604 | |
| 605 | """ |
| 606 | return os.path.join(tag, execution_subdir) |
Dan Shi | 82997b9 | 2015-05-06 12:08:02 -0700 | [diff] [blame] | 607 | |
| 608 | |
| 609 | def is_inside_chroot(): |
| 610 | """Check if the process is running inside chroot. |
| 611 | |
| 612 | This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The |
| 613 | method checks if cros_build_lib can be imported first. |
| 614 | |
| 615 | @return: True if the process is running inside chroot or cros_build_lib |
| 616 | cannot be imported. |
| 617 | |
| 618 | """ |
Prathmesh Prabhu | 16b46f8 | 2017-07-05 12:59:27 -0700 | [diff] [blame] | 619 | try: |
| 620 | # TODO(crbug.com/739466) This module import is delayed because it adds |
| 621 | # 1-2 seconds to the module import time and most users of site_utils |
| 622 | # don't need it. The correct fix is to break apart site_utils into more |
| 623 | # meaningful chunks. |
| 624 | from chromite.lib import cros_build_lib |
| 625 | except ImportError: |
| 626 | logging.warn('Unable to import chromite. Can not detect chroot. ' |
| 627 | 'Defaulting to False') |
| 628 | return False |
| 629 | return cros_build_lib.IsInsideChroot() |
Dan Shi | 70647ca | 2015-07-16 22:52:35 -0700 | [diff] [blame] | 630 | |
| 631 | |
| 632 | def parse_job_name(name): |
| 633 | """Parse job name to get information including build, board and suite etc. |
| 634 | |
| 635 | Suite job created by run_suite follows the naming convention of: |
| 636 | [build]-test_suites/control.[suite] |
| 637 | For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt |
Allen Li | cdd00f2 | 2017-02-01 18:01:52 -0800 | [diff] [blame] | 638 | The naming convention is defined in rpc_interface.create_suite_job. |
Dan Shi | 70647ca | 2015-07-16 22:52:35 -0700 | [diff] [blame] | 639 | |
| 640 | Test job created by suite job follows the naming convention of: |
| 641 | [build]/[suite]/[test name] |
| 642 | For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess |
| 643 | The naming convention is defined in |
| 644 | server/cros/dynamic_suite/tools.create_job_name |
| 645 | |
| 646 | Note that pgo and chrome-perf builds will fail the method. Since lab does |
| 647 | not run test for these builds, they can be ignored. |
Dan Shi | ef31f03 | 2016-05-13 15:51:39 -0700 | [diff] [blame] | 648 | Also, tests for Launch Control builds have different naming convention. |
| 649 | The build ID will be used as build_version. |
Dan Shi | 70647ca | 2015-07-16 22:52:35 -0700 | [diff] [blame] | 650 | |
| 651 | @param name: Name of the job. |
| 652 | |
| 653 | @return: A dictionary containing the test information. The keyvals include: |
| 654 | build: Name of the build, e.g., lumpy-release/R46-7272.0.0 |
| 655 | build_version: The version of the build, e.g., R46-7272.0.0 |
| 656 | board: Name of the board, e.g., lumpy |
| 657 | suite: Name of the test suite, e.g., bvt |
| 658 | |
| 659 | """ |
| 660 | info = {} |
Dan Shi | ef31f03 | 2016-05-13 15:51:39 -0700 | [diff] [blame] | 661 | suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)' |
| 662 | test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*' |
Dan Shi | 70647ca | 2015-07-16 22:52:35 -0700 | [diff] [blame] | 663 | match = re.match(suite_job_regex, name) |
| 664 | if not match: |
| 665 | match = re.match(test_job_regex, name) |
| 666 | if match: |
| 667 | info['build'] = match.groups()[0] |
| 668 | info['suite'] = match.groups()[1] |
| 669 | info['build_version'] = info['build'].split('/')[1] |
| 670 | try: |
| 671 | info['board'], _, _, _ = ParseBuildName(info['build']) |
| 672 | except ParseBuildNameException: |
Dan Shi | ef31f03 | 2016-05-13 15:51:39 -0700 | [diff] [blame] | 673 | # Try to parse it as Launch Control build |
| 674 | # Launch Control builds have name format: |
| 675 | # branch/build_target-build_type/build_id. |
| 676 | try: |
| 677 | _, target, build_id = utils.parse_launch_control_build( |
| 678 | info['build']) |
| 679 | build_target, _ = utils.parse_launch_control_target(target) |
| 680 | if build_target: |
| 681 | info['board'] = build_target |
| 682 | info['build_version'] = build_id |
| 683 | except ValueError: |
| 684 | pass |
Dan Shi | 70647ca | 2015-07-16 22:52:35 -0700 | [diff] [blame] | 685 | return info |
Kevin Cheng | 3a4a57a | 2015-09-30 12:09:50 -0700 | [diff] [blame] | 686 | |
| 687 | |
Simran Basi | 9f364a6 | 2015-12-07 14:15:19 -0800 | [diff] [blame] | 688 | def verify_not_root_user(): |
| 689 | """Simple function to error out if running with uid == 0""" |
| 690 | if os.getuid() == 0: |
Simran Basi | 1bf60eb | 2015-12-01 16:39:29 -0800 | [diff] [blame] | 691 | raise error.IllegalUser('This script can not be ran as root.') |
| 692 | |
| 693 | |
| 694 | def get_hostname_from_machine(machine): |
| 695 | """Lookup hostname from a machine string or dict. |
| 696 | |
| 697 | @returns: Machine hostname in string format. |
| 698 | """ |
| 699 | hostname, _ = get_host_info_from_machine(machine) |
| 700 | return hostname |
| 701 | |
| 702 | |
| 703 | def get_host_info_from_machine(machine): |
| 704 | """Lookup host information from a machine string or dict. |
| 705 | |
Kevin Cheng | 05ae2a4 | 2016-06-06 10:12:48 -0700 | [diff] [blame] | 706 | @returns: Tuple of (hostname, afe_host) |
Simran Basi | 1bf60eb | 2015-12-01 16:39:29 -0800 | [diff] [blame] | 707 | """ |
| 708 | if isinstance(machine, dict): |
Kevin Cheng | 05ae2a4 | 2016-06-06 10:12:48 -0700 | [diff] [blame] | 709 | return (machine['hostname'], machine['afe_host']) |
Simran Basi | 1bf60eb | 2015-12-01 16:39:29 -0800 | [diff] [blame] | 710 | else: |
Kevin Cheng | 05ae2a4 | 2016-06-06 10:12:48 -0700 | [diff] [blame] | 711 | return (machine, EmptyAFEHost()) |
| 712 | |
| 713 | |
| 714 | def get_afe_host_from_machine(machine): |
| 715 | """Return the afe_host from the machine dict if possible. |
| 716 | |
| 717 | @returns: AFE host object. |
| 718 | """ |
| 719 | _, afe_host = get_host_info_from_machine(machine) |
| 720 | return afe_host |
Fang Deng | f8a94e2 | 2015-12-07 13:39:13 -0800 | [diff] [blame] | 721 | |
| 722 | |
Hidehiko Abe | 0689330 | 2017-06-24 07:32:38 +0900 | [diff] [blame] | 723 | def get_connection_pool_from_machine(machine): |
| 724 | """Returns the ssh_multiplex.ConnectionPool from machine if possible.""" |
| 725 | if not isinstance(machine, dict): |
| 726 | return None |
| 727 | return machine.get('connection_pool') |
| 728 | |
| 729 | |
Fang Deng | f8a94e2 | 2015-12-07 13:39:13 -0800 | [diff] [blame] | 730 | def get_creds_abspath(creds_file): |
| 731 | """Returns the abspath of the credentials file. |
| 732 | |
| 733 | If creds_file is already an absolute path, just return it. |
| 734 | Otherwise, assume it is located in the creds directory |
| 735 | specified in global_config and return the absolute path. |
| 736 | |
| 737 | @param: creds_path, a path to the credentials. |
| 738 | @return: An absolute path to the credentials file. |
| 739 | """ |
| 740 | if not creds_file: |
| 741 | return None |
| 742 | if os.path.isabs(creds_file): |
| 743 | return creds_file |
Dan Shi | d37736b | 2016-07-06 15:10:29 -0700 | [diff] [blame] | 744 | creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='') |
Fang Deng | f8a94e2 | 2015-12-07 13:39:13 -0800 | [diff] [blame] | 745 | if not creds_dir or not os.path.exists(creds_dir): |
| 746 | creds_dir = common.autotest_dir |
| 747 | return os.path.join(creds_dir, creds_file) |
Kevin Cheng | 3b11181 | 2015-12-15 11:52:08 -0800 | [diff] [blame] | 748 | |
| 749 | |
Paul Hobbs | 20cc72a | 2016-08-30 16:57:05 -0700 | [diff] [blame] | 750 | def SetupTsMonGlobalState(*args, **kwargs): |
| 751 | """Import-safe wrap around chromite.lib.ts_mon_config's setup function. |
| 752 | |
| 753 | @param *args: Args to pass through. |
| 754 | @param **kwargs: Kwargs to pass through. |
| 755 | """ |
Prathmesh Prabhu | 16b46f8 | 2017-07-05 12:59:27 -0700 | [diff] [blame] | 756 | try: |
| 757 | # TODO(crbug.com/739466) This module import is delayed because it adds |
| 758 | # 1-2 seconds to the module import time and most users of site_utils |
| 759 | # don't need it. The correct fix is to break apart site_utils into more |
| 760 | # meaningful chunks. |
| 761 | from chromite.lib import ts_mon_config |
| 762 | except ImportError: |
| 763 | logging.warn('Unable to import chromite. Monarch is disabled.') |
Paul Hobbs | 604fc87 | 2016-09-29 16:41:55 -0700 | [diff] [blame] | 764 | return TrivialContextManager() |
Prathmesh Prabhu | 16b46f8 | 2017-07-05 12:59:27 -0700 | [diff] [blame] | 765 | |
| 766 | try: |
| 767 | context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs) |
| 768 | if hasattr(context, '__exit__'): |
| 769 | return context |
| 770 | except Exception as e: |
| 771 | logging.warning('Caught an exception trying to setup ts_mon, ' |
| 772 | 'monitoring is disabled: %s', e, exc_info=True) |
| 773 | return TrivialContextManager() |
Paul Hobbs | 20cc72a | 2016-08-30 16:57:05 -0700 | [diff] [blame] | 774 | |
| 775 | |
| 776 | @contextlib.contextmanager |
Paul Hobbs | 604fc87 | 2016-09-29 16:41:55 -0700 | [diff] [blame] | 777 | def TrivialContextManager(*args, **kwargs): |
| 778 | """Context manager that does nothing. |
| 779 | |
| 780 | @param *args: Ignored args |
| 781 | @param **kwargs: Ignored kwargs. |
| 782 | """ |
Paul Hobbs | 20cc72a | 2016-08-30 16:57:05 -0700 | [diff] [blame] | 783 | yield |
Kevin Cheng | 5f2ba6c | 2016-09-28 10:20:05 -0700 | [diff] [blame] | 784 | |
| 785 | |
| 786 | def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT): |
| 787 | """Wait for the hosts to all go idle. |
| 788 | |
| 789 | @param duts: List of duts to check for idle state. |
| 790 | @param afe: afe instance. |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 791 | @param max_wait: Max wait time in seconds to wait for duts to be idle. |
Kevin Cheng | 5f2ba6c | 2016-09-28 10:20:05 -0700 | [diff] [blame] | 792 | |
| 793 | @returns Boolean True if all hosts are idle or False if any hosts did not |
| 794 | go idle within max_wait. |
| 795 | """ |
| 796 | start_time = time.time() |
| 797 | # We make a shallow copy since we're going to be modifying active_dut_list. |
| 798 | active_dut_list = duts[:] |
| 799 | while active_dut_list: |
| 800 | # Let's rate-limit how often we hit the AFE. |
| 801 | time.sleep(1) |
| 802 | |
| 803 | # Check if we've waited too long. |
| 804 | if (time.time() - start_time) > max_wait: |
| 805 | return False |
| 806 | |
| 807 | idle_duts = [] |
| 808 | # Get the status for the duts and see if they're in the idle state. |
| 809 | afe_hosts = afe.get_hosts(active_dut_list) |
| 810 | idle_duts = [afe_host.hostname for afe_host in afe_hosts |
| 811 | if afe_host.status in host_states.IDLE_STATES] |
| 812 | |
| 813 | # Take out idle duts so we don't needlessly check them |
| 814 | # next time around. |
| 815 | for idle_dut in idle_duts: |
| 816 | active_dut_list.remove(idle_dut) |
| 817 | |
| 818 | logging.info('still waiting for following duts to go idle: %s', |
| 819 | active_dut_list) |
| 820 | return True |
| 821 | |
| 822 | |
| 823 | @contextlib.contextmanager |
| 824 | def lock_duts_and_wait(duts, afe, lock_msg='default lock message', |
| 825 | max_wait=IDLE_DUT_WAIT_TIMEOUT): |
| 826 | """Context manager to lock the duts and wait for them to go idle. |
| 827 | |
| 828 | @param duts: List of duts to lock. |
| 829 | @param afe: afe instance. |
Hidehiko Abe | 0689330 | 2017-06-24 07:32:38 +0900 | [diff] [blame] | 830 | @param lock_msg: message for afe on locking this host. |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 831 | @param max_wait: Max wait time in seconds to wait for duts to be idle. |
Kevin Cheng | 5f2ba6c | 2016-09-28 10:20:05 -0700 | [diff] [blame] | 832 | |
| 833 | @returns Boolean lock_success where True if all duts locked successfully or |
| 834 | False if we timed out waiting too long for hosts to go idle. |
| 835 | """ |
| 836 | try: |
| 837 | locked_duts = [] |
| 838 | duts.sort() |
| 839 | for dut in duts: |
| 840 | if afe.lock_host(dut, lock_msg, fail_if_locked=True): |
| 841 | locked_duts.append(dut) |
| 842 | else: |
| 843 | logging.info('%s already locked', dut) |
| 844 | yield wait_for_idle_duts(locked_duts, afe, max_wait) |
| 845 | finally: |
| 846 | afe.unlock_hosts(locked_duts) |
Dan Shi | b5b8b4f | 2016-11-02 14:04:02 -0700 | [diff] [blame] | 847 | |
| 848 | |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 849 | def _get_default_size_info(path): |
| 850 | """Get the default result size information. |
| 851 | |
| 852 | In case directory summary is failed to build, assume the test result is not |
| 853 | throttled and all result sizes are the size of existing test results. |
| 854 | |
| 855 | @return: A namedtuple of result size informations, including: |
| 856 | client_result_collected_KB: The total size (in KB) of test results |
| 857 | collected from test device. Set to be the total size of the |
| 858 | given path. |
| 859 | original_result_total_KB: The original size (in KB) of test results |
| 860 | before being trimmed. Set to be the total size of the given |
| 861 | path. |
| 862 | result_uploaded_KB: The total size (in KB) of test results to be |
| 863 | uploaded. Set to be the total size of the given path. |
| 864 | result_throttled: True if test results collection is throttled. |
| 865 | It's set to False in this default behavior. |
| 866 | """ |
| 867 | total_size = file_utils.get_directory_size_kibibytes(path); |
| 868 | return result_utils_lib.ResultSizeInfo( |
| 869 | client_result_collected_KB=total_size, |
| 870 | original_result_total_KB=total_size, |
| 871 | result_uploaded_KB=total_size, |
| 872 | result_throttled=False) |
| 873 | |
| 874 | |
| 875 | def _report_result_size_metrics(result_size_info): |
| 876 | """Report result sizes information to metrics. |
| 877 | |
| 878 | @param result_size_info: A ResultSizeInfo namedtuple containing information |
| 879 | of test result sizes. |
| 880 | """ |
| 881 | fields = {'result_throttled' : result_size_info.result_throttled} |
| 882 | metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB', |
| 883 | description='The total size (in KB) of test results ' |
| 884 | 'collected from test device. Set to be the total size of ' |
| 885 | 'the given path.' |
| 886 | ).increment_by(result_size_info.client_result_collected_KB, |
| 887 | fields=fields) |
| 888 | metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB', |
| 889 | description='The original size (in KB) of test results ' |
| 890 | 'before being trimmed.' |
| 891 | ).increment_by(result_size_info.original_result_total_KB, |
| 892 | fields=fields) |
| 893 | metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB', |
| 894 | description='The total size (in KB) of test results to be ' |
| 895 | 'uploaded.' |
| 896 | ).increment_by(result_size_info.result_uploaded_KB, |
| 897 | fields=fields) |
| 898 | |
| 899 | |
Dan Shi | 11e3506 | 2017-11-03 10:09:05 -0700 | [diff] [blame] | 900 | @metrics.SecondsTimerDecorator( |
| 901 | 'chromeos/autotest/result_collection/collect_result_sizes_duration') |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 902 | def collect_result_sizes(path, log=logging.debug): |
| 903 | """Collect the result sizes information and build result summary. |
| 904 | |
| 905 | It first tries to merge directory summaries and calculate the result sizes |
| 906 | including: |
| 907 | client_result_collected_KB: The volume in KB that's transfered from the test |
| 908 | device. |
| 909 | original_result_total_KB: The volume in KB that's the original size of the |
| 910 | result files before being trimmed. |
| 911 | result_uploaded_KB: The volume in KB that will be uploaded. |
| 912 | result_throttled: Indicating if the result files were throttled. |
| 913 | |
| 914 | If directory summary merging failed for any reason, fall back to use the |
| 915 | total size of the given result directory. |
| 916 | |
| 917 | @param path: Path of the result directory to get size information. |
| 918 | @param log: The logging method, default to logging.debug |
| 919 | @return: A ResultSizeInfo namedtuple containing information of test result |
| 920 | sizes. |
| 921 | """ |
| 922 | try: |
Dan Shi | 5aaf906 | 2017-09-20 11:53:17 -0700 | [diff] [blame] | 923 | client_collected_bytes, summary, files = result_utils.merge_summaries( |
| 924 | path) |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 925 | result_size_info = result_utils_lib.get_result_size_info( |
| 926 | client_collected_bytes, summary) |
| 927 | html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME) |
| 928 | result_view.build(client_collected_bytes, summary, html_file) |
Dan Shi | 5aaf906 | 2017-09-20 11:53:17 -0700 | [diff] [blame] | 929 | |
| 930 | # Delete all summary files after final view is built. |
| 931 | for summary_file in files: |
| 932 | os.remove(summary_file) |
Dan Shi | ffd5b82 | 2017-07-14 11:16:23 -0700 | [diff] [blame] | 933 | except: |
| 934 | log('Failed to calculate result sizes based on directory summaries for ' |
| 935 | 'directory %s. Fall back to record the total size.\nException: %s' % |
| 936 | (path, traceback.format_exc())) |
| 937 | result_size_info = _get_default_size_info(path) |
| 938 | |
| 939 | _report_result_size_metrics(result_size_info) |
| 940 | |
Richard Barnette | 9db8068 | 2018-04-26 00:55:15 +0000 | [diff] [blame] | 941 | return result_size_info |