Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 1 | # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 5 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 6 | import httplib |
| 7 | import json |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 8 | import logging |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 9 | import random |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 10 | import re |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 11 | import time |
Paul Drews | bef578d | 2013-09-24 15:10:36 -0700 | [diff] [blame] | 12 | import urllib2 |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 13 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 14 | import common |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 15 | from autotest_lib.client.common_lib import base_utils |
| 16 | from autotest_lib.client.common_lib import error |
| 17 | from autotest_lib.client.common_lib import global_config |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 18 | from autotest_lib.server.cros.dynamic_suite import constants |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame^] | 19 | from autotest_lib.server.cros.dynamic_suite import job_status |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 20 | |
| 21 | |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 22 | _SHERIFF_JS = global_config.global_config.get_config_value( |
| 23 | 'NOTIFICATIONS', 'sheriffs', default='') |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 24 | _LAB_SHERIFF_JS = global_config.global_config.get_config_value( |
| 25 | 'NOTIFICATIONS', 'lab_sheriffs', default='') |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 26 | _CHROMIUM_BUILD_URL = global_config.global_config.get_config_value( |
| 27 | 'NOTIFICATIONS', 'chromium_build_url', default='') |
| 28 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 29 | LAB_GOOD_STATES = ('open', 'throttled') |
| 30 | |
| 31 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 32 | class TestLabException(Exception): |
| 33 | """Exception raised when the Test Lab blocks a test or suite.""" |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 34 | pass |
| 35 | |
| 36 | |
| 37 | class ParseBuildNameException(Exception): |
| 38 | """Raised when ParseBuildName() cannot parse a build name.""" |
| 39 | pass |
| 40 | |
| 41 | |
| 42 | def ParseBuildName(name): |
| 43 | """Format a build name, given board, type, milestone, and manifest num. |
| 44 | |
| 45 | @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' |
| 46 | |
| 47 | @return board: board the manifest is for, e.g. x86-alex. |
| 48 | @return type: one of 'release', 'factory', or 'firmware' |
| 49 | @return milestone: (numeric) milestone the manifest was associated with. |
| 50 | @return manifest: manifest number, e.g. '2015.0.0' |
| 51 | |
| 52 | """ |
| 53 | match = re.match(r'([\w-]+)-(\w+)/R(\d+)-([\d.ab-]+)', name) |
| 54 | if match and len(match.groups()) == 4: |
| 55 | return match.groups() |
| 56 | raise ParseBuildNameException('%s is a malformed build name.' % name) |
| 57 | |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 58 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 59 | def get_label_from_afe(hostname, label_prefix, afe): |
| 60 | """Retrieve a host's specific label from the AFE. |
| 61 | |
| 62 | Looks for a host label that has the form <label_prefix>:<value> |
| 63 | and returns the "<value>" part of the label. None is returned |
| 64 | if there is not a label matching the pattern |
| 65 | |
| 66 | @param hostname: hostname of given DUT. |
| 67 | @param label_prefix: prefix of label to be matched, e.g., |board:| |
| 68 | @param afe: afe instance. |
| 69 | @returns the label that matches the prefix or 'None' |
| 70 | |
| 71 | """ |
| 72 | labels = afe.get_labels(name__startswith=label_prefix, |
| 73 | host__hostname__in=[hostname]) |
| 74 | if labels and len(labels) == 1: |
| 75 | return labels[0].name.split(label_prefix, 1)[1] |
| 76 | |
| 77 | |
| 78 | def get_board_from_afe(hostname, afe): |
| 79 | """Retrieve given host's board from its labels in the AFE. |
| 80 | |
| 81 | Looks for a host label of the form "board:<board>", and |
| 82 | returns the "<board>" part of the label. `None` is returned |
| 83 | if there is not a single, unique label matching the pattern. |
| 84 | |
| 85 | @param hostname: hostname of given DUT. |
| 86 | @param afe: afe instance. |
| 87 | @returns board from label, or `None`. |
| 88 | |
| 89 | """ |
| 90 | return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) |
| 91 | |
| 92 | |
| 93 | def get_build_from_afe(hostname, afe): |
| 94 | """Retrieve the current build for given host from the AFE. |
| 95 | |
| 96 | Looks through the host's labels in the AFE to determine its build. |
| 97 | |
| 98 | @param hostname: hostname of given DUT. |
| 99 | @param afe: afe instance. |
| 100 | @returns The current build or None if it could not find it or if there |
| 101 | were multiple build labels assigned to this host. |
| 102 | |
| 103 | """ |
| 104 | return get_label_from_afe(hostname, constants.VERSION_PREFIX, afe) |
| 105 | |
| 106 | |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 107 | def get_sheriffs(lab_only=False): |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 108 | """ |
| 109 | Polls the javascript file that holds the identity of the sheriff and |
| 110 | parses it's output to return a list of chromium sheriff email addresses. |
| 111 | The javascript file can contain the ldap of more than one sheriff, eg: |
| 112 | document.write('sheriff_one, sheriff_two'). |
| 113 | |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 114 | @param lab_only: if True, only pulls lab sheriff. |
| 115 | @return: A list of chroium.org sheriff email addresses to cc on the bug. |
| 116 | An empty list if failed to parse the javascript. |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 117 | """ |
| 118 | sheriff_ids = [] |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 119 | sheriff_js_list = _LAB_SHERIFF_JS.split(',') |
| 120 | if not lab_only: |
| 121 | sheriff_js_list.extend(_SHERIFF_JS.split(',')) |
| 122 | |
| 123 | for sheriff_js in sheriff_js_list: |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 124 | try: |
| 125 | url_content = base_utils.urlopen('%s%s'% ( |
| 126 | _CHROMIUM_BUILD_URL, sheriff_js)).read() |
| 127 | except (ValueError, IOError) as e: |
beeps | 4efdf03 | 2013-09-17 11:27:14 -0700 | [diff] [blame] | 128 | logging.warning('could not parse sheriff from url %s%s: %s', |
| 129 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
Paul Drews | bef578d | 2013-09-24 15:10:36 -0700 | [diff] [blame] | 130 | except (urllib2.URLError, httplib.HTTPException) as e: |
| 131 | logging.warning('unexpected error reading from url "%s%s": %s', |
| 132 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 133 | else: |
| 134 | ldaps = re.search(r"document.write\('(.*)'\)", url_content) |
| 135 | if not ldaps: |
beeps | 4efdf03 | 2013-09-17 11:27:14 -0700 | [diff] [blame] | 136 | logging.warning('Could not retrieve sheriff ldaps for: %s', |
| 137 | url_content) |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 138 | continue |
| 139 | sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') |
| 140 | for alias in ldaps.group(1).split(',')] |
| 141 | return sheriff_ids |
beeps | 46dadc9 | 2013-11-07 14:07:10 -0800 | [diff] [blame] | 142 | |
| 143 | |
| 144 | def remote_wget(source_url, dest_path, ssh_cmd): |
| 145 | """wget source_url from localhost to dest_path on remote host using ssh. |
| 146 | |
| 147 | @param source_url: The complete url of the source of the package to send. |
| 148 | @param dest_path: The path on the remote host's file system where we would |
| 149 | like to store the package. |
| 150 | @param ssh_cmd: The ssh command to use in performing the remote wget. |
| 151 | """ |
| 152 | wget_cmd = ("wget -O - %s | %s 'cat >%s'" % |
| 153 | (source_url, ssh_cmd, dest_path)) |
| 154 | base_utils.run(wget_cmd) |
| 155 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 156 | |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 157 | _MAX_LAB_STATUS_ATTEMPTS = 5 |
| 158 | def _get_lab_status(status_url): |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 159 | """Grabs the current lab status and message. |
| 160 | |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 161 | @returns The JSON object obtained from the given URL. |
| 162 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 163 | """ |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 164 | retry_waittime = 1 |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 165 | for _ in range(_MAX_LAB_STATUS_ATTEMPTS): |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 166 | try: |
| 167 | response = urllib2.urlopen(status_url) |
| 168 | except IOError as e: |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 169 | logging.debug('Error occurred when grabbing the lab status: %s.', |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 170 | e) |
| 171 | time.sleep(retry_waittime) |
| 172 | continue |
| 173 | # Check for successful response code. |
| 174 | if response.getcode() == 200: |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 175 | return json.load(response) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 176 | time.sleep(retry_waittime) |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 177 | return None |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 178 | |
| 179 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 180 | def _decode_lab_status(lab_status, build): |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 181 | """Decode lab status, and report exceptions as needed. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 182 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 183 | Take a deserialized JSON object from the lab status page, and |
| 184 | interpret it to determine the actual lab status. Raise |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 185 | exceptions as required to report when the lab is down. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 186 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 187 | @param build: build name that we want to check the status of. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 188 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 189 | @raises TestLabException Raised if a request to test for the given |
| 190 | status and build should be blocked. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 191 | """ |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 192 | # First check if the lab is up. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 193 | if not lab_status['general_state'] in LAB_GOOD_STATES: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 194 | raise TestLabException('Chromium OS Test Lab is closed: ' |
| 195 | '%s.' % lab_status['message']) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 196 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 197 | # Check if the build we wish to use is disabled. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 198 | # Lab messages should be in the format of: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 199 | # Lab is 'status' [regex ...] (comment) |
| 200 | # If the build name matches any regex, it will be blocked. |
| 201 | build_exceptions = re.search('\[(.*)\]', lab_status['message']) |
| 202 | if not build_exceptions: |
| 203 | return |
| 204 | for build_pattern in build_exceptions.group(1).split(): |
| 205 | if re.search(build_pattern, build): |
| 206 | raise TestLabException('Chromium OS Test Lab is closed: ' |
| 207 | '%s matches %s.' % ( |
| 208 | build, build_pattern)) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 209 | return |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 210 | |
| 211 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 212 | def check_lab_status(build): |
| 213 | """Check if the lab status allows us to schedule for a build. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 214 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 215 | Checks if the lab is down, or if testing for the requested build |
| 216 | should be blocked. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 217 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 218 | @param build: Name of the build to be scheduled for testing. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 219 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 220 | @raises TestLabException Raised if a request to test for the given |
| 221 | status and build should be blocked. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 222 | |
| 223 | """ |
| 224 | # Ensure we are trying to schedule on the actual lab. |
| 225 | test_server_name = global_config.global_config.get_config_value( |
| 226 | 'SERVER', 'hostname') |
| 227 | if not test_server_name.startswith('cautotest'): |
| 228 | return |
| 229 | |
| 230 | # Download the lab status from its home on the web. |
| 231 | status_url = global_config.global_config.get_config_value( |
| 232 | 'CROS', 'lab_status_url') |
| 233 | json_status = _get_lab_status(status_url) |
| 234 | if json_status is None: |
| 235 | # We go ahead and say the lab is open if we can't get the status. |
| 236 | logging.warn('Could not get a status from %s', status_url) |
| 237 | return |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 238 | _decode_lab_status(json_status, build) |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame] | 239 | |
| 240 | |
| 241 | def lock_host_with_labels(afe, lock_manager, labels): |
| 242 | """Lookup and lock one host that matches the list of input labels. |
| 243 | |
| 244 | @param afe: An instance of the afe class, as defined in server.frontend. |
| 245 | @param lock_manager: A lock manager capable of locking hosts, eg the |
| 246 | one defined in server.cros.host_lock_manager. |
| 247 | @param labels: A list of labels to look for on hosts. |
| 248 | |
| 249 | @return: The hostname of a host matching all labels, and locked through the |
| 250 | lock_manager. The hostname will be as specified in the database the afe |
| 251 | object is associated with, i.e if it exists in afe_hosts with a .cros |
| 252 | suffix, the hostname returned will contain a .cros suffix. |
| 253 | |
| 254 | @raises: error.NoEligibleHostException: If no hosts matching the list of |
| 255 | input labels are available. |
| 256 | @raises: error.TestError: If unable to lock a host matching the labels. |
| 257 | """ |
| 258 | potential_hosts = afe.get_hosts(multiple_labels=labels) |
| 259 | if not potential_hosts: |
| 260 | raise error.NoEligibleHostException( |
| 261 | 'No devices found with labels %s.' % labels) |
| 262 | |
| 263 | # This prevents errors where a fault might seem repeatable |
| 264 | # because we lock, say, the same packet capturer for each test run. |
| 265 | random.shuffle(potential_hosts) |
| 266 | for host in potential_hosts: |
| 267 | if lock_manager.lock([host.hostname]): |
| 268 | logging.info('Locked device %s with labels %s.', |
| 269 | host.hostname, labels) |
| 270 | return host.hostname |
| 271 | else: |
| 272 | logging.info('Unable to lock device %s with labels %s.', |
| 273 | host.hostname, labels) |
| 274 | |
| 275 | raise error.TestError('Could not lock a device with labels %s' % labels) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame^] | 276 | |
| 277 | |
| 278 | def get_test_views_from_tko(suite_job_id, tko): |
| 279 | """Get test name and result for given suite job ID. |
| 280 | |
| 281 | @param suite_job_id: ID of suite job. |
| 282 | @param tko: an instance of TKO as defined in server/frontend.py. |
| 283 | @return: A dictionary of test status keyed by test name, e.g., |
| 284 | {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'} |
| 285 | @raise: Exception when there is no test view found. |
| 286 | |
| 287 | """ |
| 288 | views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) |
| 289 | relevant_views = filter(job_status.view_is_relevant, views) |
| 290 | if not relevant_views: |
| 291 | raise Exception('Failed to retrieve job results.') |
| 292 | |
| 293 | test_views = {} |
| 294 | for view in relevant_views: |
| 295 | test_views[view['test_name']] = view['status'] |
| 296 | |
| 297 | return test_views |