Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 1 | # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 5 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 6 | import httplib |
| 7 | import json |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 8 | import logging |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame^] | 9 | import random |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 10 | import re |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 11 | import time |
Paul Drews | bef578d | 2013-09-24 15:10:36 -0700 | [diff] [blame] | 12 | import urllib2 |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 13 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 14 | import common |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame^] | 15 | from autotest_lib.client.common_lib import base_utils |
| 16 | from autotest_lib.client.common_lib import error |
| 17 | from autotest_lib.client.common_lib import global_config |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 18 | from autotest_lib.server.cros.dynamic_suite import constants |
| 19 | |
| 20 | |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 21 | _SHERIFF_JS = global_config.global_config.get_config_value( |
| 22 | 'NOTIFICATIONS', 'sheriffs', default='') |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 23 | _LAB_SHERIFF_JS = global_config.global_config.get_config_value( |
| 24 | 'NOTIFICATIONS', 'lab_sheriffs', default='') |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 25 | _CHROMIUM_BUILD_URL = global_config.global_config.get_config_value( |
| 26 | 'NOTIFICATIONS', 'chromium_build_url', default='') |
| 27 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 28 | LAB_GOOD_STATES = ('open', 'throttled') |
| 29 | |
| 30 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 31 | class TestLabException(Exception): |
| 32 | """Exception raised when the Test Lab blocks a test or suite.""" |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 33 | pass |
| 34 | |
| 35 | |
| 36 | class ParseBuildNameException(Exception): |
| 37 | """Raised when ParseBuildName() cannot parse a build name.""" |
| 38 | pass |
| 39 | |
| 40 | |
| 41 | def ParseBuildName(name): |
| 42 | """Format a build name, given board, type, milestone, and manifest num. |
| 43 | |
| 44 | @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' |
| 45 | |
| 46 | @return board: board the manifest is for, e.g. x86-alex. |
| 47 | @return type: one of 'release', 'factory', or 'firmware' |
| 48 | @return milestone: (numeric) milestone the manifest was associated with. |
| 49 | @return manifest: manifest number, e.g. '2015.0.0' |
| 50 | |
| 51 | """ |
| 52 | match = re.match(r'([\w-]+)-(\w+)/R(\d+)-([\d.ab-]+)', name) |
| 53 | if match and len(match.groups()) == 4: |
| 54 | return match.groups() |
| 55 | raise ParseBuildNameException('%s is a malformed build name.' % name) |
| 56 | |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 57 | |
Dan Shi | a1ecd5c | 2013-06-06 11:21:31 -0700 | [diff] [blame] | 58 | def get_label_from_afe(hostname, label_prefix, afe): |
| 59 | """Retrieve a host's specific label from the AFE. |
| 60 | |
| 61 | Looks for a host label that has the form <label_prefix>:<value> |
| 62 | and returns the "<value>" part of the label. None is returned |
| 63 | if there is not a label matching the pattern |
| 64 | |
| 65 | @param hostname: hostname of given DUT. |
| 66 | @param label_prefix: prefix of label to be matched, e.g., |board:| |
| 67 | @param afe: afe instance. |
| 68 | @returns the label that matches the prefix or 'None' |
| 69 | |
| 70 | """ |
| 71 | labels = afe.get_labels(name__startswith=label_prefix, |
| 72 | host__hostname__in=[hostname]) |
| 73 | if labels and len(labels) == 1: |
| 74 | return labels[0].name.split(label_prefix, 1)[1] |
| 75 | |
| 76 | |
| 77 | def get_board_from_afe(hostname, afe): |
| 78 | """Retrieve given host's board from its labels in the AFE. |
| 79 | |
| 80 | Looks for a host label of the form "board:<board>", and |
| 81 | returns the "<board>" part of the label. `None` is returned |
| 82 | if there is not a single, unique label matching the pattern. |
| 83 | |
| 84 | @param hostname: hostname of given DUT. |
| 85 | @param afe: afe instance. |
| 86 | @returns board from label, or `None`. |
| 87 | |
| 88 | """ |
| 89 | return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) |
| 90 | |
| 91 | |
| 92 | def get_build_from_afe(hostname, afe): |
| 93 | """Retrieve the current build for given host from the AFE. |
| 94 | |
| 95 | Looks through the host's labels in the AFE to determine its build. |
| 96 | |
| 97 | @param hostname: hostname of given DUT. |
| 98 | @param afe: afe instance. |
| 99 | @returns The current build or None if it could not find it or if there |
| 100 | were multiple build labels assigned to this host. |
| 101 | |
| 102 | """ |
| 103 | return get_label_from_afe(hostname, constants.VERSION_PREFIX, afe) |
| 104 | |
| 105 | |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 106 | def get_sheriffs(lab_only=False): |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 107 | """ |
| 108 | Polls the javascript file that holds the identity of the sheriff and |
| 109 | parses it's output to return a list of chromium sheriff email addresses. |
| 110 | The javascript file can contain the ldap of more than one sheriff, eg: |
| 111 | document.write('sheriff_one, sheriff_two'). |
| 112 | |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 113 | @param lab_only: if True, only pulls lab sheriff. |
| 114 | @return: A list of chroium.org sheriff email addresses to cc on the bug. |
| 115 | An empty list if failed to parse the javascript. |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 116 | """ |
| 117 | sheriff_ids = [] |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 118 | sheriff_js_list = _LAB_SHERIFF_JS.split(',') |
| 119 | if not lab_only: |
| 120 | sheriff_js_list.extend(_SHERIFF_JS.split(',')) |
| 121 | |
| 122 | for sheriff_js in sheriff_js_list: |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 123 | try: |
| 124 | url_content = base_utils.urlopen('%s%s'% ( |
| 125 | _CHROMIUM_BUILD_URL, sheriff_js)).read() |
| 126 | except (ValueError, IOError) as e: |
beeps | 4efdf03 | 2013-09-17 11:27:14 -0700 | [diff] [blame] | 127 | logging.warning('could not parse sheriff from url %s%s: %s', |
| 128 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
Paul Drews | bef578d | 2013-09-24 15:10:36 -0700 | [diff] [blame] | 129 | except (urllib2.URLError, httplib.HTTPException) as e: |
| 130 | logging.warning('unexpected error reading from url "%s%s": %s', |
| 131 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 132 | else: |
| 133 | ldaps = re.search(r"document.write\('(.*)'\)", url_content) |
| 134 | if not ldaps: |
beeps | 4efdf03 | 2013-09-17 11:27:14 -0700 | [diff] [blame] | 135 | logging.warning('Could not retrieve sheriff ldaps for: %s', |
| 136 | url_content) |
Alex Miller | dadc2c2 | 2013-07-08 15:21:21 -0700 | [diff] [blame] | 137 | continue |
| 138 | sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') |
| 139 | for alias in ldaps.group(1).split(',')] |
| 140 | return sheriff_ids |
beeps | 46dadc9 | 2013-11-07 14:07:10 -0800 | [diff] [blame] | 141 | |
| 142 | |
| 143 | def remote_wget(source_url, dest_path, ssh_cmd): |
| 144 | """wget source_url from localhost to dest_path on remote host using ssh. |
| 145 | |
| 146 | @param source_url: The complete url of the source of the package to send. |
| 147 | @param dest_path: The path on the remote host's file system where we would |
| 148 | like to store the package. |
| 149 | @param ssh_cmd: The ssh command to use in performing the remote wget. |
| 150 | """ |
| 151 | wget_cmd = ("wget -O - %s | %s 'cat >%s'" % |
| 152 | (source_url, ssh_cmd, dest_path)) |
| 153 | base_utils.run(wget_cmd) |
| 154 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 155 | |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 156 | _MAX_LAB_STATUS_ATTEMPTS = 5 |
| 157 | def _get_lab_status(status_url): |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 158 | """Grabs the current lab status and message. |
| 159 | |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 160 | @returns The JSON object obtained from the given URL. |
| 161 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 162 | """ |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 163 | retry_waittime = 1 |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 164 | for _ in range(_MAX_LAB_STATUS_ATTEMPTS): |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 165 | try: |
| 166 | response = urllib2.urlopen(status_url) |
| 167 | except IOError as e: |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 168 | logging.debug('Error occurred when grabbing the lab status: %s.', |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 169 | e) |
| 170 | time.sleep(retry_waittime) |
| 171 | continue |
| 172 | # Check for successful response code. |
| 173 | if response.getcode() == 200: |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 174 | return json.load(response) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 175 | time.sleep(retry_waittime) |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 176 | return None |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 177 | |
| 178 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 179 | def _decode_lab_status(lab_status, build): |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 180 | """Decode lab status, and report exceptions as needed. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 181 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 182 | Take a deserialized JSON object from the lab status page, and |
| 183 | interpret it to determine the actual lab status. Raise |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 184 | exceptions as required to report when the lab is down. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 185 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 186 | @param build: build name that we want to check the status of. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 187 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 188 | @raises TestLabException Raised if a request to test for the given |
| 189 | status and build should be blocked. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 190 | """ |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 191 | # First check if the lab is up. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 192 | if not lab_status['general_state'] in LAB_GOOD_STATES: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 193 | raise TestLabException('Chromium OS Test Lab is closed: ' |
| 194 | '%s.' % lab_status['message']) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 195 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 196 | # Check if the build we wish to use is disabled. |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 197 | # Lab messages should be in the format of: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 198 | # Lab is 'status' [regex ...] (comment) |
| 199 | # If the build name matches any regex, it will be blocked. |
| 200 | build_exceptions = re.search('\[(.*)\]', lab_status['message']) |
| 201 | if not build_exceptions: |
| 202 | return |
| 203 | for build_pattern in build_exceptions.group(1).split(): |
| 204 | if re.search(build_pattern, build): |
| 205 | raise TestLabException('Chromium OS Test Lab is closed: ' |
| 206 | '%s matches %s.' % ( |
| 207 | build, build_pattern)) |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 208 | return |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 209 | |
| 210 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 211 | def check_lab_status(build): |
| 212 | """Check if the lab status allows us to schedule for a build. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 213 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 214 | Checks if the lab is down, or if testing for the requested build |
| 215 | should be blocked. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 216 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 217 | @param build: Name of the build to be scheduled for testing. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 218 | |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 219 | @raises TestLabException Raised if a request to test for the given |
| 220 | status and build should be blocked. |
J. Richard Barnette | 266da2a | 2013-11-27 15:09:55 -0800 | [diff] [blame] | 221 | |
| 222 | """ |
| 223 | # Ensure we are trying to schedule on the actual lab. |
| 224 | test_server_name = global_config.global_config.get_config_value( |
| 225 | 'SERVER', 'hostname') |
| 226 | if not test_server_name.startswith('cautotest'): |
| 227 | return |
| 228 | |
| 229 | # Download the lab status from its home on the web. |
| 230 | status_url = global_config.global_config.get_config_value( |
| 231 | 'CROS', 'lab_status_url') |
| 232 | json_status = _get_lab_status(status_url) |
| 233 | if json_status is None: |
| 234 | # We go ahead and say the lab is open if we can't get the status. |
| 235 | logging.warn('Could not get a status from %s', status_url) |
| 236 | return |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 237 | _decode_lab_status(json_status, build) |
beeps | 023afc6 | 2014-02-04 16:59:22 -0800 | [diff] [blame^] | 238 | |
| 239 | |
| 240 | def lock_host_with_labels(afe, lock_manager, labels): |
| 241 | """Lookup and lock one host that matches the list of input labels. |
| 242 | |
| 243 | @param afe: An instance of the afe class, as defined in server.frontend. |
| 244 | @param lock_manager: A lock manager capable of locking hosts, eg the |
| 245 | one defined in server.cros.host_lock_manager. |
| 246 | @param labels: A list of labels to look for on hosts. |
| 247 | |
| 248 | @return: The hostname of a host matching all labels, and locked through the |
| 249 | lock_manager. The hostname will be as specified in the database the afe |
| 250 | object is associated with, i.e if it exists in afe_hosts with a .cros |
| 251 | suffix, the hostname returned will contain a .cros suffix. |
| 252 | |
| 253 | @raises: error.NoEligibleHostException: If no hosts matching the list of |
| 254 | input labels are available. |
| 255 | @raises: error.TestError: If unable to lock a host matching the labels. |
| 256 | """ |
| 257 | potential_hosts = afe.get_hosts(multiple_labels=labels) |
| 258 | if not potential_hosts: |
| 259 | raise error.NoEligibleHostException( |
| 260 | 'No devices found with labels %s.' % labels) |
| 261 | |
| 262 | # This prevents errors where a fault might seem repeatable |
| 263 | # because we lock, say, the same packet capturer for each test run. |
| 264 | random.shuffle(potential_hosts) |
| 265 | for host in potential_hosts: |
| 266 | if lock_manager.lock([host.hostname]): |
| 267 | logging.info('Locked device %s with labels %s.', |
| 268 | host.hostname, labels) |
| 269 | return host.hostname |
| 270 | else: |
| 271 | logging.info('Unable to lock device %s with labels %s.', |
| 272 | host.hostname, labels) |
| 273 | |
| 274 | raise error.TestError('Could not lock a device with labels %s' % labels) |