Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 4 | import json |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 5 | import logging |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 6 | import os |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 7 | import re |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 8 | import signal |
Scott Zawalski | 347a0b8 | 2012-03-30 16:39:21 -0400 | [diff] [blame] | 9 | import socket |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 10 | import time |
beeps | 60aec24 | 2013-06-26 14:47:48 -0700 | [diff] [blame] | 11 | import urllib2 |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 12 | |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 13 | from autotest_lib.client.common_lib import base_utils, error, global_config |
beeps | c4fb147 | 2013-05-08 21:49:48 -0700 | [diff] [blame] | 14 | from autotest_lib.client.cros import constants |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 15 | |
| 16 | |
| 17 | # Keep checking if the pid is alive every second until the timeout (in seconds) |
| 18 | CHECK_PID_IS_ALIVE_TIMEOUT = 6 |
| 19 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 20 | _LOCAL_HOST_LIST = ('localhost', '127.0.0.1') |
| 21 | |
| 22 | LAB_GOOD_STATES = ('open', 'throttled') |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 23 | |
| 24 | |
Fang Deng | 3197b39 | 2013-06-26 11:42:02 -0700 | [diff] [blame] | 25 | class ParseBuildNameException(Exception): |
| 26 | """Raised when ParseBuildName() cannot parse a build name.""" |
| 27 | pass |
| 28 | |
| 29 | |
| 30 | def ParseBuildName(name): |
| 31 | """Format a build name, given board, type, milestone, and manifest num. |
| 32 | |
| 33 | @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' |
| 34 | @return board: board the manifest is for, e.g. x86-alex. |
| 35 | @return type: one of 'release', 'factory', or 'firmware' |
| 36 | @return milestone: (numeric) milestone the manifest was associated with. |
| 37 | @return manifest: manifest number, e.g. '2015.0.0' |
| 38 | """ |
| 39 | match = re.match(r'([\w-]+)-(\w+)/R(\d+)-([\d.ab-]+)', name) |
| 40 | if match and len(match.groups()) == 4: |
| 41 | return match.groups() |
| 42 | raise ParseBuildNameException('%s is a malformed build name.' % name) |
| 43 | |
| 44 | |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 45 | def ping(host, deadline=None, tries=None, timeout=60): |
| 46 | """Attempt to ping |host|. |
| 47 | |
| 48 | Shell out to 'ping' to try to reach |host| for |timeout| seconds. |
| 49 | Returns exit code of ping. |
| 50 | |
| 51 | Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only |
| 52 | returns 0 if we get responses to |tries| pings within |deadline| seconds. |
| 53 | |
| 54 | Specifying |deadline| or |count| alone should return 0 as long as |
| 55 | some packets receive responses. |
| 56 | |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 57 | @param host: the host to ping. |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 58 | @param deadline: seconds within which |tries| pings must succeed. |
| 59 | @param tries: number of pings to send. |
| 60 | @param timeout: number of seconds after which to kill 'ping' command. |
| 61 | @return exit code of ping command. |
| 62 | """ |
| 63 | args = [host] |
| 64 | if deadline: |
| 65 | args.append('-w%d' % deadline) |
| 66 | if tries: |
| 67 | args.append('-c%d' % tries) |
| 68 | return base_utils.run('ping', args=args, |
| 69 | ignore_status=True, timeout=timeout, |
Scott Zawalski | ae84354 | 2012-03-20 09:51:29 -0400 | [diff] [blame] | 70 | stdout_tee=base_utils.TEE_TO_LOGS, |
| 71 | stderr_tee=base_utils.TEE_TO_LOGS).exit_status |
Scott Zawalski | 347a0b8 | 2012-03-30 16:39:21 -0400 | [diff] [blame] | 72 | |
| 73 | |
| 74 | def host_is_in_lab_zone(hostname): |
| 75 | """Check if the host is in the CROS.dns_zone. |
| 76 | |
| 77 | @param hostname: The hostname to check. |
| 78 | @returns True if hostname.dns_zone resolves, otherwise False. |
| 79 | """ |
| 80 | host_parts = hostname.split('.') |
| 81 | dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone', |
| 82 | default=None) |
| 83 | fqdn = '%s.%s' % (host_parts[0], dns_zone) |
| 84 | try: |
| 85 | socket.gethostbyname(fqdn) |
| 86 | return True |
| 87 | except socket.gaierror: |
| 88 | return False |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 89 | |
| 90 | |
beeps | c4fb147 | 2013-05-08 21:49:48 -0700 | [diff] [blame] | 91 | def get_chrome_version(job_views): |
| 92 | """ |
| 93 | Retrieves the version of the chrome binary associated with a job. |
| 94 | |
| 95 | When a test runs we query the chrome binary for it's version and drop |
| 96 | that value into a client keyval. To retrieve the chrome version we get all |
| 97 | the views associated with a test from the db, including those of the |
| 98 | server and client jobs, and parse the version out of the first test view |
| 99 | that has it. If we never ran a single test in the suite the job_views |
| 100 | dictionary will not contain a chrome version. |
| 101 | |
| 102 | This method cannot retrieve the chrome version from a dictionary that |
| 103 | does not conform to the structure of an autotest tko view. |
| 104 | |
| 105 | @param job_views: a list of a job's result views, as returned by |
| 106 | the get_detailed_test_views method in rpc_interface. |
| 107 | @return: The chrome version string, or None if one can't be found. |
| 108 | """ |
| 109 | |
| 110 | # Aborted jobs have no views. |
| 111 | if not job_views: |
| 112 | return None |
| 113 | |
| 114 | for view in job_views: |
| 115 | if (view.get('attributes') |
| 116 | and constants.CHROME_VERSION in view['attributes'].keys()): |
| 117 | |
| 118 | return view['attributes'].get(constants.CHROME_VERSION) |
| 119 | |
| 120 | logging.warning('Could not find chrome version for failure.') |
| 121 | return None |
| 122 | |
| 123 | |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 124 | def get_current_board(): |
| 125 | """Return the current board name. |
| 126 | |
| 127 | @return current board name, e.g "lumpy", None on fail. |
| 128 | """ |
| 129 | with open('/etc/lsb-release') as lsb_release_file: |
| 130 | for line in lsb_release_file: |
| 131 | m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line) |
| 132 | if m: |
| 133 | return m.group(1) |
| 134 | return None |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 135 | |
| 136 | |
| 137 | # TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in |
| 138 | # //chromite.git/buildbot/prebuilt.py somewhere/somehow |
| 139 | def gs_upload(local_file, remote_file, acl, result_dir=None, |
| 140 | transfer_timeout=300, acl_timeout=300): |
| 141 | """Upload to GS bucket. |
| 142 | |
| 143 | @param local_file: Local file to upload |
| 144 | @param remote_file: Remote location to upload the local_file to. |
| 145 | @param acl: name or file used for controlling access to the uploaded |
| 146 | file. |
| 147 | @param result_dir: Result directory if you want to add tracing to the |
| 148 | upload. |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 149 | @param transfer_timeout: Timeout for this upload call. |
| 150 | @param acl_timeout: Timeout for the acl call needed to confirm that |
| 151 | the uploader has permissions to execute the upload. |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 152 | |
| 153 | @raise CmdError: the exit code of the gsutil call was not 0. |
| 154 | |
| 155 | @returns True/False - depending on if the upload succeeded or failed. |
| 156 | """ |
| 157 | # https://developers.google.com/storage/docs/accesscontrol#extension |
| 158 | CANNED_ACLS = ['project-private', 'private', 'public-read', |
| 159 | 'public-read-write', 'authenticated-read', |
| 160 | 'bucket-owner-read', 'bucket-owner-full-control'] |
| 161 | _GSUTIL_BIN = 'gsutil' |
| 162 | acl_cmd = None |
| 163 | if acl in CANNED_ACLS: |
| 164 | cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file) |
| 165 | else: |
| 166 | # For private uploads we assume that the overlay board is set up |
| 167 | # properly and a googlestore_acl.xml is present, if not this script |
| 168 | # errors |
| 169 | cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file) |
| 170 | if not os.path.exists(acl): |
| 171 | logging.error('Unable to find ACL File %s.', acl) |
| 172 | return False |
| 173 | acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file) |
| 174 | if not result_dir: |
| 175 | base_utils.run(cmd, timeout=transfer_timeout, verbose=True) |
| 176 | if acl_cmd: |
| 177 | base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True) |
| 178 | return True |
| 179 | with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace: |
| 180 | ftrace.write('Preamble\n') |
| 181 | base_utils.run(cmd, timeout=transfer_timeout, verbose=True, |
| 182 | stdout_tee=ftrace, stderr_tee=ftrace) |
| 183 | if acl_cmd: |
| 184 | ftrace.write('\nACL setting\n') |
| 185 | # Apply the passed in ACL xml file to the uploaded object. |
| 186 | base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True, |
| 187 | stdout_tee=ftrace, stderr_tee=ftrace) |
| 188 | ftrace.write('Postamble\n') |
| 189 | return True |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 190 | |
| 191 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 192 | def gs_ls(uri_pattern): |
| 193 | """Returns a list of URIs that match a given pattern. |
| 194 | |
| 195 | @param uri_pattern: a GS URI pattern, may contain wildcards |
| 196 | |
| 197 | @return A list of URIs matching the given pattern. |
| 198 | |
| 199 | @raise CmdError: the gsutil command failed. |
| 200 | |
| 201 | """ |
| 202 | gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern]) |
| 203 | result = base_utils.system_output(gs_cmd).splitlines() |
| 204 | return [path.rstrip() for path in result if path] |
| 205 | |
| 206 | |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 207 | def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]): |
| 208 | """ |
| 209 | Given a list of pid's, kill them via an esclating series of signals. |
| 210 | |
| 211 | @param pid_list: List of PID's to kill. |
| 212 | @param signal_queue: Queue of signals to send the PID's to terminate them. |
| 213 | """ |
| 214 | for sig in signal_queue: |
| 215 | logging.debug('Sending signal %s to the following pids:', sig) |
| 216 | for pid in pid_list: |
| 217 | logging.debug('Pid %d', pid) |
| 218 | try: |
| 219 | os.kill(pid, sig) |
| 220 | except OSError: |
| 221 | # The process may have died from a previous signal before we |
| 222 | # could kill it. |
| 223 | pass |
| 224 | time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT) |
| 225 | failed_list = [] |
| 226 | if signal.SIGKILL in signal_queue: |
| 227 | return |
| 228 | for pid in pid_list: |
| 229 | if base_utils.pid_is_alive(pid): |
| 230 | failed_list.append('Could not kill %d for process name: %s.' % pid, |
Simran Basi | 6272320 | 2013-01-22 15:24:49 -0800 | [diff] [blame] | 231 | base_utils.get_process_name(pid)) |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 232 | if failed_list: |
| 233 | raise error.AutoservRunError('Following errors occured: %s' % |
| 234 | failed_list, None) |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 235 | |
| 236 | |
| 237 | def externalize_host(host): |
| 238 | """Returns an externally accessible host name. |
| 239 | |
| 240 | @param host: a host name or address (string) |
| 241 | |
| 242 | @return An externally visible host name or address |
| 243 | |
| 244 | """ |
| 245 | return socket.gethostname() if host in _LOCAL_HOST_LIST else host |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 246 | |
| 247 | |
| 248 | def get_lab_status(): |
| 249 | """Grabs the current lab status and message. |
| 250 | |
| 251 | @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points |
| 252 | to a boolean and message points to a string. |
| 253 | """ |
| 254 | result = {'lab_is_up' : True, 'message' : ''} |
| 255 | status_url = global_config.global_config.get_config_value('CROS', |
| 256 | 'lab_status_url') |
| 257 | max_attempts = 5 |
| 258 | retry_waittime = 1 |
| 259 | for _ in range(max_attempts): |
| 260 | try: |
beeps | 60aec24 | 2013-06-26 14:47:48 -0700 | [diff] [blame] | 261 | response = urllib2.urlopen(status_url) |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 262 | except IOError as e: |
| 263 | logging.debug('Error occured when grabbing the lab status: %s.', |
| 264 | e) |
| 265 | time.sleep(retry_waittime) |
| 266 | continue |
| 267 | # Check for successful response code. |
| 268 | if response.getcode() == 200: |
| 269 | data = json.load(response) |
| 270 | result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES |
| 271 | result['message'] = data['message'] |
| 272 | return result |
| 273 | time.sleep(retry_waittime) |
| 274 | # We go ahead and say the lab is open if we can't get the status. |
| 275 | logging.warn('Could not get a status from %s', status_url) |
| 276 | return result |
| 277 | |
| 278 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 279 | def check_lab_status(board=None): |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 280 | """Check if the lab is up and if we can schedule suites to run. |
| 281 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 282 | Also checks if the lab is disabled for that particular board, and if so |
| 283 | will raise an error to prevent new suites from being scheduled for that |
| 284 | board. |
| 285 | |
| 286 | @param board: board name that we want to check the status of. |
| 287 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 288 | @raises error.LabIsDownException if the lab is not up. |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 289 | @raises error.BoardIsDisabledException if the desired board is currently |
| 290 | disabled. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 291 | """ |
| 292 | # Ensure we are trying to schedule on the actual lab. |
| 293 | if not (global_config.global_config.get_config_value('SERVER', |
| 294 | 'hostname').startswith('cautotest')): |
| 295 | return |
| 296 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 297 | # First check if the lab is up. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 298 | lab_status = get_lab_status() |
| 299 | if not lab_status['lab_is_up']: |
| 300 | raise error.LabIsDownException('Chromium OS Lab is currently not up: ' |
| 301 | '%s.' % lab_status['message']) |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 302 | |
| 303 | # Check if the board we wish to use is disabled. |
| 304 | # Lab messages should be in the format of: |
| 305 | # Lab is 'status' [boards not to be ran] (comment). Example: |
| 306 | # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go |
| 307 | # down) |
| 308 | boards_are_disabled = re.search('\[(.*)\]', lab_status['message']) |
| 309 | if board and boards_are_disabled: |
| 310 | if board in boards_are_disabled.group(1): |
| 311 | raise error.BoardIsDisabledException('Chromium OS Lab is ' |
| 312 | 'currently not allowing suites to be scheduled on board ' |
| 313 | '%s: %s' % (board, lab_status['message'])) |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 314 | return |
| 315 | |
| 316 | |
beeps | 60aec24 | 2013-06-26 14:47:48 -0700 | [diff] [blame] | 317 | def urlopen_socket_timeout(url, data=None, timeout=5): |
| 318 | """ |
| 319 | Wrapper to urllib2.urlopen with a socket timeout. |
| 320 | |
| 321 | This method will convert all socket timeouts to |
| 322 | TimeoutExceptions, so we can use it in conjunction |
| 323 | with the rpc retry decorator and continue to handle |
| 324 | other URLErrors as we see fit. |
| 325 | |
| 326 | @param url: The url to open. |
| 327 | @param data: The data to send to the url (eg: the urlencoded dictionary |
| 328 | used with a POST call). |
| 329 | @param timeout: The timeout for this urlopen call. |
| 330 | |
| 331 | @return: The response of the urlopen call. |
| 332 | |
| 333 | @raises: error.TimeoutException when a socket timeout occurs. |
Dan Shi | 6c00dde | 2013-07-29 17:47:29 -0700 | [diff] [blame^] | 334 | urllib2.URLError for errors that not caused by timeout. |
| 335 | urllib2.HTTPError for errors like 404 url not found. |
beeps | 60aec24 | 2013-06-26 14:47:48 -0700 | [diff] [blame] | 336 | """ |
| 337 | old_timeout = socket.getdefaulttimeout() |
| 338 | socket.setdefaulttimeout(timeout) |
| 339 | try: |
| 340 | return urllib2.urlopen(url, data=data) |
| 341 | except urllib2.URLError as e: |
| 342 | if type(e.reason) is socket.timeout: |
| 343 | raise error.TimeoutException(str(e)) |
Dan Shi | 6c00dde | 2013-07-29 17:47:29 -0700 | [diff] [blame^] | 344 | raise |
beeps | 60aec24 | 2013-06-26 14:47:48 -0700 | [diff] [blame] | 345 | finally: |
| 346 | socket.setdefaulttimeout(old_timeout) |