Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 4 | import json |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 5 | import logging |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 6 | import os |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 7 | import re |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 8 | import signal |
Scott Zawalski | 347a0b8 | 2012-03-30 16:39:21 -0400 | [diff] [blame] | 9 | import socket |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 10 | import time |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 11 | import urllib |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 12 | |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 13 | from autotest_lib.client.common_lib import base_utils, error, global_config |
beeps | c4fb147 | 2013-05-08 21:49:48 -0700 | [diff] [blame^] | 14 | from autotest_lib.client.cros import constants |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 15 | |
| 16 | |
| 17 | # Keep checking if the pid is alive every second until the timeout (in seconds) |
| 18 | CHECK_PID_IS_ALIVE_TIMEOUT = 6 |
| 19 | |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 20 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 21 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 22 | _LOCAL_HOST_LIST = ('localhost', '127.0.0.1') |
| 23 | |
| 24 | LAB_GOOD_STATES = ('open', 'throttled') |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 25 | |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 26 | _SHERIFF_JS = global_config.global_config.get_config_value( |
| 27 | 'NOTIFICATIONS', 'sheriffs', default='') |
| 28 | _CHROMIUM_BUILD_URL = global_config.global_config.get_config_value( |
| 29 | 'NOTIFICATIONS', 'chromium_build_url', default='') |
| 30 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 31 | |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 32 | def ping(host, deadline=None, tries=None, timeout=60): |
| 33 | """Attempt to ping |host|. |
| 34 | |
| 35 | Shell out to 'ping' to try to reach |host| for |timeout| seconds. |
| 36 | Returns exit code of ping. |
| 37 | |
| 38 | Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only |
| 39 | returns 0 if we get responses to |tries| pings within |deadline| seconds. |
| 40 | |
| 41 | Specifying |deadline| or |count| alone should return 0 as long as |
| 42 | some packets receive responses. |
| 43 | |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 44 | @param host: the host to ping. |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 45 | @param deadline: seconds within which |tries| pings must succeed. |
| 46 | @param tries: number of pings to send. |
| 47 | @param timeout: number of seconds after which to kill 'ping' command. |
| 48 | @return exit code of ping command. |
| 49 | """ |
| 50 | args = [host] |
| 51 | if deadline: |
| 52 | args.append('-w%d' % deadline) |
| 53 | if tries: |
| 54 | args.append('-c%d' % tries) |
| 55 | return base_utils.run('ping', args=args, |
| 56 | ignore_status=True, timeout=timeout, |
Scott Zawalski | ae84354 | 2012-03-20 09:51:29 -0400 | [diff] [blame] | 57 | stdout_tee=base_utils.TEE_TO_LOGS, |
| 58 | stderr_tee=base_utils.TEE_TO_LOGS).exit_status |
Scott Zawalski | 347a0b8 | 2012-03-30 16:39:21 -0400 | [diff] [blame] | 59 | |
| 60 | |
| 61 | def host_is_in_lab_zone(hostname): |
| 62 | """Check if the host is in the CROS.dns_zone. |
| 63 | |
| 64 | @param hostname: The hostname to check. |
| 65 | @returns True if hostname.dns_zone resolves, otherwise False. |
| 66 | """ |
| 67 | host_parts = hostname.split('.') |
| 68 | dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone', |
| 69 | default=None) |
| 70 | fqdn = '%s.%s' % (host_parts[0], dns_zone) |
| 71 | try: |
| 72 | socket.gethostbyname(fqdn) |
| 73 | return True |
| 74 | except socket.gaierror: |
| 75 | return False |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 76 | |
| 77 | |
beeps | c4fb147 | 2013-05-08 21:49:48 -0700 | [diff] [blame^] | 78 | def get_chrome_version(job_views): |
| 79 | """ |
| 80 | Retrieves the version of the chrome binary associated with a job. |
| 81 | |
| 82 | When a test runs we query the chrome binary for it's version and drop |
| 83 | that value into a client keyval. To retrieve the chrome version we get all |
| 84 | the views associated with a test from the db, including those of the |
| 85 | server and client jobs, and parse the version out of the first test view |
| 86 | that has it. If we never ran a single test in the suite the job_views |
| 87 | dictionary will not contain a chrome version. |
| 88 | |
| 89 | This method cannot retrieve the chrome version from a dictionary that |
| 90 | does not conform to the structure of an autotest tko view. |
| 91 | |
| 92 | @param job_views: a list of a job's result views, as returned by |
| 93 | the get_detailed_test_views method in rpc_interface. |
| 94 | @return: The chrome version string, or None if one can't be found. |
| 95 | """ |
| 96 | |
| 97 | # Aborted jobs have no views. |
| 98 | if not job_views: |
| 99 | return None |
| 100 | |
| 101 | for view in job_views: |
| 102 | if (view.get('attributes') |
| 103 | and constants.CHROME_VERSION in view['attributes'].keys()): |
| 104 | |
| 105 | return view['attributes'].get(constants.CHROME_VERSION) |
| 106 | |
| 107 | logging.warning('Could not find chrome version for failure.') |
| 108 | return None |
| 109 | |
| 110 | |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 111 | def get_current_board(): |
| 112 | """Return the current board name. |
| 113 | |
| 114 | @return current board name, e.g "lumpy", None on fail. |
| 115 | """ |
| 116 | with open('/etc/lsb-release') as lsb_release_file: |
| 117 | for line in lsb_release_file: |
| 118 | m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line) |
| 119 | if m: |
| 120 | return m.group(1) |
| 121 | return None |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 122 | |
| 123 | |
| 124 | # TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in |
| 125 | # //chromite.git/buildbot/prebuilt.py somewhere/somehow |
| 126 | def gs_upload(local_file, remote_file, acl, result_dir=None, |
| 127 | transfer_timeout=300, acl_timeout=300): |
| 128 | """Upload to GS bucket. |
| 129 | |
| 130 | @param local_file: Local file to upload |
| 131 | @param remote_file: Remote location to upload the local_file to. |
| 132 | @param acl: name or file used for controlling access to the uploaded |
| 133 | file. |
| 134 | @param result_dir: Result directory if you want to add tracing to the |
| 135 | upload. |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 136 | @param transfer_timeout: Timeout for this upload call. |
| 137 | @param acl_timeout: Timeout for the acl call needed to confirm that |
| 138 | the uploader has permissions to execute the upload. |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 139 | |
| 140 | @raise CmdError: the exit code of the gsutil call was not 0. |
| 141 | |
| 142 | @returns True/False - depending on if the upload succeeded or failed. |
| 143 | """ |
| 144 | # https://developers.google.com/storage/docs/accesscontrol#extension |
| 145 | CANNED_ACLS = ['project-private', 'private', 'public-read', |
| 146 | 'public-read-write', 'authenticated-read', |
| 147 | 'bucket-owner-read', 'bucket-owner-full-control'] |
| 148 | _GSUTIL_BIN = 'gsutil' |
| 149 | acl_cmd = None |
| 150 | if acl in CANNED_ACLS: |
| 151 | cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file) |
| 152 | else: |
| 153 | # For private uploads we assume that the overlay board is set up |
| 154 | # properly and a googlestore_acl.xml is present, if not this script |
| 155 | # errors |
| 156 | cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file) |
| 157 | if not os.path.exists(acl): |
| 158 | logging.error('Unable to find ACL File %s.', acl) |
| 159 | return False |
| 160 | acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file) |
| 161 | if not result_dir: |
| 162 | base_utils.run(cmd, timeout=transfer_timeout, verbose=True) |
| 163 | if acl_cmd: |
| 164 | base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True) |
| 165 | return True |
| 166 | with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace: |
| 167 | ftrace.write('Preamble\n') |
| 168 | base_utils.run(cmd, timeout=transfer_timeout, verbose=True, |
| 169 | stdout_tee=ftrace, stderr_tee=ftrace) |
| 170 | if acl_cmd: |
| 171 | ftrace.write('\nACL setting\n') |
| 172 | # Apply the passed in ACL xml file to the uploaded object. |
| 173 | base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True, |
| 174 | stdout_tee=ftrace, stderr_tee=ftrace) |
| 175 | ftrace.write('Postamble\n') |
| 176 | return True |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 177 | |
| 178 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 179 | def gs_ls(uri_pattern): |
| 180 | """Returns a list of URIs that match a given pattern. |
| 181 | |
| 182 | @param uri_pattern: a GS URI pattern, may contain wildcards |
| 183 | |
| 184 | @return A list of URIs matching the given pattern. |
| 185 | |
| 186 | @raise CmdError: the gsutil command failed. |
| 187 | |
| 188 | """ |
| 189 | gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern]) |
| 190 | result = base_utils.system_output(gs_cmd).splitlines() |
| 191 | return [path.rstrip() for path in result if path] |
| 192 | |
| 193 | |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 194 | def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]): |
| 195 | """ |
| 196 | Given a list of pid's, kill them via an esclating series of signals. |
| 197 | |
| 198 | @param pid_list: List of PID's to kill. |
| 199 | @param signal_queue: Queue of signals to send the PID's to terminate them. |
| 200 | """ |
| 201 | for sig in signal_queue: |
| 202 | logging.debug('Sending signal %s to the following pids:', sig) |
| 203 | for pid in pid_list: |
| 204 | logging.debug('Pid %d', pid) |
| 205 | try: |
| 206 | os.kill(pid, sig) |
| 207 | except OSError: |
| 208 | # The process may have died from a previous signal before we |
| 209 | # could kill it. |
| 210 | pass |
| 211 | time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT) |
| 212 | failed_list = [] |
| 213 | if signal.SIGKILL in signal_queue: |
| 214 | return |
| 215 | for pid in pid_list: |
| 216 | if base_utils.pid_is_alive(pid): |
| 217 | failed_list.append('Could not kill %d for process name: %s.' % pid, |
Simran Basi | 6272320 | 2013-01-22 15:24:49 -0800 | [diff] [blame] | 218 | base_utils.get_process_name(pid)) |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 219 | if failed_list: |
| 220 | raise error.AutoservRunError('Following errors occured: %s' % |
| 221 | failed_list, None) |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 222 | |
| 223 | |
| 224 | def externalize_host(host): |
| 225 | """Returns an externally accessible host name. |
| 226 | |
| 227 | @param host: a host name or address (string) |
| 228 | |
| 229 | @return An externally visible host name or address |
| 230 | |
| 231 | """ |
| 232 | return socket.gethostname() if host in _LOCAL_HOST_LIST else host |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 233 | |
| 234 | |
| 235 | def get_lab_status(): |
| 236 | """Grabs the current lab status and message. |
| 237 | |
| 238 | @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points |
| 239 | to a boolean and message points to a string. |
| 240 | """ |
| 241 | result = {'lab_is_up' : True, 'message' : ''} |
| 242 | status_url = global_config.global_config.get_config_value('CROS', |
| 243 | 'lab_status_url') |
| 244 | max_attempts = 5 |
| 245 | retry_waittime = 1 |
| 246 | for _ in range(max_attempts): |
| 247 | try: |
| 248 | response = urllib.urlopen(status_url) |
| 249 | except IOError as e: |
| 250 | logging.debug('Error occured when grabbing the lab status: %s.', |
| 251 | e) |
| 252 | time.sleep(retry_waittime) |
| 253 | continue |
| 254 | # Check for successful response code. |
| 255 | if response.getcode() == 200: |
| 256 | data = json.load(response) |
| 257 | result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES |
| 258 | result['message'] = data['message'] |
| 259 | return result |
| 260 | time.sleep(retry_waittime) |
| 261 | # We go ahead and say the lab is open if we can't get the status. |
| 262 | logging.warn('Could not get a status from %s', status_url) |
| 263 | return result |
| 264 | |
| 265 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 266 | def check_lab_status(board=None): |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 267 | """Check if the lab is up and if we can schedule suites to run. |
| 268 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 269 | Also checks if the lab is disabled for that particular board, and if so |
| 270 | will raise an error to prevent new suites from being scheduled for that |
| 271 | board. |
| 272 | |
| 273 | @param board: board name that we want to check the status of. |
| 274 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 275 | @raises error.LabIsDownException if the lab is not up. |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 276 | @raises error.BoardIsDisabledException if the desired board is currently |
| 277 | disabled. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 278 | """ |
| 279 | # Ensure we are trying to schedule on the actual lab. |
| 280 | if not (global_config.global_config.get_config_value('SERVER', |
| 281 | 'hostname').startswith('cautotest')): |
| 282 | return |
| 283 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 284 | # First check if the lab is up. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 285 | lab_status = get_lab_status() |
| 286 | if not lab_status['lab_is_up']: |
| 287 | raise error.LabIsDownException('Chromium OS Lab is currently not up: ' |
| 288 | '%s.' % lab_status['message']) |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 289 | |
| 290 | # Check if the board we wish to use is disabled. |
| 291 | # Lab messages should be in the format of: |
| 292 | # Lab is 'status' [boards not to be ran] (comment). Example: |
| 293 | # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go |
| 294 | # down) |
| 295 | boards_are_disabled = re.search('\[(.*)\]', lab_status['message']) |
| 296 | if board and boards_are_disabled: |
| 297 | if board in boards_are_disabled.group(1): |
| 298 | raise error.BoardIsDisabledException('Chromium OS Lab is ' |
| 299 | 'currently not allowing suites to be scheduled on board ' |
| 300 | '%s: %s' % (board, lab_status['message'])) |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame] | 301 | return |
| 302 | |
| 303 | |
| 304 | def get_sheriffs(): |
| 305 | """ |
| 306 | Polls the javascript file that holds the identity of the sheriff and |
| 307 | parses it's output to return a list of chromium sheriff email addresses. |
| 308 | The javascript file can contain the ldap of more than one sheriff, eg: |
| 309 | document.write('sheriff_one, sheriff_two'). |
| 310 | |
| 311 | @return: A list of chroium.org sheriff email addresses to cc on the bug |
| 312 | if the suite that failed was the bvt suite. An empty list otherwise. |
| 313 | """ |
| 314 | sheriff_ids = [] |
| 315 | for sheriff_js in _SHERIFF_JS.split(','): |
| 316 | try: |
| 317 | url_content = base_utils.urlopen('%s%s'% ( |
| 318 | _CHROMIUM_BUILD_URL, sheriff_js)).read() |
| 319 | except (ValueError, IOError) as e: |
| 320 | logging.error('could not parse sheriff from url %s%s: %s', |
| 321 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
| 322 | else: |
| 323 | ldaps = re.search(r"document.write\('(.*)'\)", url_content) |
| 324 | if not ldaps: |
| 325 | logging.error('Could not retrieve sheriff ldaps for: %s', |
| 326 | url_content) |
| 327 | continue |
| 328 | sheriff_ids += ['%s@chromium.org'% alias.replace(' ', '') |
| 329 | for alias in ldaps.group(1).split(',')] |
| 330 | return sheriff_ids |