Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 4 | import json |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 5 | import logging |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 6 | import os |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 7 | import re |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 8 | import signal |
Scott Zawalski | 347a0b8 | 2012-03-30 16:39:21 -0400 | [diff] [blame] | 9 | import socket |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 10 | import time |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 11 | import urllib |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 12 | |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 13 | from autotest_lib.client.common_lib import base_utils, error, global_config |
| 14 | |
| 15 | |
| 16 | # Keep checking if the pid is alive every second until the timeout (in seconds) |
| 17 | CHECK_PID_IS_ALIVE_TIMEOUT = 6 |
| 18 | |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 19 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 20 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 21 | _LOCAL_HOST_LIST = ('localhost', '127.0.0.1') |
| 22 | |
| 23 | LAB_GOOD_STATES = ('open', 'throttled') |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 24 | |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame^] | 25 | _SHERIFF_JS = global_config.global_config.get_config_value( |
| 26 | 'NOTIFICATIONS', 'sheriffs', default='') |
| 27 | _CHROMIUM_BUILD_URL = global_config.global_config.get_config_value( |
| 28 | 'NOTIFICATIONS', 'chromium_build_url', default='') |
| 29 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 30 | |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 31 | def ping(host, deadline=None, tries=None, timeout=60): |
| 32 | """Attempt to ping |host|. |
| 33 | |
| 34 | Shell out to 'ping' to try to reach |host| for |timeout| seconds. |
| 35 | Returns exit code of ping. |
| 36 | |
| 37 | Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only |
| 38 | returns 0 if we get responses to |tries| pings within |deadline| seconds. |
| 39 | |
| 40 | Specifying |deadline| or |count| alone should return 0 as long as |
| 41 | some packets receive responses. |
| 42 | |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame^] | 43 | @param host: the host to ping. |
Chris Masone | 6a0680f | 2012-03-02 08:40:00 -0800 | [diff] [blame] | 44 | @param deadline: seconds within which |tries| pings must succeed. |
| 45 | @param tries: number of pings to send. |
| 46 | @param timeout: number of seconds after which to kill 'ping' command. |
| 47 | @return exit code of ping command. |
| 48 | """ |
| 49 | args = [host] |
| 50 | if deadline: |
| 51 | args.append('-w%d' % deadline) |
| 52 | if tries: |
| 53 | args.append('-c%d' % tries) |
| 54 | return base_utils.run('ping', args=args, |
| 55 | ignore_status=True, timeout=timeout, |
Scott Zawalski | ae84354 | 2012-03-20 09:51:29 -0400 | [diff] [blame] | 56 | stdout_tee=base_utils.TEE_TO_LOGS, |
| 57 | stderr_tee=base_utils.TEE_TO_LOGS).exit_status |
Scott Zawalski | 347a0b8 | 2012-03-30 16:39:21 -0400 | [diff] [blame] | 58 | |
| 59 | |
| 60 | def host_is_in_lab_zone(hostname): |
| 61 | """Check if the host is in the CROS.dns_zone. |
| 62 | |
| 63 | @param hostname: The hostname to check. |
| 64 | @returns True if hostname.dns_zone resolves, otherwise False. |
| 65 | """ |
| 66 | host_parts = hostname.split('.') |
| 67 | dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone', |
| 68 | default=None) |
| 69 | fqdn = '%s.%s' % (host_parts[0], dns_zone) |
| 70 | try: |
| 71 | socket.gethostbyname(fqdn) |
| 72 | return True |
| 73 | except socket.gaierror: |
| 74 | return False |
Fang Deng | 7c2be10 | 2012-08-27 16:20:25 -0700 | [diff] [blame] | 75 | |
| 76 | |
| 77 | def get_current_board(): |
| 78 | """Return the current board name. |
| 79 | |
| 80 | @return current board name, e.g "lumpy", None on fail. |
| 81 | """ |
| 82 | with open('/etc/lsb-release') as lsb_release_file: |
| 83 | for line in lsb_release_file: |
| 84 | m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line) |
| 85 | if m: |
| 86 | return m.group(1) |
| 87 | return None |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 88 | |
| 89 | |
| 90 | # TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in |
| 91 | # //chromite.git/buildbot/prebuilt.py somewhere/somehow |
| 92 | def gs_upload(local_file, remote_file, acl, result_dir=None, |
| 93 | transfer_timeout=300, acl_timeout=300): |
| 94 | """Upload to GS bucket. |
| 95 | |
| 96 | @param local_file: Local file to upload |
| 97 | @param remote_file: Remote location to upload the local_file to. |
| 98 | @param acl: name or file used for controlling access to the uploaded |
| 99 | file. |
| 100 | @param result_dir: Result directory if you want to add tracing to the |
| 101 | upload. |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame^] | 102 | @param transfer_timeout: Timeout for this upload call. |
| 103 | @param acl_timeout: Timeout for the acl call needed to confirm that |
| 104 | the uploader has permissions to execute the upload. |
Simran Basi | 87d7a21 | 2012-09-27 10:41:05 -0700 | [diff] [blame] | 105 | |
| 106 | @raise CmdError: the exit code of the gsutil call was not 0. |
| 107 | |
| 108 | @returns True/False - depending on if the upload succeeded or failed. |
| 109 | """ |
| 110 | # https://developers.google.com/storage/docs/accesscontrol#extension |
| 111 | CANNED_ACLS = ['project-private', 'private', 'public-read', |
| 112 | 'public-read-write', 'authenticated-read', |
| 113 | 'bucket-owner-read', 'bucket-owner-full-control'] |
| 114 | _GSUTIL_BIN = 'gsutil' |
| 115 | acl_cmd = None |
| 116 | if acl in CANNED_ACLS: |
| 117 | cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file) |
| 118 | else: |
| 119 | # For private uploads we assume that the overlay board is set up |
| 120 | # properly and a googlestore_acl.xml is present, if not this script |
| 121 | # errors |
| 122 | cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file) |
| 123 | if not os.path.exists(acl): |
| 124 | logging.error('Unable to find ACL File %s.', acl) |
| 125 | return False |
| 126 | acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file) |
| 127 | if not result_dir: |
| 128 | base_utils.run(cmd, timeout=transfer_timeout, verbose=True) |
| 129 | if acl_cmd: |
| 130 | base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True) |
| 131 | return True |
| 132 | with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace: |
| 133 | ftrace.write('Preamble\n') |
| 134 | base_utils.run(cmd, timeout=transfer_timeout, verbose=True, |
| 135 | stdout_tee=ftrace, stderr_tee=ftrace) |
| 136 | if acl_cmd: |
| 137 | ftrace.write('\nACL setting\n') |
| 138 | # Apply the passed in ACL xml file to the uploaded object. |
| 139 | base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True, |
| 140 | stdout_tee=ftrace, stderr_tee=ftrace) |
| 141 | ftrace.write('Postamble\n') |
| 142 | return True |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 143 | |
| 144 | |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 145 | def gs_ls(uri_pattern): |
| 146 | """Returns a list of URIs that match a given pattern. |
| 147 | |
| 148 | @param uri_pattern: a GS URI pattern, may contain wildcards |
| 149 | |
| 150 | @return A list of URIs matching the given pattern. |
| 151 | |
| 152 | @raise CmdError: the gsutil command failed. |
| 153 | |
| 154 | """ |
| 155 | gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern]) |
| 156 | result = base_utils.system_output(gs_cmd).splitlines() |
| 157 | return [path.rstrip() for path in result if path] |
| 158 | |
| 159 | |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 160 | def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]): |
| 161 | """ |
| 162 | Given a list of pid's, kill them via an esclating series of signals. |
| 163 | |
| 164 | @param pid_list: List of PID's to kill. |
| 165 | @param signal_queue: Queue of signals to send the PID's to terminate them. |
| 166 | """ |
| 167 | for sig in signal_queue: |
| 168 | logging.debug('Sending signal %s to the following pids:', sig) |
| 169 | for pid in pid_list: |
| 170 | logging.debug('Pid %d', pid) |
| 171 | try: |
| 172 | os.kill(pid, sig) |
| 173 | except OSError: |
| 174 | # The process may have died from a previous signal before we |
| 175 | # could kill it. |
| 176 | pass |
| 177 | time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT) |
| 178 | failed_list = [] |
| 179 | if signal.SIGKILL in signal_queue: |
| 180 | return |
| 181 | for pid in pid_list: |
| 182 | if base_utils.pid_is_alive(pid): |
| 183 | failed_list.append('Could not kill %d for process name: %s.' % pid, |
Simran Basi | 6272320 | 2013-01-22 15:24:49 -0800 | [diff] [blame] | 184 | base_utils.get_process_name(pid)) |
Simran Basi | af9b8e7 | 2012-10-12 15:02:36 -0700 | [diff] [blame] | 185 | if failed_list: |
| 186 | raise error.AutoservRunError('Following errors occured: %s' % |
| 187 | failed_list, None) |
Gilad Arnold | 0ed760c | 2012-11-05 23:42:53 -0800 | [diff] [blame] | 188 | |
| 189 | |
| 190 | def externalize_host(host): |
| 191 | """Returns an externally accessible host name. |
| 192 | |
| 193 | @param host: a host name or address (string) |
| 194 | |
| 195 | @return An externally visible host name or address |
| 196 | |
| 197 | """ |
| 198 | return socket.gethostname() if host in _LOCAL_HOST_LIST else host |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 199 | |
| 200 | |
| 201 | def get_lab_status(): |
| 202 | """Grabs the current lab status and message. |
| 203 | |
| 204 | @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points |
| 205 | to a boolean and message points to a string. |
| 206 | """ |
| 207 | result = {'lab_is_up' : True, 'message' : ''} |
| 208 | status_url = global_config.global_config.get_config_value('CROS', |
| 209 | 'lab_status_url') |
| 210 | max_attempts = 5 |
| 211 | retry_waittime = 1 |
| 212 | for _ in range(max_attempts): |
| 213 | try: |
| 214 | response = urllib.urlopen(status_url) |
| 215 | except IOError as e: |
| 216 | logging.debug('Error occured when grabbing the lab status: %s.', |
| 217 | e) |
| 218 | time.sleep(retry_waittime) |
| 219 | continue |
| 220 | # Check for successful response code. |
| 221 | if response.getcode() == 200: |
| 222 | data = json.load(response) |
| 223 | result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES |
| 224 | result['message'] = data['message'] |
| 225 | return result |
| 226 | time.sleep(retry_waittime) |
| 227 | # We go ahead and say the lab is open if we can't get the status. |
| 228 | logging.warn('Could not get a status from %s', status_url) |
| 229 | return result |
| 230 | |
| 231 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 232 | def check_lab_status(board=None): |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 233 | """Check if the lab is up and if we can schedule suites to run. |
| 234 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 235 | Also checks if the lab is disabled for that particular board, and if so |
| 236 | will raise an error to prevent new suites from being scheduled for that |
| 237 | board. |
| 238 | |
| 239 | @param board: board name that we want to check the status of. |
| 240 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 241 | @raises error.LabIsDownException if the lab is not up. |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 242 | @raises error.BoardIsDisabledException if the desired board is currently |
| 243 | disabled. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 244 | """ |
| 245 | # Ensure we are trying to schedule on the actual lab. |
| 246 | if not (global_config.global_config.get_config_value('SERVER', |
| 247 | 'hostname').startswith('cautotest')): |
| 248 | return |
| 249 | |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 250 | # First check if the lab is up. |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 251 | lab_status = get_lab_status() |
| 252 | if not lab_status['lab_is_up']: |
| 253 | raise error.LabIsDownException('Chromium OS Lab is currently not up: ' |
| 254 | '%s.' % lab_status['message']) |
Simran Basi | 41bfae4 | 2013-01-09 10:50:47 -0800 | [diff] [blame] | 255 | |
| 256 | # Check if the board we wish to use is disabled. |
| 257 | # Lab messages should be in the format of: |
| 258 | # Lab is 'status' [boards not to be ran] (comment). Example: |
| 259 | # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go |
| 260 | # down) |
| 261 | boards_are_disabled = re.search('\[(.*)\]', lab_status['message']) |
| 262 | if board and boards_are_disabled: |
| 263 | if board in boards_are_disabled.group(1): |
| 264 | raise error.BoardIsDisabledException('Chromium OS Lab is ' |
| 265 | 'currently not allowing suites to be scheduled on board ' |
| 266 | '%s: %s' % (board, lab_status['message'])) |
beeps | fda8f41 | 2013-05-02 19:08:20 -0700 | [diff] [blame^] | 267 | return |
| 268 | |
| 269 | |
| 270 | def get_sheriffs(): |
| 271 | """ |
| 272 | Polls the javascript file that holds the identity of the sheriff and |
| 273 | parses it's output to return a list of chromium sheriff email addresses. |
| 274 | The javascript file can contain the ldap of more than one sheriff, eg: |
| 275 | document.write('sheriff_one, sheriff_two'). |
| 276 | |
| 277 | @return: A list of chroium.org sheriff email addresses to cc on the bug |
| 278 | if the suite that failed was the bvt suite. An empty list otherwise. |
| 279 | """ |
| 280 | sheriff_ids = [] |
| 281 | for sheriff_js in _SHERIFF_JS.split(','): |
| 282 | try: |
| 283 | url_content = base_utils.urlopen('%s%s'% ( |
| 284 | _CHROMIUM_BUILD_URL, sheriff_js)).read() |
| 285 | except (ValueError, IOError) as e: |
| 286 | logging.error('could not parse sheriff from url %s%s: %s', |
| 287 | _CHROMIUM_BUILD_URL, sheriff_js, str(e)) |
| 288 | else: |
| 289 | ldaps = re.search(r"document.write\('(.*)'\)", url_content) |
| 290 | if not ldaps: |
| 291 | logging.error('Could not retrieve sheriff ldaps for: %s', |
| 292 | url_content) |
| 293 | continue |
| 294 | sheriff_ids += ['%s@chromium.org'% alias.replace(' ', '') |
| 295 | for alias in ldaps.group(1).split(',')] |
| 296 | return sheriff_ids |