blob: 517ad84e2b0014b8ff2239287eb690e107bb015b [file] [log] [blame]
Chris Masone6a0680f2012-03-02 08:40:00 -08001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Simran Basi22aa9fe2012-12-07 16:37:09 -08004import json
Simran Basi87d7a212012-09-27 10:41:05 -07005import logging
Simran Basiaf9b8e72012-10-12 15:02:36 -07006import os
Fang Deng7c2be102012-08-27 16:20:25 -07007import re
Simran Basiaf9b8e72012-10-12 15:02:36 -07008import signal
Scott Zawalski347a0b82012-03-30 16:39:21 -04009import socket
Simran Basiaf9b8e72012-10-12 15:02:36 -070010import time
Simran Basi22aa9fe2012-12-07 16:37:09 -080011import urllib
Chris Masone6a0680f2012-03-02 08:40:00 -080012
Simran Basiaf9b8e72012-10-12 15:02:36 -070013from autotest_lib.client.common_lib import base_utils, error, global_config
beepsc4fb1472013-05-08 21:49:48 -070014from autotest_lib.client.cros import constants
Simran Basiaf9b8e72012-10-12 15:02:36 -070015
16
17# Keep checking if the pid is alive every second until the timeout (in seconds)
18CHECK_PID_IS_ALIVE_TIMEOUT = 6
19
Chris Masone6a0680f2012-03-02 08:40:00 -080020
Gilad Arnold0ed760c2012-11-05 23:42:53 -080021
Simran Basi22aa9fe2012-12-07 16:37:09 -080022_LOCAL_HOST_LIST = ('localhost', '127.0.0.1')
23
24LAB_GOOD_STATES = ('open', 'throttled')
Gilad Arnold0ed760c2012-11-05 23:42:53 -080025
beepsfda8f412013-05-02 19:08:20 -070026_SHERIFF_JS = global_config.global_config.get_config_value(
27 'NOTIFICATIONS', 'sheriffs', default='')
28_CHROMIUM_BUILD_URL = global_config.global_config.get_config_value(
29 'NOTIFICATIONS', 'chromium_build_url', default='')
30
Gilad Arnold0ed760c2012-11-05 23:42:53 -080031
Chris Masone6a0680f2012-03-02 08:40:00 -080032def ping(host, deadline=None, tries=None, timeout=60):
33 """Attempt to ping |host|.
34
35 Shell out to 'ping' to try to reach |host| for |timeout| seconds.
36 Returns exit code of ping.
37
38 Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only
39 returns 0 if we get responses to |tries| pings within |deadline| seconds.
40
41 Specifying |deadline| or |count| alone should return 0 as long as
42 some packets receive responses.
43
beepsfda8f412013-05-02 19:08:20 -070044 @param host: the host to ping.
Chris Masone6a0680f2012-03-02 08:40:00 -080045 @param deadline: seconds within which |tries| pings must succeed.
46 @param tries: number of pings to send.
47 @param timeout: number of seconds after which to kill 'ping' command.
48 @return exit code of ping command.
49 """
50 args = [host]
51 if deadline:
52 args.append('-w%d' % deadline)
53 if tries:
54 args.append('-c%d' % tries)
55 return base_utils.run('ping', args=args,
56 ignore_status=True, timeout=timeout,
Scott Zawalskiae843542012-03-20 09:51:29 -040057 stdout_tee=base_utils.TEE_TO_LOGS,
58 stderr_tee=base_utils.TEE_TO_LOGS).exit_status
Scott Zawalski347a0b82012-03-30 16:39:21 -040059
60
61def host_is_in_lab_zone(hostname):
62 """Check if the host is in the CROS.dns_zone.
63
64 @param hostname: The hostname to check.
65 @returns True if hostname.dns_zone resolves, otherwise False.
66 """
67 host_parts = hostname.split('.')
68 dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone',
69 default=None)
70 fqdn = '%s.%s' % (host_parts[0], dns_zone)
71 try:
72 socket.gethostbyname(fqdn)
73 return True
74 except socket.gaierror:
75 return False
Fang Deng7c2be102012-08-27 16:20:25 -070076
77
beepsc4fb1472013-05-08 21:49:48 -070078def get_chrome_version(job_views):
79 """
80 Retrieves the version of the chrome binary associated with a job.
81
82 When a test runs we query the chrome binary for it's version and drop
83 that value into a client keyval. To retrieve the chrome version we get all
84 the views associated with a test from the db, including those of the
85 server and client jobs, and parse the version out of the first test view
86 that has it. If we never ran a single test in the suite the job_views
87 dictionary will not contain a chrome version.
88
89 This method cannot retrieve the chrome version from a dictionary that
90 does not conform to the structure of an autotest tko view.
91
92 @param job_views: a list of a job's result views, as returned by
93 the get_detailed_test_views method in rpc_interface.
94 @return: The chrome version string, or None if one can't be found.
95 """
96
97 # Aborted jobs have no views.
98 if not job_views:
99 return None
100
101 for view in job_views:
102 if (view.get('attributes')
103 and constants.CHROME_VERSION in view['attributes'].keys()):
104
105 return view['attributes'].get(constants.CHROME_VERSION)
106
107 logging.warning('Could not find chrome version for failure.')
108 return None
109
110
Fang Deng7c2be102012-08-27 16:20:25 -0700111def get_current_board():
112 """Return the current board name.
113
114 @return current board name, e.g "lumpy", None on fail.
115 """
116 with open('/etc/lsb-release') as lsb_release_file:
117 for line in lsb_release_file:
118 m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line)
119 if m:
120 return m.group(1)
121 return None
Simran Basi87d7a212012-09-27 10:41:05 -0700122
123
124# TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in
125# //chromite.git/buildbot/prebuilt.py somewhere/somehow
126def gs_upload(local_file, remote_file, acl, result_dir=None,
127 transfer_timeout=300, acl_timeout=300):
128 """Upload to GS bucket.
129
130 @param local_file: Local file to upload
131 @param remote_file: Remote location to upload the local_file to.
132 @param acl: name or file used for controlling access to the uploaded
133 file.
134 @param result_dir: Result directory if you want to add tracing to the
135 upload.
beepsfda8f412013-05-02 19:08:20 -0700136 @param transfer_timeout: Timeout for this upload call.
137 @param acl_timeout: Timeout for the acl call needed to confirm that
138 the uploader has permissions to execute the upload.
Simran Basi87d7a212012-09-27 10:41:05 -0700139
140 @raise CmdError: the exit code of the gsutil call was not 0.
141
142 @returns True/False - depending on if the upload succeeded or failed.
143 """
144 # https://developers.google.com/storage/docs/accesscontrol#extension
145 CANNED_ACLS = ['project-private', 'private', 'public-read',
146 'public-read-write', 'authenticated-read',
147 'bucket-owner-read', 'bucket-owner-full-control']
148 _GSUTIL_BIN = 'gsutil'
149 acl_cmd = None
150 if acl in CANNED_ACLS:
151 cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file)
152 else:
153 # For private uploads we assume that the overlay board is set up
154 # properly and a googlestore_acl.xml is present, if not this script
155 # errors
156 cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file)
157 if not os.path.exists(acl):
158 logging.error('Unable to find ACL File %s.', acl)
159 return False
160 acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file)
161 if not result_dir:
162 base_utils.run(cmd, timeout=transfer_timeout, verbose=True)
163 if acl_cmd:
164 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True)
165 return True
166 with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace:
167 ftrace.write('Preamble\n')
168 base_utils.run(cmd, timeout=transfer_timeout, verbose=True,
169 stdout_tee=ftrace, stderr_tee=ftrace)
170 if acl_cmd:
171 ftrace.write('\nACL setting\n')
172 # Apply the passed in ACL xml file to the uploaded object.
173 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True,
174 stdout_tee=ftrace, stderr_tee=ftrace)
175 ftrace.write('Postamble\n')
176 return True
Simran Basiaf9b8e72012-10-12 15:02:36 -0700177
178
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800179def gs_ls(uri_pattern):
180 """Returns a list of URIs that match a given pattern.
181
182 @param uri_pattern: a GS URI pattern, may contain wildcards
183
184 @return A list of URIs matching the given pattern.
185
186 @raise CmdError: the gsutil command failed.
187
188 """
189 gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern])
190 result = base_utils.system_output(gs_cmd).splitlines()
191 return [path.rstrip() for path in result if path]
192
193
Simran Basiaf9b8e72012-10-12 15:02:36 -0700194def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]):
195 """
196 Given a list of pid's, kill them via an esclating series of signals.
197
198 @param pid_list: List of PID's to kill.
199 @param signal_queue: Queue of signals to send the PID's to terminate them.
200 """
201 for sig in signal_queue:
202 logging.debug('Sending signal %s to the following pids:', sig)
203 for pid in pid_list:
204 logging.debug('Pid %d', pid)
205 try:
206 os.kill(pid, sig)
207 except OSError:
208 # The process may have died from a previous signal before we
209 # could kill it.
210 pass
211 time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT)
212 failed_list = []
213 if signal.SIGKILL in signal_queue:
214 return
215 for pid in pid_list:
216 if base_utils.pid_is_alive(pid):
217 failed_list.append('Could not kill %d for process name: %s.' % pid,
Simran Basi62723202013-01-22 15:24:49 -0800218 base_utils.get_process_name(pid))
Simran Basiaf9b8e72012-10-12 15:02:36 -0700219 if failed_list:
220 raise error.AutoservRunError('Following errors occured: %s' %
221 failed_list, None)
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800222
223
224def externalize_host(host):
225 """Returns an externally accessible host name.
226
227 @param host: a host name or address (string)
228
229 @return An externally visible host name or address
230
231 """
232 return socket.gethostname() if host in _LOCAL_HOST_LIST else host
Simran Basi22aa9fe2012-12-07 16:37:09 -0800233
234
235def get_lab_status():
236 """Grabs the current lab status and message.
237
238 @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points
239 to a boolean and message points to a string.
240 """
241 result = {'lab_is_up' : True, 'message' : ''}
242 status_url = global_config.global_config.get_config_value('CROS',
243 'lab_status_url')
244 max_attempts = 5
245 retry_waittime = 1
246 for _ in range(max_attempts):
247 try:
248 response = urllib.urlopen(status_url)
249 except IOError as e:
250 logging.debug('Error occured when grabbing the lab status: %s.',
251 e)
252 time.sleep(retry_waittime)
253 continue
254 # Check for successful response code.
255 if response.getcode() == 200:
256 data = json.load(response)
257 result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES
258 result['message'] = data['message']
259 return result
260 time.sleep(retry_waittime)
261 # We go ahead and say the lab is open if we can't get the status.
262 logging.warn('Could not get a status from %s', status_url)
263 return result
264
265
Simran Basi41bfae42013-01-09 10:50:47 -0800266def check_lab_status(board=None):
Simran Basi22aa9fe2012-12-07 16:37:09 -0800267 """Check if the lab is up and if we can schedule suites to run.
268
Simran Basi41bfae42013-01-09 10:50:47 -0800269 Also checks if the lab is disabled for that particular board, and if so
270 will raise an error to prevent new suites from being scheduled for that
271 board.
272
273 @param board: board name that we want to check the status of.
274
Simran Basi22aa9fe2012-12-07 16:37:09 -0800275 @raises error.LabIsDownException if the lab is not up.
Simran Basi41bfae42013-01-09 10:50:47 -0800276 @raises error.BoardIsDisabledException if the desired board is currently
277 disabled.
Simran Basi22aa9fe2012-12-07 16:37:09 -0800278 """
279 # Ensure we are trying to schedule on the actual lab.
280 if not (global_config.global_config.get_config_value('SERVER',
281 'hostname').startswith('cautotest')):
282 return
283
Simran Basi41bfae42013-01-09 10:50:47 -0800284 # First check if the lab is up.
Simran Basi22aa9fe2012-12-07 16:37:09 -0800285 lab_status = get_lab_status()
286 if not lab_status['lab_is_up']:
287 raise error.LabIsDownException('Chromium OS Lab is currently not up: '
288 '%s.' % lab_status['message'])
Simran Basi41bfae42013-01-09 10:50:47 -0800289
290 # Check if the board we wish to use is disabled.
291 # Lab messages should be in the format of:
292 # Lab is 'status' [boards not to be ran] (comment). Example:
293 # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go
294 # down)
295 boards_are_disabled = re.search('\[(.*)\]', lab_status['message'])
296 if board and boards_are_disabled:
297 if board in boards_are_disabled.group(1):
298 raise error.BoardIsDisabledException('Chromium OS Lab is '
299 'currently not allowing suites to be scheduled on board '
300 '%s: %s' % (board, lab_status['message']))
beepsfda8f412013-05-02 19:08:20 -0700301 return
302
303
304def get_sheriffs():
305 """
306 Polls the javascript file that holds the identity of the sheriff and
307 parses it's output to return a list of chromium sheriff email addresses.
308 The javascript file can contain the ldap of more than one sheriff, eg:
309 document.write('sheriff_one, sheriff_two').
310
311 @return: A list of chroium.org sheriff email addresses to cc on the bug
312 if the suite that failed was the bvt suite. An empty list otherwise.
313 """
314 sheriff_ids = []
315 for sheriff_js in _SHERIFF_JS.split(','):
316 try:
317 url_content = base_utils.urlopen('%s%s'% (
318 _CHROMIUM_BUILD_URL, sheriff_js)).read()
319 except (ValueError, IOError) as e:
320 logging.error('could not parse sheriff from url %s%s: %s',
321 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
322 else:
323 ldaps = re.search(r"document.write\('(.*)'\)", url_content)
324 if not ldaps:
325 logging.error('Could not retrieve sheriff ldaps for: %s',
326 url_content)
327 continue
328 sheriff_ids += ['%s@chromium.org'% alias.replace(' ', '')
329 for alias in ldaps.group(1).split(',')]
330 return sheriff_ids