blob: 62caf1bbff7142f4ee4b670afe05b42938e849d4 [file] [log] [blame]
Chris Masone6a0680f2012-03-02 08:40:00 -08001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Simran Basi22aa9fe2012-12-07 16:37:09 -08004import json
Simran Basi87d7a212012-09-27 10:41:05 -07005import logging
Simran Basiaf9b8e72012-10-12 15:02:36 -07006import os
Fang Deng7c2be102012-08-27 16:20:25 -07007import re
Simran Basiaf9b8e72012-10-12 15:02:36 -07008import signal
Scott Zawalski347a0b82012-03-30 16:39:21 -04009import socket
Simran Basiaf9b8e72012-10-12 15:02:36 -070010import time
beeps60aec242013-06-26 14:47:48 -070011import urllib2
Chris Masone6a0680f2012-03-02 08:40:00 -080012
Simran Basiaf9b8e72012-10-12 15:02:36 -070013from autotest_lib.client.common_lib import base_utils, error, global_config
beepsc4fb1472013-05-08 21:49:48 -070014from autotest_lib.client.cros import constants
Simran Basiaf9b8e72012-10-12 15:02:36 -070015
16
17# Keep checking if the pid is alive every second until the timeout (in seconds)
18CHECK_PID_IS_ALIVE_TIMEOUT = 6
19
Simran Basi22aa9fe2012-12-07 16:37:09 -080020_LOCAL_HOST_LIST = ('localhost', '127.0.0.1')
21
22LAB_GOOD_STATES = ('open', 'throttled')
Gilad Arnold0ed760c2012-11-05 23:42:53 -080023
24
Chris Masone6a0680f2012-03-02 08:40:00 -080025def ping(host, deadline=None, tries=None, timeout=60):
26 """Attempt to ping |host|.
27
28 Shell out to 'ping' to try to reach |host| for |timeout| seconds.
29 Returns exit code of ping.
30
31 Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only
32 returns 0 if we get responses to |tries| pings within |deadline| seconds.
33
34 Specifying |deadline| or |count| alone should return 0 as long as
35 some packets receive responses.
36
beepsfda8f412013-05-02 19:08:20 -070037 @param host: the host to ping.
Chris Masone6a0680f2012-03-02 08:40:00 -080038 @param deadline: seconds within which |tries| pings must succeed.
39 @param tries: number of pings to send.
40 @param timeout: number of seconds after which to kill 'ping' command.
41 @return exit code of ping command.
42 """
43 args = [host]
44 if deadline:
45 args.append('-w%d' % deadline)
46 if tries:
47 args.append('-c%d' % tries)
48 return base_utils.run('ping', args=args,
49 ignore_status=True, timeout=timeout,
Scott Zawalskiae843542012-03-20 09:51:29 -040050 stdout_tee=base_utils.TEE_TO_LOGS,
51 stderr_tee=base_utils.TEE_TO_LOGS).exit_status
Scott Zawalski347a0b82012-03-30 16:39:21 -040052
53
54def host_is_in_lab_zone(hostname):
55 """Check if the host is in the CROS.dns_zone.
56
57 @param hostname: The hostname to check.
58 @returns True if hostname.dns_zone resolves, otherwise False.
59 """
60 host_parts = hostname.split('.')
61 dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone',
62 default=None)
63 fqdn = '%s.%s' % (host_parts[0], dns_zone)
64 try:
65 socket.gethostbyname(fqdn)
66 return True
67 except socket.gaierror:
68 return False
Fang Deng7c2be102012-08-27 16:20:25 -070069
70
beepsc4fb1472013-05-08 21:49:48 -070071def get_chrome_version(job_views):
72 """
73 Retrieves the version of the chrome binary associated with a job.
74
75 When a test runs we query the chrome binary for it's version and drop
76 that value into a client keyval. To retrieve the chrome version we get all
77 the views associated with a test from the db, including those of the
78 server and client jobs, and parse the version out of the first test view
79 that has it. If we never ran a single test in the suite the job_views
80 dictionary will not contain a chrome version.
81
82 This method cannot retrieve the chrome version from a dictionary that
83 does not conform to the structure of an autotest tko view.
84
85 @param job_views: a list of a job's result views, as returned by
86 the get_detailed_test_views method in rpc_interface.
87 @return: The chrome version string, or None if one can't be found.
88 """
89
90 # Aborted jobs have no views.
91 if not job_views:
92 return None
93
94 for view in job_views:
95 if (view.get('attributes')
96 and constants.CHROME_VERSION in view['attributes'].keys()):
97
98 return view['attributes'].get(constants.CHROME_VERSION)
99
100 logging.warning('Could not find chrome version for failure.')
101 return None
102
103
Fang Deng7c2be102012-08-27 16:20:25 -0700104def get_current_board():
105 """Return the current board name.
106
107 @return current board name, e.g "lumpy", None on fail.
108 """
109 with open('/etc/lsb-release') as lsb_release_file:
110 for line in lsb_release_file:
111 m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line)
112 if m:
113 return m.group(1)
114 return None
Simran Basi87d7a212012-09-27 10:41:05 -0700115
116
117# TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in
118# //chromite.git/buildbot/prebuilt.py somewhere/somehow
119def gs_upload(local_file, remote_file, acl, result_dir=None,
120 transfer_timeout=300, acl_timeout=300):
121 """Upload to GS bucket.
122
123 @param local_file: Local file to upload
124 @param remote_file: Remote location to upload the local_file to.
125 @param acl: name or file used for controlling access to the uploaded
126 file.
127 @param result_dir: Result directory if you want to add tracing to the
128 upload.
beepsfda8f412013-05-02 19:08:20 -0700129 @param transfer_timeout: Timeout for this upload call.
130 @param acl_timeout: Timeout for the acl call needed to confirm that
131 the uploader has permissions to execute the upload.
Simran Basi87d7a212012-09-27 10:41:05 -0700132
133 @raise CmdError: the exit code of the gsutil call was not 0.
134
135 @returns True/False - depending on if the upload succeeded or failed.
136 """
137 # https://developers.google.com/storage/docs/accesscontrol#extension
138 CANNED_ACLS = ['project-private', 'private', 'public-read',
139 'public-read-write', 'authenticated-read',
140 'bucket-owner-read', 'bucket-owner-full-control']
141 _GSUTIL_BIN = 'gsutil'
142 acl_cmd = None
143 if acl in CANNED_ACLS:
144 cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file)
145 else:
146 # For private uploads we assume that the overlay board is set up
147 # properly and a googlestore_acl.xml is present, if not this script
148 # errors
149 cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file)
150 if not os.path.exists(acl):
151 logging.error('Unable to find ACL File %s.', acl)
152 return False
153 acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file)
154 if not result_dir:
155 base_utils.run(cmd, timeout=transfer_timeout, verbose=True)
156 if acl_cmd:
157 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True)
158 return True
159 with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace:
160 ftrace.write('Preamble\n')
161 base_utils.run(cmd, timeout=transfer_timeout, verbose=True,
162 stdout_tee=ftrace, stderr_tee=ftrace)
163 if acl_cmd:
164 ftrace.write('\nACL setting\n')
165 # Apply the passed in ACL xml file to the uploaded object.
166 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True,
167 stdout_tee=ftrace, stderr_tee=ftrace)
168 ftrace.write('Postamble\n')
169 return True
Simran Basiaf9b8e72012-10-12 15:02:36 -0700170
171
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800172def gs_ls(uri_pattern):
173 """Returns a list of URIs that match a given pattern.
174
175 @param uri_pattern: a GS URI pattern, may contain wildcards
176
177 @return A list of URIs matching the given pattern.
178
179 @raise CmdError: the gsutil command failed.
180
181 """
182 gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern])
183 result = base_utils.system_output(gs_cmd).splitlines()
184 return [path.rstrip() for path in result if path]
185
186
Simran Basiaf9b8e72012-10-12 15:02:36 -0700187def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]):
188 """
189 Given a list of pid's, kill them via an esclating series of signals.
190
191 @param pid_list: List of PID's to kill.
192 @param signal_queue: Queue of signals to send the PID's to terminate them.
193 """
194 for sig in signal_queue:
195 logging.debug('Sending signal %s to the following pids:', sig)
196 for pid in pid_list:
197 logging.debug('Pid %d', pid)
198 try:
199 os.kill(pid, sig)
200 except OSError:
201 # The process may have died from a previous signal before we
202 # could kill it.
203 pass
204 time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT)
205 failed_list = []
206 if signal.SIGKILL in signal_queue:
207 return
208 for pid in pid_list:
209 if base_utils.pid_is_alive(pid):
210 failed_list.append('Could not kill %d for process name: %s.' % pid,
Simran Basi62723202013-01-22 15:24:49 -0800211 base_utils.get_process_name(pid))
Simran Basiaf9b8e72012-10-12 15:02:36 -0700212 if failed_list:
213 raise error.AutoservRunError('Following errors occured: %s' %
214 failed_list, None)
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800215
216
217def externalize_host(host):
218 """Returns an externally accessible host name.
219
220 @param host: a host name or address (string)
221
222 @return An externally visible host name or address
223
224 """
225 return socket.gethostname() if host in _LOCAL_HOST_LIST else host
Simran Basi22aa9fe2012-12-07 16:37:09 -0800226
227
228def get_lab_status():
229 """Grabs the current lab status and message.
230
231 @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points
232 to a boolean and message points to a string.
233 """
234 result = {'lab_is_up' : True, 'message' : ''}
235 status_url = global_config.global_config.get_config_value('CROS',
236 'lab_status_url')
237 max_attempts = 5
238 retry_waittime = 1
239 for _ in range(max_attempts):
240 try:
beeps60aec242013-06-26 14:47:48 -0700241 response = urllib2.urlopen(status_url)
Simran Basi22aa9fe2012-12-07 16:37:09 -0800242 except IOError as e:
243 logging.debug('Error occured when grabbing the lab status: %s.',
244 e)
245 time.sleep(retry_waittime)
246 continue
247 # Check for successful response code.
248 if response.getcode() == 200:
249 data = json.load(response)
250 result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES
251 result['message'] = data['message']
252 return result
253 time.sleep(retry_waittime)
254 # We go ahead and say the lab is open if we can't get the status.
255 logging.warn('Could not get a status from %s', status_url)
256 return result
257
258
Simran Basi41bfae42013-01-09 10:50:47 -0800259def check_lab_status(board=None):
Simran Basi22aa9fe2012-12-07 16:37:09 -0800260 """Check if the lab is up and if we can schedule suites to run.
261
Simran Basi41bfae42013-01-09 10:50:47 -0800262 Also checks if the lab is disabled for that particular board, and if so
263 will raise an error to prevent new suites from being scheduled for that
264 board.
265
266 @param board: board name that we want to check the status of.
267
Simran Basi22aa9fe2012-12-07 16:37:09 -0800268 @raises error.LabIsDownException if the lab is not up.
Simran Basi41bfae42013-01-09 10:50:47 -0800269 @raises error.BoardIsDisabledException if the desired board is currently
270 disabled.
Simran Basi22aa9fe2012-12-07 16:37:09 -0800271 """
272 # Ensure we are trying to schedule on the actual lab.
273 if not (global_config.global_config.get_config_value('SERVER',
274 'hostname').startswith('cautotest')):
275 return
276
Simran Basi41bfae42013-01-09 10:50:47 -0800277 # First check if the lab is up.
Simran Basi22aa9fe2012-12-07 16:37:09 -0800278 lab_status = get_lab_status()
279 if not lab_status['lab_is_up']:
280 raise error.LabIsDownException('Chromium OS Lab is currently not up: '
281 '%s.' % lab_status['message'])
Simran Basi41bfae42013-01-09 10:50:47 -0800282
283 # Check if the board we wish to use is disabled.
284 # Lab messages should be in the format of:
285 # Lab is 'status' [boards not to be ran] (comment). Example:
286 # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go
287 # down)
288 boards_are_disabled = re.search('\[(.*)\]', lab_status['message'])
289 if board and boards_are_disabled:
290 if board in boards_are_disabled.group(1):
291 raise error.BoardIsDisabledException('Chromium OS Lab is '
292 'currently not allowing suites to be scheduled on board '
293 '%s: %s' % (board, lab_status['message']))
beepsfda8f412013-05-02 19:08:20 -0700294 return
295
296
beeps60aec242013-06-26 14:47:48 -0700297def urlopen_socket_timeout(url, data=None, timeout=5):
298 """
299 Wrapper to urllib2.urlopen with a socket timeout.
300
301 This method will convert all socket timeouts to
302 TimeoutExceptions, so we can use it in conjunction
303 with the rpc retry decorator and continue to handle
304 other URLErrors as we see fit.
305
306 @param url: The url to open.
307 @param data: The data to send to the url (eg: the urlencoded dictionary
308 used with a POST call).
309 @param timeout: The timeout for this urlopen call.
310
311 @return: The response of the urlopen call.
312
313 @raises: error.TimeoutException when a socket timeout occurs.
314 """
315 old_timeout = socket.getdefaulttimeout()
316 socket.setdefaulttimeout(timeout)
317 try:
318 return urllib2.urlopen(url, data=data)
319 except urllib2.URLError as e:
320 if type(e.reason) is socket.timeout:
321 raise error.TimeoutException(str(e))
322 finally:
323 socket.setdefaulttimeout(old_timeout)