blob: 245e9bad7faf1758059a16ce87f339957be41255 [file] [log] [blame]
Chris Masone6a0680f2012-03-02 08:40:00 -08001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Simran Basi22aa9fe2012-12-07 16:37:09 -08004import json
Simran Basi87d7a212012-09-27 10:41:05 -07005import logging
Simran Basiaf9b8e72012-10-12 15:02:36 -07006import os
Fang Deng7c2be102012-08-27 16:20:25 -07007import re
Simran Basiaf9b8e72012-10-12 15:02:36 -07008import signal
Scott Zawalski347a0b82012-03-30 16:39:21 -04009import socket
Simran Basiaf9b8e72012-10-12 15:02:36 -070010import time
Simran Basi22aa9fe2012-12-07 16:37:09 -080011import urllib
Chris Masone6a0680f2012-03-02 08:40:00 -080012
Simran Basiaf9b8e72012-10-12 15:02:36 -070013from autotest_lib.client.common_lib import base_utils, error, global_config
14
15
16# Keep checking if the pid is alive every second until the timeout (in seconds)
17CHECK_PID_IS_ALIVE_TIMEOUT = 6
18
Chris Masone6a0680f2012-03-02 08:40:00 -080019
Gilad Arnold0ed760c2012-11-05 23:42:53 -080020
Simran Basi22aa9fe2012-12-07 16:37:09 -080021_LOCAL_HOST_LIST = ('localhost', '127.0.0.1')
22
23LAB_GOOD_STATES = ('open', 'throttled')
Gilad Arnold0ed760c2012-11-05 23:42:53 -080024
beepsfda8f412013-05-02 19:08:20 -070025_SHERIFF_JS = global_config.global_config.get_config_value(
26 'NOTIFICATIONS', 'sheriffs', default='')
27_CHROMIUM_BUILD_URL = global_config.global_config.get_config_value(
28 'NOTIFICATIONS', 'chromium_build_url', default='')
29
Gilad Arnold0ed760c2012-11-05 23:42:53 -080030
Chris Masone6a0680f2012-03-02 08:40:00 -080031def ping(host, deadline=None, tries=None, timeout=60):
32 """Attempt to ping |host|.
33
34 Shell out to 'ping' to try to reach |host| for |timeout| seconds.
35 Returns exit code of ping.
36
37 Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only
38 returns 0 if we get responses to |tries| pings within |deadline| seconds.
39
40 Specifying |deadline| or |count| alone should return 0 as long as
41 some packets receive responses.
42
beepsfda8f412013-05-02 19:08:20 -070043 @param host: the host to ping.
Chris Masone6a0680f2012-03-02 08:40:00 -080044 @param deadline: seconds within which |tries| pings must succeed.
45 @param tries: number of pings to send.
46 @param timeout: number of seconds after which to kill 'ping' command.
47 @return exit code of ping command.
48 """
49 args = [host]
50 if deadline:
51 args.append('-w%d' % deadline)
52 if tries:
53 args.append('-c%d' % tries)
54 return base_utils.run('ping', args=args,
55 ignore_status=True, timeout=timeout,
Scott Zawalskiae843542012-03-20 09:51:29 -040056 stdout_tee=base_utils.TEE_TO_LOGS,
57 stderr_tee=base_utils.TEE_TO_LOGS).exit_status
Scott Zawalski347a0b82012-03-30 16:39:21 -040058
59
60def host_is_in_lab_zone(hostname):
61 """Check if the host is in the CROS.dns_zone.
62
63 @param hostname: The hostname to check.
64 @returns True if hostname.dns_zone resolves, otherwise False.
65 """
66 host_parts = hostname.split('.')
67 dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone',
68 default=None)
69 fqdn = '%s.%s' % (host_parts[0], dns_zone)
70 try:
71 socket.gethostbyname(fqdn)
72 return True
73 except socket.gaierror:
74 return False
Fang Deng7c2be102012-08-27 16:20:25 -070075
76
77def get_current_board():
78 """Return the current board name.
79
80 @return current board name, e.g "lumpy", None on fail.
81 """
82 with open('/etc/lsb-release') as lsb_release_file:
83 for line in lsb_release_file:
84 m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line)
85 if m:
86 return m.group(1)
87 return None
Simran Basi87d7a212012-09-27 10:41:05 -070088
89
90# TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in
91# //chromite.git/buildbot/prebuilt.py somewhere/somehow
92def gs_upload(local_file, remote_file, acl, result_dir=None,
93 transfer_timeout=300, acl_timeout=300):
94 """Upload to GS bucket.
95
96 @param local_file: Local file to upload
97 @param remote_file: Remote location to upload the local_file to.
98 @param acl: name or file used for controlling access to the uploaded
99 file.
100 @param result_dir: Result directory if you want to add tracing to the
101 upload.
beepsfda8f412013-05-02 19:08:20 -0700102 @param transfer_timeout: Timeout for this upload call.
103 @param acl_timeout: Timeout for the acl call needed to confirm that
104 the uploader has permissions to execute the upload.
Simran Basi87d7a212012-09-27 10:41:05 -0700105
106 @raise CmdError: the exit code of the gsutil call was not 0.
107
108 @returns True/False - depending on if the upload succeeded or failed.
109 """
110 # https://developers.google.com/storage/docs/accesscontrol#extension
111 CANNED_ACLS = ['project-private', 'private', 'public-read',
112 'public-read-write', 'authenticated-read',
113 'bucket-owner-read', 'bucket-owner-full-control']
114 _GSUTIL_BIN = 'gsutil'
115 acl_cmd = None
116 if acl in CANNED_ACLS:
117 cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file)
118 else:
119 # For private uploads we assume that the overlay board is set up
120 # properly and a googlestore_acl.xml is present, if not this script
121 # errors
122 cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file)
123 if not os.path.exists(acl):
124 logging.error('Unable to find ACL File %s.', acl)
125 return False
126 acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file)
127 if not result_dir:
128 base_utils.run(cmd, timeout=transfer_timeout, verbose=True)
129 if acl_cmd:
130 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True)
131 return True
132 with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace:
133 ftrace.write('Preamble\n')
134 base_utils.run(cmd, timeout=transfer_timeout, verbose=True,
135 stdout_tee=ftrace, stderr_tee=ftrace)
136 if acl_cmd:
137 ftrace.write('\nACL setting\n')
138 # Apply the passed in ACL xml file to the uploaded object.
139 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True,
140 stdout_tee=ftrace, stderr_tee=ftrace)
141 ftrace.write('Postamble\n')
142 return True
Simran Basiaf9b8e72012-10-12 15:02:36 -0700143
144
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800145def gs_ls(uri_pattern):
146 """Returns a list of URIs that match a given pattern.
147
148 @param uri_pattern: a GS URI pattern, may contain wildcards
149
150 @return A list of URIs matching the given pattern.
151
152 @raise CmdError: the gsutil command failed.
153
154 """
155 gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern])
156 result = base_utils.system_output(gs_cmd).splitlines()
157 return [path.rstrip() for path in result if path]
158
159
Simran Basiaf9b8e72012-10-12 15:02:36 -0700160def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]):
161 """
162 Given a list of pid's, kill them via an esclating series of signals.
163
164 @param pid_list: List of PID's to kill.
165 @param signal_queue: Queue of signals to send the PID's to terminate them.
166 """
167 for sig in signal_queue:
168 logging.debug('Sending signal %s to the following pids:', sig)
169 for pid in pid_list:
170 logging.debug('Pid %d', pid)
171 try:
172 os.kill(pid, sig)
173 except OSError:
174 # The process may have died from a previous signal before we
175 # could kill it.
176 pass
177 time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT)
178 failed_list = []
179 if signal.SIGKILL in signal_queue:
180 return
181 for pid in pid_list:
182 if base_utils.pid_is_alive(pid):
183 failed_list.append('Could not kill %d for process name: %s.' % pid,
Simran Basi62723202013-01-22 15:24:49 -0800184 base_utils.get_process_name(pid))
Simran Basiaf9b8e72012-10-12 15:02:36 -0700185 if failed_list:
186 raise error.AutoservRunError('Following errors occured: %s' %
187 failed_list, None)
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800188
189
190def externalize_host(host):
191 """Returns an externally accessible host name.
192
193 @param host: a host name or address (string)
194
195 @return An externally visible host name or address
196
197 """
198 return socket.gethostname() if host in _LOCAL_HOST_LIST else host
Simran Basi22aa9fe2012-12-07 16:37:09 -0800199
200
201def get_lab_status():
202 """Grabs the current lab status and message.
203
204 @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points
205 to a boolean and message points to a string.
206 """
207 result = {'lab_is_up' : True, 'message' : ''}
208 status_url = global_config.global_config.get_config_value('CROS',
209 'lab_status_url')
210 max_attempts = 5
211 retry_waittime = 1
212 for _ in range(max_attempts):
213 try:
214 response = urllib.urlopen(status_url)
215 except IOError as e:
216 logging.debug('Error occured when grabbing the lab status: %s.',
217 e)
218 time.sleep(retry_waittime)
219 continue
220 # Check for successful response code.
221 if response.getcode() == 200:
222 data = json.load(response)
223 result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES
224 result['message'] = data['message']
225 return result
226 time.sleep(retry_waittime)
227 # We go ahead and say the lab is open if we can't get the status.
228 logging.warn('Could not get a status from %s', status_url)
229 return result
230
231
Simran Basi41bfae42013-01-09 10:50:47 -0800232def check_lab_status(board=None):
Simran Basi22aa9fe2012-12-07 16:37:09 -0800233 """Check if the lab is up and if we can schedule suites to run.
234
Simran Basi41bfae42013-01-09 10:50:47 -0800235 Also checks if the lab is disabled for that particular board, and if so
236 will raise an error to prevent new suites from being scheduled for that
237 board.
238
239 @param board: board name that we want to check the status of.
240
Simran Basi22aa9fe2012-12-07 16:37:09 -0800241 @raises error.LabIsDownException if the lab is not up.
Simran Basi41bfae42013-01-09 10:50:47 -0800242 @raises error.BoardIsDisabledException if the desired board is currently
243 disabled.
Simran Basi22aa9fe2012-12-07 16:37:09 -0800244 """
245 # Ensure we are trying to schedule on the actual lab.
246 if not (global_config.global_config.get_config_value('SERVER',
247 'hostname').startswith('cautotest')):
248 return
249
Simran Basi41bfae42013-01-09 10:50:47 -0800250 # First check if the lab is up.
Simran Basi22aa9fe2012-12-07 16:37:09 -0800251 lab_status = get_lab_status()
252 if not lab_status['lab_is_up']:
253 raise error.LabIsDownException('Chromium OS Lab is currently not up: '
254 '%s.' % lab_status['message'])
Simran Basi41bfae42013-01-09 10:50:47 -0800255
256 # Check if the board we wish to use is disabled.
257 # Lab messages should be in the format of:
258 # Lab is 'status' [boards not to be ran] (comment). Example:
259 # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go
260 # down)
261 boards_are_disabled = re.search('\[(.*)\]', lab_status['message'])
262 if board and boards_are_disabled:
263 if board in boards_are_disabled.group(1):
264 raise error.BoardIsDisabledException('Chromium OS Lab is '
265 'currently not allowing suites to be scheduled on board '
266 '%s: %s' % (board, lab_status['message']))
beepsfda8f412013-05-02 19:08:20 -0700267 return
268
269
270def get_sheriffs():
271 """
272 Polls the javascript file that holds the identity of the sheriff and
273 parses it's output to return a list of chromium sheriff email addresses.
274 The javascript file can contain the ldap of more than one sheriff, eg:
275 document.write('sheriff_one, sheriff_two').
276
277 @return: A list of chroium.org sheriff email addresses to cc on the bug
278 if the suite that failed was the bvt suite. An empty list otherwise.
279 """
280 sheriff_ids = []
281 for sheriff_js in _SHERIFF_JS.split(','):
282 try:
283 url_content = base_utils.urlopen('%s%s'% (
284 _CHROMIUM_BUILD_URL, sheriff_js)).read()
285 except (ValueError, IOError) as e:
286 logging.error('could not parse sheriff from url %s%s: %s',
287 _CHROMIUM_BUILD_URL, sheriff_js, str(e))
288 else:
289 ldaps = re.search(r"document.write\('(.*)'\)", url_content)
290 if not ldaps:
291 logging.error('Could not retrieve sheriff ldaps for: %s',
292 url_content)
293 continue
294 sheriff_ids += ['%s@chromium.org'% alias.replace(' ', '')
295 for alias in ldaps.group(1).split(',')]
296 return sheriff_ids