blob: 38dfd2ec76b9d7bc55d023ab71333bd348ee22d2 [file] [log] [blame]
Xixuan Wu0bea9522018-05-08 17:49:19 -07001# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Module for swarming execution."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
Xixuan Wu98e5de32018-05-29 17:23:16 -070011import collections
Xixuan Wucb469512018-06-08 15:17:23 -070012import json
Xixuan Wu53d15712018-06-12 10:52:55 -070013import logging
Xixuan Wu0bea9522018-05-08 17:49:19 -070014import os
Xixuan Wucb469512018-06-08 15:17:23 -070015import urllib
16
17from lucifer import autotest
Xixuan Wu0bea9522018-05-08 17:49:19 -070018
19
20SERVICE_ACCOUNT = '/creds/skylab_swarming_bot/skylab_bot_service_account.json'
21SWARMING_SERVER = 'chrome-swarming.appspot.com'
Xixuan Wucb469512018-06-08 15:17:23 -070022SKYLAB_DRONE_POOL = 'ChromeOSSkylab'
Xixuan Wu53d15712018-06-12 10:52:55 -070023SKYLAB_SUITE_POOL = 'ChromeOSSkylab-suite'
Xixuan Wucb469512018-06-08 15:17:23 -070024
Xixuan Wu2406be32018-05-14 13:51:30 -070025TASK_COMPLETED = 'COMPLETED'
Xixuan Wu9af95a22018-05-18 10:46:42 -070026TASK_COMPLETED_SUCCESS = 'COMPLETED (SUCCESS)'
27TASK_COMPLETED_FAILURE = 'COMPLETED (FAILURE)'
Xixuan Wu2406be32018-05-14 13:51:30 -070028TASK_EXPIRED = 'EXPIRED'
29TASK_CANCELED = 'CANCELED'
30TASK_TIMEDOUT = 'TIMED_OUT'
Xixuan Wu8157c1f2018-06-06 15:26:00 -070031TASK_RUNNING = 'RUNNING'
Xixuan Wuf52e40d2018-06-14 12:10:44 -070032TASK_PENDING = 'PENDING'
Xixuan Wu2406be32018-05-14 13:51:30 -070033TASK_FINISHED_STATUS = [TASK_COMPLETED,
34 TASK_EXPIRED,
35 TASK_CANCELED,
36 TASK_TIMEDOUT]
Xixuan Wu56424bc2018-05-15 11:03:27 -070037TASK_FAILED_STATUS = [TASK_EXPIRED,
38 TASK_CANCELED,
39 TASK_TIMEDOUT]
Xixuan Wu0bea9522018-05-08 17:49:19 -070040
Xixuan Wu98e5de32018-05-29 17:23:16 -070041DEFAULT_EXPIRATION_SECS = 30
42DEFAULT_TIMEOUT_SECS = 60 * 60
43
Xixuan Wu77d4a592018-06-08 10:40:57 -070044# TODO (xixuan): Use proto library or some future APIs instead of hardcoding.
45SWARMING_DUT_POOL_MAP = {
46 'cq': 'DUT_POOL_CQ',
47 'bvt': 'DUT_POOL_BVT',
48 'suites': 'DUT_POOL_SUITES',
49 'cts': 'DUT_POOL_CTS',
50 'arc-presubmit': 'DUT_POOL_CTS_PERBUILD',
51}
52SWARMING_DUT_READY_STATUS = 'ready'
Xixuan Wu98e5de32018-05-29 17:23:16 -070053
54# The structure of fallback swarming task request is:
55# NewTaskRequest:
56# ...
57# task_slices -> NewTaskSlice:
58# ...
59# properties -> TaskProperties
60# ...
61TaskProperties = collections.namedtuple(
62 'TaskProperties',
63 [
64 'command',
65 'dimensions',
66 'execution_timeout_secs',
67 'grace_period_secs',
68 'io_timeout_secs',
69 ])
70
71NewTaskSlice = collections.namedtuple(
72 'NewTaskSlice',
73 [
74 'expiration_secs',
75 'properties',
76 ])
77
78NewTaskRequest = collections.namedtuple(
79 'NewTaskRequest',
80 [
81 'name',
Xixuan Wu6ac13442018-06-12 11:26:30 -070082 'parent_task_id',
Xixuan Wu98e5de32018-05-29 17:23:16 -070083 'priority',
84 'tags',
85 'user',
86 'task_slices',
87 ])
88
Xixuan Wu0bea9522018-05-08 17:49:19 -070089
90def _get_client():
91 return os.path.join(
92 os.path.expanduser('~'),
93 'chromiumos/chromite/third_party/swarming.client/swarming.py')
94
95
96def get_basic_swarming_cmd(command):
97 return [_get_client(), command,
98 '--auth-service-account-json', SERVICE_ACCOUNT,
99 '--swarming', SWARMING_SERVER]
Xixuan Wu9af95a22018-05-18 10:46:42 -0700100
101
Xixuan Wu98e5de32018-05-29 17:23:16 -0700102def make_fallback_request_dict(cmds, slices_dimensions, task_name, priority,
103 tags, user,
Xixuan Wu6ac13442018-06-12 11:26:30 -0700104 parent_task_id='',
Xixuan Wu98e5de32018-05-29 17:23:16 -0700105 expiration_secs=DEFAULT_EXPIRATION_SECS,
106 grace_period_secs=DEFAULT_TIMEOUT_SECS,
107 execution_timeout_secs=DEFAULT_TIMEOUT_SECS,
108 io_timeout_secs=DEFAULT_TIMEOUT_SECS):
109 """Form a json-compatible dict for fallback swarming call.
110
111 @param cmds: A list of cmd to run on swarming bots.
112 @param slices_dimensions: A list of dict to indicates different tries'
113 dimensions.
114 @param task_name: The request's name.
115 @param priority: The request's priority. An integer.
116 @param expiration_secs: The expiration seconds for the each cmd to wait
117 to be expired.
118 @param grace_period_secs: The seconds to send a task after a SIGTERM before
119 sending it a SIGKILL.
120 @param execution_timeout_secs: The seconds to run before a task gets
121 terminated.
122 @param io_timeout_secs: The seconds to wait before a task is considered
123 hung.
124
125 @return a json-compatible dict, as a request for swarming call.
126 """
127 assert len(cmds) == len(slices_dimensions)
128 task_slices = []
129 for cmd, dimensions in zip(cmds, slices_dimensions):
130 properties = TaskProperties(
131 command=cmd,
132 dimensions=dimensions,
133 execution_timeout_secs=execution_timeout_secs,
134 grace_period_secs=grace_period_secs,
135 io_timeout_secs=io_timeout_secs)
136 task_slices.append(
137 NewTaskSlice(
138 expiration_secs=expiration_secs,
139 properties=properties))
140
141 task_request = NewTaskRequest(
142 name=task_name,
Xixuan Wu6ac13442018-06-12 11:26:30 -0700143 parent_task_id=parent_task_id,
Xixuan Wu98e5de32018-05-29 17:23:16 -0700144 priority=priority,
145 tags=tags,
146 user=user,
147 task_slices=task_slices)
148
149 return _to_raw_request(task_request)
150
151
152def _namedtuple_to_dict(value):
153 """Recursively converts a namedtuple to a dict.
154
155 Args:
156 value: a namedtuple object.
157
158 Returns:
159 A dict object with the same value.
160 """
161 out = dict(value._asdict())
162 for k, v in out.iteritems():
163 if hasattr(v, '_asdict'):
164 out[k] = _namedtuple_to_dict(v)
165 elif isinstance(v, (list, tuple)):
166 l = []
167 for elem in v:
168 if hasattr(elem, '_asdict'):
169 l.append(_namedtuple_to_dict(elem))
170 else:
171 l.append(elem)
172 out[k] = l
173
174 return out
175
176
177def _to_raw_request(request):
178 """Returns the json-compatible dict expected by the server.
179
180 Args:
181 request: a NewTaskRequest object.
182
183 Returns:
184 A json-compatible dict, which could be parsed by swarming proxy
185 service.
186 """
187 out = _namedtuple_to_dict(request)
188 for task_slice in out['task_slices']:
189 task_slice['properties']['dimensions'] = [
190 {'key': k, 'value': v}
191 for k, v in task_slice['properties']['dimensions'].iteritems()
192 ]
193 task_slice['properties']['dimensions'].sort(key=lambda x: x['key'])
194 return out
195
196
Xixuan Wu9af95a22018-05-18 10:46:42 -0700197def get_task_link(task_id):
198 return 'https://%s/user/task/%s' % (SWARMING_SERVER, task_id)
199
200
201def get_task_final_state(task):
202 """Get the final state of a swarming task.
203
204 @param task: the json output of a swarming task fetched by API tasks.list.
205 """
206 state = task['state']
207 if state == TASK_COMPLETED:
208 state = (TASK_COMPLETED_FAILURE if task['failure'] else
209 TASK_COMPLETED_SUCCESS)
210
211 return state
Xixuan Wu415e8212018-06-04 17:01:12 -0700212
213
214def get_task_dut_name(task):
215 """Get the DUT name of running this task.
216
217 @param task: the json output of a swarming task fetched by API tasks.list.
218 """
219 for dimension in task['bot_dimensions']:
220 if dimension['key'] == 'dut_name':
221 return dimension['value'][0]
222
223 return None
Xixuan Wucb469512018-06-08 15:17:23 -0700224
225
226def query_bots_count(dimensions):
227 """Get bots count for given requirements.
228
229 @param dimensions: A dict of dimensions for swarming bots.
230
231 @return a dict, which contains counts for different status of bots.
232 """
233 basic_swarming_cmd = get_basic_swarming_cmd('query')
234 conditions = [('dimensions', '%s:%s' % (k, v))
235 for k, v in dimensions.iteritems()]
236 swarming_cmd = basic_swarming_cmd + ['bots/count?%s' %
237 urllib.urlencode(conditions)]
238 cros_build_lib = autotest.chromite_load('cros_build_lib')
239 result = cros_build_lib.RunCommand(swarming_cmd, capture_output=True)
240 return json.loads(result.output)
241
242
243def get_idle_bots_count(outputs):
244 """Get the idle bots count.
245
246 @param outputs: The outputs of |query_bots_count|.
247 """
248 return (int(outputs['count']) - int(outputs['busy']) - int(outputs['dead'])
249 - int(outputs['quarantined']))
Xixuan Wu53d15712018-06-12 10:52:55 -0700250
251
252def query_task_by_tags(tags):
253 """Get tasks for given tags.
254
255 @param tags: A dict of tags for swarming tasks.
256
257 @return a dict, which contains all tasks queried by the given tags.
258 """
259 basic_swarming_cmd = get_basic_swarming_cmd('query')
260 conditions = [('tags', '%s:%s' % (k, v)) for k, v in tags.iteritems()]
261 swarming_cmd = basic_swarming_cmd + ['tasks/list?%s' %
262 urllib.urlencode(conditions)]
263 cros_build_lib = autotest.chromite_load('cros_build_lib')
264 result = cros_build_lib.RunCommand(swarming_cmd, capture_output=True)
265 return json.loads(result.output)['items']
266
267
268def abort_task(task_id):
269 """Abort a swarming task by its id.
270
271 @param task_id: A string swarming task id.
272 """
273 basic_swarming_cmd = get_basic_swarming_cmd('cancel')
274 swarming_cmd = basic_swarming_cmd + ['--kill-running', task_id]
275 cros_build_lib = autotest.chromite_load('cros_build_lib')
276 try:
277 cros_build_lib.RunCommand(swarming_cmd, log_output=True)
278 except cros_build_lib.RunCommandError:
279 logging.error('Task %s probably already gone, skip canceling it.',
280 task_id)
Xixuan Wu0c01b092018-06-13 14:12:55 -0700281
282
283def query_bots_list(dimensions):
284 """Get bots list for given requirements.
285
286 @param dimensions: A dict of dimensions for swarming bots.
287
288 @return a list of bot dicts.
289 """
290 basic_swarming_cmd = get_basic_swarming_cmd('query')
291 conditions = [('dimensions', '%s:%s' % (k, v))
292 for k, v in dimensions.iteritems()]
293 swarming_cmd = basic_swarming_cmd + ['bots/list?%s' %
294 urllib.urlencode(conditions)]
295 cros_build_lib = autotest.chromite_load('cros_build_lib')
296 result = cros_build_lib.RunCommand(swarming_cmd, capture_output=True)
297 return json.loads(result.output)['items']
298
299
300def bot_available(bot):
301 """Check whether a bot is available.
302
303 @param bot: A dict describes a bot's dimensions, i.e. an element in return
304 list of |query_bots_list|.
305
306 @return True if a bot is available to run task, otherwise False.
307 """
308 return not (bot['is_dead'] or bot['quarantined'])