blob: 79ab256736cb5ef11c8bfbad413474873e6ae4c2 [file] [log] [blame]
Mike Frysingerd03e6b52019-08-03 12:49:01 -04001#!/usr/bin/python2
Prashanth B923ca262014-03-14 12:36:29 -07002#
3# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import datetime as datetime_base
8import logging
9from datetime import datetime
10
11import common
12
Allen Li6a612392016-08-18 12:09:32 -070013from autotest_lib.client.common_lib import global_config
J. Richard Barnetteaf1e8262016-03-04 12:55:11 -080014from autotest_lib.client.common_lib import time_utils
Dan Shi20952c12014-05-14 17:07:38 -070015from autotest_lib.server import utils
Prashanth B923ca262014-03-14 12:36:29 -070016from autotest_lib.server.cros.dynamic_suite import reporting_utils
Aviv Keshet7ee95862016-08-30 15:18:27 -070017from autotest_lib.server.lib import status_history
Prashanth B923ca262014-03-14 12:36:29 -070018
Allen Li6a612392016-08-18 12:09:32 -070019CONFIG = global_config.global_config
20
Prashanth B923ca262014-03-14 12:36:29 -070021
Richard Barnetteae9eaa42018-09-12 10:28:07 -070022class DUTsNotAvailableError(utils.TestLabException):
23 """Raised when a DUT label combination is not available in the lab."""
Fang Deng6197da32014-09-25 10:18:48 -070024
25
26class NotEnoughDutsError(utils.TestLabException):
27 """Rasied when the lab doesn't have the minimum number of duts."""
28
Richard Barnetteae9eaa42018-09-12 10:28:07 -070029 def __init__(self, labels, num_available, num_required, hosts):
Allen Li6a612392016-08-18 12:09:32 -070030 """Initialize instance.
31
32 Please pass arguments by keyword.
33
Richard Barnetteae9eaa42018-09-12 10:28:07 -070034 @param labels: Labels required, including board an pool labels.
Allen Li6a612392016-08-18 12:09:32 -070035 @param num_available: Number of available hosts.
36 @param num_required: Number of hosts required.
37 @param hosts: Sequence of Host instances for given board and pool.
38 """
Richard Barnetteae9eaa42018-09-12 10:28:07 -070039 self.labels = labels
Allen Li6a612392016-08-18 12:09:32 -070040 self.num_available = num_available
41 self.num_required = num_required
42 self.hosts = hosts
43 self.bug_id = None
44 self.suite_name = None
45 self.build = None
46
47
48 def __repr__(self):
49 return (
50 '<{cls} at 0x{id:x} with'
Richard Barnetteae9eaa42018-09-12 10:28:07 -070051 ' labels={this.labels!r},'
Allen Li6a612392016-08-18 12:09:32 -070052 ' num_available={this.num_available!r},'
53 ' num_required={this.num_required!r},'
54 ' bug_id={this.bug_id!r},'
55 ' suite_name={this.suite_name!r},'
56 ' build={this.build!r}>'
57 .format(cls=type(self).__name__, id=id(self), this=self)
58 )
59
60
61 def __str__(self):
62 msg_parts = [
Richard Barnetteae9eaa42018-09-12 10:28:07 -070063 'Not enough DUTs for requirements: {this.labels};'
Allen Li6a612392016-08-18 12:09:32 -070064 ' required: {this.num_required}, found: {this.num_available}'
65 ]
Allen Lidc2c69a2016-09-14 19:05:47 -070066 format_dict = {'this': self}
Allen Li6a612392016-08-18 12:09:32 -070067 if self.bug_id is not None:
Allen Lidc2c69a2016-09-14 19:05:47 -070068 msg_parts.append('bug: {bug_url}')
69 format_dict['bug_url'] = reporting_utils.link_crbug(self.bug_id)
Allen Li6a612392016-08-18 12:09:32 -070070 if self.suite_name is not None:
Allen Lidc2c69a2016-09-14 19:05:47 -070071 msg_parts.append('suite: {this.suite_name}')
Allen Li6a612392016-08-18 12:09:32 -070072 if self.build is not None:
Allen Lidc2c69a2016-09-14 19:05:47 -070073 msg_parts.append('build: {this.build}')
74 return ', '.join(msg_parts).format(**format_dict)
Allen Li6a612392016-08-18 12:09:32 -070075
76
77 def add_bug_id(self, bug_id):
78 """Add crbug id associated with this exception.
79
80 @param bug_id crbug id whose str() value is used in a crbug URL.
81 """
82 self.bug_id = bug_id
83
84
85 def add_suite_name(self, suite_name):
86 """Add name of test suite that needed the DUTs.
87
88 @param suite_name Name of test suite.
89 """
90 self.suite_name = suite_name
91
92
93 def add_build(self, build):
94 """Add name of build of job that needed the DUTs.
95
96 @param build Name of build.
97 """
98 self.build = build
99
Fang Deng6197da32014-09-25 10:18:48 -0700100
Prashanth Ba7be2072014-07-15 15:03:21 -0700101class SimpleTimer(object):
102 """A simple timer used to periodically check if a deadline has passed."""
103
104 def _reset(self):
105 """Reset the deadline."""
106 if not self.interval_hours or self.interval_hours < 0:
107 logging.error('Bad interval %s', self.interval_hours)
108 self.deadline = None
109 return
110 self.deadline = datetime.now() + datetime_base.timedelta(
111 hours=self.interval_hours)
112
113
114 def __init__(self, interval_hours=0.5):
115 """Initialize a simple periodic deadline timer.
116
117 @param interval_hours: Interval of the deadline.
118 """
119 self.interval_hours = interval_hours
120 self._reset()
121
122
123 def poll(self):
124 """Poll the timer to see if we've hit the deadline.
125
126 This method resets the deadline if it has passed. If the deadline
127 hasn't been set, or the current time is less than the deadline, the
128 method returns False.
129
130 @return: True if the deadline has passed, False otherwise.
131 """
132 if not self.deadline or datetime.now() < self.deadline:
133 return False
134 self._reset()
135 return True
136
137
Prashanth B923ca262014-03-14 12:36:29 -0700138class JobTimer(object):
139 """Utility class capable of measuring job timeouts.
140 """
141
142 # Format used in datetime - string conversion.
143 time_format = '%m-%d-%Y [%H:%M:%S]'
144
145 def __init__(self, job_created_time, timeout_mins):
146 """JobTimer constructor.
147
148 @param job_created_time: float representing the time a job was
149 created. Eg: time.time()
150 @param timeout_mins: float representing the timeout in minutes.
151 """
152 self.job_created_time = datetime.fromtimestamp(job_created_time)
153 self.timeout_hours = datetime_base.timedelta(hours=timeout_mins/60.0)
Prashanth Ba7be2072014-07-15 15:03:21 -0700154 self.debug_output_timer = SimpleTimer(interval_hours=0.5)
Prashanth B923ca262014-03-14 12:36:29 -0700155 self.past_halftime = False
156
157
158 @classmethod
159 def format_time(cls, datetime_obj):
160 """Get the string formatted version of the datetime object.
161
162 @param datetime_obj: A datetime.datetime object.
163 Eg: datetime.datetime.now()
164
165 @return: A formatted string containing the date/time of the
166 input datetime.
167 """
168 return datetime_obj.strftime(cls.time_format)
169
170
171 def elapsed_time(self):
172 """Get the time elapsed since this job was created.
173
174 @return: A timedelta object representing the elapsed time.
175 """
176 return datetime.now() - self.job_created_time
177
178
179 def is_suite_timeout(self):
180 """Check if the suite timed out.
181
182 @return: True if more than timeout_hours has elapsed since the suite job
183 was created.
184 """
185 if self.elapsed_time() >= self.timeout_hours:
186 logging.info('Suite timed out. Started on %s, timed out on %s',
187 self.format_time(self.job_created_time),
188 self.format_time(datetime.now()))
189 return True
190 return False
191
192
193 def first_past_halftime(self):
194 """Check if we just crossed half time.
195
196 This method will only return True once, the first time it is called
197 after a job's elapsed time is past half its timeout.
198
199 @return True: If this is the first call of the method after halftime.
200 """
201 if (not self.past_halftime and
202 self.elapsed_time() > self.timeout_hours/2):
203 self.past_halftime = True
204 return True
205 return False
206
207
208class RPCHelper(object):
209 """A class to help diagnose a suite run through the rpc interface.
210 """
211
212 def __init__(self, rpc_interface):
213 """Constructor for rpc helper class.
214
215 @param rpc_interface: An rpc object, eg: A RetryingAFE instance.
216 """
217 self.rpc_interface = rpc_interface
218
219
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700220 def diagnose_pool(self, labels, time_delta_hours, limit=10):
Prashanth B923ca262014-03-14 12:36:29 -0700221 """Log diagnostic information about a timeout for a board/pool.
222
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700223 @param labels: DUT label dependencies, including board and pool
224 labels.
Prashanth B923ca262014-03-14 12:36:29 -0700225 @param time_delta_hours: The time from which we should log information.
226 This is a datetime.timedelta object, as stored by the JobTimer.
227 @param limit: The maximum number of jobs per host, to log.
228
229 @raises proxy.JSONRPCException: For exceptions thrown across the wire.
230 """
J. Richard Barnetteaf1e8262016-03-04 12:55:11 -0800231 end_time = datetime.now()
232 start_time = end_time - time_delta_hours
Prathmesh Prabhu68acc402017-11-09 15:24:15 -0800233 host_histories = status_history.HostJobHistory.get_multiple_histories(
J. Richard Barnetteaf1e8262016-03-04 12:55:11 -0800234 self.rpc_interface,
235 time_utils.to_epoch_time(start_time),
236 time_utils.to_epoch_time(end_time),
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700237 labels,
Prathmesh Prabhu68acc402017-11-09 15:24:15 -0800238 )
J. Richard Barnetteaf1e8262016-03-04 12:55:11 -0800239 if not host_histories:
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700240 logging.error('No hosts found for labels %r', labels)
Prashanth B923ca262014-03-14 12:36:29 -0700241 return
J. Richard Barnetteaf1e8262016-03-04 12:55:11 -0800242 status_map = {
243 status_history.UNUSED: 'Unused',
244 status_history.UNKNOWN: 'No job history',
245 status_history.WORKING: 'Working',
246 status_history.BROKEN: 'Failed repair'
247 }
248 for history in host_histories:
249 count = 0
250 job_info =''
251 for job in history:
252 start_time = (
253 time_utils.epoch_time_to_date_string(job.start_time))
Prashanth B923ca262014-03-14 12:36:29 -0700254 job_info += ('%s %s started on: %s status %s\n' %
J. Richard Barnetteaf1e8262016-03-04 12:55:11 -0800255 (job.id, job.name, start_time, job.job_status))
256 count += 1
257 if count >= limit:
258 break
259 host = history.host
260 logging.error('host: %s, status: %s, locked: %s '
261 'diagnosis: %s\n'
262 'labels: %s\nLast %s jobs within %s:\n'
263 '%s',
264 history.hostname, host.status, host.locked,
265 status_map[history.last_diagnosis()[0]],
266 host.labels, limit, time_delta_hours,
267 job_info)
Prashanth B923ca262014-03-14 12:36:29 -0700268
269
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700270 def check_dut_availability(self, labels, minimum_duts=0,
271 skip_duts_check=False):
Dan Shi20952c12014-05-14 17:07:38 -0700272 """Check if DUT availability for a given board and pool is less than
273 minimum.
274
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700275 @param labels: DUT label dependencies, including board and pool
276 labels.
Dan Shi81363372014-06-03 22:27:37 -0700277 @param minimum_duts: Minimum Number of available machines required to
278 run the suite. Default is set to 0, which means do
279 not force the check of available machines before
280 running the suite.
Ningning Xiaf2c206c2016-04-13 14:15:51 -0700281 @param skip_duts_check: If True, skip minimum available DUTs check.
Fang Deng6197da32014-09-25 10:18:48 -0700282 @raise: NotEnoughDutsError if DUT availability is lower than minimum.
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700283 @raise: DUTsNotAvailableError if no host found for requested
Fang Deng6197da32014-09-25 10:18:48 -0700284 board/pool.
Dan Shi20952c12014-05-14 17:07:38 -0700285 """
Dan Shi8de6d1b2014-06-12 09:10:37 -0700286 if minimum_duts == 0:
Dan Shi94234cb2014-05-23 20:04:31 -0700287 return
288
Dan Shi20952c12014-05-14 17:07:38 -0700289 hosts = self.rpc_interface.get_hosts(
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700290 invalid=False, multiple_labels=labels)
Dan Shi20952c12014-05-14 17:07:38 -0700291 if not hosts:
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700292 raise DUTsNotAvailableError(
293 'No hosts found for labels %r. The test lab '
294 'currently does not cover test for those DUTs.' %
295 (labels,))
Dan Shi20952c12014-05-14 17:07:38 -0700296
Ningning Xiaf2c206c2016-04-13 14:15:51 -0700297 if skip_duts_check:
298 # Bypass minimum avilable DUTs check
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700299 logging.debug('skip_duts_check is on, do not enforce minimum '
300 'DUTs check.')
Ningning Xiaf2c206c2016-04-13 14:15:51 -0700301 return
302
Dan Shi8de6d1b2014-06-12 09:10:37 -0700303 if len(hosts) < minimum_duts:
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700304 logging.debug('The total number of DUTs for %r is %d, '
305 'which is less than %d, the required minimum '
306 'number of available DUTS', labels, len(hosts),
Dan Shi8de6d1b2014-06-12 09:10:37 -0700307 minimum_duts)
Ningning Xiaf2c206c2016-04-13 14:15:51 -0700308
Allen Li6a612392016-08-18 12:09:32 -0700309 available_hosts = 0
Prashanth Balasubramaniana6c03be2014-10-20 18:47:24 -0700310 for host in hosts:
Allen Lif74957d2017-11-20 17:46:48 -0800311 if host.is_available():
Allen Li6a612392016-08-18 12:09:32 -0700312 available_hosts += 1
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700313 logging.debug('%d of %d DUTs are available for %r.',
314 available_hosts, len(hosts), labels)
Allen Li6a612392016-08-18 12:09:32 -0700315 if available_hosts < minimum_duts:
Fang Deng6197da32014-09-25 10:18:48 -0700316 raise NotEnoughDutsError(
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700317 labels=labels,
Allen Li6a612392016-08-18 12:09:32 -0700318 num_available=available_hosts,
319 num_required=minimum_duts,
320 hosts=hosts)
Dan Shi20952c12014-05-14 17:07:38 -0700321
322
MK Ryu4790eec2014-07-31 11:39:02 -0700323 def diagnose_job(self, job_id, instance_server):
Prashanth B923ca262014-03-14 12:36:29 -0700324 """Diagnose a suite job.
325
326 Logs information about the jobs that are still to run in the suite.
327
328 @param job_id: The id of the suite job to get information about.
329 No meaningful information gets logged if the id is for a sub-job.
MK Ryu4790eec2014-07-31 11:39:02 -0700330 @param instance_server: The instance server.
331 Eg: cautotest, cautotest-cq, localhost.
Prashanth B923ca262014-03-14 12:36:29 -0700332 """
333 incomplete_jobs = self.rpc_interface.get_jobs(
334 parent_job_id=job_id, summary=True,
335 hostqueueentry__complete=False)
336 if incomplete_jobs:
337 logging.info('\n%s printing summary of incomplete jobs (%s):\n',
338 JobTimer.format_time(datetime.now()),
339 len(incomplete_jobs))
340 for job in incomplete_jobs:
341 logging.info('%s: %s', job.testname[job.testname.rfind('/')+1:],
MK Ryu4790eec2014-07-31 11:39:02 -0700342 reporting_utils.link_job(job.id, instance_server))
Prashanth B923ca262014-03-14 12:36:29 -0700343 else:
344 logging.info('All jobs in suite have already completed.')