blob: 95e5e6b9d731e953aef896a6817a5d18fead01a4 [file] [log] [blame]
Prashanth B923ca262014-03-14 12:36:29 -07001#!/usr/bin/python
2#
3# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import datetime as datetime_base
8import logging
9from datetime import datetime
10
11import common
12
Dan Shi20952c12014-05-14 17:07:38 -070013from autotest_lib.server import utils
Prashanth B923ca262014-03-14 12:36:29 -070014from autotest_lib.server.cros.dynamic_suite import reporting_utils
15
16
Fang Deng6197da32014-09-25 10:18:48 -070017class BoardNotAvailableError(utils.TestLabException):
18 """Raised when a board is not available in the lab."""
19
20
21class NotEnoughDutsError(utils.TestLabException):
22 """Rasied when the lab doesn't have the minimum number of duts."""
23
24
Prashanth Ba7be2072014-07-15 15:03:21 -070025class SimpleTimer(object):
26 """A simple timer used to periodically check if a deadline has passed."""
27
28 def _reset(self):
29 """Reset the deadline."""
30 if not self.interval_hours or self.interval_hours < 0:
31 logging.error('Bad interval %s', self.interval_hours)
32 self.deadline = None
33 return
34 self.deadline = datetime.now() + datetime_base.timedelta(
35 hours=self.interval_hours)
36
37
38 def __init__(self, interval_hours=0.5):
39 """Initialize a simple periodic deadline timer.
40
41 @param interval_hours: Interval of the deadline.
42 """
43 self.interval_hours = interval_hours
44 self._reset()
45
46
47 def poll(self):
48 """Poll the timer to see if we've hit the deadline.
49
50 This method resets the deadline if it has passed. If the deadline
51 hasn't been set, or the current time is less than the deadline, the
52 method returns False.
53
54 @return: True if the deadline has passed, False otherwise.
55 """
56 if not self.deadline or datetime.now() < self.deadline:
57 return False
58 self._reset()
59 return True
60
61
Prashanth B923ca262014-03-14 12:36:29 -070062class JobTimer(object):
63 """Utility class capable of measuring job timeouts.
64 """
65
66 # Format used in datetime - string conversion.
67 time_format = '%m-%d-%Y [%H:%M:%S]'
68
69 def __init__(self, job_created_time, timeout_mins):
70 """JobTimer constructor.
71
72 @param job_created_time: float representing the time a job was
73 created. Eg: time.time()
74 @param timeout_mins: float representing the timeout in minutes.
75 """
76 self.job_created_time = datetime.fromtimestamp(job_created_time)
77 self.timeout_hours = datetime_base.timedelta(hours=timeout_mins/60.0)
Prashanth Ba7be2072014-07-15 15:03:21 -070078 self.debug_output_timer = SimpleTimer(interval_hours=0.5)
Prashanth B923ca262014-03-14 12:36:29 -070079 self.past_halftime = False
80
81
82 @classmethod
83 def format_time(cls, datetime_obj):
84 """Get the string formatted version of the datetime object.
85
86 @param datetime_obj: A datetime.datetime object.
87 Eg: datetime.datetime.now()
88
89 @return: A formatted string containing the date/time of the
90 input datetime.
91 """
92 return datetime_obj.strftime(cls.time_format)
93
94
95 def elapsed_time(self):
96 """Get the time elapsed since this job was created.
97
98 @return: A timedelta object representing the elapsed time.
99 """
100 return datetime.now() - self.job_created_time
101
102
103 def is_suite_timeout(self):
104 """Check if the suite timed out.
105
106 @return: True if more than timeout_hours has elapsed since the suite job
107 was created.
108 """
109 if self.elapsed_time() >= self.timeout_hours:
110 logging.info('Suite timed out. Started on %s, timed out on %s',
111 self.format_time(self.job_created_time),
112 self.format_time(datetime.now()))
113 return True
114 return False
115
116
117 def first_past_halftime(self):
118 """Check if we just crossed half time.
119
120 This method will only return True once, the first time it is called
121 after a job's elapsed time is past half its timeout.
122
123 @return True: If this is the first call of the method after halftime.
124 """
125 if (not self.past_halftime and
126 self.elapsed_time() > self.timeout_hours/2):
127 self.past_halftime = True
128 return True
129 return False
130
131
132class RPCHelper(object):
133 """A class to help diagnose a suite run through the rpc interface.
134 """
135
136 def __init__(self, rpc_interface):
137 """Constructor for rpc helper class.
138
139 @param rpc_interface: An rpc object, eg: A RetryingAFE instance.
140 """
141 self.rpc_interface = rpc_interface
142
143
144 def diagnose_pool(self, board, pool, time_delta_hours, limit=5):
145 """Log diagnostic information about a timeout for a board/pool.
146
147 @param board: The board for which the current suite was run.
148 @param pool: The pool against which the current suite was run.
149 @param time_delta_hours: The time from which we should log information.
150 This is a datetime.timedelta object, as stored by the JobTimer.
151 @param limit: The maximum number of jobs per host, to log.
152
153 @raises proxy.JSONRPCException: For exceptions thrown across the wire.
154 """
155 hosts = self.rpc_interface.get_hosts(
156 multiple_labels=('pool:%s' % pool, 'board:%s' % board))
157 if not hosts:
Alex Miller8bf50202014-06-02 04:10:51 -0700158 logging.warning('No hosts found for board:%s in pool:%s',
159 board, pool)
Prashanth B923ca262014-03-14 12:36:29 -0700160 return
161 cutoff = datetime.now() - time_delta_hours
162 for host in hosts:
163 jobs = self.rpc_interface.get_host_queue_entries(
164 host__id=host.id, started_on__gte=str(cutoff))
165 job_info = ''
166 for job in jobs[-limit:]:
167 job_info += ('%s %s started on: %s status %s\n' %
168 (job.id, job.job.name, job.started_on, job.status))
169 logging.error('host:%s, status:%s, locked: %s'
170 '\nlabels: %s\nLast %s jobs within %s:\n%s',
171 host.hostname, host.status, host.locked, host.labels,
172 limit, time_delta_hours, job_info)
173
174
Dan Shi8de6d1b2014-06-12 09:10:37 -0700175 def check_dut_availability(self, board, pool, minimum_duts=0):
Dan Shi20952c12014-05-14 17:07:38 -0700176 """Check if DUT availability for a given board and pool is less than
177 minimum.
178
179 @param board: The board to check DUT availability.
180 @param pool: The pool to check DUT availability.
Dan Shi81363372014-06-03 22:27:37 -0700181 @param minimum_duts: Minimum Number of available machines required to
182 run the suite. Default is set to 0, which means do
183 not force the check of available machines before
184 running the suite.
Fang Deng6197da32014-09-25 10:18:48 -0700185 @raise: NotEnoughDutsError if DUT availability is lower than minimum.
186 @raise: BoardNotAvailableError if no host found for requested
187 board/pool.
Dan Shi20952c12014-05-14 17:07:38 -0700188 """
Dan Shi8de6d1b2014-06-12 09:10:37 -0700189 if minimum_duts == 0:
Dan Shi94234cb2014-05-23 20:04:31 -0700190 return
191
Dan Shi20952c12014-05-14 17:07:38 -0700192 hosts = self.rpc_interface.get_hosts(
193 invalid=False,
194 multiple_labels=('pool:%s' % pool, 'board:%s' % board))
195 if not hosts:
Fang Deng6197da32014-09-25 10:18:48 -0700196 raise BoardNotAvailableError(
Alex Miller8bf50202014-06-02 04:10:51 -0700197 'No hosts found for board:%s in pool:%s' %
198 (board, pool))
Dan Shi20952c12014-05-14 17:07:38 -0700199
Dan Shi8de6d1b2014-06-12 09:10:37 -0700200 if len(hosts) < minimum_duts:
Dan Shi94234cb2014-05-23 20:04:31 -0700201 logging.debug('The total number of DUTs for %s in pool:%s is %d, '
202 'which is no more than the required minimum number of'
203 ' available DUTS of %d. Minimum available DUT rule is'
204 ' not enforced.', board, pool, len(hosts),
Dan Shi8de6d1b2014-06-12 09:10:37 -0700205 minimum_duts)
Dan Shi94234cb2014-05-23 20:04:31 -0700206 return
207
Dan Shi20952c12014-05-14 17:07:38 -0700208 # TODO(dshi): Replace the hard coded string with enum value,
209 # models.Host.Status.REPAIRING and REPAIR_FAILED
210 # setup_django_environment can't be imported now as paygen server does
211 # not have django package.
Dan Shif1f77a52014-10-08 14:34:34 -0700212 bad_statuses = ('Repair Failed', 'Repairing', 'Verifying')
Dan Shi20952c12014-05-14 17:07:38 -0700213 available_hosts = [host for host in hosts
Dan Shif1f77a52014-10-08 14:34:34 -0700214 if not host.status in bad_statuses and
215 not host.locked]
Dan Shi20952c12014-05-14 17:07:38 -0700216 logging.debug('%d of %d DUTs are available for board %s pool %s.',
217 len(available_hosts), len(hosts), board, pool)
Dan Shi8de6d1b2014-06-12 09:10:37 -0700218 if len(available_hosts) < minimum_duts:
Fang Deng6197da32014-09-25 10:18:48 -0700219 raise NotEnoughDutsError(
Dan Shi20952c12014-05-14 17:07:38 -0700220 'Number of available DUTs for board %s pool %s is %d, which'
221 ' is less than the minimum value %d.' %
Dan Shi8de6d1b2014-06-12 09:10:37 -0700222 (board, pool, len(available_hosts), minimum_duts))
Dan Shi20952c12014-05-14 17:07:38 -0700223
224
MK Ryu4790eec2014-07-31 11:39:02 -0700225 def diagnose_job(self, job_id, instance_server):
Prashanth B923ca262014-03-14 12:36:29 -0700226 """Diagnose a suite job.
227
228 Logs information about the jobs that are still to run in the suite.
229
230 @param job_id: The id of the suite job to get information about.
231 No meaningful information gets logged if the id is for a sub-job.
MK Ryu4790eec2014-07-31 11:39:02 -0700232 @param instance_server: The instance server.
233 Eg: cautotest, cautotest-cq, localhost.
Prashanth B923ca262014-03-14 12:36:29 -0700234 """
235 incomplete_jobs = self.rpc_interface.get_jobs(
236 parent_job_id=job_id, summary=True,
237 hostqueueentry__complete=False)
238 if incomplete_jobs:
239 logging.info('\n%s printing summary of incomplete jobs (%s):\n',
240 JobTimer.format_time(datetime.now()),
241 len(incomplete_jobs))
242 for job in incomplete_jobs:
243 logging.info('%s: %s', job.testname[job.testname.rfind('/')+1:],
MK Ryu4790eec2014-07-31 11:39:02 -0700244 reporting_utils.link_job(job.id, instance_server))
Prashanth B923ca262014-03-14 12:36:29 -0700245 else:
246 logging.info('All jobs in suite have already completed.')