blob: 380ea92da4900b2acc7e1981c58ac5cbcbe5f0f5 [file] [log] [blame]
Chris Masone6f109082012-07-18 14:21:38 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Matthew Sartoridb550112015-06-02 13:55:32 -07005import logging, math, random, signal, sys, time
6
Chris Masone6f109082012-07-18 14:21:38 -07007from autotest_lib.client.common_lib import error
Chris Masone6f109082012-07-18 14:21:38 -07008
Hung-ying Tyan3a7f3e22015-06-05 11:12:56 +08009try:
10 from chromite.lib import retry_util
11except ImportError:
12 logging.warn('Unable to import chromite for retry_util.')
13 retry_util = None
14
Chris Masone6f109082012-07-18 14:21:38 -070015
Dan Shi6d31f802013-01-11 14:46:12 -080016def handler(signum, frame):
17 """
18 Register a handler for the timeout.
19 """
beeps60aec242013-06-26 14:47:48 -070020 raise error.TimeoutException('Call is timed out.')
Dan Shi6d31f802013-01-11 14:46:12 -080021
22
beeps445cb742013-06-25 16:05:12 -070023def install_sigalarm_handler(new_handler):
24 """
25 Try installing a sigalarm handler.
26
27 In order to protect apache, wsgi intercepts any attempt to install a
28 sigalarm handler, so our function will feel the full force of a sigalarm
29 even if we try to install a pacifying signal handler. To avoid this we
30 need to confirm that the handler we tried to install really was installed.
31
32 @param new_handler: The new handler to install. This must be a callable
33 object, or signal.SIG_IGN/SIG_DFL which correspond to
34 the numbers 1,0 respectively.
35 @return: True if the installation of new_handler succeeded, False otherwise.
36 """
37 if (new_handler is None or
38 (not callable(new_handler) and
39 new_handler != signal.SIG_IGN and
40 new_handler != signal.SIG_DFL)):
41 logging.warning('Trying to install an invalid sigalarm handler.')
42 return False
43
44 signal.signal(signal.SIGALRM, new_handler)
45 installed_handler = signal.getsignal(signal.SIGALRM)
46 return installed_handler == new_handler
47
48
49def set_sigalarm_timeout(timeout_secs, default_timeout=60):
50 """
51 Set the sigalarm timeout.
52
53 This methods treats any timeout <= 0 as a possible error and falls back to
54 using it's default timeout, since negative timeouts can have 'alarming'
55 effects. Though 0 is a valid timeout, it is often used to cancel signals; in
56 order to set a sigalarm of 0 please call signal.alarm directly as there are
57 many situations where a 0 timeout is considered invalid.
58
59 @param timeout_secs: The new timeout, in seconds.
60 @param default_timeout: The default timeout to use, if timeout <= 0.
61 @return: The old sigalarm timeout
62 """
63 timeout_sec_n = int(timeout_secs)
64 if timeout_sec_n <= 0:
65 timeout_sec_n = int(default_timeout)
66 return signal.alarm(timeout_sec_n)
67
68
Dan Shi6d31f802013-01-11 14:46:12 -080069def timeout(func, args=(), kwargs={}, timeout_sec=60.0, default_result=None):
70 """
71 This function run the given function using the args, kwargs and
72 return the given default value if the timeout_sec is exceeded.
73
74 @param func: function to be called.
75 @param args: arguments for function to be called.
76 @param kwargs: keyword arguments for function to be called.
77 @param timeout_sec: timeout setting for call to exit, in seconds.
78 @param default_result: default return value for the function call.
79
80 @return 1: is_timeout 2: result of the function call. If
81 is_timeout is True, the call is timed out. If the
82 value is False, the call is finished on time.
83 """
beeps445cb742013-06-25 16:05:12 -070084 old_alarm_sec = 0
85 old_handler = signal.getsignal(signal.SIGALRM)
86 installed_handler = install_sigalarm_handler(handler)
87 if installed_handler:
88 old_alarm_sec = set_sigalarm_timeout(timeout_sec, default_timeout=60)
Dan Shi6d31f802013-01-11 14:46:12 -080089
beeps445cb742013-06-25 16:05:12 -070090 # If old_timeout_time = 0 we either didn't install a handler, or sigalrm
91 # had a signal.SIG_DFL handler with 0 timeout. In the latter case we still
92 # need to restore the handler/timeout.
93 old_timeout_time = (time.time() + old_alarm_sec) if old_alarm_sec > 0 else 0
94
Dan Shi6d31f802013-01-11 14:46:12 -080095 try:
96 default_result = func(*args, **kwargs)
97 return False, default_result
beeps60aec242013-06-26 14:47:48 -070098 except error.TimeoutException:
Dan Shi6d31f802013-01-11 14:46:12 -080099 return True, default_result
100 finally:
beeps445cb742013-06-25 16:05:12 -0700101 # If we installed a sigalarm handler, cancel it since our function
102 # returned on time. If we can successfully restore the old handler,
103 # reset the old timeout, or, if the old timeout's deadline has passed,
104 # set the sigalarm to fire in one second. If the old_timeout_time is 0
105 # we don't need to set the sigalarm timeout since we have already set it
106 # as a byproduct of cancelling the current signal.
107 if installed_handler:
108 signal.alarm(0)
109 if install_sigalarm_handler(old_handler) and old_timeout_time:
110 set_sigalarm_timeout(int(old_timeout_time - time.time()),
111 default_timeout=1)
112
Dan Shi6d31f802013-01-11 14:46:12 -0800113
114
Fang Deng241ae6c2013-05-01 11:43:28 -0700115def retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, blacklist=None):
Chris Masone6f109082012-07-18 14:21:38 -0700116 """Retry calling the decorated function using a delay with jitter.
117
118 Will raise RPC ValidationError exceptions from the decorated
119 function without retrying; a malformed RPC isn't going to
Fang Deng241ae6c2013-05-01 11:43:28 -0700120 magically become good. Will raise exceptions in blacklist as well.
Chris Masone6f109082012-07-18 14:21:38 -0700121
122 original from:
123 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
124
125 @param ExceptionToCheck: the exception to check. May be a tuple of
126 exceptions to check.
127 @param timeout_min: timeout in minutes until giving up.
128 @param delay_sec: pre-jittered delay between retries in seconds. Actual
129 delays will be centered around this value, ranging up to
130 50% off this midpoint.
Fang Deng241ae6c2013-05-01 11:43:28 -0700131 @param blacklist: a list of exceptions that will be raised without retrying
Chris Masone6f109082012-07-18 14:21:38 -0700132 """
133 def deco_retry(func):
134 random.seed()
Dan Shi6d31f802013-01-11 14:46:12 -0800135
136
137 def delay():
138 """
139 'Jitter' the delay, up to 50% in either direction.
140 """
141 random_delay = random.uniform(.5 * delay_sec, 1.5 * delay_sec)
142 logging.warning('Retrying in %f seconds...', random_delay)
143 time.sleep(random_delay)
144
145
Chris Masone6f109082012-07-18 14:21:38 -0700146 def func_retry(*args, **kwargs):
Dan Shi6d31f802013-01-11 14:46:12 -0800147 # Used to cache exception to be raised later.
148 exc_info = None
149 delayed_enabled = False
Fang Deng241ae6c2013-05-01 11:43:28 -0700150 exception_tuple = () if blacklist is None else tuple(blacklist)
beeps445cb742013-06-25 16:05:12 -0700151 start_time = time.time()
152 remaining_time = timeout_min * 60
153
154 while remaining_time > 0:
Dan Shi6d31f802013-01-11 14:46:12 -0800155 if delayed_enabled:
156 delay()
157 else:
158 delayed_enabled = True
Chris Masone6f109082012-07-18 14:21:38 -0700159 try:
Dan Shi6d31f802013-01-11 14:46:12 -0800160 # Clear the cache
161 exc_info = None
162 is_timeout, result = timeout(func, args, kwargs,
beeps445cb742013-06-25 16:05:12 -0700163 remaining_time)
Dan Shi6d31f802013-01-11 14:46:12 -0800164 if not is_timeout:
165 return result
Fang Deng241ae6c2013-05-01 11:43:28 -0700166 except exception_tuple:
167 raise
Tom Wai-Hong Tamd5dde482014-11-21 05:11:23 +0800168 except error.CrosDynamicSuiteException:
Dan Shi6d31f802013-01-11 14:46:12 -0800169 raise
170 except ExceptionToCheck as e:
171 logging.warning('%s(%s)', e.__class__, e)
172 # Cache the exception to be raised later.
173 exc_info = sys.exc_info()
beeps445cb742013-06-25 16:05:12 -0700174
175 remaining_time = int(timeout_min*60 -
176 (time.time() - start_time))
177
Dan Shi6d31f802013-01-11 14:46:12 -0800178 # The call must have timed out or raised ExceptionToCheck.
179 if not exc_info:
beeps60aec242013-06-26 14:47:48 -0700180 raise error.TimeoutException('Call is timed out.')
Dan Shi6d31f802013-01-11 14:46:12 -0800181 # Raise the cached exception with original backtrace.
182 raise exc_info[0], exc_info[1], exc_info[2]
183
184
Chris Masone6f109082012-07-18 14:21:38 -0700185 return func_retry # true decorator
Fang Deng241ae6c2013-05-01 11:43:28 -0700186 return deco_retry
Matthew Sartoridb550112015-06-02 13:55:32 -0700187
188
189def retry_exponential(ExceptionToCheck, timeout_min=1.0, delay_sec=3,
190 backoff_factor=2, blacklist=None):
191 """Retry calling the decorated function using an exponential backoff.
192
193 Present an interface consistent with the existing retry function, but
194 use instead the chromite retry_util functions to provide exponential
195 backoff.
196
197 @param ExceptionToCheck: See retry.
198 @param timeout_min: See retry.
199 @param delay_sec: See retry.
200 @param backoff_factor: The base used for exponential backoff. A simpler
201 backoff method is used if backoff_factor is not
202 greater than 1.
203 @param blacklist: See retry.
204 """
205 def deco_retry(func):
206 """The outer decorator.
207
208 @param func: The function we are decorating.
209 """
210 exception_tuple = () if blacklist is None else tuple(blacklist)
211
212 # Check the backoff_factor. If backoff is greater than 1,
213 # then we use exponential backoff, else, simple backoff.
214 backoff = backoff_factor if backoff_factor >= 1 else 1
215
216 # Chromite retry_util uses:
217 # max_retry: The number of retry attempts to make.
218 # sleep: The multiplier for how long to sleep between attempts.
219 total_sleep = timeout_min * 60.0
220 sleep = abs(delay_sec) if delay_sec != 0 else 1
221
222 # Estimate the max_retry in the case of simple backoff:
223 # => total_sleep = sleep*sum(1..max_retry)
224 # => total_sleep/sleep = max_retry(max_retry+1)/2
225 # => max_retry = -1/2 + sqrt(1+8K)/2 where K = total_sleep/sleep
226 max_retry = int(math.ceil(-1 + math.sqrt(
227 1+8*math.ceil(total_sleep/sleep))/2.0))
228
229 # Estimate the max_retry in the case of exponential backoff:
230 # => total_sleep = sleep*sum(r=0..max_retry-1, backoff^r)
231 # => total_sleep = sleep( (1-backoff^max_retry) / (1-backoff) )
232 # => max_retry*ln(backoff) = ln(1-(total_sleep/sleep)*(1-backoff))
233 # => max_retry = ln(1-(total_sleep/sleep)*(1-backoff))/ln(backoff)
234 if backoff > 1:
235 numerator = math.log10(1-(total_sleep/sleep)*(1-backoff))
236 denominator = math.log10(backoff)
237 max_retry = int(math.ceil(numerator/denominator))
238
239 def handler(exc):
240 """Check if exc is an ExceptionToCheck or if it's blacklisted.
241
242 @param exc: An exception.
243
244 @return: True if exc is an ExceptionToCheck and is not
245 blacklisted. False otherwise.
246 """
247 is_exc_to_check = isinstance(exc, ExceptionToCheck)
248 is_blacklisted = isinstance(exc, exception_tuple)
249 return is_exc_to_check and not is_blacklisted
250
251 def func_retry(*args, **kwargs):
252 """The actual function decorator.
253
254 @params args: The arguments to the function.
255 @params kwargs: The keyword arguments to the function.
256 """
257 # Set keyword arguments
258 kwargs['sleep'] = sleep
259 kwargs['backoff_factor'] = backoff
260
Hung-ying Tyan3a7f3e22015-06-05 11:12:56 +0800261 if retry_util is None:
262 logging.warn('Failed to decorate with retry_exponential.')
263 return func
Matthew Sartoridb550112015-06-02 13:55:32 -0700264 return retry_util.GenericRetry(handler, max_retry, func,
265 *args, **kwargs)
266
267 return func_retry
268
269 return deco_retry