blob: dc5cb25ead13f126e4ec4fc6e079ba782b21f207 [file] [log] [blame]
Chris Masone6f109082012-07-18 14:21:38 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Matthew Sartoridb550112015-06-02 13:55:32 -07005import logging, math, random, signal, sys, time
6
7from chromite.lib import retry_util
Dan Shi6d31f802013-01-11 14:46:12 -08008
Chris Masone6f109082012-07-18 14:21:38 -07009from autotest_lib.client.common_lib import error
Chris Masone6f109082012-07-18 14:21:38 -070010
11
Dan Shi6d31f802013-01-11 14:46:12 -080012def handler(signum, frame):
13 """
14 Register a handler for the timeout.
15 """
beeps60aec242013-06-26 14:47:48 -070016 raise error.TimeoutException('Call is timed out.')
Dan Shi6d31f802013-01-11 14:46:12 -080017
18
beeps445cb742013-06-25 16:05:12 -070019def install_sigalarm_handler(new_handler):
20 """
21 Try installing a sigalarm handler.
22
23 In order to protect apache, wsgi intercepts any attempt to install a
24 sigalarm handler, so our function will feel the full force of a sigalarm
25 even if we try to install a pacifying signal handler. To avoid this we
26 need to confirm that the handler we tried to install really was installed.
27
28 @param new_handler: The new handler to install. This must be a callable
29 object, or signal.SIG_IGN/SIG_DFL which correspond to
30 the numbers 1,0 respectively.
31 @return: True if the installation of new_handler succeeded, False otherwise.
32 """
33 if (new_handler is None or
34 (not callable(new_handler) and
35 new_handler != signal.SIG_IGN and
36 new_handler != signal.SIG_DFL)):
37 logging.warning('Trying to install an invalid sigalarm handler.')
38 return False
39
40 signal.signal(signal.SIGALRM, new_handler)
41 installed_handler = signal.getsignal(signal.SIGALRM)
42 return installed_handler == new_handler
43
44
45def set_sigalarm_timeout(timeout_secs, default_timeout=60):
46 """
47 Set the sigalarm timeout.
48
49 This methods treats any timeout <= 0 as a possible error and falls back to
50 using it's default timeout, since negative timeouts can have 'alarming'
51 effects. Though 0 is a valid timeout, it is often used to cancel signals; in
52 order to set a sigalarm of 0 please call signal.alarm directly as there are
53 many situations where a 0 timeout is considered invalid.
54
55 @param timeout_secs: The new timeout, in seconds.
56 @param default_timeout: The default timeout to use, if timeout <= 0.
57 @return: The old sigalarm timeout
58 """
59 timeout_sec_n = int(timeout_secs)
60 if timeout_sec_n <= 0:
61 timeout_sec_n = int(default_timeout)
62 return signal.alarm(timeout_sec_n)
63
64
Dan Shi6d31f802013-01-11 14:46:12 -080065def timeout(func, args=(), kwargs={}, timeout_sec=60.0, default_result=None):
66 """
67 This function run the given function using the args, kwargs and
68 return the given default value if the timeout_sec is exceeded.
69
70 @param func: function to be called.
71 @param args: arguments for function to be called.
72 @param kwargs: keyword arguments for function to be called.
73 @param timeout_sec: timeout setting for call to exit, in seconds.
74 @param default_result: default return value for the function call.
75
76 @return 1: is_timeout 2: result of the function call. If
77 is_timeout is True, the call is timed out. If the
78 value is False, the call is finished on time.
79 """
beeps445cb742013-06-25 16:05:12 -070080 old_alarm_sec = 0
81 old_handler = signal.getsignal(signal.SIGALRM)
82 installed_handler = install_sigalarm_handler(handler)
83 if installed_handler:
84 old_alarm_sec = set_sigalarm_timeout(timeout_sec, default_timeout=60)
Dan Shi6d31f802013-01-11 14:46:12 -080085
beeps445cb742013-06-25 16:05:12 -070086 # If old_timeout_time = 0 we either didn't install a handler, or sigalrm
87 # had a signal.SIG_DFL handler with 0 timeout. In the latter case we still
88 # need to restore the handler/timeout.
89 old_timeout_time = (time.time() + old_alarm_sec) if old_alarm_sec > 0 else 0
90
Dan Shi6d31f802013-01-11 14:46:12 -080091 try:
92 default_result = func(*args, **kwargs)
93 return False, default_result
beeps60aec242013-06-26 14:47:48 -070094 except error.TimeoutException:
Dan Shi6d31f802013-01-11 14:46:12 -080095 return True, default_result
96 finally:
beeps445cb742013-06-25 16:05:12 -070097 # If we installed a sigalarm handler, cancel it since our function
98 # returned on time. If we can successfully restore the old handler,
99 # reset the old timeout, or, if the old timeout's deadline has passed,
100 # set the sigalarm to fire in one second. If the old_timeout_time is 0
101 # we don't need to set the sigalarm timeout since we have already set it
102 # as a byproduct of cancelling the current signal.
103 if installed_handler:
104 signal.alarm(0)
105 if install_sigalarm_handler(old_handler) and old_timeout_time:
106 set_sigalarm_timeout(int(old_timeout_time - time.time()),
107 default_timeout=1)
108
Dan Shi6d31f802013-01-11 14:46:12 -0800109
110
Fang Deng241ae6c2013-05-01 11:43:28 -0700111def retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, blacklist=None):
Chris Masone6f109082012-07-18 14:21:38 -0700112 """Retry calling the decorated function using a delay with jitter.
113
114 Will raise RPC ValidationError exceptions from the decorated
115 function without retrying; a malformed RPC isn't going to
Fang Deng241ae6c2013-05-01 11:43:28 -0700116 magically become good. Will raise exceptions in blacklist as well.
Chris Masone6f109082012-07-18 14:21:38 -0700117
118 original from:
119 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
120
121 @param ExceptionToCheck: the exception to check. May be a tuple of
122 exceptions to check.
123 @param timeout_min: timeout in minutes until giving up.
124 @param delay_sec: pre-jittered delay between retries in seconds. Actual
125 delays will be centered around this value, ranging up to
126 50% off this midpoint.
Fang Deng241ae6c2013-05-01 11:43:28 -0700127 @param blacklist: a list of exceptions that will be raised without retrying
Chris Masone6f109082012-07-18 14:21:38 -0700128 """
129 def deco_retry(func):
130 random.seed()
Dan Shi6d31f802013-01-11 14:46:12 -0800131
132
133 def delay():
134 """
135 'Jitter' the delay, up to 50% in either direction.
136 """
137 random_delay = random.uniform(.5 * delay_sec, 1.5 * delay_sec)
138 logging.warning('Retrying in %f seconds...', random_delay)
139 time.sleep(random_delay)
140
141
Chris Masone6f109082012-07-18 14:21:38 -0700142 def func_retry(*args, **kwargs):
Dan Shi6d31f802013-01-11 14:46:12 -0800143 # Used to cache exception to be raised later.
144 exc_info = None
145 delayed_enabled = False
Fang Deng241ae6c2013-05-01 11:43:28 -0700146 exception_tuple = () if blacklist is None else tuple(blacklist)
beeps445cb742013-06-25 16:05:12 -0700147 start_time = time.time()
148 remaining_time = timeout_min * 60
149
150 while remaining_time > 0:
Dan Shi6d31f802013-01-11 14:46:12 -0800151 if delayed_enabled:
152 delay()
153 else:
154 delayed_enabled = True
Chris Masone6f109082012-07-18 14:21:38 -0700155 try:
Dan Shi6d31f802013-01-11 14:46:12 -0800156 # Clear the cache
157 exc_info = None
158 is_timeout, result = timeout(func, args, kwargs,
beeps445cb742013-06-25 16:05:12 -0700159 remaining_time)
Dan Shi6d31f802013-01-11 14:46:12 -0800160 if not is_timeout:
161 return result
Fang Deng241ae6c2013-05-01 11:43:28 -0700162 except exception_tuple:
163 raise
Tom Wai-Hong Tamd5dde482014-11-21 05:11:23 +0800164 except error.CrosDynamicSuiteException:
Dan Shi6d31f802013-01-11 14:46:12 -0800165 raise
166 except ExceptionToCheck as e:
167 logging.warning('%s(%s)', e.__class__, e)
168 # Cache the exception to be raised later.
169 exc_info = sys.exc_info()
beeps445cb742013-06-25 16:05:12 -0700170
171 remaining_time = int(timeout_min*60 -
172 (time.time() - start_time))
173
Dan Shi6d31f802013-01-11 14:46:12 -0800174 # The call must have timed out or raised ExceptionToCheck.
175 if not exc_info:
beeps60aec242013-06-26 14:47:48 -0700176 raise error.TimeoutException('Call is timed out.')
Dan Shi6d31f802013-01-11 14:46:12 -0800177 # Raise the cached exception with original backtrace.
178 raise exc_info[0], exc_info[1], exc_info[2]
179
180
Chris Masone6f109082012-07-18 14:21:38 -0700181 return func_retry # true decorator
Fang Deng241ae6c2013-05-01 11:43:28 -0700182 return deco_retry
Matthew Sartoridb550112015-06-02 13:55:32 -0700183
184
185def retry_exponential(ExceptionToCheck, timeout_min=1.0, delay_sec=3,
186 backoff_factor=2, blacklist=None):
187 """Retry calling the decorated function using an exponential backoff.
188
189 Present an interface consistent with the existing retry function, but
190 use instead the chromite retry_util functions to provide exponential
191 backoff.
192
193 @param ExceptionToCheck: See retry.
194 @param timeout_min: See retry.
195 @param delay_sec: See retry.
196 @param backoff_factor: The base used for exponential backoff. A simpler
197 backoff method is used if backoff_factor is not
198 greater than 1.
199 @param blacklist: See retry.
200 """
201 def deco_retry(func):
202 """The outer decorator.
203
204 @param func: The function we are decorating.
205 """
206 exception_tuple = () if blacklist is None else tuple(blacklist)
207
208 # Check the backoff_factor. If backoff is greater than 1,
209 # then we use exponential backoff, else, simple backoff.
210 backoff = backoff_factor if backoff_factor >= 1 else 1
211
212 # Chromite retry_util uses:
213 # max_retry: The number of retry attempts to make.
214 # sleep: The multiplier for how long to sleep between attempts.
215 total_sleep = timeout_min * 60.0
216 sleep = abs(delay_sec) if delay_sec != 0 else 1
217
218 # Estimate the max_retry in the case of simple backoff:
219 # => total_sleep = sleep*sum(1..max_retry)
220 # => total_sleep/sleep = max_retry(max_retry+1)/2
221 # => max_retry = -1/2 + sqrt(1+8K)/2 where K = total_sleep/sleep
222 max_retry = int(math.ceil(-1 + math.sqrt(
223 1+8*math.ceil(total_sleep/sleep))/2.0))
224
225 # Estimate the max_retry in the case of exponential backoff:
226 # => total_sleep = sleep*sum(r=0..max_retry-1, backoff^r)
227 # => total_sleep = sleep( (1-backoff^max_retry) / (1-backoff) )
228 # => max_retry*ln(backoff) = ln(1-(total_sleep/sleep)*(1-backoff))
229 # => max_retry = ln(1-(total_sleep/sleep)*(1-backoff))/ln(backoff)
230 if backoff > 1:
231 numerator = math.log10(1-(total_sleep/sleep)*(1-backoff))
232 denominator = math.log10(backoff)
233 max_retry = int(math.ceil(numerator/denominator))
234
235 def handler(exc):
236 """Check if exc is an ExceptionToCheck or if it's blacklisted.
237
238 @param exc: An exception.
239
240 @return: True if exc is an ExceptionToCheck and is not
241 blacklisted. False otherwise.
242 """
243 is_exc_to_check = isinstance(exc, ExceptionToCheck)
244 is_blacklisted = isinstance(exc, exception_tuple)
245 return is_exc_to_check and not is_blacklisted
246
247 def func_retry(*args, **kwargs):
248 """The actual function decorator.
249
250 @params args: The arguments to the function.
251 @params kwargs: The keyword arguments to the function.
252 """
253 # Set keyword arguments
254 kwargs['sleep'] = sleep
255 kwargs['backoff_factor'] = backoff
256
257 return retry_util.GenericRetry(handler, max_retry, func,
258 *args, **kwargs)
259
260 return func_retry
261
262 return deco_retry