blob: 4142e2e860c8c58576653ac96ac1aef368ba7c33 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Random variable generators.
Guido van Rossumff03b1a1994-03-09 12:55:02 +00002
Raymond Hettingeref19bad2020-06-25 17:03:50 -07003 bytes
4 -----
5 uniform bytes (values between 0 and 255)
6
Tim Petersd7b5e882001-01-25 03:36:26 +00007 integers
8 --------
9 uniform within range
10
11 sequences
12 ---------
13 pick random element
Raymond Hettingerf24eb352002-11-12 17:41:57 +000014 pick random sample
Raymond Hettingere8f1e002016-09-06 17:15:29 -070015 pick weighted random sample
Tim Petersd7b5e882001-01-25 03:36:26 +000016 generate random permutation
17
Guido van Rossume7b146f2000-02-04 15:28:42 +000018 distributions on the real line:
19 ------------------------------
Tim Petersd7b5e882001-01-25 03:36:26 +000020 uniform
Christian Heimesfe337bf2008-03-23 21:54:12 +000021 triangular
Guido van Rossume7b146f2000-02-04 15:28:42 +000022 normal (Gaussian)
23 lognormal
24 negative exponential
25 gamma
26 beta
Raymond Hettinger40f62172002-12-29 23:03:38 +000027 pareto
28 Weibull
Guido van Rossumff03b1a1994-03-09 12:55:02 +000029
Guido van Rossume7b146f2000-02-04 15:28:42 +000030 distributions on the circle (angles 0 to 2pi)
31 ---------------------------------------------
32 circular uniform
33 von Mises
34
Raymond Hettinger40f62172002-12-29 23:03:38 +000035General notes on the underlying Mersenne Twister core generator:
Guido van Rossume7b146f2000-02-04 15:28:42 +000036
Raymond Hettinger40f62172002-12-29 23:03:38 +000037* The period is 2**19937-1.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000038* It is one of the most extensively tested generators in existence.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000039* The random() method is implemented in C, executes in a single Python step,
40 and is, therefore, threadsafe.
Tim Peterse360d952001-01-26 10:00:39 +000041
Guido van Rossume7b146f2000-02-04 15:28:42 +000042"""
Guido van Rossumd03e1191998-05-29 17:51:31 +000043
Raymond Hettingeref19bad2020-06-25 17:03:50 -070044# Translated by Guido van Rossum from C source provided by
45# Adrian Baddeley. Adapted by Raymond Hettinger for use with
46# the Mersenne Twister and os.urandom() core generators.
47
Raymond Hettinger2f726e92003-10-05 09:09:15 +000048from warnings import warn as _warn
Raymond Hettinger91e27c22005-08-19 01:36:35 +000049from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
Raymond Hettinger26a1ad12020-06-22 19:38:59 -070050from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
Ram Rachumb0dfc752020-09-29 04:32:10 +030051from math import tau as TWOPI, floor as _floor, isfinite as _isfinite
Raymond Hettingerc1c43ca2004-09-05 00:00:42 +000052from os import urandom as _urandom
Christian Heimesf1dc3ee2013-10-13 02:04:20 +020053from _collections_abc import Set as _Set, Sequence as _Sequence
Raymond Hettingera9621bb2020-12-28 11:10:34 -080054from operator import index as _index
Raymond Hettinger53822032019-02-16 13:30:51 -080055from itertools import accumulate as _accumulate, repeat as _repeat
Raymond Hettingercfd31f02019-02-13 02:04:17 -080056from bisect import bisect as _bisect
Antoine Pitrou346cbd32017-05-27 17:50:54 +020057import os as _os
Raymond Hettingeref19bad2020-06-25 17:03:50 -070058import _random
Guido van Rossumff03b1a1994-03-09 12:55:02 +000059
Christian Heimesd9145962019-04-10 22:18:02 +020060try:
61 # hashlib is pretty heavy to load, try lean internal module first
62 from _sha512 import sha512 as _sha512
63except ImportError:
64 # fallback to official implementation
65 from hashlib import sha512 as _sha512
66
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070067__all__ = [
68 "Random",
69 "SystemRandom",
70 "betavariate",
71 "choice",
72 "choices",
73 "expovariate",
74 "gammavariate",
75 "gauss",
76 "getrandbits",
77 "getstate",
78 "lognormvariate",
79 "normalvariate",
80 "paretovariate",
81 "randint",
82 "random",
83 "randrange",
84 "sample",
85 "seed",
86 "setstate",
87 "shuffle",
88 "triangular",
89 "uniform",
90 "vonmisesvariate",
91 "weibullvariate",
92]
Tim Petersd7b5e882001-01-25 03:36:26 +000093
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070094NV_MAGICCONST = 4 * _exp(-0.5) / _sqrt(2.0)
Tim Petersd7b5e882001-01-25 03:36:26 +000095LOG4 = _log(4.0)
Tim Petersd7b5e882001-01-25 03:36:26 +000096SG_MAGICCONST = 1.0 + _log(4.5)
Raymond Hettinger2f726e92003-10-05 09:09:15 +000097BPF = 53 # Number of bits in a float
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070098RECIP_BPF = 2 ** -BPF
Raymond Hettinger768fa142021-01-02 10:24:51 -080099_ONE = 1
Tim Petersd7b5e882001-01-25 03:36:26 +0000100
Raymond Hettinger356a4592004-08-30 06:14:31 +0000101
Raymond Hettinger145a4a02003-01-07 10:25:55 +0000102class Random(_random.Random):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000103 """Random number generator base class used by bound module functions.
104
105 Used to instantiate instances of Random to get generators that don't
Raymond Hettinger28de64f2008-01-13 23:40:30 +0000106 share state.
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000107
108 Class Random can also be subclassed if you want to use a different basic
109 generator of your own devising: in that case, override the following
Raymond Hettinger28de64f2008-01-13 23:40:30 +0000110 methods: random(), seed(), getstate(), and setstate().
Benjamin Petersond18de0e2008-07-31 20:21:46 +0000111 Optionally, implement a getrandbits() method so that randrange()
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000112 can cover arbitrarily large ranges.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000113
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000114 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000115
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000116 VERSION = 3 # used by getstate/setstate
Tim Petersd7b5e882001-01-25 03:36:26 +0000117
118 def __init__(self, x=None):
119 """Initialize an instance.
120
121 Optional argument x controls seeding, as for Random.seed().
122 """
123
124 self.seed(x)
Raymond Hettinger40f62172002-12-29 23:03:38 +0000125 self.gauss_next = None
Tim Petersd7b5e882001-01-25 03:36:26 +0000126
Raymond Hettingerf763a722010-09-07 00:38:15 +0000127 def seed(self, a=None, version=2):
Raymond Hettingerd0cdeaa2019-08-22 09:19:36 -0700128 """Initialize internal state from a seed.
129
130 The only supported seed types are None, int, float,
131 str, bytes, and bytearray.
Tim Petersd7b5e882001-01-25 03:36:26 +0000132
Raymond Hettinger23f12412004-09-13 22:23:21 +0000133 None or no argument seeds from current time or from an operating
134 system specific randomness source if available.
Tim Peters0de88fc2001-02-01 04:59:18 +0000135
Raymond Hettinger183cd1f2010-09-08 18:48:21 +0000136 If *a* is an int, all bits are used.
Raymond Hettingerf763a722010-09-07 00:38:15 +0000137
Raymond Hettinger16eb8272016-09-04 11:17:28 -0700138 For version 2 (the default), all of the bits are used if *a* is a str,
139 bytes, or bytearray. For version 1 (provided for reproducing random
140 sequences from older versions of Python), the algorithm for str and
141 bytes generates a narrower range of seeds.
142
Tim Petersd7b5e882001-01-25 03:36:26 +0000143 """
144
Raymond Hettingerc7bab7c2016-08-31 15:01:08 -0700145 if version == 1 and isinstance(a, (str, bytes)):
Raymond Hettinger132a7d72017-09-17 09:04:30 -0700146 a = a.decode('latin-1') if isinstance(a, bytes) else a
Raymond Hettingerc7bab7c2016-08-31 15:01:08 -0700147 x = ord(a[0]) << 7 if a else 0
Raymond Hettinger132a7d72017-09-17 09:04:30 -0700148 for c in map(ord, a):
149 x = ((1000003 * x) ^ c) & 0xFFFFFFFFFFFFFFFF
Raymond Hettingerc7bab7c2016-08-31 15:01:08 -0700150 x ^= len(a)
151 a = -2 if x == -1 else x
152
Raymond Hettingerd0cdeaa2019-08-22 09:19:36 -0700153 elif version == 2 and isinstance(a, (str, bytes, bytearray)):
Raymond Hettinger2f9cc7a2016-08-31 23:00:32 -0700154 if isinstance(a, str):
155 a = a.encode()
156 a += _sha512(a).digest()
157 a = int.from_bytes(a, 'big')
Raymond Hettingerf763a722010-09-07 00:38:15 +0000158
Raymond Hettingerd0cdeaa2019-08-22 09:19:36 -0700159 elif not isinstance(a, (type(None), int, float, str, bytes, bytearray)):
160 _warn('Seeding based on hashing is deprecated\n'
161 'since Python 3.9 and will be removed in a subsequent '
162 'version. The only \n'
163 'supported seed types are: None, '
164 'int, float, str, bytes, and bytearray.',
165 DeprecationWarning, 2)
166
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000167 super().seed(a)
Tim Peters46c04e12002-05-05 20:40:00 +0000168 self.gauss_next = None
169
Tim Peterscd804102001-01-25 20:25:57 +0000170 def getstate(self):
171 """Return internal state; can be passed to setstate() later."""
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000172 return self.VERSION, super().getstate(), self.gauss_next
Tim Peterscd804102001-01-25 20:25:57 +0000173
174 def setstate(self, state):
175 """Restore internal state from object returned by getstate()."""
176 version = state[0]
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000177 if version == 3:
Raymond Hettinger40f62172002-12-29 23:03:38 +0000178 version, internalstate, self.gauss_next = state
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000179 super().setstate(internalstate)
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000180 elif version == 2:
181 version, internalstate, self.gauss_next = state
182 # In version 2, the state was saved as signed ints, which causes
183 # inconsistencies between 32/64-bit systems. The state is
184 # really unsigned 32-bit ints, so we convert negative ints from
185 # version 2 to positive longs for version 3.
186 try:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700187 internalstate = tuple(x % (2 ** 32) for x in internalstate)
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000188 except ValueError as e:
189 raise TypeError from e
Raymond Hettinger183cd1f2010-09-08 18:48:21 +0000190 super().setstate(internalstate)
Tim Peterscd804102001-01-25 20:25:57 +0000191 else:
192 raise ValueError("state with version %s passed to "
193 "Random.setstate() of version %s" %
194 (version, self.VERSION))
195
Tim Peterscd804102001-01-25 20:25:57 +0000196
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700197 ## -------------------------------------------------------
198 ## ---- Methods below this point do not need to be overridden or extended
199 ## ---- when subclassing for the purpose of using a different core generator.
Victor Stinner2d875772020-04-29 18:49:00 +0200200
Victor Stinner2d875772020-04-29 18:49:00 +0200201
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700202 ## -------------------- pickle support -------------------
Tim Peterscd804102001-01-25 20:25:57 +0000203
R David Murrayd9ebf4d2013-04-02 13:10:52 -0400204 # Issue 17489: Since __reduce__ was defined to fix #759889 this is no
205 # longer called; we leave it here because it has been here since random was
206 # rewritten back in 2001 and why risk breaking something.
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700207 def __getstate__(self): # for pickle
Tim Peterscd804102001-01-25 20:25:57 +0000208 return self.getstate()
209
210 def __setstate__(self, state): # for pickle
211 self.setstate(state)
212
Raymond Hettinger5f078ff2003-06-24 20:29:04 +0000213 def __reduce__(self):
214 return self.__class__, (), self.getstate()
215
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700216
217 ## ---- internal support method for evenly distributed integers ----
218
219 def __init_subclass__(cls, /, **kwargs):
220 """Control how subclasses generate random integers.
221
222 The algorithm a subclass can use depends on the random() and/or
223 getrandbits() implementation available to it and determines
224 whether it can generate random integers from arbitrarily large
225 ranges.
226 """
227
228 for c in cls.__mro__:
229 if '_randbelow' in c.__dict__:
230 # just inherit it
231 break
232 if 'getrandbits' in c.__dict__:
233 cls._randbelow = cls._randbelow_with_getrandbits
234 break
235 if 'random' in c.__dict__:
236 cls._randbelow = cls._randbelow_without_getrandbits
237 break
238
239 def _randbelow_with_getrandbits(self, n):
240 "Return a random int in the range [0,n). Returns 0 if n==0."
241
242 if not n:
243 return 0
244 getrandbits = self.getrandbits
245 k = n.bit_length() # don't use (n-1) here because n can be 1
246 r = getrandbits(k) # 0 <= r < 2**k
247 while r >= n:
248 r = getrandbits(k)
249 return r
250
251 def _randbelow_without_getrandbits(self, n, maxsize=1<<BPF):
252 """Return a random int in the range [0,n). Returns 0 if n==0.
253
254 The implementation does not use getrandbits, but only random.
255 """
256
257 random = self.random
258 if n >= maxsize:
259 _warn("Underlying random() generator does not supply \n"
260 "enough bits to choose from a population range this large.\n"
261 "To remove the range limitation, add a getrandbits() method.")
262 return _floor(random() * n)
263 if n == 0:
264 return 0
265 rem = maxsize % n
266 limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0
267 r = random()
268 while r >= limit:
269 r = random()
270 return _floor(r * maxsize) % n
271
272 _randbelow = _randbelow_with_getrandbits
273
274
275 ## --------------------------------------------------------
276 ## ---- Methods below this point generate custom distributions
277 ## ---- based on the methods defined above. They do not
278 ## ---- directly touch the underlying generator and only
279 ## ---- access randomness through the methods: random(),
280 ## ---- getrandbits(), or _randbelow().
281
282
283 ## -------------------- bytes methods ---------------------
284
285 def randbytes(self, n):
286 """Generate n random bytes."""
287 return self.getrandbits(n * 8).to_bytes(n, 'little')
288
289
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700290 ## -------------------- integer methods -------------------
Tim Peterscd804102001-01-25 20:25:57 +0000291
Raymond Hettinger768fa142021-01-02 10:24:51 -0800292 def randrange(self, start, stop=None, step=_ONE):
Tim Petersd7b5e882001-01-25 03:36:26 +0000293 """Choose a random item from range(start, stop[, step]).
294
295 This fixes the problem with randint() which includes the
296 endpoint; in Python this is usually not what you want.
Raymond Hettinger3051cc32010-09-07 00:48:40 +0000297
Tim Petersd7b5e882001-01-25 03:36:26 +0000298 """
299
300 # This code is a bit messy to make it fast for the
Tim Peters9146f272002-08-16 03:41:39 +0000301 # common case while still doing adequate error checking.
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800302 try:
303 istart = _index(start)
304 except TypeError:
305 if int(start) == start:
306 istart = int(start)
307 _warn('Float arguments to randrange() have been deprecated\n'
308 'since Python 3.10 and will be removed in a subsequent '
309 'version.',
310 DeprecationWarning, 2)
311 else:
312 _warn('randrange() will raise TypeError in the future',
313 DeprecationWarning, 2)
314 raise ValueError("non-integer arg 1 for randrange()")
Raymond Hettinger768fa142021-01-02 10:24:51 -0800315
Raymond Hettinger3051cc32010-09-07 00:48:40 +0000316 if stop is None:
Raymond Hettinger768fa142021-01-02 10:24:51 -0800317 # We don't check for "step != 1" because it hasn't been
318 # type checked and converted to an integer yet.
319 if step is not _ONE:
320 raise TypeError('Missing a non-None stop argument')
Tim Petersd7b5e882001-01-25 03:36:26 +0000321 if istart > 0:
Raymond Hettinger05156612010-09-07 04:44:52 +0000322 return self._randbelow(istart)
Collin Winterce36ad82007-08-30 01:19:48 +0000323 raise ValueError("empty range for randrange()")
Tim Peters9146f272002-08-16 03:41:39 +0000324
325 # stop argument supplied.
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800326 try:
327 istop = _index(stop)
328 except TypeError:
329 if int(stop) == stop:
330 istop = int(stop)
331 _warn('Float arguments to randrange() have been deprecated\n'
332 'since Python 3.10 and will be removed in a subsequent '
333 'version.',
334 DeprecationWarning, 2)
335 else:
336 _warn('randrange() will raise TypeError in the future',
337 DeprecationWarning, 2)
338 raise ValueError("non-integer stop for randrange()")
339
340 try:
341 istep = _index(step)
342 except TypeError:
343 if int(step) == step:
344 istep = int(step)
345 _warn('Float arguments to randrange() have been deprecated\n'
346 'since Python 3.10 and will be removed in a subsequent '
347 'version.',
348 DeprecationWarning, 2)
349 else:
350 _warn('randrange() will raise TypeError in the future',
351 DeprecationWarning, 2)
352 raise ValueError("non-integer step for randrange()")
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000353 width = istop - istart
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800354 if istep == 1:
Raymond Hettinger8f8de732021-01-02 12:09:56 -0800355 if width > 0:
356 return istart + self._randbelow(width)
Kumar Akshay2433a2a2019-01-22 00:49:59 +0530357 raise ValueError("empty range for randrange() (%d, %d, %d)" % (istart, istop, width))
Tim Peters9146f272002-08-16 03:41:39 +0000358
359 # Non-unit step argument supplied.
Tim Petersd7b5e882001-01-25 03:36:26 +0000360 if istep > 0:
Raymond Hettingerffdb8bb2004-09-27 15:29:05 +0000361 n = (width + istep - 1) // istep
Tim Petersd7b5e882001-01-25 03:36:26 +0000362 elif istep < 0:
Raymond Hettingerffdb8bb2004-09-27 15:29:05 +0000363 n = (width + istep + 1) // istep
Tim Petersd7b5e882001-01-25 03:36:26 +0000364 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000365 raise ValueError("zero step for randrange()")
Tim Petersd7b5e882001-01-25 03:36:26 +0000366 if n <= 0:
Collin Winterce36ad82007-08-30 01:19:48 +0000367 raise ValueError("empty range for randrange()")
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700368 return istart + istep * self._randbelow(n)
Tim Petersd7b5e882001-01-25 03:36:26 +0000369
370 def randint(self, a, b):
Tim Peterscd804102001-01-25 20:25:57 +0000371 """Return random integer in range [a, b], including both end points.
Tim Petersd7b5e882001-01-25 03:36:26 +0000372 """
373
374 return self.randrange(a, b+1)
375
Wolfgang Maierba3a87a2018-04-17 17:16:17 +0200376
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700377 ## -------------------- sequence methods -------------------
Tim Peterscd804102001-01-25 20:25:57 +0000378
Tim Petersd7b5e882001-01-25 03:36:26 +0000379 def choice(self, seq):
380 """Choose a random element from a non-empty sequence."""
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700381 # raises IndexError if seq is empty
382 return seq[self._randbelow(len(seq))]
Tim Petersd7b5e882001-01-25 03:36:26 +0000383
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700384 def shuffle(self, x, random=None):
Antoine Pitrou5e394332012-11-04 02:10:33 +0100385 """Shuffle list x in place, and return None.
Tim Petersd7b5e882001-01-25 03:36:26 +0000386
Antoine Pitrou5e394332012-11-04 02:10:33 +0100387 Optional argument random is a 0-argument function returning a
388 random float in [0.0, 1.0); if it is the default None, the
389 standard random.random will be used.
Senthil Kumaranf8ce51a2013-09-11 22:54:31 -0700390
Tim Petersd7b5e882001-01-25 03:36:26 +0000391 """
392
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700393 if random is None:
394 randbelow = self._randbelow
395 for i in reversed(range(1, len(x))):
396 # pick an element in x[:i+1] with which to exchange x[i]
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700397 j = randbelow(i + 1)
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700398 x[i], x[j] = x[j], x[i]
399 else:
Raymond Hettinger190fac92020-05-02 16:45:32 -0700400 _warn('The *random* parameter to shuffle() has been deprecated\n'
401 'since Python 3.9 and will be removed in a subsequent '
402 'version.',
403 DeprecationWarning, 2)
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700404 floor = _floor
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700405 for i in reversed(range(1, len(x))):
406 # pick an element in x[:i+1] with which to exchange x[i]
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700407 j = floor(random() * (i + 1))
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700408 x[i], x[j] = x[j], x[i]
Tim Petersd7b5e882001-01-25 03:36:26 +0000409
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700410 def sample(self, population, k, *, counts=None):
Raymond Hettinger1acde192008-01-14 01:00:53 +0000411 """Chooses k unique random elements from a population sequence or set.
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000412
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000413 Returns a new list containing elements from the population while
414 leaving the original population unchanged. The resulting list is
415 in selection order so that all sub-slices will also be valid random
416 samples. This allows raffle winners (the sample) to be partitioned
417 into grand prize and second place winners (the subslices).
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000418
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000419 Members of the population need not be hashable or unique. If the
420 population contains repeats, then each occurrence is a possible
421 selection in the sample.
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000422
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700423 Repeated elements can be specified one at a time or with the optional
424 counts parameter. For example:
425
426 sample(['red', 'blue'], counts=[4, 2], k=5)
427
428 is equivalent to:
429
430 sample(['red', 'red', 'red', 'red', 'blue', 'blue'], k=5)
431
432 To choose a sample from a range of integers, use range() for the
433 population argument. This is especially fast and space efficient
434 for sampling from a large population:
435
436 sample(range(10000000), 60)
437
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000438 """
439
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000440 # Sampling without replacement entails tracking either potential
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000441 # selections (the pool) in a list or previous selections in a set.
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000442
Jeremy Hylton2b55d352004-02-23 17:27:57 +0000443 # When the number of selections is small compared to the
444 # population, then tracking selections is efficient, requiring
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000445 # only a small set and an occasional reselection. For
Jeremy Hylton2b55d352004-02-23 17:27:57 +0000446 # a larger number of selections, the pool tracking method is
447 # preferred since the list takes less space than the
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000448 # set and it doesn't suffer from frequent reselections.
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000449
Raymond Hettinger7fc633f2018-12-04 00:13:38 -0800450 # The number of calls to _randbelow() is kept at or near k, the
451 # theoretical minimum. This is important because running time
452 # is dominated by _randbelow() and because it extracts the
453 # least entropy from the underlying random number generators.
454
455 # Memory requirements are kept to the smaller of a k-length
456 # set or an n-length list.
457
458 # There are other sampling algorithms that do not require
459 # auxiliary memory, but they were rejected because they made
460 # too many calls to _randbelow(), making them slower and
461 # causing them to eat more entropy than necessary.
462
Raymond Hettinger57d1a882011-02-23 00:46:28 +0000463 if not isinstance(population, _Sequence):
masklinn1e27b572020-12-19 05:33:36 +0100464 if isinstance(population, _Set):
465 _warn('Sampling from a set deprecated\n'
466 'since Python 3.9 and will be removed in a subsequent version.',
467 DeprecationWarning, 2)
468 population = tuple(population)
469 else:
470 raise TypeError("Population must be a sequence. For dicts or sets, use sorted(d).")
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000471 n = len(population)
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700472 if counts is not None:
473 cum_counts = list(_accumulate(counts))
474 if len(cum_counts) != n:
475 raise ValueError('The number of counts does not match the population')
476 total = cum_counts.pop()
477 if not isinstance(total, int):
478 raise TypeError('Counts must be integers')
479 if total <= 0:
480 raise ValueError('Total of counts must be greater than zero')
481 selections = sample(range(total), k=k)
482 bisect = _bisect
483 return [population[bisect(cum_counts, s)] for s in selections]
484 randbelow = self._randbelow
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000485 if not 0 <= k <= n:
Raymond Hettingerbf871262016-11-21 14:34:33 -0800486 raise ValueError("Sample larger than population or is negative")
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000487 result = [None] * k
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000488 setsize = 21 # size of a small set minus size of an empty list
489 if k > 5:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700490 setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
Raymond Hettinger1acde192008-01-14 01:00:53 +0000491 if n <= setsize:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700492 # An n-length list is smaller than a k-length set.
493 # Invariant: non-selected at pool[0 : n-i]
Raymond Hettinger311f4192002-11-18 09:01:24 +0000494 pool = list(population)
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700495 for i in range(k):
496 j = randbelow(n - i)
Raymond Hettinger311f4192002-11-18 09:01:24 +0000497 result[i] = pool[j]
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700498 pool[j] = pool[n - i - 1] # move non-selected item into vacancy
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000499 else:
Raymond Hettinger1acde192008-01-14 01:00:53 +0000500 selected = set()
501 selected_add = selected.add
502 for i in range(k):
Raymond Hettinger05a505f2010-09-07 19:19:33 +0000503 j = randbelow(n)
Raymond Hettinger1acde192008-01-14 01:00:53 +0000504 while j in selected:
Raymond Hettinger05a505f2010-09-07 19:19:33 +0000505 j = randbelow(n)
Raymond Hettinger1acde192008-01-14 01:00:53 +0000506 selected_add(j)
507 result[i] = population[j]
Raymond Hettinger311f4192002-11-18 09:01:24 +0000508 return result
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000509
Raymond Hettinger9016f282016-09-26 21:45:57 -0700510 def choices(self, population, weights=None, *, cum_weights=None, k=1):
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700511 """Return a k sized list of population elements chosen with replacement.
512
513 If the relative weights or cumulative weights are not specified,
514 the selections are made with equal probability.
515
516 """
Raymond Hettinger30d00e52016-10-29 16:55:36 -0700517 random = self.random
Raymond Hettingere69cd162018-07-04 15:28:20 -0700518 n = len(population)
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700519 if cum_weights is None:
520 if weights is None:
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700521 floor = _floor
Raymond Hettinger0a18e052018-11-09 02:39:50 -0800522 n += 0.0 # convert to float for a small speed improvement
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700523 return [population[floor(random() * n)] for i in _repeat(None, k)]
Raymond Hettingercfd31f02019-02-13 02:04:17 -0800524 cum_weights = list(_accumulate(weights))
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700525 elif weights is not None:
Raymond Hettinger24e42392016-11-13 00:42:56 -0500526 raise TypeError('Cannot specify both weights and cumulative weights')
Raymond Hettingere69cd162018-07-04 15:28:20 -0700527 if len(cum_weights) != n:
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700528 raise ValueError('The number of weights does not match the population')
Raymond Hettinger0a18e052018-11-09 02:39:50 -0800529 total = cum_weights[-1] + 0.0 # convert to float
Raymond Hettinger041d8b42019-11-23 02:22:13 -0800530 if total <= 0.0:
531 raise ValueError('Total of weights must be greater than zero')
Ram Rachumb0dfc752020-09-29 04:32:10 +0300532 if not _isfinite(total):
533 raise ValueError('Total of weights must be finite')
Raymond Hettinger041d8b42019-11-23 02:22:13 -0800534 bisect = _bisect
Raymond Hettingere69cd162018-07-04 15:28:20 -0700535 hi = n - 1
Raymond Hettingerddf71712018-06-27 01:08:31 -0700536 return [population[bisect(cum_weights, random() * total, 0, hi)]
Raymond Hettinger53822032019-02-16 13:30:51 -0800537 for i in _repeat(None, k)]
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700538
Tim Peterscd804102001-01-25 20:25:57 +0000539
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700540 ## -------------------- real-valued distributions -------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000541
542 def uniform(self, a, b):
Raymond Hettingerbe40db02009-06-11 23:12:14 +0000543 "Get a random number in the range [a, b) or [a, b] depending on rounding."
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700544 return a + (b - a) * self.random()
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000545
Christian Heimesfe337bf2008-03-23 21:54:12 +0000546 def triangular(self, low=0.0, high=1.0, mode=None):
547 """Triangular distribution.
548
549 Continuous distribution bounded by given lower and upper limits,
550 and having a given mode value in-between.
551
552 http://en.wikipedia.org/wiki/Triangular_distribution
553
554 """
555 u = self.random()
Raymond Hettinger978c6ab2014-05-25 17:25:27 -0700556 try:
557 c = 0.5 if mode is None else (mode - low) / (high - low)
558 except ZeroDivisionError:
559 return low
Christian Heimesfe337bf2008-03-23 21:54:12 +0000560 if u > c:
561 u = 1.0 - u
562 c = 1.0 - c
563 low, high = high, low
Raymond Hettingerf5ea83f2017-09-04 16:51:06 -0700564 return low + (high - low) * _sqrt(u * c)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000565
Tim Petersd7b5e882001-01-25 03:36:26 +0000566 def normalvariate(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000567 """Normal distribution.
568
569 mu is the mean, and sigma is the standard deviation.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000570
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000571 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000572 # Uses Kinderman and Monahan method. Reference: Kinderman,
573 # A.J. and Monahan, J.F., "Computer generation of random
574 # variables using the ratio of uniform deviates", ACM Trans
575 # Math Software, 3, (1977), pp257-260.
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000576
Tim Petersd7b5e882001-01-25 03:36:26 +0000577 random = self.random
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700578 while True:
Tim Peters0c9886d2001-01-15 01:18:21 +0000579 u1 = random()
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000580 u2 = 1.0 - random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700581 z = NV_MAGICCONST * (u1 - 0.5) / u2
582 zz = z * z / 4.0
Tim Petersd7b5e882001-01-25 03:36:26 +0000583 if zz <= -_log(u2):
584 break
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700585 return mu + z * sigma
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000586
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700587 def gauss(self, mu, sigma):
588 """Gaussian distribution.
589
590 mu is the mean, and sigma is the standard deviation. This is
591 slightly faster than the normalvariate() function.
592
593 Not thread-safe without a lock around calls.
594
595 """
596 # When x and y are two variables from [0, 1), uniformly
597 # distributed, then
598 #
599 # cos(2*pi*x)*sqrt(-2*log(1-y))
600 # sin(2*pi*x)*sqrt(-2*log(1-y))
601 #
602 # are two *independent* variables with normal distribution
603 # (mu = 0, sigma = 1).
604 # (Lambert Meertens)
605 # (corrected version; bug discovered by Mike Miller, fixed by LM)
606
607 # Multithreading note: When two threads call this function
608 # simultaneously, it is possible that they will receive the
609 # same return value. The window is very small though. To
610 # avoid this, you have to use a lock around all calls. (I
611 # didn't want to slow this down in the serial case by using a
612 # lock here.)
613
614 random = self.random
615 z = self.gauss_next
616 self.gauss_next = None
617 if z is None:
618 x2pi = random() * TWOPI
619 g2rad = _sqrt(-2.0 * _log(1.0 - random()))
620 z = _cos(x2pi) * g2rad
621 self.gauss_next = _sin(x2pi) * g2rad
622
623 return mu + z * sigma
Tim Petersd7b5e882001-01-25 03:36:26 +0000624
625 def lognormvariate(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000626 """Log normal distribution.
627
628 If you take the natural logarithm of this distribution, you'll get a
629 normal distribution with mean mu and standard deviation sigma.
630 mu can have any value, and sigma must be greater than zero.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000631
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000632 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000633 return _exp(self.normalvariate(mu, sigma))
634
Tim Petersd7b5e882001-01-25 03:36:26 +0000635 def expovariate(self, lambd):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000636 """Exponential distribution.
637
Mark Dickinson2f947362009-01-07 17:54:07 +0000638 lambd is 1.0 divided by the desired mean. It should be
639 nonzero. (The parameter would be called "lambda", but that is
640 a reserved word in Python.) Returned values range from 0 to
641 positive infinity if lambd is positive, and from negative
642 infinity to 0 if lambd is negative.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000643
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000644 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000645 # lambd: rate lambd = 1/mean
646 # ('lambda' is a Python reserved word)
647
Raymond Hettinger5279fb92011-06-25 11:30:53 +0200648 # we use 1-random() instead of random() to preclude the
649 # possibility of taking the log of zero.
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700650 return -_log(1.0 - self.random()) / lambd
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000651
Tim Petersd7b5e882001-01-25 03:36:26 +0000652 def vonmisesvariate(self, mu, kappa):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000653 """Circular data distribution.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000654
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000655 mu is the mean angle, expressed in radians between 0 and 2*pi, and
656 kappa is the concentration parameter, which must be greater than or
657 equal to zero. If kappa is equal to zero, this distribution reduces
658 to a uniform random angle over the range 0 to 2*pi.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000659
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000660 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000661 # Based upon an algorithm published in: Fisher, N.I.,
662 # "Statistical Analysis of Circular Data", Cambridge
663 # University Press, 1993.
664
665 # Thanks to Magnus Kessler for a correction to the
666 # implementation of step 4.
667
668 random = self.random
669 if kappa <= 1e-6:
670 return TWOPI * random()
671
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200672 s = 0.5 / kappa
673 r = s + _sqrt(1.0 + s * s)
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000674
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700675 while True:
Tim Peters0c9886d2001-01-15 01:18:21 +0000676 u1 = random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000677 z = _cos(_pi * u1)
Tim Petersd7b5e882001-01-25 03:36:26 +0000678
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200679 d = z / (r + z)
Tim Petersd7b5e882001-01-25 03:36:26 +0000680 u2 = random()
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200681 if u2 < 1.0 - d * d or u2 <= (1.0 - d) * _exp(d):
Tim Peters0c9886d2001-01-15 01:18:21 +0000682 break
Tim Petersd7b5e882001-01-25 03:36:26 +0000683
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200684 q = 1.0 / r
685 f = (q + z) / (1.0 + q * z)
Tim Petersd7b5e882001-01-25 03:36:26 +0000686 u3 = random()
687 if u3 > 0.5:
Mark Dickinsonbe5f9192013-02-10 14:16:10 +0000688 theta = (mu + _acos(f)) % TWOPI
Tim Petersd7b5e882001-01-25 03:36:26 +0000689 else:
Mark Dickinsonbe5f9192013-02-10 14:16:10 +0000690 theta = (mu - _acos(f)) % TWOPI
Tim Petersd7b5e882001-01-25 03:36:26 +0000691
692 return theta
693
Tim Petersd7b5e882001-01-25 03:36:26 +0000694 def gammavariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000695 """Gamma distribution. Not the gamma function!
696
697 Conditions on the parameters are alpha > 0 and beta > 0.
698
Raymond Hettingera8e4d6e2011-03-22 15:55:51 -0700699 The probability distribution function is:
700
701 x ** (alpha - 1) * math.exp(-x / beta)
702 pdf(x) = --------------------------------------
703 math.gamma(alpha) * beta ** alpha
704
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000705 """
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000706 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
Tim Peters8ac14952002-05-23 15:15:30 +0000707
Guido van Rossum570764d2002-05-14 14:08:12 +0000708 # Warning: a few older sources define the gamma distribution in terms
709 # of alpha > -1.0
710 if alpha <= 0.0 or beta <= 0.0:
Collin Winterce36ad82007-08-30 01:19:48 +0000711 raise ValueError('gammavariate: alpha and beta must be > 0.0')
Tim Peters8ac14952002-05-23 15:15:30 +0000712
Tim Petersd7b5e882001-01-25 03:36:26 +0000713 random = self.random
Tim Petersd7b5e882001-01-25 03:36:26 +0000714 if alpha > 1.0:
715
716 # Uses R.C.H. Cheng, "The generation of Gamma
717 # variables with non-integral shape parameters",
718 # Applied Statistics, (1977), 26, No. 1, p71-74
719
Raymond Hettingerca6cdc22002-05-13 23:40:14 +0000720 ainv = _sqrt(2.0 * alpha - 1.0)
721 bbb = alpha - LOG4
722 ccc = alpha + ainv
Tim Peters8ac14952002-05-23 15:15:30 +0000723
Raymond Hettinger6a613f92020-08-02 12:03:32 -0700724 while True:
Tim Petersd7b5e882001-01-25 03:36:26 +0000725 u1 = random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700726 if not 1e-7 < u1 < 0.9999999:
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000727 continue
728 u2 = 1.0 - random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700729 v = _log(u1 / (1.0 - u1)) / ainv
730 x = alpha * _exp(v)
731 z = u1 * u1 * u2
732 r = bbb + ccc * v - x
733 if r + SG_MAGICCONST - 4.5 * z >= 0.0 or r >= _log(z):
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000734 return x * beta
Tim Petersd7b5e882001-01-25 03:36:26 +0000735
736 elif alpha == 1.0:
leodema9f396b62017-06-04 07:41:41 +0100737 # expovariate(1/beta)
leodema63d15222018-12-24 07:54:25 +0100738 return -_log(1.0 - random()) * beta
Tim Petersd7b5e882001-01-25 03:36:26 +0000739
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700740 else:
741 # alpha is between 0 and 1 (exclusive)
Tim Petersd7b5e882001-01-25 03:36:26 +0000742 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700743 while True:
Tim Petersd7b5e882001-01-25 03:36:26 +0000744 u = random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700745 b = (_e + alpha) / _e
746 p = b * u
Tim Petersd7b5e882001-01-25 03:36:26 +0000747 if p <= 1.0:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700748 x = p ** (1.0 / alpha)
Tim Petersd7b5e882001-01-25 03:36:26 +0000749 else:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700750 x = -_log((b - p) / alpha)
Tim Petersd7b5e882001-01-25 03:36:26 +0000751 u1 = random()
Raymond Hettinger42406e62005-04-30 09:02:51 +0000752 if p > 1.0:
753 if u1 <= x ** (alpha - 1.0):
754 break
755 elif u1 <= _exp(-x):
Tim Petersd7b5e882001-01-25 03:36:26 +0000756 break
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000757 return x * beta
758
Tim Petersd7b5e882001-01-25 03:36:26 +0000759 def betavariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000760 """Beta distribution.
761
Thomas Woutersb2137042007-02-01 18:02:27 +0000762 Conditions on the parameters are alpha > 0 and beta > 0.
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000763 Returned values range between 0 and 1.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000764
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000765 """
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700766 ## See
767 ## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html
768 ## for Ivan Frohne's insightful analysis of why the original implementation:
769 ##
770 ## def betavariate(self, alpha, beta):
771 ## # Discrete Event Simulation in C, pp 87-88.
772 ##
773 ## y = self.expovariate(alpha)
774 ## z = self.expovariate(1.0/beta)
775 ## return z/(y+z)
776 ##
777 ## was dead wrong, and how it probably got that way.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000778
Tim Peters85e2e472001-01-26 06:49:56 +0000779 # This version due to Janne Sinkkonen, and matches all the std
780 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
Raymond Hettinger650c1c92016-06-25 05:36:42 +0300781 y = self.gammavariate(alpha, 1.0)
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700782 if y:
Raymond Hettinger650c1c92016-06-25 05:36:42 +0300783 return y / (y + self.gammavariate(beta, 1.0))
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700784 return 0.0
Guido van Rossum95bfcda1994-03-09 14:21:05 +0000785
Tim Petersd7b5e882001-01-25 03:36:26 +0000786 def paretovariate(self, alpha):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000787 """Pareto distribution. alpha is the shape parameter."""
Tim Petersd7b5e882001-01-25 03:36:26 +0000788 # Jain, pg. 495
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000789
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000790 u = 1.0 - self.random()
Raymond Hettinger5c327092020-08-01 01:18:26 -0700791 return u ** (-1.0 / alpha)
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000792
Tim Petersd7b5e882001-01-25 03:36:26 +0000793 def weibullvariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000794 """Weibull distribution.
795
796 alpha is the scale parameter and beta is the shape parameter.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000797
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000798 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000799 # Jain, pg. 499; bug fix courtesy Bill Arms
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000800
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000801 u = 1.0 - self.random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700802 return alpha * (-_log(u)) ** (1.0 / beta)
803
Guido van Rossum6c395ba1999-08-18 13:53:28 +0000804
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700805## ------------------------------------------------------------------
Raymond Hettinger23f12412004-09-13 22:23:21 +0000806## --------------- Operating System Random Source ------------------
Raymond Hettinger356a4592004-08-30 06:14:31 +0000807
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700808
Raymond Hettinger23f12412004-09-13 22:23:21 +0000809class SystemRandom(Random):
810 """Alternate random number generator using sources provided
811 by the operating system (such as /dev/urandom on Unix or
812 CryptGenRandom on Windows).
Raymond Hettinger356a4592004-08-30 06:14:31 +0000813
814 Not available on all systems (see os.urandom() for details).
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700815
Raymond Hettinger356a4592004-08-30 06:14:31 +0000816 """
817
818 def random(self):
819 """Get the next random number in the range [0.0, 1.0)."""
Raymond Hettinger183cd1f2010-09-08 18:48:21 +0000820 return (int.from_bytes(_urandom(7), 'big') >> 3) * RECIP_BPF
Raymond Hettinger356a4592004-08-30 06:14:31 +0000821
822 def getrandbits(self, k):
Serhiy Storchaka95949422013-08-27 19:40:23 +0300823 """getrandbits(k) -> x. Generates an int with k random bits."""
Antoine Pitrou75a33782020-04-17 19:32:14 +0200824 if k < 0:
825 raise ValueError('number of bits must be non-negative')
Raymond Hettinger63b17672010-09-08 19:27:59 +0000826 numbytes = (k + 7) // 8 # bits / 8 and rounded up
827 x = int.from_bytes(_urandom(numbytes), 'big')
828 return x >> (numbytes * 8 - k) # trim excess bits
Raymond Hettinger356a4592004-08-30 06:14:31 +0000829
Victor Stinner9f5fe792020-04-17 19:05:35 +0200830 def randbytes(self, n):
831 """Generate n random bytes."""
832 # os.urandom(n) fails with ValueError for n < 0
833 # and returns an empty bytes string for n == 0.
834 return _urandom(n)
835
Raymond Hettinger28de64f2008-01-13 23:40:30 +0000836 def seed(self, *args, **kwds):
Raymond Hettinger23f12412004-09-13 22:23:21 +0000837 "Stub method. Not used for a system random number generator."
Raymond Hettinger356a4592004-08-30 06:14:31 +0000838 return None
Raymond Hettinger356a4592004-08-30 06:14:31 +0000839
840 def _notimplemented(self, *args, **kwds):
Raymond Hettinger23f12412004-09-13 22:23:21 +0000841 "Method should not be called for a system random number generator."
842 raise NotImplementedError('System entropy source does not have state.')
Raymond Hettinger356a4592004-08-30 06:14:31 +0000843 getstate = setstate = _notimplemented
844
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700845
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700846# ----------------------------------------------------------------------
847# Create one instance, seeded from current time, and export its methods
848# as module-level functions. The functions share state across all uses
849# (both in the user's code and in the Python libraries), but that's fine
850# for most programs and is easier for the casual user than making them
851# instantiate their own Random() instance.
852
853_inst = Random()
854seed = _inst.seed
855random = _inst.random
856uniform = _inst.uniform
857triangular = _inst.triangular
858randint = _inst.randint
859choice = _inst.choice
860randrange = _inst.randrange
861sample = _inst.sample
862shuffle = _inst.shuffle
863choices = _inst.choices
864normalvariate = _inst.normalvariate
865lognormvariate = _inst.lognormvariate
866expovariate = _inst.expovariate
867vonmisesvariate = _inst.vonmisesvariate
868gammavariate = _inst.gammavariate
869gauss = _inst.gauss
870betavariate = _inst.betavariate
871paretovariate = _inst.paretovariate
872weibullvariate = _inst.weibullvariate
873getstate = _inst.getstate
874setstate = _inst.setstate
875getrandbits = _inst.getrandbits
876randbytes = _inst.randbytes
877
878
879## ------------------------------------------------------
880## ----------------- test program -----------------------
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000881
Raymond Hettinger62297132003-08-30 01:24:19 +0000882def _test_generator(n, func, args):
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700883 from statistics import stdev, fmean as mean
884 from time import perf_counter
885
886 t0 = perf_counter()
887 data = [func(*args) for i in range(n)]
888 t1 = perf_counter()
889
890 xbar = mean(data)
891 sigma = stdev(data, xbar)
892 low = min(data)
893 high = max(data)
894
895 print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}')
896 print('avg %g, stddev %g, min %g, max %g\n' % (xbar, sigma, low, high))
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000897
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000898
899def _test(N=2000):
Raymond Hettinger62297132003-08-30 01:24:19 +0000900 _test_generator(N, random, ())
901 _test_generator(N, normalvariate, (0.0, 1.0))
902 _test_generator(N, lognormvariate, (0.0, 1.0))
903 _test_generator(N, vonmisesvariate, (0.0, 1.0))
904 _test_generator(N, gammavariate, (0.01, 1.0))
905 _test_generator(N, gammavariate, (0.1, 1.0))
906 _test_generator(N, gammavariate, (0.1, 2.0))
907 _test_generator(N, gammavariate, (0.5, 1.0))
908 _test_generator(N, gammavariate, (0.9, 1.0))
909 _test_generator(N, gammavariate, (1.0, 1.0))
910 _test_generator(N, gammavariate, (2.0, 1.0))
911 _test_generator(N, gammavariate, (20.0, 1.0))
912 _test_generator(N, gammavariate, (200.0, 1.0))
913 _test_generator(N, gauss, (0.0, 1.0))
914 _test_generator(N, betavariate, (3.0, 3.0))
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700915 _test_generator(N, triangular, (0.0, 1.0, 1.0 / 3.0))
Tim Peterscd804102001-01-25 20:25:57 +0000916
Raymond Hettinger40f62172002-12-29 23:03:38 +0000917
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700918## ------------------------------------------------------
919## ------------------ fork support ---------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000920
Antoine Pitrou346cbd32017-05-27 17:50:54 +0200921if hasattr(_os, "fork"):
Gregory P. Smith163468a2017-05-29 10:03:41 -0700922 _os.register_at_fork(after_in_child=_inst.seed)
Antoine Pitrou346cbd32017-05-27 17:50:54 +0200923
924
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000925if __name__ == '__main__':
Tim Petersd7b5e882001-01-25 03:36:26 +0000926 _test()