blob: 3a835aef0bc1d4388056e94a39d3a286cbdc67e6 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Random variable generators.
Guido van Rossumff03b1a1994-03-09 12:55:02 +00002
Raymond Hettingeref19bad2020-06-25 17:03:50 -07003 bytes
4 -----
5 uniform bytes (values between 0 and 255)
6
Tim Petersd7b5e882001-01-25 03:36:26 +00007 integers
8 --------
9 uniform within range
10
11 sequences
12 ---------
13 pick random element
Raymond Hettingerf24eb352002-11-12 17:41:57 +000014 pick random sample
Raymond Hettingere8f1e002016-09-06 17:15:29 -070015 pick weighted random sample
Tim Petersd7b5e882001-01-25 03:36:26 +000016 generate random permutation
17
Guido van Rossume7b146f2000-02-04 15:28:42 +000018 distributions on the real line:
19 ------------------------------
Tim Petersd7b5e882001-01-25 03:36:26 +000020 uniform
Christian Heimesfe337bf2008-03-23 21:54:12 +000021 triangular
Guido van Rossume7b146f2000-02-04 15:28:42 +000022 normal (Gaussian)
23 lognormal
24 negative exponential
25 gamma
26 beta
Raymond Hettinger40f62172002-12-29 23:03:38 +000027 pareto
28 Weibull
Guido van Rossumff03b1a1994-03-09 12:55:02 +000029
Guido van Rossume7b146f2000-02-04 15:28:42 +000030 distributions on the circle (angles 0 to 2pi)
31 ---------------------------------------------
32 circular uniform
33 von Mises
34
Raymond Hettinger40f62172002-12-29 23:03:38 +000035General notes on the underlying Mersenne Twister core generator:
Guido van Rossume7b146f2000-02-04 15:28:42 +000036
Raymond Hettinger40f62172002-12-29 23:03:38 +000037* The period is 2**19937-1.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000038* It is one of the most extensively tested generators in existence.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000039* The random() method is implemented in C, executes in a single Python step,
40 and is, therefore, threadsafe.
Tim Peterse360d952001-01-26 10:00:39 +000041
Guido van Rossume7b146f2000-02-04 15:28:42 +000042"""
Guido van Rossumd03e1191998-05-29 17:51:31 +000043
Raymond Hettingeref19bad2020-06-25 17:03:50 -070044# Translated by Guido van Rossum from C source provided by
45# Adrian Baddeley. Adapted by Raymond Hettinger for use with
46# the Mersenne Twister and os.urandom() core generators.
47
Raymond Hettinger2f726e92003-10-05 09:09:15 +000048from warnings import warn as _warn
Raymond Hettinger91e27c22005-08-19 01:36:35 +000049from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
Raymond Hettinger26a1ad12020-06-22 19:38:59 -070050from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
Ram Rachumb0dfc752020-09-29 04:32:10 +030051from math import tau as TWOPI, floor as _floor, isfinite as _isfinite
Raymond Hettingerc1c43ca2004-09-05 00:00:42 +000052from os import urandom as _urandom
Christian Heimesf1dc3ee2013-10-13 02:04:20 +020053from _collections_abc import Set as _Set, Sequence as _Sequence
Raymond Hettingera9621bb2020-12-28 11:10:34 -080054from operator import index as _index
Raymond Hettinger53822032019-02-16 13:30:51 -080055from itertools import accumulate as _accumulate, repeat as _repeat
Raymond Hettingercfd31f02019-02-13 02:04:17 -080056from bisect import bisect as _bisect
Antoine Pitrou346cbd32017-05-27 17:50:54 +020057import os as _os
Raymond Hettingeref19bad2020-06-25 17:03:50 -070058import _random
Guido van Rossumff03b1a1994-03-09 12:55:02 +000059
Christian Heimesd9145962019-04-10 22:18:02 +020060try:
61 # hashlib is pretty heavy to load, try lean internal module first
62 from _sha512 import sha512 as _sha512
63except ImportError:
64 # fallback to official implementation
65 from hashlib import sha512 as _sha512
66
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070067__all__ = [
68 "Random",
69 "SystemRandom",
70 "betavariate",
71 "choice",
72 "choices",
73 "expovariate",
74 "gammavariate",
75 "gauss",
76 "getrandbits",
77 "getstate",
78 "lognormvariate",
79 "normalvariate",
80 "paretovariate",
Setrak Balian998ae1f2021-01-15 17:50:42 +000081 "randbytes",
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070082 "randint",
83 "random",
84 "randrange",
85 "sample",
86 "seed",
87 "setstate",
88 "shuffle",
89 "triangular",
90 "uniform",
91 "vonmisesvariate",
92 "weibullvariate",
93]
Tim Petersd7b5e882001-01-25 03:36:26 +000094
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070095NV_MAGICCONST = 4 * _exp(-0.5) / _sqrt(2.0)
Tim Petersd7b5e882001-01-25 03:36:26 +000096LOG4 = _log(4.0)
Tim Petersd7b5e882001-01-25 03:36:26 +000097SG_MAGICCONST = 1.0 + _log(4.5)
Raymond Hettinger2f726e92003-10-05 09:09:15 +000098BPF = 53 # Number of bits in a float
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -070099RECIP_BPF = 2 ** -BPF
Raymond Hettinger768fa142021-01-02 10:24:51 -0800100_ONE = 1
Tim Petersd7b5e882001-01-25 03:36:26 +0000101
Raymond Hettinger356a4592004-08-30 06:14:31 +0000102
Raymond Hettinger145a4a02003-01-07 10:25:55 +0000103class Random(_random.Random):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000104 """Random number generator base class used by bound module functions.
105
106 Used to instantiate instances of Random to get generators that don't
Raymond Hettinger28de64f2008-01-13 23:40:30 +0000107 share state.
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000108
109 Class Random can also be subclassed if you want to use a different basic
110 generator of your own devising: in that case, override the following
Raymond Hettinger28de64f2008-01-13 23:40:30 +0000111 methods: random(), seed(), getstate(), and setstate().
Benjamin Petersond18de0e2008-07-31 20:21:46 +0000112 Optionally, implement a getrandbits() method so that randrange()
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000113 can cover arbitrarily large ranges.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000114
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000115 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000116
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000117 VERSION = 3 # used by getstate/setstate
Tim Petersd7b5e882001-01-25 03:36:26 +0000118
119 def __init__(self, x=None):
120 """Initialize an instance.
121
122 Optional argument x controls seeding, as for Random.seed().
123 """
124
125 self.seed(x)
Raymond Hettinger40f62172002-12-29 23:03:38 +0000126 self.gauss_next = None
Tim Petersd7b5e882001-01-25 03:36:26 +0000127
Raymond Hettingerf763a722010-09-07 00:38:15 +0000128 def seed(self, a=None, version=2):
Raymond Hettingerd0cdeaa2019-08-22 09:19:36 -0700129 """Initialize internal state from a seed.
130
131 The only supported seed types are None, int, float,
132 str, bytes, and bytearray.
Tim Petersd7b5e882001-01-25 03:36:26 +0000133
Raymond Hettinger23f12412004-09-13 22:23:21 +0000134 None or no argument seeds from current time or from an operating
135 system specific randomness source if available.
Tim Peters0de88fc2001-02-01 04:59:18 +0000136
Raymond Hettinger183cd1f2010-09-08 18:48:21 +0000137 If *a* is an int, all bits are used.
Raymond Hettingerf763a722010-09-07 00:38:15 +0000138
Raymond Hettinger16eb8272016-09-04 11:17:28 -0700139 For version 2 (the default), all of the bits are used if *a* is a str,
140 bytes, or bytearray. For version 1 (provided for reproducing random
141 sequences from older versions of Python), the algorithm for str and
142 bytes generates a narrower range of seeds.
143
Tim Petersd7b5e882001-01-25 03:36:26 +0000144 """
145
Raymond Hettingerc7bab7c2016-08-31 15:01:08 -0700146 if version == 1 and isinstance(a, (str, bytes)):
Raymond Hettinger132a7d72017-09-17 09:04:30 -0700147 a = a.decode('latin-1') if isinstance(a, bytes) else a
Raymond Hettingerc7bab7c2016-08-31 15:01:08 -0700148 x = ord(a[0]) << 7 if a else 0
Raymond Hettinger132a7d72017-09-17 09:04:30 -0700149 for c in map(ord, a):
150 x = ((1000003 * x) ^ c) & 0xFFFFFFFFFFFFFFFF
Raymond Hettingerc7bab7c2016-08-31 15:01:08 -0700151 x ^= len(a)
152 a = -2 if x == -1 else x
153
Raymond Hettingerd0cdeaa2019-08-22 09:19:36 -0700154 elif version == 2 and isinstance(a, (str, bytes, bytearray)):
Raymond Hettinger2f9cc7a2016-08-31 23:00:32 -0700155 if isinstance(a, str):
156 a = a.encode()
157 a += _sha512(a).digest()
158 a = int.from_bytes(a, 'big')
Raymond Hettingerf763a722010-09-07 00:38:15 +0000159
Raymond Hettingerd0cdeaa2019-08-22 09:19:36 -0700160 elif not isinstance(a, (type(None), int, float, str, bytes, bytearray)):
161 _warn('Seeding based on hashing is deprecated\n'
162 'since Python 3.9 and will be removed in a subsequent '
163 'version. The only \n'
164 'supported seed types are: None, '
165 'int, float, str, bytes, and bytearray.',
166 DeprecationWarning, 2)
167
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000168 super().seed(a)
Tim Peters46c04e12002-05-05 20:40:00 +0000169 self.gauss_next = None
170
Tim Peterscd804102001-01-25 20:25:57 +0000171 def getstate(self):
172 """Return internal state; can be passed to setstate() later."""
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000173 return self.VERSION, super().getstate(), self.gauss_next
Tim Peterscd804102001-01-25 20:25:57 +0000174
175 def setstate(self, state):
176 """Restore internal state from object returned by getstate()."""
177 version = state[0]
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000178 if version == 3:
Raymond Hettinger40f62172002-12-29 23:03:38 +0000179 version, internalstate, self.gauss_next = state
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000180 super().setstate(internalstate)
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000181 elif version == 2:
182 version, internalstate, self.gauss_next = state
183 # In version 2, the state was saved as signed ints, which causes
184 # inconsistencies between 32/64-bit systems. The state is
185 # really unsigned 32-bit ints, so we convert negative ints from
186 # version 2 to positive longs for version 3.
187 try:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700188 internalstate = tuple(x % (2 ** 32) for x in internalstate)
Christian Heimescbf3b5c2007-12-03 21:02:03 +0000189 except ValueError as e:
190 raise TypeError from e
Raymond Hettinger183cd1f2010-09-08 18:48:21 +0000191 super().setstate(internalstate)
Tim Peterscd804102001-01-25 20:25:57 +0000192 else:
193 raise ValueError("state with version %s passed to "
194 "Random.setstate() of version %s" %
195 (version, self.VERSION))
196
Tim Peterscd804102001-01-25 20:25:57 +0000197
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700198 ## -------------------------------------------------------
199 ## ---- Methods below this point do not need to be overridden or extended
200 ## ---- when subclassing for the purpose of using a different core generator.
Victor Stinner2d875772020-04-29 18:49:00 +0200201
Victor Stinner2d875772020-04-29 18:49:00 +0200202
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700203 ## -------------------- pickle support -------------------
Tim Peterscd804102001-01-25 20:25:57 +0000204
R David Murrayd9ebf4d2013-04-02 13:10:52 -0400205 # Issue 17489: Since __reduce__ was defined to fix #759889 this is no
206 # longer called; we leave it here because it has been here since random was
207 # rewritten back in 2001 and why risk breaking something.
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700208 def __getstate__(self): # for pickle
Tim Peterscd804102001-01-25 20:25:57 +0000209 return self.getstate()
210
211 def __setstate__(self, state): # for pickle
212 self.setstate(state)
213
Raymond Hettinger5f078ff2003-06-24 20:29:04 +0000214 def __reduce__(self):
215 return self.__class__, (), self.getstate()
216
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700217
218 ## ---- internal support method for evenly distributed integers ----
219
220 def __init_subclass__(cls, /, **kwargs):
221 """Control how subclasses generate random integers.
222
223 The algorithm a subclass can use depends on the random() and/or
224 getrandbits() implementation available to it and determines
225 whether it can generate random integers from arbitrarily large
226 ranges.
227 """
228
229 for c in cls.__mro__:
230 if '_randbelow' in c.__dict__:
231 # just inherit it
232 break
233 if 'getrandbits' in c.__dict__:
234 cls._randbelow = cls._randbelow_with_getrandbits
235 break
236 if 'random' in c.__dict__:
237 cls._randbelow = cls._randbelow_without_getrandbits
238 break
239
240 def _randbelow_with_getrandbits(self, n):
241 "Return a random int in the range [0,n). Returns 0 if n==0."
242
243 if not n:
244 return 0
245 getrandbits = self.getrandbits
246 k = n.bit_length() # don't use (n-1) here because n can be 1
247 r = getrandbits(k) # 0 <= r < 2**k
248 while r >= n:
249 r = getrandbits(k)
250 return r
251
252 def _randbelow_without_getrandbits(self, n, maxsize=1<<BPF):
253 """Return a random int in the range [0,n). Returns 0 if n==0.
254
255 The implementation does not use getrandbits, but only random.
256 """
257
258 random = self.random
259 if n >= maxsize:
260 _warn("Underlying random() generator does not supply \n"
261 "enough bits to choose from a population range this large.\n"
262 "To remove the range limitation, add a getrandbits() method.")
263 return _floor(random() * n)
264 if n == 0:
265 return 0
266 rem = maxsize % n
267 limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0
268 r = random()
269 while r >= limit:
270 r = random()
271 return _floor(r * maxsize) % n
272
273 _randbelow = _randbelow_with_getrandbits
274
275
276 ## --------------------------------------------------------
277 ## ---- Methods below this point generate custom distributions
278 ## ---- based on the methods defined above. They do not
279 ## ---- directly touch the underlying generator and only
280 ## ---- access randomness through the methods: random(),
281 ## ---- getrandbits(), or _randbelow().
282
283
284 ## -------------------- bytes methods ---------------------
285
286 def randbytes(self, n):
287 """Generate n random bytes."""
288 return self.getrandbits(n * 8).to_bytes(n, 'little')
289
290
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700291 ## -------------------- integer methods -------------------
Tim Peterscd804102001-01-25 20:25:57 +0000292
Raymond Hettinger768fa142021-01-02 10:24:51 -0800293 def randrange(self, start, stop=None, step=_ONE):
Tim Petersd7b5e882001-01-25 03:36:26 +0000294 """Choose a random item from range(start, stop[, step]).
295
296 This fixes the problem with randint() which includes the
297 endpoint; in Python this is usually not what you want.
Raymond Hettinger3051cc32010-09-07 00:48:40 +0000298
Tim Petersd7b5e882001-01-25 03:36:26 +0000299 """
300
301 # This code is a bit messy to make it fast for the
Tim Peters9146f272002-08-16 03:41:39 +0000302 # common case while still doing adequate error checking.
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800303 try:
304 istart = _index(start)
305 except TypeError:
Serhiy Storchakaf066bd92021-01-25 23:02:04 +0200306 istart = int(start)
307 if istart != start:
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800308 _warn('randrange() will raise TypeError in the future',
309 DeprecationWarning, 2)
310 raise ValueError("non-integer arg 1 for randrange()")
Serhiy Storchakaf066bd92021-01-25 23:02:04 +0200311 _warn('non-integer arguments to randrange() have been deprecated '
312 'since Python 3.10 and will be removed in a subsequent '
313 'version',
314 DeprecationWarning, 2)
Raymond Hettinger3051cc32010-09-07 00:48:40 +0000315 if stop is None:
Raymond Hettinger768fa142021-01-02 10:24:51 -0800316 # We don't check for "step != 1" because it hasn't been
317 # type checked and converted to an integer yet.
318 if step is not _ONE:
319 raise TypeError('Missing a non-None stop argument')
Tim Petersd7b5e882001-01-25 03:36:26 +0000320 if istart > 0:
Raymond Hettinger05156612010-09-07 04:44:52 +0000321 return self._randbelow(istart)
Collin Winterce36ad82007-08-30 01:19:48 +0000322 raise ValueError("empty range for randrange()")
Tim Peters9146f272002-08-16 03:41:39 +0000323
324 # stop argument supplied.
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800325 try:
326 istop = _index(stop)
327 except TypeError:
Serhiy Storchakaf066bd92021-01-25 23:02:04 +0200328 istop = int(stop)
329 if istop != stop:
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800330 _warn('randrange() will raise TypeError in the future',
331 DeprecationWarning, 2)
332 raise ValueError("non-integer stop for randrange()")
Serhiy Storchakaf066bd92021-01-25 23:02:04 +0200333 _warn('non-integer arguments to randrange() have been deprecated '
334 'since Python 3.10 and will be removed in a subsequent '
335 'version',
336 DeprecationWarning, 2)
337 width = istop - istart
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800338 try:
339 istep = _index(step)
340 except TypeError:
Serhiy Storchakaf066bd92021-01-25 23:02:04 +0200341 istep = int(step)
342 if istep != step:
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800343 _warn('randrange() will raise TypeError in the future',
344 DeprecationWarning, 2)
345 raise ValueError("non-integer step for randrange()")
Serhiy Storchakaf066bd92021-01-25 23:02:04 +0200346 _warn('non-integer arguments to randrange() have been deprecated '
347 'since Python 3.10 and will be removed in a subsequent '
348 'version',
349 DeprecationWarning, 2)
350 # Fast path.
Raymond Hettingera9621bb2020-12-28 11:10:34 -0800351 if istep == 1:
Raymond Hettinger8f8de732021-01-02 12:09:56 -0800352 if width > 0:
353 return istart + self._randbelow(width)
Kumar Akshay2433a2a2019-01-22 00:49:59 +0530354 raise ValueError("empty range for randrange() (%d, %d, %d)" % (istart, istop, width))
Tim Peters9146f272002-08-16 03:41:39 +0000355
356 # Non-unit step argument supplied.
Tim Petersd7b5e882001-01-25 03:36:26 +0000357 if istep > 0:
Raymond Hettingerffdb8bb2004-09-27 15:29:05 +0000358 n = (width + istep - 1) // istep
Tim Petersd7b5e882001-01-25 03:36:26 +0000359 elif istep < 0:
Raymond Hettingerffdb8bb2004-09-27 15:29:05 +0000360 n = (width + istep + 1) // istep
Tim Petersd7b5e882001-01-25 03:36:26 +0000361 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000362 raise ValueError("zero step for randrange()")
Tim Petersd7b5e882001-01-25 03:36:26 +0000363 if n <= 0:
Collin Winterce36ad82007-08-30 01:19:48 +0000364 raise ValueError("empty range for randrange()")
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700365 return istart + istep * self._randbelow(n)
Tim Petersd7b5e882001-01-25 03:36:26 +0000366
367 def randint(self, a, b):
Tim Peterscd804102001-01-25 20:25:57 +0000368 """Return random integer in range [a, b], including both end points.
Tim Petersd7b5e882001-01-25 03:36:26 +0000369 """
370
371 return self.randrange(a, b+1)
372
Wolfgang Maierba3a87a2018-04-17 17:16:17 +0200373
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700374 ## -------------------- sequence methods -------------------
Tim Peterscd804102001-01-25 20:25:57 +0000375
Tim Petersd7b5e882001-01-25 03:36:26 +0000376 def choice(self, seq):
377 """Choose a random element from a non-empty sequence."""
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700378 # raises IndexError if seq is empty
379 return seq[self._randbelow(len(seq))]
Tim Petersd7b5e882001-01-25 03:36:26 +0000380
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700381 def shuffle(self, x, random=None):
Antoine Pitrou5e394332012-11-04 02:10:33 +0100382 """Shuffle list x in place, and return None.
Tim Petersd7b5e882001-01-25 03:36:26 +0000383
Antoine Pitrou5e394332012-11-04 02:10:33 +0100384 Optional argument random is a 0-argument function returning a
385 random float in [0.0, 1.0); if it is the default None, the
386 standard random.random will be used.
Senthil Kumaranf8ce51a2013-09-11 22:54:31 -0700387
Tim Petersd7b5e882001-01-25 03:36:26 +0000388 """
389
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700390 if random is None:
391 randbelow = self._randbelow
392 for i in reversed(range(1, len(x))):
393 # pick an element in x[:i+1] with which to exchange x[i]
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700394 j = randbelow(i + 1)
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700395 x[i], x[j] = x[j], x[i]
396 else:
Raymond Hettinger190fac92020-05-02 16:45:32 -0700397 _warn('The *random* parameter to shuffle() has been deprecated\n'
398 'since Python 3.9 and will be removed in a subsequent '
399 'version.',
400 DeprecationWarning, 2)
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700401 floor = _floor
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700402 for i in reversed(range(1, len(x))):
403 # pick an element in x[:i+1] with which to exchange x[i]
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700404 j = floor(random() * (i + 1))
Raymond Hettinger8fe47c32013-10-05 21:48:21 -0700405 x[i], x[j] = x[j], x[i]
Tim Petersd7b5e882001-01-25 03:36:26 +0000406
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700407 def sample(self, population, k, *, counts=None):
Raymond Hettinger1acde192008-01-14 01:00:53 +0000408 """Chooses k unique random elements from a population sequence or set.
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000409
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000410 Returns a new list containing elements from the population while
411 leaving the original population unchanged. The resulting list is
412 in selection order so that all sub-slices will also be valid random
413 samples. This allows raffle winners (the sample) to be partitioned
414 into grand prize and second place winners (the subslices).
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000415
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000416 Members of the population need not be hashable or unique. If the
417 population contains repeats, then each occurrence is a possible
418 selection in the sample.
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000419
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700420 Repeated elements can be specified one at a time or with the optional
421 counts parameter. For example:
422
423 sample(['red', 'blue'], counts=[4, 2], k=5)
424
425 is equivalent to:
426
427 sample(['red', 'red', 'red', 'red', 'blue', 'blue'], k=5)
428
429 To choose a sample from a range of integers, use range() for the
430 population argument. This is especially fast and space efficient
431 for sampling from a large population:
432
433 sample(range(10000000), 60)
434
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000435 """
436
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000437 # Sampling without replacement entails tracking either potential
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000438 # selections (the pool) in a list or previous selections in a set.
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000439
Jeremy Hylton2b55d352004-02-23 17:27:57 +0000440 # When the number of selections is small compared to the
441 # population, then tracking selections is efficient, requiring
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000442 # only a small set and an occasional reselection. For
Jeremy Hylton2b55d352004-02-23 17:27:57 +0000443 # a larger number of selections, the pool tracking method is
444 # preferred since the list takes less space than the
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000445 # set and it doesn't suffer from frequent reselections.
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000446
Raymond Hettinger7fc633f2018-12-04 00:13:38 -0800447 # The number of calls to _randbelow() is kept at or near k, the
448 # theoretical minimum. This is important because running time
449 # is dominated by _randbelow() and because it extracts the
450 # least entropy from the underlying random number generators.
451
452 # Memory requirements are kept to the smaller of a k-length
453 # set or an n-length list.
454
455 # There are other sampling algorithms that do not require
456 # auxiliary memory, but they were rejected because they made
457 # too many calls to _randbelow(), making them slower and
458 # causing them to eat more entropy than necessary.
459
Raymond Hettinger57d1a882011-02-23 00:46:28 +0000460 if not isinstance(population, _Sequence):
masklinn1e27b572020-12-19 05:33:36 +0100461 if isinstance(population, _Set):
462 _warn('Sampling from a set deprecated\n'
463 'since Python 3.9 and will be removed in a subsequent version.',
464 DeprecationWarning, 2)
465 population = tuple(population)
466 else:
467 raise TypeError("Population must be a sequence. For dicts or sets, use sorted(d).")
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000468 n = len(population)
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700469 if counts is not None:
470 cum_counts = list(_accumulate(counts))
471 if len(cum_counts) != n:
472 raise ValueError('The number of counts does not match the population')
473 total = cum_counts.pop()
474 if not isinstance(total, int):
475 raise TypeError('Counts must be integers')
476 if total <= 0:
477 raise ValueError('Total of counts must be greater than zero')
jonanifrancof7b5bacd2021-01-18 19:04:29 +0100478 selections = self.sample(range(total), k=k)
Raymond Hettinger81a5fc32020-05-08 07:53:15 -0700479 bisect = _bisect
480 return [population[bisect(cum_counts, s)] for s in selections]
481 randbelow = self._randbelow
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000482 if not 0 <= k <= n:
Raymond Hettingerbf871262016-11-21 14:34:33 -0800483 raise ValueError("Sample larger than population or is negative")
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000484 result = [None] * k
Raymond Hettinger91e27c22005-08-19 01:36:35 +0000485 setsize = 21 # size of a small set minus size of an empty list
486 if k > 5:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700487 setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
Raymond Hettinger1acde192008-01-14 01:00:53 +0000488 if n <= setsize:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700489 # An n-length list is smaller than a k-length set.
490 # Invariant: non-selected at pool[0 : n-i]
Raymond Hettinger311f4192002-11-18 09:01:24 +0000491 pool = list(population)
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700492 for i in range(k):
493 j = randbelow(n - i)
Raymond Hettinger311f4192002-11-18 09:01:24 +0000494 result[i] = pool[j]
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700495 pool[j] = pool[n - i - 1] # move non-selected item into vacancy
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000496 else:
Raymond Hettinger1acde192008-01-14 01:00:53 +0000497 selected = set()
498 selected_add = selected.add
499 for i in range(k):
Raymond Hettinger05a505f2010-09-07 19:19:33 +0000500 j = randbelow(n)
Raymond Hettinger1acde192008-01-14 01:00:53 +0000501 while j in selected:
Raymond Hettinger05a505f2010-09-07 19:19:33 +0000502 j = randbelow(n)
Raymond Hettinger1acde192008-01-14 01:00:53 +0000503 selected_add(j)
504 result[i] = population[j]
Raymond Hettinger311f4192002-11-18 09:01:24 +0000505 return result
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000506
Raymond Hettinger9016f282016-09-26 21:45:57 -0700507 def choices(self, population, weights=None, *, cum_weights=None, k=1):
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700508 """Return a k sized list of population elements chosen with replacement.
509
510 If the relative weights or cumulative weights are not specified,
511 the selections are made with equal probability.
512
513 """
Raymond Hettinger30d00e52016-10-29 16:55:36 -0700514 random = self.random
Raymond Hettingere69cd162018-07-04 15:28:20 -0700515 n = len(population)
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700516 if cum_weights is None:
517 if weights is None:
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700518 floor = _floor
Raymond Hettinger0a18e052018-11-09 02:39:50 -0800519 n += 0.0 # convert to float for a small speed improvement
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700520 return [population[floor(random() * n)] for i in _repeat(None, k)]
Raymond Hettinger2a36b092021-04-19 20:29:48 -0700521 try:
522 cum_weights = list(_accumulate(weights))
523 except TypeError:
524 if not isinstance(weights, int):
525 raise
526 k = weights
527 raise TypeError(
528 f'The number of choices must be a keyword argument: {k=}'
529 ) from None
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700530 elif weights is not None:
Raymond Hettinger24e42392016-11-13 00:42:56 -0500531 raise TypeError('Cannot specify both weights and cumulative weights')
Raymond Hettingere69cd162018-07-04 15:28:20 -0700532 if len(cum_weights) != n:
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700533 raise ValueError('The number of weights does not match the population')
Raymond Hettinger0a18e052018-11-09 02:39:50 -0800534 total = cum_weights[-1] + 0.0 # convert to float
Raymond Hettinger041d8b42019-11-23 02:22:13 -0800535 if total <= 0.0:
536 raise ValueError('Total of weights must be greater than zero')
Ram Rachumb0dfc752020-09-29 04:32:10 +0300537 if not _isfinite(total):
538 raise ValueError('Total of weights must be finite')
Raymond Hettinger041d8b42019-11-23 02:22:13 -0800539 bisect = _bisect
Raymond Hettingere69cd162018-07-04 15:28:20 -0700540 hi = n - 1
Raymond Hettingerddf71712018-06-27 01:08:31 -0700541 return [population[bisect(cum_weights, random() * total, 0, hi)]
Raymond Hettinger53822032019-02-16 13:30:51 -0800542 for i in _repeat(None, k)]
Raymond Hettingere8f1e002016-09-06 17:15:29 -0700543
Tim Peterscd804102001-01-25 20:25:57 +0000544
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700545 ## -------------------- real-valued distributions -------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000546
547 def uniform(self, a, b):
Raymond Hettingerbe40db02009-06-11 23:12:14 +0000548 "Get a random number in the range [a, b) or [a, b] depending on rounding."
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700549 return a + (b - a) * self.random()
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000550
Christian Heimesfe337bf2008-03-23 21:54:12 +0000551 def triangular(self, low=0.0, high=1.0, mode=None):
552 """Triangular distribution.
553
554 Continuous distribution bounded by given lower and upper limits,
555 and having a given mode value in-between.
556
557 http://en.wikipedia.org/wiki/Triangular_distribution
558
559 """
560 u = self.random()
Raymond Hettinger978c6ab2014-05-25 17:25:27 -0700561 try:
562 c = 0.5 if mode is None else (mode - low) / (high - low)
563 except ZeroDivisionError:
564 return low
Christian Heimesfe337bf2008-03-23 21:54:12 +0000565 if u > c:
566 u = 1.0 - u
567 c = 1.0 - c
568 low, high = high, low
Raymond Hettingerf5ea83f2017-09-04 16:51:06 -0700569 return low + (high - low) * _sqrt(u * c)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000570
Tim Petersd7b5e882001-01-25 03:36:26 +0000571 def normalvariate(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000572 """Normal distribution.
573
574 mu is the mean, and sigma is the standard deviation.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000575
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000576 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000577 # Uses Kinderman and Monahan method. Reference: Kinderman,
578 # A.J. and Monahan, J.F., "Computer generation of random
579 # variables using the ratio of uniform deviates", ACM Trans
580 # Math Software, 3, (1977), pp257-260.
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000581
Tim Petersd7b5e882001-01-25 03:36:26 +0000582 random = self.random
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700583 while True:
Tim Peters0c9886d2001-01-15 01:18:21 +0000584 u1 = random()
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000585 u2 = 1.0 - random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700586 z = NV_MAGICCONST * (u1 - 0.5) / u2
587 zz = z * z / 4.0
Tim Petersd7b5e882001-01-25 03:36:26 +0000588 if zz <= -_log(u2):
589 break
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700590 return mu + z * sigma
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000591
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700592 def gauss(self, mu, sigma):
593 """Gaussian distribution.
594
595 mu is the mean, and sigma is the standard deviation. This is
596 slightly faster than the normalvariate() function.
597
598 Not thread-safe without a lock around calls.
599
600 """
601 # When x and y are two variables from [0, 1), uniformly
602 # distributed, then
603 #
604 # cos(2*pi*x)*sqrt(-2*log(1-y))
605 # sin(2*pi*x)*sqrt(-2*log(1-y))
606 #
607 # are two *independent* variables with normal distribution
608 # (mu = 0, sigma = 1).
609 # (Lambert Meertens)
610 # (corrected version; bug discovered by Mike Miller, fixed by LM)
611
612 # Multithreading note: When two threads call this function
613 # simultaneously, it is possible that they will receive the
614 # same return value. The window is very small though. To
615 # avoid this, you have to use a lock around all calls. (I
616 # didn't want to slow this down in the serial case by using a
617 # lock here.)
618
619 random = self.random
620 z = self.gauss_next
621 self.gauss_next = None
622 if z is None:
623 x2pi = random() * TWOPI
624 g2rad = _sqrt(-2.0 * _log(1.0 - random()))
625 z = _cos(x2pi) * g2rad
626 self.gauss_next = _sin(x2pi) * g2rad
627
628 return mu + z * sigma
Tim Petersd7b5e882001-01-25 03:36:26 +0000629
630 def lognormvariate(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000631 """Log normal distribution.
632
633 If you take the natural logarithm of this distribution, you'll get a
634 normal distribution with mean mu and standard deviation sigma.
635 mu can have any value, and sigma must be greater than zero.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000636
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000637 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000638 return _exp(self.normalvariate(mu, sigma))
639
Tim Petersd7b5e882001-01-25 03:36:26 +0000640 def expovariate(self, lambd):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000641 """Exponential distribution.
642
Mark Dickinson2f947362009-01-07 17:54:07 +0000643 lambd is 1.0 divided by the desired mean. It should be
644 nonzero. (The parameter would be called "lambda", but that is
645 a reserved word in Python.) Returned values range from 0 to
646 positive infinity if lambd is positive, and from negative
647 infinity to 0 if lambd is negative.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000648
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000649 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000650 # lambd: rate lambd = 1/mean
651 # ('lambda' is a Python reserved word)
652
Raymond Hettinger5279fb92011-06-25 11:30:53 +0200653 # we use 1-random() instead of random() to preclude the
654 # possibility of taking the log of zero.
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700655 return -_log(1.0 - self.random()) / lambd
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000656
Tim Petersd7b5e882001-01-25 03:36:26 +0000657 def vonmisesvariate(self, mu, kappa):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000658 """Circular data distribution.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000659
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000660 mu is the mean angle, expressed in radians between 0 and 2*pi, and
661 kappa is the concentration parameter, which must be greater than or
662 equal to zero. If kappa is equal to zero, this distribution reduces
663 to a uniform random angle over the range 0 to 2*pi.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000664
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000665 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000666 # Based upon an algorithm published in: Fisher, N.I.,
667 # "Statistical Analysis of Circular Data", Cambridge
668 # University Press, 1993.
669
670 # Thanks to Magnus Kessler for a correction to the
671 # implementation of step 4.
672
673 random = self.random
674 if kappa <= 1e-6:
675 return TWOPI * random()
676
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200677 s = 0.5 / kappa
678 r = s + _sqrt(1.0 + s * s)
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000679
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700680 while True:
Tim Peters0c9886d2001-01-15 01:18:21 +0000681 u1 = random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000682 z = _cos(_pi * u1)
Tim Petersd7b5e882001-01-25 03:36:26 +0000683
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200684 d = z / (r + z)
Tim Petersd7b5e882001-01-25 03:36:26 +0000685 u2 = random()
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200686 if u2 < 1.0 - d * d or u2 <= (1.0 - d) * _exp(d):
Tim Peters0c9886d2001-01-15 01:18:21 +0000687 break
Tim Petersd7b5e882001-01-25 03:36:26 +0000688
Serhiy Storchaka6c22b1d2013-02-10 19:28:56 +0200689 q = 1.0 / r
690 f = (q + z) / (1.0 + q * z)
Tim Petersd7b5e882001-01-25 03:36:26 +0000691 u3 = random()
692 if u3 > 0.5:
Mark Dickinsonbe5f9192013-02-10 14:16:10 +0000693 theta = (mu + _acos(f)) % TWOPI
Tim Petersd7b5e882001-01-25 03:36:26 +0000694 else:
Mark Dickinsonbe5f9192013-02-10 14:16:10 +0000695 theta = (mu - _acos(f)) % TWOPI
Tim Petersd7b5e882001-01-25 03:36:26 +0000696
697 return theta
698
Tim Petersd7b5e882001-01-25 03:36:26 +0000699 def gammavariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000700 """Gamma distribution. Not the gamma function!
701
702 Conditions on the parameters are alpha > 0 and beta > 0.
703
Raymond Hettingera8e4d6e2011-03-22 15:55:51 -0700704 The probability distribution function is:
705
706 x ** (alpha - 1) * math.exp(-x / beta)
707 pdf(x) = --------------------------------------
708 math.gamma(alpha) * beta ** alpha
709
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000710 """
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000711 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
Tim Peters8ac14952002-05-23 15:15:30 +0000712
Guido van Rossum570764d2002-05-14 14:08:12 +0000713 # Warning: a few older sources define the gamma distribution in terms
714 # of alpha > -1.0
715 if alpha <= 0.0 or beta <= 0.0:
Collin Winterce36ad82007-08-30 01:19:48 +0000716 raise ValueError('gammavariate: alpha and beta must be > 0.0')
Tim Peters8ac14952002-05-23 15:15:30 +0000717
Tim Petersd7b5e882001-01-25 03:36:26 +0000718 random = self.random
Tim Petersd7b5e882001-01-25 03:36:26 +0000719 if alpha > 1.0:
720
721 # Uses R.C.H. Cheng, "The generation of Gamma
722 # variables with non-integral shape parameters",
723 # Applied Statistics, (1977), 26, No. 1, p71-74
724
Raymond Hettingerca6cdc22002-05-13 23:40:14 +0000725 ainv = _sqrt(2.0 * alpha - 1.0)
726 bbb = alpha - LOG4
727 ccc = alpha + ainv
Tim Peters8ac14952002-05-23 15:15:30 +0000728
Raymond Hettinger6a613f92020-08-02 12:03:32 -0700729 while True:
Tim Petersd7b5e882001-01-25 03:36:26 +0000730 u1 = random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700731 if not 1e-7 < u1 < 0.9999999:
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000732 continue
733 u2 = 1.0 - random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700734 v = _log(u1 / (1.0 - u1)) / ainv
735 x = alpha * _exp(v)
736 z = u1 * u1 * u2
737 r = bbb + ccc * v - x
738 if r + SG_MAGICCONST - 4.5 * z >= 0.0 or r >= _log(z):
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000739 return x * beta
Tim Petersd7b5e882001-01-25 03:36:26 +0000740
741 elif alpha == 1.0:
leodema9f396b62017-06-04 07:41:41 +0100742 # expovariate(1/beta)
leodema63d15222018-12-24 07:54:25 +0100743 return -_log(1.0 - random()) * beta
Tim Petersd7b5e882001-01-25 03:36:26 +0000744
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700745 else:
746 # alpha is between 0 and 1 (exclusive)
Tim Petersd7b5e882001-01-25 03:36:26 +0000747 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700748 while True:
Tim Petersd7b5e882001-01-25 03:36:26 +0000749 u = random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700750 b = (_e + alpha) / _e
751 p = b * u
Tim Petersd7b5e882001-01-25 03:36:26 +0000752 if p <= 1.0:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700753 x = p ** (1.0 / alpha)
Tim Petersd7b5e882001-01-25 03:36:26 +0000754 else:
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700755 x = -_log((b - p) / alpha)
Tim Petersd7b5e882001-01-25 03:36:26 +0000756 u1 = random()
Raymond Hettinger42406e62005-04-30 09:02:51 +0000757 if p > 1.0:
758 if u1 <= x ** (alpha - 1.0):
759 break
760 elif u1 <= _exp(-x):
Tim Petersd7b5e882001-01-25 03:36:26 +0000761 break
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000762 return x * beta
763
Tim Petersd7b5e882001-01-25 03:36:26 +0000764 def betavariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000765 """Beta distribution.
766
Thomas Woutersb2137042007-02-01 18:02:27 +0000767 Conditions on the parameters are alpha > 0 and beta > 0.
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000768 Returned values range between 0 and 1.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000769
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000770 """
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700771 ## See
772 ## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html
773 ## for Ivan Frohne's insightful analysis of why the original implementation:
774 ##
775 ## def betavariate(self, alpha, beta):
776 ## # Discrete Event Simulation in C, pp 87-88.
777 ##
778 ## y = self.expovariate(alpha)
779 ## z = self.expovariate(1.0/beta)
780 ## return z/(y+z)
781 ##
782 ## was dead wrong, and how it probably got that way.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000783
Tim Peters85e2e472001-01-26 06:49:56 +0000784 # This version due to Janne Sinkkonen, and matches all the std
785 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
Raymond Hettinger650c1c92016-06-25 05:36:42 +0300786 y = self.gammavariate(alpha, 1.0)
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700787 if y:
Raymond Hettinger650c1c92016-06-25 05:36:42 +0300788 return y / (y + self.gammavariate(beta, 1.0))
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700789 return 0.0
Guido van Rossum95bfcda1994-03-09 14:21:05 +0000790
Tim Petersd7b5e882001-01-25 03:36:26 +0000791 def paretovariate(self, alpha):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000792 """Pareto distribution. alpha is the shape parameter."""
Tim Petersd7b5e882001-01-25 03:36:26 +0000793 # Jain, pg. 495
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000794
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000795 u = 1.0 - self.random()
Raymond Hettinger5c327092020-08-01 01:18:26 -0700796 return u ** (-1.0 / alpha)
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000797
Tim Petersd7b5e882001-01-25 03:36:26 +0000798 def weibullvariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000799 """Weibull distribution.
800
801 alpha is the scale parameter and beta is the shape parameter.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000802
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000803 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000804 # Jain, pg. 499; bug fix courtesy Bill Arms
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000805
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000806 u = 1.0 - self.random()
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700807 return alpha * (-_log(u)) ** (1.0 / beta)
808
Guido van Rossum6c395ba1999-08-18 13:53:28 +0000809
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700810## ------------------------------------------------------------------
Raymond Hettinger23f12412004-09-13 22:23:21 +0000811## --------------- Operating System Random Source ------------------
Raymond Hettinger356a4592004-08-30 06:14:31 +0000812
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700813
Raymond Hettinger23f12412004-09-13 22:23:21 +0000814class SystemRandom(Random):
815 """Alternate random number generator using sources provided
816 by the operating system (such as /dev/urandom on Unix or
817 CryptGenRandom on Windows).
Raymond Hettinger356a4592004-08-30 06:14:31 +0000818
819 Not available on all systems (see os.urandom() for details).
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700820
Raymond Hettinger356a4592004-08-30 06:14:31 +0000821 """
822
823 def random(self):
824 """Get the next random number in the range [0.0, 1.0)."""
Raymond Hettinger183cd1f2010-09-08 18:48:21 +0000825 return (int.from_bytes(_urandom(7), 'big') >> 3) * RECIP_BPF
Raymond Hettinger356a4592004-08-30 06:14:31 +0000826
827 def getrandbits(self, k):
Serhiy Storchaka95949422013-08-27 19:40:23 +0300828 """getrandbits(k) -> x. Generates an int with k random bits."""
Antoine Pitrou75a33782020-04-17 19:32:14 +0200829 if k < 0:
830 raise ValueError('number of bits must be non-negative')
Raymond Hettinger63b17672010-09-08 19:27:59 +0000831 numbytes = (k + 7) // 8 # bits / 8 and rounded up
832 x = int.from_bytes(_urandom(numbytes), 'big')
833 return x >> (numbytes * 8 - k) # trim excess bits
Raymond Hettinger356a4592004-08-30 06:14:31 +0000834
Victor Stinner9f5fe792020-04-17 19:05:35 +0200835 def randbytes(self, n):
836 """Generate n random bytes."""
837 # os.urandom(n) fails with ValueError for n < 0
838 # and returns an empty bytes string for n == 0.
839 return _urandom(n)
840
Raymond Hettinger28de64f2008-01-13 23:40:30 +0000841 def seed(self, *args, **kwds):
Raymond Hettinger23f12412004-09-13 22:23:21 +0000842 "Stub method. Not used for a system random number generator."
Raymond Hettinger356a4592004-08-30 06:14:31 +0000843 return None
Raymond Hettinger356a4592004-08-30 06:14:31 +0000844
845 def _notimplemented(self, *args, **kwds):
Raymond Hettinger23f12412004-09-13 22:23:21 +0000846 "Method should not be called for a system random number generator."
847 raise NotImplementedError('System entropy source does not have state.')
Raymond Hettinger356a4592004-08-30 06:14:31 +0000848 getstate = setstate = _notimplemented
849
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700850
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700851# ----------------------------------------------------------------------
852# Create one instance, seeded from current time, and export its methods
853# as module-level functions. The functions share state across all uses
854# (both in the user's code and in the Python libraries), but that's fine
855# for most programs and is easier for the casual user than making them
856# instantiate their own Random() instance.
857
858_inst = Random()
859seed = _inst.seed
860random = _inst.random
861uniform = _inst.uniform
862triangular = _inst.triangular
863randint = _inst.randint
864choice = _inst.choice
865randrange = _inst.randrange
866sample = _inst.sample
867shuffle = _inst.shuffle
868choices = _inst.choices
869normalvariate = _inst.normalvariate
870lognormvariate = _inst.lognormvariate
871expovariate = _inst.expovariate
872vonmisesvariate = _inst.vonmisesvariate
873gammavariate = _inst.gammavariate
874gauss = _inst.gauss
875betavariate = _inst.betavariate
876paretovariate = _inst.paretovariate
877weibullvariate = _inst.weibullvariate
878getstate = _inst.getstate
879setstate = _inst.setstate
880getrandbits = _inst.getrandbits
881randbytes = _inst.randbytes
882
883
884## ------------------------------------------------------
885## ----------------- test program -----------------------
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000886
Raymond Hettinger62297132003-08-30 01:24:19 +0000887def _test_generator(n, func, args):
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700888 from statistics import stdev, fmean as mean
889 from time import perf_counter
890
891 t0 = perf_counter()
Raymond Hettingerd9dda322021-02-04 21:36:03 -0800892 data = [func(*args) for i in _repeat(None, n)]
Raymond Hettinger26a1ad12020-06-22 19:38:59 -0700893 t1 = perf_counter()
894
895 xbar = mean(data)
896 sigma = stdev(data, xbar)
897 low = min(data)
898 high = max(data)
899
900 print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}')
901 print('avg %g, stddev %g, min %g, max %g\n' % (xbar, sigma, low, high))
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000902
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000903
904def _test(N=2000):
Raymond Hettinger62297132003-08-30 01:24:19 +0000905 _test_generator(N, random, ())
906 _test_generator(N, normalvariate, (0.0, 1.0))
907 _test_generator(N, lognormvariate, (0.0, 1.0))
908 _test_generator(N, vonmisesvariate, (0.0, 1.0))
909 _test_generator(N, gammavariate, (0.01, 1.0))
910 _test_generator(N, gammavariate, (0.1, 1.0))
911 _test_generator(N, gammavariate, (0.1, 2.0))
912 _test_generator(N, gammavariate, (0.5, 1.0))
913 _test_generator(N, gammavariate, (0.9, 1.0))
914 _test_generator(N, gammavariate, (1.0, 1.0))
915 _test_generator(N, gammavariate, (2.0, 1.0))
916 _test_generator(N, gammavariate, (20.0, 1.0))
917 _test_generator(N, gammavariate, (200.0, 1.0))
918 _test_generator(N, gauss, (0.0, 1.0))
919 _test_generator(N, betavariate, (3.0, 3.0))
Raymond Hettinger9db5b8d2020-06-13 09:46:47 -0700920 _test_generator(N, triangular, (0.0, 1.0, 1.0 / 3.0))
Tim Peterscd804102001-01-25 20:25:57 +0000921
Raymond Hettinger40f62172002-12-29 23:03:38 +0000922
Raymond Hettingeref19bad2020-06-25 17:03:50 -0700923## ------------------------------------------------------
924## ------------------ fork support ---------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000925
Antoine Pitrou346cbd32017-05-27 17:50:54 +0200926if hasattr(_os, "fork"):
Gregory P. Smith163468a2017-05-29 10:03:41 -0700927 _os.register_at_fork(after_in_child=_inst.seed)
Antoine Pitrou346cbd32017-05-27 17:50:54 +0200928
929
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000930if __name__ == '__main__':
Tim Petersd7b5e882001-01-25 03:36:26 +0000931 _test()