blob: 0047c9137a07ec537cc0d7f1e1e40d0406dd20fd [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""Random variable generators.
Guido van Rossumff03b1a1994-03-09 12:55:02 +00002
Tim Petersd7b5e882001-01-25 03:36:26 +00003 integers
4 --------
5 uniform within range
6
7 sequences
8 ---------
9 pick random element
Raymond Hettingerf24eb352002-11-12 17:41:57 +000010 pick random sample
Tim Petersd7b5e882001-01-25 03:36:26 +000011 generate random permutation
12
Guido van Rossume7b146f2000-02-04 15:28:42 +000013 distributions on the real line:
14 ------------------------------
Tim Petersd7b5e882001-01-25 03:36:26 +000015 uniform
Guido van Rossume7b146f2000-02-04 15:28:42 +000016 normal (Gaussian)
17 lognormal
18 negative exponential
19 gamma
20 beta
Raymond Hettinger40f62172002-12-29 23:03:38 +000021 pareto
22 Weibull
Guido van Rossumff03b1a1994-03-09 12:55:02 +000023
Guido van Rossume7b146f2000-02-04 15:28:42 +000024 distributions on the circle (angles 0 to 2pi)
25 ---------------------------------------------
26 circular uniform
27 von Mises
28
Raymond Hettinger40f62172002-12-29 23:03:38 +000029General notes on the underlying Mersenne Twister core generator:
Guido van Rossume7b146f2000-02-04 15:28:42 +000030
Raymond Hettinger40f62172002-12-29 23:03:38 +000031* The period is 2**19937-1.
32* It is one of the most extensively tested generators in existence
33* Without a direct way to compute N steps forward, the
34 semantics of jumpahead(n) are weakened to simply jump
35 to another distant state and rely on the large period
36 to avoid overlapping sequences.
37* The random() method is implemented in C, executes in
38 a single Python step, and is, therefore, threadsafe.
Tim Peterse360d952001-01-26 10:00:39 +000039
Guido van Rossume7b146f2000-02-04 15:28:42 +000040"""
Guido van Rossumd03e1191998-05-29 17:51:31 +000041
Raymond Hettinger2f726e92003-10-05 09:09:15 +000042from warnings import warn as _warn
43from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
Tim Petersd7b5e882001-01-25 03:36:26 +000044from math import log as _log, exp as _exp, pi as _pi, e as _e
45from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
Tim Peters7c2a85b2004-08-31 02:19:55 +000046from math import floor as _floor
Guido van Rossumff03b1a1994-03-09 12:55:02 +000047
Raymond Hettingerf24eb352002-11-12 17:41:57 +000048__all__ = ["Random","seed","random","uniform","randint","choice","sample",
Skip Montanaro0de65802001-02-15 22:15:14 +000049 "randrange","shuffle","normalvariate","lognormvariate",
Raymond Hettingerf8a52d32003-08-05 12:23:19 +000050 "expovariate","vonmisesvariate","gammavariate",
51 "gauss","betavariate","paretovariate","weibullvariate",
Raymond Hettinger356a4592004-08-30 06:14:31 +000052 "getstate","setstate","jumpahead", "WichmannHill", "getrandbits",
53 "HardwareRandom"]
Tim Petersd7b5e882001-01-25 03:36:26 +000054
55NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
Tim Petersd7b5e882001-01-25 03:36:26 +000056TWOPI = 2.0*_pi
Tim Petersd7b5e882001-01-25 03:36:26 +000057LOG4 = _log(4.0)
Tim Petersd7b5e882001-01-25 03:36:26 +000058SG_MAGICCONST = 1.0 + _log(4.5)
Raymond Hettinger2f726e92003-10-05 09:09:15 +000059BPF = 53 # Number of bits in a float
Tim Peters7c2a85b2004-08-31 02:19:55 +000060RECIP_BPF = 2**-BPF
Tim Petersd7b5e882001-01-25 03:36:26 +000061
Raymond Hettinger356a4592004-08-30 06:14:31 +000062try:
63 from os import urandom as _urandom
64 from binascii import hexlify as _hexlify
65except ImportError:
66 _urandom = None
Raymond Hettinger356a4592004-08-30 06:14:31 +000067
68
Tim Petersd7b5e882001-01-25 03:36:26 +000069# Translated by Guido van Rossum from C source provided by
Raymond Hettinger40f62172002-12-29 23:03:38 +000070# Adrian Baddeley. Adapted by Raymond Hettinger for use with
Raymond Hettinger3fa19d72004-08-31 01:05:15 +000071# the Mersenne Twister and os.urandom() core generators.
Tim Petersd7b5e882001-01-25 03:36:26 +000072
Raymond Hettinger145a4a02003-01-07 10:25:55 +000073import _random
Raymond Hettinger40f62172002-12-29 23:03:38 +000074
Raymond Hettinger145a4a02003-01-07 10:25:55 +000075class Random(_random.Random):
Raymond Hettingerc32f0332002-05-23 19:44:49 +000076 """Random number generator base class used by bound module functions.
77
78 Used to instantiate instances of Random to get generators that don't
79 share state. Especially useful for multi-threaded programs, creating
80 a different instance of Random for each thread, and using the jumpahead()
81 method to ensure that the generated sequences seen by each thread don't
82 overlap.
83
84 Class Random can also be subclassed if you want to use a different basic
85 generator of your own devising: in that case, override the following
86 methods: random(), seed(), getstate(), setstate() and jumpahead().
Raymond Hettinger2f726e92003-10-05 09:09:15 +000087 Optionally, implement a getrandombits() method so that randrange()
88 can cover arbitrarily large ranges.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +000089
Raymond Hettingerc32f0332002-05-23 19:44:49 +000090 """
Tim Petersd7b5e882001-01-25 03:36:26 +000091
Raymond Hettinger40f62172002-12-29 23:03:38 +000092 VERSION = 2 # used by getstate/setstate
Tim Petersd7b5e882001-01-25 03:36:26 +000093
94 def __init__(self, x=None):
95 """Initialize an instance.
96
97 Optional argument x controls seeding, as for Random.seed().
98 """
99
100 self.seed(x)
Raymond Hettinger40f62172002-12-29 23:03:38 +0000101 self.gauss_next = None
Tim Petersd7b5e882001-01-25 03:36:26 +0000102
Tim Peters0de88fc2001-02-01 04:59:18 +0000103 def seed(self, a=None):
104 """Initialize internal state from hashable object.
Tim Petersd7b5e882001-01-25 03:36:26 +0000105
Raymond Hettinger356a4592004-08-30 06:14:31 +0000106 None or no argument seeds from current time or from a hardware
107 randomness source if available.
Tim Peters0de88fc2001-02-01 04:59:18 +0000108
Tim Petersbcd725f2001-02-01 10:06:53 +0000109 If a is not None or an int or long, hash(a) is used instead.
Tim Petersd7b5e882001-01-25 03:36:26 +0000110 """
111
Raymond Hettinger3081d592003-08-09 18:30:57 +0000112 if a is None:
Raymond Hettinger356a4592004-08-30 06:14:31 +0000113 if _urandom is None:
114 import time
115 a = long(time.time() * 256) # use fractional seconds
116 else:
117 a = long(_hexlify(_urandom(16)), 16)
118
Raymond Hettinger145a4a02003-01-07 10:25:55 +0000119 super(Random, self).seed(a)
Tim Peters46c04e12002-05-05 20:40:00 +0000120 self.gauss_next = None
121
Tim Peterscd804102001-01-25 20:25:57 +0000122 def getstate(self):
123 """Return internal state; can be passed to setstate() later."""
Raymond Hettinger145a4a02003-01-07 10:25:55 +0000124 return self.VERSION, super(Random, self).getstate(), self.gauss_next
Tim Peterscd804102001-01-25 20:25:57 +0000125
126 def setstate(self, state):
127 """Restore internal state from object returned by getstate()."""
128 version = state[0]
Raymond Hettinger40f62172002-12-29 23:03:38 +0000129 if version == 2:
130 version, internalstate, self.gauss_next = state
Raymond Hettinger145a4a02003-01-07 10:25:55 +0000131 super(Random, self).setstate(internalstate)
Tim Peterscd804102001-01-25 20:25:57 +0000132 else:
133 raise ValueError("state with version %s passed to "
134 "Random.setstate() of version %s" %
135 (version, self.VERSION))
136
Tim Peterscd804102001-01-25 20:25:57 +0000137## ---- Methods below this point do not need to be overridden when
138## ---- subclassing for the purpose of using a different core generator.
139
140## -------------------- pickle support -------------------
141
142 def __getstate__(self): # for pickle
143 return self.getstate()
144
145 def __setstate__(self, state): # for pickle
146 self.setstate(state)
147
Raymond Hettinger5f078ff2003-06-24 20:29:04 +0000148 def __reduce__(self):
149 return self.__class__, (), self.getstate()
150
Tim Peterscd804102001-01-25 20:25:57 +0000151## -------------------- integer methods -------------------
152
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000153 def randrange(self, start, stop=None, step=1, int=int, default=None,
154 maxwidth=1L<<BPF):
Tim Petersd7b5e882001-01-25 03:36:26 +0000155 """Choose a random item from range(start, stop[, step]).
156
157 This fixes the problem with randint() which includes the
158 endpoint; in Python this is usually not what you want.
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000159 Do not supply the 'int', 'default', and 'maxwidth' arguments.
Tim Petersd7b5e882001-01-25 03:36:26 +0000160 """
161
162 # This code is a bit messy to make it fast for the
Tim Peters9146f272002-08-16 03:41:39 +0000163 # common case while still doing adequate error checking.
Tim Petersd7b5e882001-01-25 03:36:26 +0000164 istart = int(start)
165 if istart != start:
166 raise ValueError, "non-integer arg 1 for randrange()"
167 if stop is default:
168 if istart > 0:
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000169 if istart >= maxwidth:
170 return self._randbelow(istart)
Tim Petersd7b5e882001-01-25 03:36:26 +0000171 return int(self.random() * istart)
172 raise ValueError, "empty range for randrange()"
Tim Peters9146f272002-08-16 03:41:39 +0000173
174 # stop argument supplied.
Tim Petersd7b5e882001-01-25 03:36:26 +0000175 istop = int(stop)
176 if istop != stop:
177 raise ValueError, "non-integer stop for randrange()"
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000178 width = istop - istart
179 if step == 1 and width > 0:
Tim Peters76ca1d42003-06-19 03:46:46 +0000180 # Note that
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000181 # int(istart + self.random()*width)
Tim Peters76ca1d42003-06-19 03:46:46 +0000182 # instead would be incorrect. For example, consider istart
183 # = -2 and istop = 0. Then the guts would be in
184 # -2.0 to 0.0 exclusive on both ends (ignoring that random()
185 # might return 0.0), and because int() truncates toward 0, the
186 # final result would be -1 or 0 (instead of -2 or -1).
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000187 # istart + int(self.random()*width)
Tim Peters76ca1d42003-06-19 03:46:46 +0000188 # would also be incorrect, for a subtler reason: the RHS
189 # can return a long, and then randrange() would also return
190 # a long, but we're supposed to return an int (for backward
191 # compatibility).
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000192
193 if width >= maxwidth:
Tim Peters58eb11c2004-01-18 20:29:55 +0000194 return int(istart + self._randbelow(width))
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000195 return int(istart + int(self.random()*width))
Tim Petersd7b5e882001-01-25 03:36:26 +0000196 if step == 1:
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000197 raise ValueError, "empty range for randrange() (%d,%d, %d)" % (istart, istop, width)
Tim Peters9146f272002-08-16 03:41:39 +0000198
199 # Non-unit step argument supplied.
Tim Petersd7b5e882001-01-25 03:36:26 +0000200 istep = int(step)
201 if istep != step:
202 raise ValueError, "non-integer step for randrange()"
203 if istep > 0:
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000204 n = (width + istep - 1) / istep
Tim Petersd7b5e882001-01-25 03:36:26 +0000205 elif istep < 0:
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000206 n = (width + istep + 1) / istep
Tim Petersd7b5e882001-01-25 03:36:26 +0000207 else:
208 raise ValueError, "zero step for randrange()"
209
210 if n <= 0:
211 raise ValueError, "empty range for randrange()"
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000212
213 if n >= maxwidth:
214 return istart + self._randbelow(n)
Tim Petersd7b5e882001-01-25 03:36:26 +0000215 return istart + istep*int(self.random() * n)
216
217 def randint(self, a, b):
Tim Peterscd804102001-01-25 20:25:57 +0000218 """Return random integer in range [a, b], including both end points.
Tim Petersd7b5e882001-01-25 03:36:26 +0000219 """
220
221 return self.randrange(a, b+1)
222
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000223 def _randbelow(self, n, _log=_log, int=int, _maxwidth=1L<<BPF,
224 _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
225 """Return a random int in the range [0,n)
226
227 Handles the case where n has more bits than returned
228 by a single call to the underlying generator.
229 """
230
231 try:
232 getrandbits = self.getrandbits
233 except AttributeError:
234 pass
235 else:
236 # Only call self.getrandbits if the original random() builtin method
237 # has not been overridden or if a new getrandbits() was supplied.
238 # This assures that the two methods correspond.
239 if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
240 k = int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2)
241 r = getrandbits(k)
242 while r >= n:
243 r = getrandbits(k)
244 return r
245 if n >= _maxwidth:
246 _warn("Underlying random() generator does not supply \n"
247 "enough bits to choose from a population range this large")
248 return int(self.random() * n)
249
Tim Peterscd804102001-01-25 20:25:57 +0000250## -------------------- sequence methods -------------------
251
Tim Petersd7b5e882001-01-25 03:36:26 +0000252 def choice(self, seq):
253 """Choose a random element from a non-empty sequence."""
Raymond Hettinger5dae5052004-06-07 02:07:15 +0000254 return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
Tim Petersd7b5e882001-01-25 03:36:26 +0000255
256 def shuffle(self, x, random=None, int=int):
257 """x, random=random.random -> shuffle list x in place; return None.
258
259 Optional arg random is a 0-argument function returning a random
260 float in [0.0, 1.0); by default, the standard random.random.
261
262 Note that for even rather small len(x), the total number of
263 permutations of x is larger than the period of most random number
264 generators; this implies that "most" permutations of a long
265 sequence can never be generated.
266 """
267
268 if random is None:
269 random = self.random
Raymond Hettinger85c20a42003-11-06 14:06:48 +0000270 for i in reversed(xrange(1, len(x))):
Tim Peterscd804102001-01-25 20:25:57 +0000271 # pick an element in x[:i+1] with which to exchange x[i]
Tim Petersd7b5e882001-01-25 03:36:26 +0000272 j = int(random() * (i+1))
273 x[i], x[j] = x[j], x[i]
274
Raymond Hettingerfdbe5222003-06-13 07:01:51 +0000275 def sample(self, population, k):
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000276 """Chooses k unique random elements from a population sequence.
277
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000278 Returns a new list containing elements from the population while
279 leaving the original population unchanged. The resulting list is
280 in selection order so that all sub-slices will also be valid random
281 samples. This allows raffle winners (the sample) to be partitioned
282 into grand prize and second place winners (the subslices).
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000283
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000284 Members of the population need not be hashable or unique. If the
285 population contains repeats, then each occurrence is a possible
286 selection in the sample.
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000287
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000288 To choose a sample in a range of integers, use xrange as an argument.
289 This is especially fast and space efficient for sampling from a
290 large population: sample(xrange(10000000), 60)
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000291 """
292
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000293 # Sampling without replacement entails tracking either potential
Raymond Hettinger8b9aa8d2003-01-04 05:20:33 +0000294 # selections (the pool) in a list or previous selections in a
295 # dictionary.
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000296
Jeremy Hylton2b55d352004-02-23 17:27:57 +0000297 # When the number of selections is small compared to the
298 # population, then tracking selections is efficient, requiring
299 # only a small dictionary and an occasional reselection. For
300 # a larger number of selections, the pool tracking method is
301 # preferred since the list takes less space than the
302 # dictionary and it doesn't suffer from frequent reselections.
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000303
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000304 n = len(population)
305 if not 0 <= k <= n:
306 raise ValueError, "sample larger than population"
Raymond Hettinger8b9aa8d2003-01-04 05:20:33 +0000307 random = self.random
Raymond Hettingerfdbe5222003-06-13 07:01:51 +0000308 _int = int
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000309 result = [None] * k
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000310 if n < 6 * k: # if n len list takes less space than a k len dict
Raymond Hettinger311f4192002-11-18 09:01:24 +0000311 pool = list(population)
312 for i in xrange(k): # invariant: non-selected at [0,n-i)
Raymond Hettingerfdbe5222003-06-13 07:01:51 +0000313 j = _int(random() * (n-i))
Raymond Hettinger311f4192002-11-18 09:01:24 +0000314 result[i] = pool[j]
Raymond Hettinger8b9aa8d2003-01-04 05:20:33 +0000315 pool[j] = pool[n-i-1] # move non-selected item into vacancy
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000316 else:
Raymond Hettinger66d09f12003-09-06 04:25:54 +0000317 try:
318 n > 0 and (population[0], population[n//2], population[n-1])
319 except (TypeError, KeyError): # handle sets and dictionaries
320 population = tuple(population)
Raymond Hettinger311f4192002-11-18 09:01:24 +0000321 selected = {}
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000322 for i in xrange(k):
Raymond Hettingerfdbe5222003-06-13 07:01:51 +0000323 j = _int(random() * n)
Raymond Hettinger311f4192002-11-18 09:01:24 +0000324 while j in selected:
Raymond Hettingerfdbe5222003-06-13 07:01:51 +0000325 j = _int(random() * n)
Raymond Hettingerc0b40342002-11-13 15:26:37 +0000326 result[i] = selected[j] = population[j]
Raymond Hettinger311f4192002-11-18 09:01:24 +0000327 return result
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000328
Tim Peterscd804102001-01-25 20:25:57 +0000329## -------------------- real-valued distributions -------------------
330
331## -------------------- uniform distribution -------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000332
333 def uniform(self, a, b):
334 """Get a random number in the range [a, b)."""
335 return a + (b-a) * self.random()
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000336
Tim Peterscd804102001-01-25 20:25:57 +0000337## -------------------- normal distribution --------------------
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000338
Tim Petersd7b5e882001-01-25 03:36:26 +0000339 def normalvariate(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000340 """Normal distribution.
341
342 mu is the mean, and sigma is the standard deviation.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000343
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000344 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000345 # mu = mean, sigma = standard deviation
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000346
Tim Petersd7b5e882001-01-25 03:36:26 +0000347 # Uses Kinderman and Monahan method. Reference: Kinderman,
348 # A.J. and Monahan, J.F., "Computer generation of random
349 # variables using the ratio of uniform deviates", ACM Trans
350 # Math Software, 3, (1977), pp257-260.
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000351
Tim Petersd7b5e882001-01-25 03:36:26 +0000352 random = self.random
Raymond Hettinger311f4192002-11-18 09:01:24 +0000353 while True:
Tim Peters0c9886d2001-01-15 01:18:21 +0000354 u1 = random()
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000355 u2 = 1.0 - random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000356 z = NV_MAGICCONST*(u1-0.5)/u2
357 zz = z*z/4.0
358 if zz <= -_log(u2):
359 break
360 return mu + z*sigma
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000361
Tim Peterscd804102001-01-25 20:25:57 +0000362## -------------------- lognormal distribution --------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000363
364 def lognormvariate(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000365 """Log normal distribution.
366
367 If you take the natural logarithm of this distribution, you'll get a
368 normal distribution with mean mu and standard deviation sigma.
369 mu can have any value, and sigma must be greater than zero.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000370
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000371 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000372 return _exp(self.normalvariate(mu, sigma))
373
Tim Peterscd804102001-01-25 20:25:57 +0000374## -------------------- exponential distribution --------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000375
376 def expovariate(self, lambd):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000377 """Exponential distribution.
378
379 lambd is 1.0 divided by the desired mean. (The parameter would be
380 called "lambda", but that is a reserved word in Python.) Returned
381 values range from 0 to positive infinity.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000382
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000383 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000384 # lambd: rate lambd = 1/mean
385 # ('lambda' is a Python reserved word)
386
387 random = self.random
Tim Peters0c9886d2001-01-15 01:18:21 +0000388 u = random()
389 while u <= 1e-7:
390 u = random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000391 return -_log(u)/lambd
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000392
Tim Peterscd804102001-01-25 20:25:57 +0000393## -------------------- von Mises distribution --------------------
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000394
Tim Petersd7b5e882001-01-25 03:36:26 +0000395 def vonmisesvariate(self, mu, kappa):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000396 """Circular data distribution.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000397
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000398 mu is the mean angle, expressed in radians between 0 and 2*pi, and
399 kappa is the concentration parameter, which must be greater than or
400 equal to zero. If kappa is equal to zero, this distribution reduces
401 to a uniform random angle over the range 0 to 2*pi.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000402
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000403 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000404 # mu: mean angle (in radians between 0 and 2*pi)
405 # kappa: concentration parameter kappa (>= 0)
406 # if kappa = 0 generate uniform random angle
407
408 # Based upon an algorithm published in: Fisher, N.I.,
409 # "Statistical Analysis of Circular Data", Cambridge
410 # University Press, 1993.
411
412 # Thanks to Magnus Kessler for a correction to the
413 # implementation of step 4.
414
415 random = self.random
416 if kappa <= 1e-6:
417 return TWOPI * random()
418
419 a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa)
420 b = (a - _sqrt(2.0 * a))/(2.0 * kappa)
421 r = (1.0 + b * b)/(2.0 * b)
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000422
Raymond Hettinger311f4192002-11-18 09:01:24 +0000423 while True:
Tim Peters0c9886d2001-01-15 01:18:21 +0000424 u1 = random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000425
426 z = _cos(_pi * u1)
427 f = (1.0 + r * z)/(r + z)
428 c = kappa * (r - f)
429
430 u2 = random()
431
432 if not (u2 >= c * (2.0 - c) and u2 > c * _exp(1.0 - c)):
Tim Peters0c9886d2001-01-15 01:18:21 +0000433 break
Tim Petersd7b5e882001-01-25 03:36:26 +0000434
435 u3 = random()
436 if u3 > 0.5:
437 theta = (mu % TWOPI) + _acos(f)
438 else:
439 theta = (mu % TWOPI) - _acos(f)
440
441 return theta
442
Tim Peterscd804102001-01-25 20:25:57 +0000443## -------------------- gamma distribution --------------------
Tim Petersd7b5e882001-01-25 03:36:26 +0000444
445 def gammavariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000446 """Gamma distribution. Not the gamma function!
447
448 Conditions on the parameters are alpha > 0 and beta > 0.
449
450 """
Tim Peters8ac14952002-05-23 15:15:30 +0000451
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000452 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
Tim Peters8ac14952002-05-23 15:15:30 +0000453
Guido van Rossum570764d2002-05-14 14:08:12 +0000454 # Warning: a few older sources define the gamma distribution in terms
455 # of alpha > -1.0
456 if alpha <= 0.0 or beta <= 0.0:
457 raise ValueError, 'gammavariate: alpha and beta must be > 0.0'
Tim Peters8ac14952002-05-23 15:15:30 +0000458
Tim Petersd7b5e882001-01-25 03:36:26 +0000459 random = self.random
Tim Petersd7b5e882001-01-25 03:36:26 +0000460 if alpha > 1.0:
461
462 # Uses R.C.H. Cheng, "The generation of Gamma
463 # variables with non-integral shape parameters",
464 # Applied Statistics, (1977), 26, No. 1, p71-74
465
Raymond Hettingerca6cdc22002-05-13 23:40:14 +0000466 ainv = _sqrt(2.0 * alpha - 1.0)
467 bbb = alpha - LOG4
468 ccc = alpha + ainv
Tim Peters8ac14952002-05-23 15:15:30 +0000469
Raymond Hettinger311f4192002-11-18 09:01:24 +0000470 while True:
Tim Petersd7b5e882001-01-25 03:36:26 +0000471 u1 = random()
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000472 if not 1e-7 < u1 < .9999999:
473 continue
474 u2 = 1.0 - random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000475 v = _log(u1/(1.0-u1))/ainv
476 x = alpha*_exp(v)
477 z = u1*u1*u2
478 r = bbb+ccc*v-x
479 if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z):
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000480 return x * beta
Tim Petersd7b5e882001-01-25 03:36:26 +0000481
482 elif alpha == 1.0:
483 # expovariate(1)
484 u = random()
485 while u <= 1e-7:
486 u = random()
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000487 return -_log(u) * beta
Tim Petersd7b5e882001-01-25 03:36:26 +0000488
489 else: # alpha is between 0 and 1 (exclusive)
490
491 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
492
Raymond Hettinger311f4192002-11-18 09:01:24 +0000493 while True:
Tim Petersd7b5e882001-01-25 03:36:26 +0000494 u = random()
495 b = (_e + alpha)/_e
496 p = b*u
497 if p <= 1.0:
498 x = pow(p, 1.0/alpha)
499 else:
500 # p > 1
501 x = -_log((b-p)/alpha)
502 u1 = random()
503 if not (((p <= 1.0) and (u1 > _exp(-x))) or
504 ((p > 1) and (u1 > pow(x, alpha - 1.0)))):
505 break
Raymond Hettingerb760efb2002-05-14 06:40:34 +0000506 return x * beta
507
Tim Peterscd804102001-01-25 20:25:57 +0000508## -------------------- Gauss (faster alternative) --------------------
Guido van Rossum95bfcda1994-03-09 14:21:05 +0000509
Tim Petersd7b5e882001-01-25 03:36:26 +0000510 def gauss(self, mu, sigma):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000511 """Gaussian distribution.
512
513 mu is the mean, and sigma is the standard deviation. This is
514 slightly faster than the normalvariate() function.
515
516 Not thread-safe without a lock around calls.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000517
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000518 """
Guido van Rossumcc32ac91994-03-15 16:10:24 +0000519
Tim Petersd7b5e882001-01-25 03:36:26 +0000520 # When x and y are two variables from [0, 1), uniformly
521 # distributed, then
522 #
523 # cos(2*pi*x)*sqrt(-2*log(1-y))
524 # sin(2*pi*x)*sqrt(-2*log(1-y))
525 #
526 # are two *independent* variables with normal distribution
527 # (mu = 0, sigma = 1).
528 # (Lambert Meertens)
529 # (corrected version; bug discovered by Mike Miller, fixed by LM)
Guido van Rossumcc32ac91994-03-15 16:10:24 +0000530
Tim Petersd7b5e882001-01-25 03:36:26 +0000531 # Multithreading note: When two threads call this function
532 # simultaneously, it is possible that they will receive the
533 # same return value. The window is very small though. To
534 # avoid this, you have to use a lock around all calls. (I
535 # didn't want to slow this down in the serial case by using a
536 # lock here.)
Guido van Rossumd03e1191998-05-29 17:51:31 +0000537
Tim Petersd7b5e882001-01-25 03:36:26 +0000538 random = self.random
539 z = self.gauss_next
540 self.gauss_next = None
541 if z is None:
542 x2pi = random() * TWOPI
543 g2rad = _sqrt(-2.0 * _log(1.0 - random()))
544 z = _cos(x2pi) * g2rad
545 self.gauss_next = _sin(x2pi) * g2rad
Guido van Rossumcc32ac91994-03-15 16:10:24 +0000546
Tim Petersd7b5e882001-01-25 03:36:26 +0000547 return mu + z*sigma
Guido van Rossum95bfcda1994-03-09 14:21:05 +0000548
Tim Peterscd804102001-01-25 20:25:57 +0000549## -------------------- beta --------------------
Tim Peters85e2e472001-01-26 06:49:56 +0000550## See
551## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470
552## for Ivan Frohne's insightful analysis of why the original implementation:
553##
554## def betavariate(self, alpha, beta):
555## # Discrete Event Simulation in C, pp 87-88.
556##
557## y = self.expovariate(alpha)
558## z = self.expovariate(1.0/beta)
559## return z/(y+z)
560##
561## was dead wrong, and how it probably got that way.
Guido van Rossum95bfcda1994-03-09 14:21:05 +0000562
Tim Petersd7b5e882001-01-25 03:36:26 +0000563 def betavariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000564 """Beta distribution.
565
566 Conditions on the parameters are alpha > -1 and beta} > -1.
567 Returned values range between 0 and 1.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000568
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000569 """
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000570
Tim Peters85e2e472001-01-26 06:49:56 +0000571 # This version due to Janne Sinkkonen, and matches all the std
572 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
573 y = self.gammavariate(alpha, 1.)
574 if y == 0:
575 return 0.0
576 else:
577 return y / (y + self.gammavariate(beta, 1.))
Guido van Rossum95bfcda1994-03-09 14:21:05 +0000578
Tim Peterscd804102001-01-25 20:25:57 +0000579## -------------------- Pareto --------------------
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000580
Tim Petersd7b5e882001-01-25 03:36:26 +0000581 def paretovariate(self, alpha):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000582 """Pareto distribution. alpha is the shape parameter."""
Tim Petersd7b5e882001-01-25 03:36:26 +0000583 # Jain, pg. 495
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000584
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000585 u = 1.0 - self.random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000586 return 1.0 / pow(u, 1.0/alpha)
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000587
Tim Peterscd804102001-01-25 20:25:57 +0000588## -------------------- Weibull --------------------
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000589
Tim Petersd7b5e882001-01-25 03:36:26 +0000590 def weibullvariate(self, alpha, beta):
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000591 """Weibull distribution.
592
593 alpha is the scale parameter and beta is the shape parameter.
Raymond Hettingeref4d4bd2002-05-23 23:58:17 +0000594
Raymond Hettingerc32f0332002-05-23 19:44:49 +0000595 """
Tim Petersd7b5e882001-01-25 03:36:26 +0000596 # Jain, pg. 499; bug fix courtesy Bill Arms
Guido van Rossumcf4559a1997-12-02 02:47:39 +0000597
Raymond Hettinger73ced7e2003-01-04 09:26:32 +0000598 u = 1.0 - self.random()
Tim Petersd7b5e882001-01-25 03:36:26 +0000599 return alpha * pow(-_log(u), 1.0/beta)
Guido van Rossum6c395ba1999-08-18 13:53:28 +0000600
Raymond Hettinger40f62172002-12-29 23:03:38 +0000601## -------------------- Wichmann-Hill -------------------
602
603class WichmannHill(Random):
604
605 VERSION = 1 # used by getstate/setstate
606
607 def seed(self, a=None):
608 """Initialize internal state from hashable object.
609
Raymond Hettinger356a4592004-08-30 06:14:31 +0000610 None or no argument seeds from current time or from a hardware
611 randomness source if available.
Raymond Hettinger40f62172002-12-29 23:03:38 +0000612
613 If a is not None or an int or long, hash(a) is used instead.
614
615 If a is an int or long, a is used directly. Distinct values between
616 0 and 27814431486575L inclusive are guaranteed to yield distinct
617 internal states (this guarantee is specific to the default
618 Wichmann-Hill generator).
619 """
620
621 if a is None:
Raymond Hettinger356a4592004-08-30 06:14:31 +0000622 if _urandom is None:
623 import time
624 a = long(time.time() * 256) # use fractional seconds
625 else:
626 a = long(_hexlify(_urandom(16)), 16)
Raymond Hettinger40f62172002-12-29 23:03:38 +0000627
628 if not isinstance(a, (int, long)):
629 a = hash(a)
630
631 a, x = divmod(a, 30268)
632 a, y = divmod(a, 30306)
633 a, z = divmod(a, 30322)
634 self._seed = int(x)+1, int(y)+1, int(z)+1
635
636 self.gauss_next = None
637
638 def random(self):
639 """Get the next random number in the range [0.0, 1.0)."""
640
641 # Wichman-Hill random number generator.
642 #
643 # Wichmann, B. A. & Hill, I. D. (1982)
644 # Algorithm AS 183:
645 # An efficient and portable pseudo-random number generator
646 # Applied Statistics 31 (1982) 188-190
647 #
648 # see also:
649 # Correction to Algorithm AS 183
650 # Applied Statistics 33 (1984) 123
651 #
652 # McLeod, A. I. (1985)
653 # A remark on Algorithm AS 183
654 # Applied Statistics 34 (1985),198-200
655
656 # This part is thread-unsafe:
657 # BEGIN CRITICAL SECTION
658 x, y, z = self._seed
659 x = (171 * x) % 30269
660 y = (172 * y) % 30307
661 z = (170 * z) % 30323
662 self._seed = x, y, z
663 # END CRITICAL SECTION
664
665 # Note: on a platform using IEEE-754 double arithmetic, this can
666 # never return 0.0 (asserted by Tim; proof too long for a comment).
667 return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
668
669 def getstate(self):
670 """Return internal state; can be passed to setstate() later."""
671 return self.VERSION, self._seed, self.gauss_next
672
673 def setstate(self, state):
674 """Restore internal state from object returned by getstate()."""
675 version = state[0]
676 if version == 1:
677 version, self._seed, self.gauss_next = state
678 else:
679 raise ValueError("state with version %s passed to "
680 "Random.setstate() of version %s" %
681 (version, self.VERSION))
682
683 def jumpahead(self, n):
684 """Act as if n calls to random() were made, but quickly.
685
686 n is an int, greater than or equal to 0.
687
688 Example use: If you have 2 threads and know that each will
689 consume no more than a million random numbers, create two Random
690 objects r1 and r2, then do
691 r2.setstate(r1.getstate())
692 r2.jumpahead(1000000)
693 Then r1 and r2 will use guaranteed-disjoint segments of the full
694 period.
695 """
696
697 if not n >= 0:
698 raise ValueError("n must be >= 0")
699 x, y, z = self._seed
700 x = int(x * pow(171, n, 30269)) % 30269
701 y = int(y * pow(172, n, 30307)) % 30307
702 z = int(z * pow(170, n, 30323)) % 30323
703 self._seed = x, y, z
704
705 def __whseed(self, x=0, y=0, z=0):
706 """Set the Wichmann-Hill seed from (x, y, z).
707
708 These must be integers in the range [0, 256).
709 """
710
711 if not type(x) == type(y) == type(z) == int:
712 raise TypeError('seeds must be integers')
713 if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
714 raise ValueError('seeds must be in range(0, 256)')
715 if 0 == x == y == z:
716 # Initialize from current time
717 import time
718 t = long(time.time() * 256)
719 t = int((t&0xffffff) ^ (t>>24))
720 t, x = divmod(t, 256)
721 t, y = divmod(t, 256)
722 t, z = divmod(t, 256)
723 # Zero is a poor seed, so substitute 1
724 self._seed = (x or 1, y or 1, z or 1)
725
726 self.gauss_next = None
727
728 def whseed(self, a=None):
729 """Seed from hashable object's hash code.
730
731 None or no argument seeds from current time. It is not guaranteed
732 that objects with distinct hash codes lead to distinct internal
733 states.
734
735 This is obsolete, provided for compatibility with the seed routine
736 used prior to Python 2.1. Use the .seed() method instead.
737 """
738
739 if a is None:
740 self.__whseed()
741 return
742 a = hash(a)
743 a, x = divmod(a, 256)
744 a, y = divmod(a, 256)
745 a, z = divmod(a, 256)
746 x = (x + a) % 256 or 1
747 y = (y + a) % 256 or 1
748 z = (z + a) % 256 or 1
749 self.__whseed(x, y, z)
750
Raymond Hettinger356a4592004-08-30 06:14:31 +0000751## -------------------- Hardware Random Source -------------------
752
753class HardwareRandom(Random):
754 """Alternate random number generator using hardware sources.
755
756 Not available on all systems (see os.urandom() for details).
757 """
758
759 def random(self):
760 """Get the next random number in the range [0.0, 1.0)."""
761 if _urandom is None:
762 raise NotImplementedError('Cannot find hardware entropy source')
Tim Peters7c2a85b2004-08-31 02:19:55 +0000763 return (long(_hexlify(_urandom(7)), 16) >> 3) * RECIP_BPF
Raymond Hettinger356a4592004-08-30 06:14:31 +0000764
765 def getrandbits(self, k):
766 """getrandbits(k) -> x. Generates a long int with k random bits."""
767 if _urandom is None:
768 raise NotImplementedError('Cannot find hardware entropy source')
769 if k <= 0:
770 raise ValueError('number of bits must be greater than zero')
771 if k != int(k):
772 raise TypeError('number of bits should be an integer')
773 bytes = (k + 7) // 8 # bits / 8 and rounded up
774 x = long(_hexlify(_urandom(bytes)), 16)
775 return x >> (bytes * 8 - k) # trim excess bits
776
777 def _stub(self, *args, **kwds):
778 "Stub method. Not used for a hardware random number generator."
779 return None
780 seed = jumpahead = _stub
781
782 def _notimplemented(self, *args, **kwds):
783 "Method should not be called for a hardware random number generator."
784 raise NotImplementedError('Hardware entropy source does not have state.')
785 getstate = setstate = _notimplemented
786
Tim Peterscd804102001-01-25 20:25:57 +0000787## -------------------- test program --------------------
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000788
Raymond Hettinger62297132003-08-30 01:24:19 +0000789def _test_generator(n, func, args):
Tim Peters0c9886d2001-01-15 01:18:21 +0000790 import time
Raymond Hettinger62297132003-08-30 01:24:19 +0000791 print n, 'times', func.__name__
Raymond Hettingerb98154e2003-05-24 17:26:02 +0000792 total = 0.0
Tim Peters0c9886d2001-01-15 01:18:21 +0000793 sqsum = 0.0
794 smallest = 1e10
795 largest = -1e10
796 t0 = time.time()
797 for i in range(n):
Raymond Hettinger62297132003-08-30 01:24:19 +0000798 x = func(*args)
Raymond Hettingerb98154e2003-05-24 17:26:02 +0000799 total += x
Tim Peters0c9886d2001-01-15 01:18:21 +0000800 sqsum = sqsum + x*x
801 smallest = min(x, smallest)
802 largest = max(x, largest)
803 t1 = time.time()
804 print round(t1-t0, 3), 'sec,',
Raymond Hettingerb98154e2003-05-24 17:26:02 +0000805 avg = total/n
Tim Petersd7b5e882001-01-25 03:36:26 +0000806 stddev = _sqrt(sqsum/n - avg*avg)
Tim Peters0c9886d2001-01-15 01:18:21 +0000807 print 'avg %g, stddev %g, min %g, max %g' % \
808 (avg, stddev, smallest, largest)
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000809
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000810
811def _test(N=2000):
Raymond Hettinger62297132003-08-30 01:24:19 +0000812 _test_generator(N, random, ())
813 _test_generator(N, normalvariate, (0.0, 1.0))
814 _test_generator(N, lognormvariate, (0.0, 1.0))
815 _test_generator(N, vonmisesvariate, (0.0, 1.0))
816 _test_generator(N, gammavariate, (0.01, 1.0))
817 _test_generator(N, gammavariate, (0.1, 1.0))
818 _test_generator(N, gammavariate, (0.1, 2.0))
819 _test_generator(N, gammavariate, (0.5, 1.0))
820 _test_generator(N, gammavariate, (0.9, 1.0))
821 _test_generator(N, gammavariate, (1.0, 1.0))
822 _test_generator(N, gammavariate, (2.0, 1.0))
823 _test_generator(N, gammavariate, (20.0, 1.0))
824 _test_generator(N, gammavariate, (200.0, 1.0))
825 _test_generator(N, gauss, (0.0, 1.0))
826 _test_generator(N, betavariate, (3.0, 3.0))
Tim Peterscd804102001-01-25 20:25:57 +0000827
Tim Peters715c4c42001-01-26 22:56:56 +0000828# Create one instance, seeded from current time, and export its methods
Raymond Hettinger40f62172002-12-29 23:03:38 +0000829# as module-level functions. The functions share state across all uses
830#(both in the user's code and in the Python libraries), but that's fine
831# for most programs and is easier for the casual user than making them
832# instantiate their own Random() instance.
833
Tim Petersd7b5e882001-01-25 03:36:26 +0000834_inst = Random()
835seed = _inst.seed
836random = _inst.random
837uniform = _inst.uniform
838randint = _inst.randint
839choice = _inst.choice
840randrange = _inst.randrange
Raymond Hettingerf24eb352002-11-12 17:41:57 +0000841sample = _inst.sample
Tim Petersd7b5e882001-01-25 03:36:26 +0000842shuffle = _inst.shuffle
843normalvariate = _inst.normalvariate
844lognormvariate = _inst.lognormvariate
Tim Petersd7b5e882001-01-25 03:36:26 +0000845expovariate = _inst.expovariate
846vonmisesvariate = _inst.vonmisesvariate
847gammavariate = _inst.gammavariate
Tim Petersd7b5e882001-01-25 03:36:26 +0000848gauss = _inst.gauss
849betavariate = _inst.betavariate
850paretovariate = _inst.paretovariate
851weibullvariate = _inst.weibullvariate
852getstate = _inst.getstate
853setstate = _inst.setstate
Tim Petersd52269b2001-01-25 06:23:18 +0000854jumpahead = _inst.jumpahead
Raymond Hettinger2f726e92003-10-05 09:09:15 +0000855getrandbits = _inst.getrandbits
Tim Petersd7b5e882001-01-25 03:36:26 +0000856
Guido van Rossumff03b1a1994-03-09 12:55:02 +0000857if __name__ == '__main__':
Tim Petersd7b5e882001-01-25 03:36:26 +0000858 _test()