blob: 997110732a17657e106717621d41907407516646 [file] [log] [blame]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001"""Test suite for statistics module, including helper NumericTestCase and
2approx_equal function.
3
4"""
5
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07006import bisect
Larry Hastingsf5e987b2013-10-19 11:50:09 -07007import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03008import collections.abc
Raymond Hettinger11c79532019-02-23 14:44:07 -08009import copy
Larry Hastingsf5e987b2013-10-19 11:50:09 -070010import decimal
11import doctest
12import math
Raymond Hettinger11c79532019-02-23 14:44:07 -080013import pickle
Larry Hastingsf5e987b2013-10-19 11:50:09 -070014import random
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +020015import sys
Larry Hastingsf5e987b2013-10-19 11:50:09 -070016import unittest
Neil Schemenauer52a48e62019-07-30 11:08:18 -070017from test import support
Hai Shi79bb2c92020-08-06 19:51:29 +080018from test.support import import_helper
Larry Hastingsf5e987b2013-10-19 11:50:09 -070019
20from decimal import Decimal
21from fractions import Fraction
22
23
24# Module to be tested.
25import statistics
26
27
28# === Helper functions and class ===
29
Steven D'Apranoa474afd2016-08-09 12:49:01 +100030def sign(x):
31 """Return -1.0 for negatives, including -0.0, otherwise +1.0."""
32 return math.copysign(1, x)
33
Steven D'Apranob28c3272015-12-01 19:59:53 +110034def _nan_equal(a, b):
35 """Return True if a and b are both the same kind of NAN.
36
37 >>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
38 True
39 >>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
40 True
41 >>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
42 False
43 >>> _nan_equal(Decimal(42), Decimal('NAN'))
44 False
45
46 >>> _nan_equal(float('NAN'), float('NAN'))
47 True
48 >>> _nan_equal(float('NAN'), 0.5)
49 False
50
51 >>> _nan_equal(float('NAN'), Decimal('NAN'))
52 False
53
54 NAN payloads are not compared.
55 """
56 if type(a) is not type(b):
57 return False
58 if isinstance(a, float):
59 return math.isnan(a) and math.isnan(b)
60 aexp = a.as_tuple()[2]
61 bexp = b.as_tuple()[2]
62 return (aexp == bexp) and (aexp in ('n', 'N')) # Both NAN or both sNAN.
63
64
Larry Hastingsf5e987b2013-10-19 11:50:09 -070065def _calc_errors(actual, expected):
66 """Return the absolute and relative errors between two numbers.
67
68 >>> _calc_errors(100, 75)
69 (25, 0.25)
70 >>> _calc_errors(100, 100)
71 (0, 0.0)
72
73 Returns the (absolute error, relative error) between the two arguments.
74 """
75 base = max(abs(actual), abs(expected))
76 abs_err = abs(actual - expected)
77 rel_err = abs_err/base if base else float('inf')
78 return (abs_err, rel_err)
79
80
81def approx_equal(x, y, tol=1e-12, rel=1e-7):
82 """approx_equal(x, y [, tol [, rel]]) => True|False
83
84 Return True if numbers x and y are approximately equal, to within some
85 margin of error, otherwise return False. Numbers which compare equal
86 will also compare approximately equal.
87
88 x is approximately equal to y if the difference between them is less than
89 an absolute error tol or a relative error rel, whichever is bigger.
90
91 If given, both tol and rel must be finite, non-negative numbers. If not
92 given, default values are tol=1e-12 and rel=1e-7.
93
94 >>> approx_equal(1.2589, 1.2587, tol=0.0003, rel=0)
95 True
96 >>> approx_equal(1.2589, 1.2587, tol=0.0001, rel=0)
97 False
98
99 Absolute error is defined as abs(x-y); if that is less than or equal to
100 tol, x and y are considered approximately equal.
101
102 Relative error is defined as abs((x-y)/x) or abs((x-y)/y), whichever is
103 smaller, provided x or y are not zero. If that figure is less than or
104 equal to rel, x and y are considered approximately equal.
105
106 Complex numbers are not directly supported. If you wish to compare to
107 complex numbers, extract their real and imaginary parts and compare them
108 individually.
109
110 NANs always compare unequal, even with themselves. Infinities compare
111 approximately equal if they have the same sign (both positive or both
112 negative). Infinities with different signs compare unequal; so do
113 comparisons of infinities with finite numbers.
114 """
115 if tol < 0 or rel < 0:
116 raise ValueError('error tolerances must be non-negative')
117 # NANs are never equal to anything, approximately or otherwise.
118 if math.isnan(x) or math.isnan(y):
119 return False
120 # Numbers which compare equal also compare approximately equal.
121 if x == y:
122 # This includes the case of two infinities with the same sign.
123 return True
124 if math.isinf(x) or math.isinf(y):
125 # This includes the case of two infinities of opposite sign, or
126 # one infinity and one finite number.
127 return False
128 # Two finite numbers.
129 actual_error = abs(x - y)
130 allowed_error = max(tol, rel*max(abs(x), abs(y)))
131 return actual_error <= allowed_error
132
133
134# This class exists only as somewhere to stick a docstring containing
135# doctests. The following docstring and tests were originally in a separate
136# module. Now that it has been merged in here, I need somewhere to hang the.
137# docstring. Ultimately, this class will die, and the information below will
138# either become redundant, or be moved into more appropriate places.
139class _DoNothing:
140 """
141 When doing numeric work, especially with floats, exact equality is often
142 not what you want. Due to round-off error, it is often a bad idea to try
143 to compare floats with equality. Instead the usual procedure is to test
144 them with some (hopefully small!) allowance for error.
145
146 The ``approx_equal`` function allows you to specify either an absolute
147 error tolerance, or a relative error, or both.
148
149 Absolute error tolerances are simple, but you need to know the magnitude
150 of the quantities being compared:
151
152 >>> approx_equal(12.345, 12.346, tol=1e-3)
153 True
154 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3) # tol is too small.
155 False
156
157 Relative errors are more suitable when the values you are comparing can
158 vary in magnitude:
159
160 >>> approx_equal(12.345, 12.346, rel=1e-4)
161 True
162 >>> approx_equal(12.345e6, 12.346e6, rel=1e-4)
163 True
164
165 but a naive implementation of relative error testing can run into trouble
166 around zero.
167
168 If you supply both an absolute tolerance and a relative error, the
169 comparison succeeds if either individual test succeeds:
170
171 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3, rel=1e-4)
172 True
173
174 """
175 pass
176
177
178
179# We prefer this for testing numeric values that may not be exactly equal,
180# and avoid using TestCase.assertAlmostEqual, because it sucks :-)
181
Hai Shi79bb2c92020-08-06 19:51:29 +0800182py_statistics = import_helper.import_fresh_module('statistics',
183 blocked=['_statistics'])
184c_statistics = import_helper.import_fresh_module('statistics',
185 fresh=['_statistics'])
Dong-hee Na8ad22a42019-08-25 02:51:20 +0900186
187
188class TestModules(unittest.TestCase):
189 func_names = ['_normal_dist_inv_cdf']
190
191 def test_py_functions(self):
192 for fname in self.func_names:
193 self.assertEqual(getattr(py_statistics, fname).__module__, 'statistics')
194
195 @unittest.skipUnless(c_statistics, 'requires _statistics')
196 def test_c_functions(self):
197 for fname in self.func_names:
198 self.assertEqual(getattr(c_statistics, fname).__module__, '_statistics')
199
200
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700201class NumericTestCase(unittest.TestCase):
202 """Unit test class for numeric work.
203
204 This subclasses TestCase. In addition to the standard method
205 ``TestCase.assertAlmostEqual``, ``assertApproxEqual`` is provided.
206 """
207 # By default, we expect exact equality, unless overridden.
208 tol = rel = 0
209
210 def assertApproxEqual(
211 self, first, second, tol=None, rel=None, msg=None
212 ):
213 """Test passes if ``first`` and ``second`` are approximately equal.
214
215 This test passes if ``first`` and ``second`` are equal to
216 within ``tol``, an absolute error, or ``rel``, a relative error.
217
218 If either ``tol`` or ``rel`` are None or not given, they default to
219 test attributes of the same name (by default, 0).
220
221 The objects may be either numbers, or sequences of numbers. Sequences
222 are tested element-by-element.
223
224 >>> class MyTest(NumericTestCase):
225 ... def test_number(self):
226 ... x = 1.0/6
227 ... y = sum([x]*6)
228 ... self.assertApproxEqual(y, 1.0, tol=1e-15)
229 ... def test_sequence(self):
230 ... a = [1.001, 1.001e-10, 1.001e10]
231 ... b = [1.0, 1e-10, 1e10]
232 ... self.assertApproxEqual(a, b, rel=1e-3)
233 ...
234 >>> import unittest
235 >>> from io import StringIO # Suppress test runner output.
236 >>> suite = unittest.TestLoader().loadTestsFromTestCase(MyTest)
237 >>> unittest.TextTestRunner(stream=StringIO()).run(suite)
238 <unittest.runner.TextTestResult run=2 errors=0 failures=0>
239
240 """
241 if tol is None:
242 tol = self.tol
243 if rel is None:
244 rel = self.rel
245 if (
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300246 isinstance(first, collections.abc.Sequence) and
247 isinstance(second, collections.abc.Sequence)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700248 ):
249 check = self._check_approx_seq
250 else:
251 check = self._check_approx_num
252 check(first, second, tol, rel, msg)
253
254 def _check_approx_seq(self, first, second, tol, rel, msg):
255 if len(first) != len(second):
256 standardMsg = (
257 "sequences differ in length: %d items != %d items"
258 % (len(first), len(second))
259 )
260 msg = self._formatMessage(msg, standardMsg)
261 raise self.failureException(msg)
262 for i, (a,e) in enumerate(zip(first, second)):
263 self._check_approx_num(a, e, tol, rel, msg, i)
264
265 def _check_approx_num(self, first, second, tol, rel, msg, idx=None):
266 if approx_equal(first, second, tol, rel):
267 # Test passes. Return early, we are done.
268 return None
269 # Otherwise we failed.
270 standardMsg = self._make_std_err_msg(first, second, tol, rel, idx)
271 msg = self._formatMessage(msg, standardMsg)
272 raise self.failureException(msg)
273
274 @staticmethod
275 def _make_std_err_msg(first, second, tol, rel, idx):
276 # Create the standard error message for approx_equal failures.
277 assert first != second
278 template = (
279 ' %r != %r\n'
280 ' values differ by more than tol=%r and rel=%r\n'
281 ' -> absolute error = %r\n'
282 ' -> relative error = %r'
283 )
284 if idx is not None:
285 header = 'numeric sequences first differ at index %d.\n' % idx
286 template = header + template
287 # Calculate actual errors:
288 abs_err, rel_err = _calc_errors(first, second)
289 return template % (first, second, tol, rel, abs_err, rel_err)
290
291
292# ========================
293# === Test the helpers ===
294# ========================
295
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000296class TestSign(unittest.TestCase):
297 """Test that the helper function sign() works correctly."""
298 def testZeroes(self):
299 # Test that signed zeroes report their sign correctly.
300 self.assertEqual(sign(0.0), +1)
301 self.assertEqual(sign(-0.0), -1)
302
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700303
304# --- Tests for approx_equal ---
305
306class ApproxEqualSymmetryTest(unittest.TestCase):
307 # Test symmetry of approx_equal.
308
309 def test_relative_symmetry(self):
310 # Check that approx_equal treats relative error symmetrically.
311 # (a-b)/a is usually not equal to (a-b)/b. Ensure that this
312 # doesn't matter.
313 #
314 # Note: the reason for this test is that an early version
315 # of approx_equal was not symmetric. A relative error test
316 # would pass, or fail, depending on which value was passed
317 # as the first argument.
318 #
319 args1 = [2456, 37.8, -12.45, Decimal('2.54'), Fraction(17, 54)]
320 args2 = [2459, 37.2, -12.41, Decimal('2.59'), Fraction(15, 54)]
321 assert len(args1) == len(args2)
322 for a, b in zip(args1, args2):
323 self.do_relative_symmetry(a, b)
324
325 def do_relative_symmetry(self, a, b):
326 a, b = min(a, b), max(a, b)
327 assert a < b
328 delta = b - a # The absolute difference between the values.
329 rel_err1, rel_err2 = abs(delta/a), abs(delta/b)
330 # Choose an error margin halfway between the two.
331 rel = (rel_err1 + rel_err2)/2
332 # Now see that values a and b compare approx equal regardless of
333 # which is given first.
334 self.assertTrue(approx_equal(a, b, tol=0, rel=rel))
335 self.assertTrue(approx_equal(b, a, tol=0, rel=rel))
336
337 def test_symmetry(self):
338 # Test that approx_equal(a, b) == approx_equal(b, a)
339 args = [-23, -2, 5, 107, 93568]
340 delta = 2
Christian Heimesad393602013-11-26 01:32:15 +0100341 for a in args:
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700342 for type_ in (int, float, Decimal, Fraction):
Christian Heimesad393602013-11-26 01:32:15 +0100343 x = type_(a)*100
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700344 y = x + delta
345 r = abs(delta/max(x, y))
346 # There are five cases to check:
347 # 1) actual error <= tol, <= rel
348 self.do_symmetry_test(x, y, tol=delta, rel=r)
349 self.do_symmetry_test(x, y, tol=delta+1, rel=2*r)
350 # 2) actual error > tol, > rel
351 self.do_symmetry_test(x, y, tol=delta-1, rel=r/2)
352 # 3) actual error <= tol, > rel
353 self.do_symmetry_test(x, y, tol=delta, rel=r/2)
354 # 4) actual error > tol, <= rel
355 self.do_symmetry_test(x, y, tol=delta-1, rel=r)
356 self.do_symmetry_test(x, y, tol=delta-1, rel=2*r)
357 # 5) exact equality test
358 self.do_symmetry_test(x, x, tol=0, rel=0)
359 self.do_symmetry_test(x, y, tol=0, rel=0)
360
361 def do_symmetry_test(self, a, b, tol, rel):
362 template = "approx_equal comparisons don't match for %r"
363 flag1 = approx_equal(a, b, tol, rel)
364 flag2 = approx_equal(b, a, tol, rel)
365 self.assertEqual(flag1, flag2, template.format((a, b, tol, rel)))
366
367
368class ApproxEqualExactTest(unittest.TestCase):
369 # Test the approx_equal function with exactly equal values.
370 # Equal values should compare as approximately equal.
371 # Test cases for exactly equal values, which should compare approx
372 # equal regardless of the error tolerances given.
373
374 def do_exactly_equal_test(self, x, tol, rel):
375 result = approx_equal(x, x, tol=tol, rel=rel)
376 self.assertTrue(result, 'equality failure for x=%r' % x)
377 result = approx_equal(-x, -x, tol=tol, rel=rel)
378 self.assertTrue(result, 'equality failure for x=%r' % -x)
379
380 def test_exactly_equal_ints(self):
381 # Test that equal int values are exactly equal.
382 for n in [42, 19740, 14974, 230, 1795, 700245, 36587]:
383 self.do_exactly_equal_test(n, 0, 0)
384
385 def test_exactly_equal_floats(self):
386 # Test that equal float values are exactly equal.
387 for x in [0.42, 1.9740, 1497.4, 23.0, 179.5, 70.0245, 36.587]:
388 self.do_exactly_equal_test(x, 0, 0)
389
390 def test_exactly_equal_fractions(self):
391 # Test that equal Fraction values are exactly equal.
392 F = Fraction
393 for f in [F(1, 2), F(0), F(5, 3), F(9, 7), F(35, 36), F(3, 7)]:
394 self.do_exactly_equal_test(f, 0, 0)
395
396 def test_exactly_equal_decimals(self):
397 # Test that equal Decimal values are exactly equal.
398 D = Decimal
399 for d in map(D, "8.2 31.274 912.04 16.745 1.2047".split()):
400 self.do_exactly_equal_test(d, 0, 0)
401
402 def test_exactly_equal_absolute(self):
403 # Test that equal values are exactly equal with an absolute error.
404 for n in [16, 1013, 1372, 1198, 971, 4]:
405 # Test as ints.
406 self.do_exactly_equal_test(n, 0.01, 0)
407 # Test as floats.
408 self.do_exactly_equal_test(n/10, 0.01, 0)
409 # Test as Fractions.
410 f = Fraction(n, 1234)
411 self.do_exactly_equal_test(f, 0.01, 0)
412
413 def test_exactly_equal_absolute_decimals(self):
414 # Test equal Decimal values are exactly equal with an absolute error.
415 self.do_exactly_equal_test(Decimal("3.571"), Decimal("0.01"), 0)
416 self.do_exactly_equal_test(-Decimal("81.3971"), Decimal("0.01"), 0)
417
418 def test_exactly_equal_relative(self):
419 # Test that equal values are exactly equal with a relative error.
420 for x in [8347, 101.3, -7910.28, Fraction(5, 21)]:
421 self.do_exactly_equal_test(x, 0, 0.01)
422 self.do_exactly_equal_test(Decimal("11.68"), 0, Decimal("0.01"))
423
424 def test_exactly_equal_both(self):
425 # Test that equal values are equal when both tol and rel are given.
426 for x in [41017, 16.742, -813.02, Fraction(3, 8)]:
427 self.do_exactly_equal_test(x, 0.1, 0.01)
428 D = Decimal
429 self.do_exactly_equal_test(D("7.2"), D("0.1"), D("0.01"))
430
431
432class ApproxEqualUnequalTest(unittest.TestCase):
433 # Unequal values should compare unequal with zero error tolerances.
434 # Test cases for unequal values, with exact equality test.
435
436 def do_exactly_unequal_test(self, x):
437 for a in (x, -x):
438 result = approx_equal(a, a+1, tol=0, rel=0)
439 self.assertFalse(result, 'inequality failure for x=%r' % a)
440
441 def test_exactly_unequal_ints(self):
442 # Test unequal int values are unequal with zero error tolerance.
443 for n in [951, 572305, 478, 917, 17240]:
444 self.do_exactly_unequal_test(n)
445
446 def test_exactly_unequal_floats(self):
447 # Test unequal float values are unequal with zero error tolerance.
448 for x in [9.51, 5723.05, 47.8, 9.17, 17.24]:
449 self.do_exactly_unequal_test(x)
450
451 def test_exactly_unequal_fractions(self):
452 # Test that unequal Fractions are unequal with zero error tolerance.
453 F = Fraction
454 for f in [F(1, 5), F(7, 9), F(12, 11), F(101, 99023)]:
455 self.do_exactly_unequal_test(f)
456
457 def test_exactly_unequal_decimals(self):
458 # Test that unequal Decimals are unequal with zero error tolerance.
459 for d in map(Decimal, "3.1415 298.12 3.47 18.996 0.00245".split()):
460 self.do_exactly_unequal_test(d)
461
462
463class ApproxEqualInexactTest(unittest.TestCase):
464 # Inexact test cases for approx_error.
465 # Test cases when comparing two values that are not exactly equal.
466
467 # === Absolute error tests ===
468
469 def do_approx_equal_abs_test(self, x, delta):
470 template = "Test failure for x={!r}, y={!r}"
471 for y in (x + delta, x - delta):
472 msg = template.format(x, y)
473 self.assertTrue(approx_equal(x, y, tol=2*delta, rel=0), msg)
474 self.assertFalse(approx_equal(x, y, tol=delta/2, rel=0), msg)
475
476 def test_approx_equal_absolute_ints(self):
477 # Test approximate equality of ints with an absolute error.
478 for n in [-10737, -1975, -7, -2, 0, 1, 9, 37, 423, 9874, 23789110]:
479 self.do_approx_equal_abs_test(n, 10)
480 self.do_approx_equal_abs_test(n, 2)
481
482 def test_approx_equal_absolute_floats(self):
483 # Test approximate equality of floats with an absolute error.
484 for x in [-284.126, -97.1, -3.4, -2.15, 0.5, 1.0, 7.8, 4.23, 3817.4]:
485 self.do_approx_equal_abs_test(x, 1.5)
486 self.do_approx_equal_abs_test(x, 0.01)
487 self.do_approx_equal_abs_test(x, 0.0001)
488
489 def test_approx_equal_absolute_fractions(self):
490 # Test approximate equality of Fractions with an absolute error.
491 delta = Fraction(1, 29)
492 numerators = [-84, -15, -2, -1, 0, 1, 5, 17, 23, 34, 71]
493 for f in (Fraction(n, 29) for n in numerators):
494 self.do_approx_equal_abs_test(f, delta)
495 self.do_approx_equal_abs_test(f, float(delta))
496
497 def test_approx_equal_absolute_decimals(self):
498 # Test approximate equality of Decimals with an absolute error.
499 delta = Decimal("0.01")
500 for d in map(Decimal, "1.0 3.5 36.08 61.79 7912.3648".split()):
501 self.do_approx_equal_abs_test(d, delta)
502 self.do_approx_equal_abs_test(-d, delta)
503
504 def test_cross_zero(self):
505 # Test for the case of the two values having opposite signs.
506 self.assertTrue(approx_equal(1e-5, -1e-5, tol=1e-4, rel=0))
507
508 # === Relative error tests ===
509
510 def do_approx_equal_rel_test(self, x, delta):
511 template = "Test failure for x={!r}, y={!r}"
512 for y in (x*(1+delta), x*(1-delta)):
513 msg = template.format(x, y)
514 self.assertTrue(approx_equal(x, y, tol=0, rel=2*delta), msg)
515 self.assertFalse(approx_equal(x, y, tol=0, rel=delta/2), msg)
516
517 def test_approx_equal_relative_ints(self):
518 # Test approximate equality of ints with a relative error.
519 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.36))
520 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.37))
521 # ---
522 self.assertTrue(approx_equal(449, 512, tol=0, rel=0.125))
523 self.assertTrue(approx_equal(448, 512, tol=0, rel=0.125))
524 self.assertFalse(approx_equal(447, 512, tol=0, rel=0.125))
525
526 def test_approx_equal_relative_floats(self):
527 # Test approximate equality of floats with a relative error.
528 for x in [-178.34, -0.1, 0.1, 1.0, 36.97, 2847.136, 9145.074]:
529 self.do_approx_equal_rel_test(x, 0.02)
530 self.do_approx_equal_rel_test(x, 0.0001)
531
532 def test_approx_equal_relative_fractions(self):
533 # Test approximate equality of Fractions with a relative error.
534 F = Fraction
535 delta = Fraction(3, 8)
536 for f in [F(3, 84), F(17, 30), F(49, 50), F(92, 85)]:
537 for d in (delta, float(delta)):
538 self.do_approx_equal_rel_test(f, d)
539 self.do_approx_equal_rel_test(-f, d)
540
541 def test_approx_equal_relative_decimals(self):
542 # Test approximate equality of Decimals with a relative error.
543 for d in map(Decimal, "0.02 1.0 5.7 13.67 94.138 91027.9321".split()):
544 self.do_approx_equal_rel_test(d, Decimal("0.001"))
545 self.do_approx_equal_rel_test(-d, Decimal("0.05"))
546
547 # === Both absolute and relative error tests ===
548
549 # There are four cases to consider:
550 # 1) actual error <= both absolute and relative error
551 # 2) actual error <= absolute error but > relative error
552 # 3) actual error <= relative error but > absolute error
553 # 4) actual error > both absolute and relative error
554
555 def do_check_both(self, a, b, tol, rel, tol_flag, rel_flag):
556 check = self.assertTrue if tol_flag else self.assertFalse
557 check(approx_equal(a, b, tol=tol, rel=0))
558 check = self.assertTrue if rel_flag else self.assertFalse
559 check(approx_equal(a, b, tol=0, rel=rel))
560 check = self.assertTrue if (tol_flag or rel_flag) else self.assertFalse
561 check(approx_equal(a, b, tol=tol, rel=rel))
562
563 def test_approx_equal_both1(self):
564 # Test actual error <= both absolute and relative error.
565 self.do_check_both(7.955, 7.952, 0.004, 3.8e-4, True, True)
566 self.do_check_both(-7.387, -7.386, 0.002, 0.0002, True, True)
567
568 def test_approx_equal_both2(self):
569 # Test actual error <= absolute error but > relative error.
570 self.do_check_both(7.955, 7.952, 0.004, 3.7e-4, True, False)
571
572 def test_approx_equal_both3(self):
573 # Test actual error <= relative error but > absolute error.
574 self.do_check_both(7.955, 7.952, 0.001, 3.8e-4, False, True)
575
576 def test_approx_equal_both4(self):
577 # Test actual error > both absolute and relative error.
578 self.do_check_both(2.78, 2.75, 0.01, 0.001, False, False)
579 self.do_check_both(971.44, 971.47, 0.02, 3e-5, False, False)
580
581
582class ApproxEqualSpecialsTest(unittest.TestCase):
583 # Test approx_equal with NANs and INFs and zeroes.
584
585 def test_inf(self):
586 for type_ in (float, Decimal):
587 inf = type_('inf')
588 self.assertTrue(approx_equal(inf, inf))
589 self.assertTrue(approx_equal(inf, inf, 0, 0))
590 self.assertTrue(approx_equal(inf, inf, 1, 0.01))
591 self.assertTrue(approx_equal(-inf, -inf))
592 self.assertFalse(approx_equal(inf, -inf))
593 self.assertFalse(approx_equal(inf, 1000))
594
595 def test_nan(self):
596 for type_ in (float, Decimal):
597 nan = type_('nan')
598 for other in (nan, type_('inf'), 1000):
599 self.assertFalse(approx_equal(nan, other))
600
601 def test_float_zeroes(self):
602 nzero = math.copysign(0.0, -1)
603 self.assertTrue(approx_equal(nzero, 0.0, tol=0.1, rel=0.1))
604
605 def test_decimal_zeroes(self):
606 nzero = Decimal("-0.0")
607 self.assertTrue(approx_equal(nzero, Decimal(0), tol=0.1, rel=0.1))
608
609
610class TestApproxEqualErrors(unittest.TestCase):
611 # Test error conditions of approx_equal.
612
613 def test_bad_tol(self):
614 # Test negative tol raises.
615 self.assertRaises(ValueError, approx_equal, 100, 100, -1, 0.1)
616
617 def test_bad_rel(self):
618 # Test negative rel raises.
619 self.assertRaises(ValueError, approx_equal, 100, 100, 1, -0.1)
620
621
622# --- Tests for NumericTestCase ---
623
624# The formatting routine that generates the error messages is complex enough
625# that it too needs testing.
626
627class TestNumericTestCase(unittest.TestCase):
628 # The exact wording of NumericTestCase error messages is *not* guaranteed,
629 # but we need to give them some sort of test to ensure that they are
630 # generated correctly. As a compromise, we look for specific substrings
631 # that are expected to be found even if the overall error message changes.
632
633 def do_test(self, args):
634 actual_msg = NumericTestCase._make_std_err_msg(*args)
635 expected = self.generate_substrings(*args)
636 for substring in expected:
637 self.assertIn(substring, actual_msg)
638
639 def test_numerictestcase_is_testcase(self):
640 # Ensure that NumericTestCase actually is a TestCase.
641 self.assertTrue(issubclass(NumericTestCase, unittest.TestCase))
642
643 def test_error_msg_numeric(self):
644 # Test the error message generated for numeric comparisons.
645 args = (2.5, 4.0, 0.5, 0.25, None)
646 self.do_test(args)
647
648 def test_error_msg_sequence(self):
649 # Test the error message generated for sequence comparisons.
650 args = (3.75, 8.25, 1.25, 0.5, 7)
651 self.do_test(args)
652
653 def generate_substrings(self, first, second, tol, rel, idx):
654 """Return substrings we expect to see in error messages."""
655 abs_err, rel_err = _calc_errors(first, second)
656 substrings = [
657 'tol=%r' % tol,
658 'rel=%r' % rel,
659 'absolute error = %r' % abs_err,
660 'relative error = %r' % rel_err,
661 ]
662 if idx is not None:
663 substrings.append('differ at index %d' % idx)
664 return substrings
665
666
667# =======================================
668# === Tests for the statistics module ===
669# =======================================
670
671
672class GlobalsTest(unittest.TestCase):
673 module = statistics
674 expected_metadata = ["__doc__", "__all__"]
675
676 def test_meta(self):
677 # Test for the existence of metadata.
678 for meta in self.expected_metadata:
679 self.assertTrue(hasattr(self.module, meta),
680 "%s not present" % meta)
681
682 def test_check_all(self):
683 # Check everything in __all__ exists and is public.
684 module = self.module
685 for name in module.__all__:
686 # No private names in __all__:
687 self.assertFalse(name.startswith("_"),
688 'private name "%s" in __all__' % name)
689 # And anything in __all__ must exist:
690 self.assertTrue(hasattr(module, name),
691 'missing name "%s" in __all__' % name)
692
693
694class DocTests(unittest.TestCase):
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +0200695 @unittest.skipIf(sys.flags.optimize >= 2,
696 "Docstrings are omitted with -OO and above")
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700697 def test_doc_tests(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000698 failed, tried = doctest.testmod(statistics, optionflags=doctest.ELLIPSIS)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700699 self.assertGreater(tried, 0)
700 self.assertEqual(failed, 0)
701
702class StatisticsErrorTest(unittest.TestCase):
703 def test_has_exception(self):
704 errmsg = (
705 "Expected StatisticsError to be a ValueError, but got a"
706 " subclass of %r instead."
707 )
708 self.assertTrue(hasattr(statistics, 'StatisticsError'))
709 self.assertTrue(
710 issubclass(statistics.StatisticsError, ValueError),
711 errmsg % statistics.StatisticsError.__base__
712 )
713
714
715# === Tests for private utility functions ===
716
717class ExactRatioTest(unittest.TestCase):
718 # Test _exact_ratio utility.
719
720 def test_int(self):
721 for i in (-20, -3, 0, 5, 99, 10**20):
722 self.assertEqual(statistics._exact_ratio(i), (i, 1))
723
724 def test_fraction(self):
725 numerators = (-5, 1, 12, 38)
726 for n in numerators:
727 f = Fraction(n, 37)
728 self.assertEqual(statistics._exact_ratio(f), (n, 37))
729
730 def test_float(self):
731 self.assertEqual(statistics._exact_ratio(0.125), (1, 8))
732 self.assertEqual(statistics._exact_ratio(1.125), (9, 8))
733 data = [random.uniform(-100, 100) for _ in range(100)]
734 for x in data:
735 num, den = statistics._exact_ratio(x)
736 self.assertEqual(x, num/den)
737
738 def test_decimal(self):
739 D = Decimal
740 _exact_ratio = statistics._exact_ratio
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000741 self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
742 self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
743 self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700744
Steven D'Apranob28c3272015-12-01 19:59:53 +1100745 def test_inf(self):
746 INF = float("INF")
747 class MyFloat(float):
748 pass
749 class MyDecimal(Decimal):
750 pass
751 for inf in (INF, -INF):
752 for type_ in (float, MyFloat, Decimal, MyDecimal):
753 x = type_(inf)
754 ratio = statistics._exact_ratio(x)
755 self.assertEqual(ratio, (x, None))
756 self.assertEqual(type(ratio[0]), type_)
757 self.assertTrue(math.isinf(ratio[0]))
758
759 def test_float_nan(self):
760 NAN = float("NAN")
761 class MyFloat(float):
762 pass
763 for nan in (NAN, MyFloat(NAN)):
764 ratio = statistics._exact_ratio(nan)
765 self.assertTrue(math.isnan(ratio[0]))
766 self.assertIs(ratio[1], None)
767 self.assertEqual(type(ratio[0]), type(nan))
768
769 def test_decimal_nan(self):
770 NAN = Decimal("NAN")
771 sNAN = Decimal("sNAN")
772 class MyDecimal(Decimal):
773 pass
774 for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)):
775 ratio = statistics._exact_ratio(nan)
776 self.assertTrue(_nan_equal(ratio[0], nan))
777 self.assertIs(ratio[1], None)
778 self.assertEqual(type(ratio[0]), type(nan))
779
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700780
781class DecimalToRatioTest(unittest.TestCase):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000782 # Test _exact_ratio private function.
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700783
Steven D'Apranob28c3272015-12-01 19:59:53 +1100784 def test_infinity(self):
785 # Test that INFs are handled correctly.
786 inf = Decimal('INF')
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000787 self.assertEqual(statistics._exact_ratio(inf), (inf, None))
788 self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
Steven D'Apranob28c3272015-12-01 19:59:53 +1100789
790 def test_nan(self):
791 # Test that NANs are handled correctly.
792 for nan in (Decimal('NAN'), Decimal('sNAN')):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000793 num, den = statistics._exact_ratio(nan)
Steven D'Apranob28c3272015-12-01 19:59:53 +1100794 # Because NANs always compare non-equal, we cannot use assertEqual.
795 # Nor can we use an identity test, as we don't guarantee anything
796 # about the object identity.
797 self.assertTrue(_nan_equal(num, nan))
798 self.assertIs(den, None)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700799
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000800 def test_sign(self):
801 # Test sign is calculated correctly.
802 numbers = [Decimal("9.8765e12"), Decimal("9.8765e-12")]
803 for d in numbers:
804 # First test positive decimals.
805 assert d > 0
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000806 num, den = statistics._exact_ratio(d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000807 self.assertGreaterEqual(num, 0)
808 self.assertGreater(den, 0)
809 # Then test negative decimals.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000810 num, den = statistics._exact_ratio(-d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000811 self.assertLessEqual(num, 0)
812 self.assertGreater(den, 0)
813
814 def test_negative_exponent(self):
815 # Test result when the exponent is negative.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000816 t = statistics._exact_ratio(Decimal("0.1234"))
817 self.assertEqual(t, (617, 5000))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000818
819 def test_positive_exponent(self):
820 # Test results when the exponent is positive.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000821 t = statistics._exact_ratio(Decimal("1.234e7"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000822 self.assertEqual(t, (12340000, 1))
823
824 def test_regression_20536(self):
825 # Regression test for issue 20536.
826 # See http://bugs.python.org/issue20536
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000827 t = statistics._exact_ratio(Decimal("1e2"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000828 self.assertEqual(t, (100, 1))
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000829 t = statistics._exact_ratio(Decimal("1.47e5"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000830 self.assertEqual(t, (147000, 1))
831
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700832
Steven D'Apranob28c3272015-12-01 19:59:53 +1100833class IsFiniteTest(unittest.TestCase):
834 # Test _isfinite private function.
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000835
Steven D'Apranob28c3272015-12-01 19:59:53 +1100836 def test_finite(self):
837 # Test that finite numbers are recognised as finite.
838 for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")):
839 self.assertTrue(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000840
Steven D'Apranob28c3272015-12-01 19:59:53 +1100841 def test_infinity(self):
842 # Test that INFs are not recognised as finite.
843 for x in (float("inf"), Decimal("inf")):
844 self.assertFalse(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000845
Steven D'Apranob28c3272015-12-01 19:59:53 +1100846 def test_nan(self):
847 # Test that NANs are not recognised as finite.
848 for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")):
849 self.assertFalse(statistics._isfinite(x))
850
851
852class CoerceTest(unittest.TestCase):
853 # Test that private function _coerce correctly deals with types.
854
855 # The coercion rules are currently an implementation detail, although at
856 # some point that should change. The tests and comments here define the
857 # correct implementation.
858
859 # Pre-conditions of _coerce:
860 #
861 # - The first time _sum calls _coerce, the
862 # - coerce(T, S) will never be called with bool as the first argument;
863 # this is a pre-condition, guarded with an assertion.
864
865 #
866 # - coerce(T, T) will always return T; we assume T is a valid numeric
867 # type. Violate this assumption at your own risk.
868 #
869 # - Apart from as above, bool is treated as if it were actually int.
870 #
871 # - coerce(int, X) and coerce(X, int) return X.
872 # -
873 def test_bool(self):
874 # bool is somewhat special, due to the pre-condition that it is
875 # never given as the first argument to _coerce, and that it cannot
876 # be subclassed. So we test it specially.
877 for T in (int, float, Fraction, Decimal):
878 self.assertIs(statistics._coerce(T, bool), T)
879 class MyClass(T): pass
880 self.assertIs(statistics._coerce(MyClass, bool), MyClass)
881
882 def assertCoerceTo(self, A, B):
883 """Assert that type A coerces to B."""
884 self.assertIs(statistics._coerce(A, B), B)
885 self.assertIs(statistics._coerce(B, A), B)
886
887 def check_coerce_to(self, A, B):
888 """Checks that type A coerces to B, including subclasses."""
889 # Assert that type A is coerced to B.
890 self.assertCoerceTo(A, B)
891 # Subclasses of A are also coerced to B.
892 class SubclassOfA(A): pass
893 self.assertCoerceTo(SubclassOfA, B)
894 # A, and subclasses of A, are coerced to subclasses of B.
895 class SubclassOfB(B): pass
896 self.assertCoerceTo(A, SubclassOfB)
897 self.assertCoerceTo(SubclassOfA, SubclassOfB)
898
899 def assertCoerceRaises(self, A, B):
900 """Assert that coercing A to B, or vice versa, raises TypeError."""
901 self.assertRaises(TypeError, statistics._coerce, (A, B))
902 self.assertRaises(TypeError, statistics._coerce, (B, A))
903
904 def check_type_coercions(self, T):
905 """Check that type T coerces correctly with subclasses of itself."""
906 assert T is not bool
907 # Coercing a type with itself returns the same type.
908 self.assertIs(statistics._coerce(T, T), T)
909 # Coercing a type with a subclass of itself returns the subclass.
910 class U(T): pass
911 class V(T): pass
912 class W(U): pass
913 for typ in (U, V, W):
914 self.assertCoerceTo(T, typ)
915 self.assertCoerceTo(U, W)
916 # Coercing two subclasses that aren't parent/child is an error.
917 self.assertCoerceRaises(U, V)
918 self.assertCoerceRaises(V, W)
919
920 def test_int(self):
921 # Check that int coerces correctly.
922 self.check_type_coercions(int)
923 for typ in (float, Fraction, Decimal):
924 self.check_coerce_to(int, typ)
925
926 def test_fraction(self):
927 # Check that Fraction coerces correctly.
928 self.check_type_coercions(Fraction)
929 self.check_coerce_to(Fraction, float)
930
931 def test_decimal(self):
932 # Check that Decimal coerces correctly.
933 self.check_type_coercions(Decimal)
934
935 def test_float(self):
936 # Check that float coerces correctly.
937 self.check_type_coercions(float)
938
939 def test_non_numeric_types(self):
940 for bad_type in (str, list, type(None), tuple, dict):
941 for good_type in (int, float, Fraction, Decimal):
942 self.assertCoerceRaises(good_type, bad_type)
943
944 def test_incompatible_types(self):
945 # Test that incompatible types raise.
946 for T in (float, Fraction):
947 class MySubclass(T): pass
948 self.assertCoerceRaises(T, Decimal)
949 self.assertCoerceRaises(MySubclass, Decimal)
950
951
952class ConvertTest(unittest.TestCase):
953 # Test private _convert function.
954
955 def check_exact_equal(self, x, y):
956 """Check that x equals y, and has the same type as well."""
957 self.assertEqual(x, y)
958 self.assertIs(type(x), type(y))
959
960 def test_int(self):
961 # Test conversions to int.
962 x = statistics._convert(Fraction(71), int)
963 self.check_exact_equal(x, 71)
964 class MyInt(int): pass
965 x = statistics._convert(Fraction(17), MyInt)
966 self.check_exact_equal(x, MyInt(17))
967
968 def test_fraction(self):
969 # Test conversions to Fraction.
970 x = statistics._convert(Fraction(95, 99), Fraction)
971 self.check_exact_equal(x, Fraction(95, 99))
972 class MyFraction(Fraction):
973 def __truediv__(self, other):
974 return self.__class__(super().__truediv__(other))
975 x = statistics._convert(Fraction(71, 13), MyFraction)
976 self.check_exact_equal(x, MyFraction(71, 13))
977
978 def test_float(self):
979 # Test conversions to float.
980 x = statistics._convert(Fraction(-1, 2), float)
981 self.check_exact_equal(x, -0.5)
982 class MyFloat(float):
983 def __truediv__(self, other):
984 return self.__class__(super().__truediv__(other))
985 x = statistics._convert(Fraction(9, 8), MyFloat)
986 self.check_exact_equal(x, MyFloat(1.125))
987
988 def test_decimal(self):
989 # Test conversions to Decimal.
990 x = statistics._convert(Fraction(1, 40), Decimal)
991 self.check_exact_equal(x, Decimal("0.025"))
992 class MyDecimal(Decimal):
993 def __truediv__(self, other):
994 return self.__class__(super().__truediv__(other))
995 x = statistics._convert(Fraction(-15, 16), MyDecimal)
996 self.check_exact_equal(x, MyDecimal("-0.9375"))
997
998 def test_inf(self):
999 for INF in (float('inf'), Decimal('inf')):
1000 for inf in (INF, -INF):
1001 x = statistics._convert(inf, type(inf))
1002 self.check_exact_equal(x, inf)
1003
1004 def test_nan(self):
1005 for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')):
1006 x = statistics._convert(nan, type(nan))
1007 self.assertTrue(_nan_equal(x, nan))
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001008
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001009 def test_invalid_input_type(self):
1010 with self.assertRaises(TypeError):
1011 statistics._convert(None, float)
1012
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001013
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001014class FailNegTest(unittest.TestCase):
1015 """Test _fail_neg private function."""
1016
1017 def test_pass_through(self):
1018 # Test that values are passed through unchanged.
1019 values = [1, 2.0, Fraction(3), Decimal(4)]
1020 new = list(statistics._fail_neg(values))
1021 self.assertEqual(values, new)
1022
1023 def test_negatives_raise(self):
1024 # Test that negatives raise an exception.
1025 for x in [1, 2.0, Fraction(3), Decimal(4)]:
1026 seq = [-x]
1027 it = statistics._fail_neg(seq)
1028 self.assertRaises(statistics.StatisticsError, next, it)
1029
1030 def test_error_msg(self):
1031 # Test that a given error message is used.
1032 msg = "badness #%d" % random.randint(10000, 99999)
1033 try:
1034 next(statistics._fail_neg([-1], msg))
1035 except statistics.StatisticsError as e:
1036 errmsg = e.args[0]
1037 else:
1038 self.fail("expected exception, but it didn't happen")
1039 self.assertEqual(errmsg, msg)
1040
1041
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001042class FindLteqTest(unittest.TestCase):
1043 # Test _find_lteq private function.
1044
1045 def test_invalid_input_values(self):
1046 for a, x in [
1047 ([], 1),
1048 ([1, 2], 3),
1049 ([1, 3], 2)
1050 ]:
1051 with self.subTest(a=a, x=x):
1052 with self.assertRaises(ValueError):
1053 statistics._find_lteq(a, x)
1054
1055 def test_locate_successfully(self):
1056 for a, x, expected_i in [
1057 ([1, 1, 1, 2, 3], 1, 0),
1058 ([0, 1, 1, 1, 2, 3], 1, 1),
1059 ([1, 2, 3, 3, 3], 3, 2)
1060 ]:
1061 with self.subTest(a=a, x=x):
1062 self.assertEqual(expected_i, statistics._find_lteq(a, x))
1063
1064
1065class FindRteqTest(unittest.TestCase):
1066 # Test _find_rteq private function.
1067
1068 def test_invalid_input_values(self):
1069 for a, l, x in [
1070 ([1], 2, 1),
1071 ([1, 3], 0, 2)
1072 ]:
1073 with self.assertRaises(ValueError):
1074 statistics._find_rteq(a, l, x)
1075
1076 def test_locate_successfully(self):
1077 for a, l, x, expected_i in [
1078 ([1, 1, 1, 2, 3], 0, 1, 2),
1079 ([0, 1, 1, 1, 2, 3], 0, 1, 3),
1080 ([1, 2, 3, 3, 3], 0, 3, 4)
1081 ]:
1082 with self.subTest(a=a, l=l, x=x):
1083 self.assertEqual(expected_i, statistics._find_rteq(a, l, x))
1084
1085
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001086# === Tests for public functions ===
1087
1088class UnivariateCommonMixin:
1089 # Common tests for most univariate functions that take a data argument.
1090
1091 def test_no_args(self):
1092 # Fail if given no arguments.
1093 self.assertRaises(TypeError, self.func)
1094
1095 def test_empty_data(self):
1096 # Fail when the data argument (first argument) is empty.
1097 for empty in ([], (), iter([])):
1098 self.assertRaises(statistics.StatisticsError, self.func, empty)
1099
1100 def prepare_data(self):
1101 """Return int data for various tests."""
1102 data = list(range(10))
1103 while data == sorted(data):
1104 random.shuffle(data)
1105 return data
1106
1107 def test_no_inplace_modifications(self):
1108 # Test that the function does not modify its input data.
1109 data = self.prepare_data()
1110 assert len(data) != 1 # Necessary to avoid infinite loop.
1111 assert data != sorted(data)
1112 saved = data[:]
1113 assert data is not saved
1114 _ = self.func(data)
1115 self.assertListEqual(data, saved, "data has been modified")
1116
1117 def test_order_doesnt_matter(self):
1118 # Test that the order of data points doesn't change the result.
1119
1120 # CAUTION: due to floating point rounding errors, the result actually
1121 # may depend on the order. Consider this test representing an ideal.
1122 # To avoid this test failing, only test with exact values such as ints
1123 # or Fractions.
1124 data = [1, 2, 3, 3, 3, 4, 5, 6]*100
1125 expected = self.func(data)
1126 random.shuffle(data)
1127 actual = self.func(data)
1128 self.assertEqual(expected, actual)
1129
1130 def test_type_of_data_collection(self):
1131 # Test that the type of iterable data doesn't effect the result.
1132 class MyList(list):
1133 pass
1134 class MyTuple(tuple):
1135 pass
1136 def generator(data):
1137 return (obj for obj in data)
1138 data = self.prepare_data()
1139 expected = self.func(data)
1140 for kind in (list, tuple, iter, MyList, MyTuple, generator):
1141 result = self.func(kind(data))
1142 self.assertEqual(result, expected)
1143
1144 def test_range_data(self):
1145 # Test that functions work with range objects.
1146 data = range(20, 50, 3)
1147 expected = self.func(list(data))
1148 self.assertEqual(self.func(data), expected)
1149
1150 def test_bad_arg_types(self):
1151 # Test that function raises when given data of the wrong type.
1152
1153 # Don't roll the following into a loop like this:
1154 # for bad in list_of_bad:
1155 # self.check_for_type_error(bad)
1156 #
1157 # Since assertRaises doesn't show the arguments that caused the test
1158 # failure, it is very difficult to debug these test failures when the
1159 # following are in a loop.
1160 self.check_for_type_error(None)
1161 self.check_for_type_error(23)
1162 self.check_for_type_error(42.0)
1163 self.check_for_type_error(object())
1164
1165 def check_for_type_error(self, *args):
1166 self.assertRaises(TypeError, self.func, *args)
1167
1168 def test_type_of_data_element(self):
1169 # Check the type of data elements doesn't affect the numeric result.
1170 # This is a weaker test than UnivariateTypeMixin.testTypesConserved,
1171 # because it checks the numeric result by equality, but not by type.
1172 class MyFloat(float):
1173 def __truediv__(self, other):
1174 return type(self)(super().__truediv__(other))
1175 def __add__(self, other):
1176 return type(self)(super().__add__(other))
1177 __radd__ = __add__
1178
1179 raw = self.prepare_data()
1180 expected = self.func(raw)
1181 for kind in (float, MyFloat, Decimal, Fraction):
1182 data = [kind(x) for x in raw]
1183 result = type(expected)(self.func(data))
1184 self.assertEqual(result, expected)
1185
1186
1187class UnivariateTypeMixin:
1188 """Mixin class for type-conserving functions.
1189
1190 This mixin class holds test(s) for functions which conserve the type of
1191 individual data points. E.g. the mean of a list of Fractions should itself
1192 be a Fraction.
1193
1194 Not all tests to do with types need go in this class. Only those that
1195 rely on the function returning the same type as its input data.
1196 """
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001197 def prepare_types_for_conservation_test(self):
1198 """Return the types which are expected to be conserved."""
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001199 class MyFloat(float):
1200 def __truediv__(self, other):
1201 return type(self)(super().__truediv__(other))
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001202 def __rtruediv__(self, other):
1203 return type(self)(super().__rtruediv__(other))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001204 def __sub__(self, other):
1205 return type(self)(super().__sub__(other))
1206 def __rsub__(self, other):
1207 return type(self)(super().__rsub__(other))
1208 def __pow__(self, other):
1209 return type(self)(super().__pow__(other))
1210 def __add__(self, other):
1211 return type(self)(super().__add__(other))
1212 __radd__ = __add__
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001213 return (float, Decimal, Fraction, MyFloat)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001214
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001215 def test_types_conserved(self):
1216 # Test that functions keeps the same type as their data points.
1217 # (Excludes mixed data types.) This only tests the type of the return
1218 # result, not the value.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001219 data = self.prepare_data()
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001220 for kind in self.prepare_types_for_conservation_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001221 d = [kind(x) for x in data]
1222 result = self.func(d)
1223 self.assertIs(type(result), kind)
1224
1225
Steven D'Apranob28c3272015-12-01 19:59:53 +11001226class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin):
1227 # Common test cases for statistics._sum() function.
1228
1229 # This test suite looks only at the numeric value returned by _sum,
1230 # after conversion to the appropriate type.
1231 def setUp(self):
1232 def simplified_sum(*args):
1233 T, value, n = statistics._sum(*args)
1234 return statistics._coerce(value, T)
1235 self.func = simplified_sum
1236
1237
1238class TestSum(NumericTestCase):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001239 # Test cases for statistics._sum() function.
1240
Steven D'Apranob28c3272015-12-01 19:59:53 +11001241 # These tests look at the entire three value tuple returned by _sum.
1242
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001243 def setUp(self):
1244 self.func = statistics._sum
1245
1246 def test_empty_data(self):
1247 # Override test for empty data.
1248 for data in ([], (), iter([])):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001249 self.assertEqual(self.func(data), (int, Fraction(0), 0))
1250 self.assertEqual(self.func(data, 23), (int, Fraction(23), 0))
1251 self.assertEqual(self.func(data, 2.3), (float, Fraction(2.3), 0))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001252
1253 def test_ints(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001254 self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]),
1255 (int, Fraction(60), 8))
1256 self.assertEqual(self.func([4, 2, 3, -8, 7], 1000),
1257 (int, Fraction(1008), 5))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001258
1259 def test_floats(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001260 self.assertEqual(self.func([0.25]*20),
1261 (float, Fraction(5.0), 20))
1262 self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5),
1263 (float, Fraction(3.125), 4))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001264
1265 def test_fractions(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001266 self.assertEqual(self.func([Fraction(1, 1000)]*500),
1267 (Fraction, Fraction(1, 2), 500))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001268
1269 def test_decimals(self):
1270 D = Decimal
1271 data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"),
1272 D("3.974"), D("2.328"), D("4.617"), D("2.843"),
1273 ]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001274 self.assertEqual(self.func(data),
1275 (Decimal, Decimal("20.686"), 8))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001276
1277 def test_compare_with_math_fsum(self):
1278 # Compare with the math.fsum function.
1279 # Ideally we ought to get the exact same result, but sometimes
1280 # we differ by a very slight amount :-(
1281 data = [random.uniform(-100, 1000) for _ in range(1000)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001282 self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001283
1284 def test_start_argument(self):
1285 # Test that the optional start argument works correctly.
1286 data = [random.uniform(1, 1000) for _ in range(100)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001287 t = self.func(data)[1]
1288 self.assertEqual(t+42, self.func(data, 42)[1])
1289 self.assertEqual(t-23, self.func(data, -23)[1])
1290 self.assertEqual(t+Fraction(1e20), self.func(data, 1e20)[1])
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001291
1292 def test_strings_fail(self):
1293 # Sum of strings should fail.
1294 self.assertRaises(TypeError, self.func, [1, 2, 3], '999')
1295 self.assertRaises(TypeError, self.func, [1, 2, 3, '999'])
1296
1297 def test_bytes_fail(self):
1298 # Sum of bytes should fail.
1299 self.assertRaises(TypeError, self.func, [1, 2, 3], b'999')
1300 self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
1301
1302 def test_mixed_sum(self):
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001303 # Mixed input types are not (currently) allowed.
1304 # Check that mixed data types fail.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001305 self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)])
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001306 # And so does mixed start argument.
1307 self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001308
1309
1310class SumTortureTest(NumericTestCase):
1311 def test_torture(self):
1312 # Tim Peters' torture test for sum, and variants of same.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001313 self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000),
1314 (float, Fraction(20000.0), 40000))
1315 self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000),
1316 (float, Fraction(20000.0), 40000))
1317 T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000)
1318 self.assertIs(T, float)
1319 self.assertEqual(count, 40000)
1320 self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001321
1322
1323class SumSpecialValues(NumericTestCase):
1324 # Test that sum works correctly with IEEE-754 special values.
1325
1326 def test_nan(self):
1327 for type_ in (float, Decimal):
1328 nan = type_('nan')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001329 result = statistics._sum([1, nan, 2])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001330 self.assertIs(type(result), type_)
1331 self.assertTrue(math.isnan(result))
1332
1333 def check_infinity(self, x, inf):
1334 """Check x is an infinity of the same type and sign as inf."""
1335 self.assertTrue(math.isinf(x))
1336 self.assertIs(type(x), type(inf))
1337 self.assertEqual(x > 0, inf > 0)
1338 assert x == inf
1339
1340 def do_test_inf(self, inf):
1341 # Adding a single infinity gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001342 result = statistics._sum([1, 2, inf, 3])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001343 self.check_infinity(result, inf)
1344 # Adding two infinities of the same sign also gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001345 result = statistics._sum([1, 2, inf, 3, inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001346 self.check_infinity(result, inf)
1347
1348 def test_float_inf(self):
1349 inf = float('inf')
1350 for sign in (+1, -1):
1351 self.do_test_inf(sign*inf)
1352
1353 def test_decimal_inf(self):
1354 inf = Decimal('inf')
1355 for sign in (+1, -1):
1356 self.do_test_inf(sign*inf)
1357
1358 def test_float_mismatched_infs(self):
1359 # Test that adding two infinities of opposite sign gives a NAN.
1360 inf = float('inf')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001361 result = statistics._sum([1, 2, inf, 3, -inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001362 self.assertTrue(math.isnan(result))
1363
Berker Peksagf8c111d2014-09-24 15:03:25 +03001364 def test_decimal_extendedcontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001365 # Test adding Decimal INFs with opposite sign returns NAN.
1366 inf = Decimal('inf')
1367 data = [1, 2, inf, 3, -inf, 4]
1368 with decimal.localcontext(decimal.ExtendedContext):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001369 self.assertTrue(math.isnan(statistics._sum(data)[1]))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001370
Berker Peksagf8c111d2014-09-24 15:03:25 +03001371 def test_decimal_basiccontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001372 # Test adding Decimal INFs with opposite sign raises InvalidOperation.
1373 inf = Decimal('inf')
1374 data = [1, 2, inf, 3, -inf, 4]
1375 with decimal.localcontext(decimal.BasicContext):
1376 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1377
1378 def test_decimal_snan_raises(self):
1379 # Adding sNAN should raise InvalidOperation.
1380 sNAN = Decimal('sNAN')
1381 data = [1, sNAN, 2]
1382 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1383
1384
1385# === Tests for averages ===
1386
1387class AverageMixin(UnivariateCommonMixin):
1388 # Mixin class holding common tests for averages.
1389
1390 def test_single_value(self):
1391 # Average of a single value is the value itself.
1392 for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
1393 self.assertEqual(self.func([x]), x)
1394
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001395 def prepare_values_for_repeated_single_test(self):
1396 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.9712'))
1397
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001398 def test_repeated_single_value(self):
1399 # The average of a single repeated value is the value itself.
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001400 for x in self.prepare_values_for_repeated_single_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001401 for count in (2, 5, 10, 20):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001402 with self.subTest(x=x, count=count):
1403 data = [x]*count
1404 self.assertEqual(self.func(data), x)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001405
1406
1407class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1408 def setUp(self):
1409 self.func = statistics.mean
1410
1411 def test_torture_pep(self):
1412 # "Torture Test" from PEP-450.
1413 self.assertEqual(self.func([1e100, 1, 3, -1e100]), 1)
1414
1415 def test_ints(self):
1416 # Test mean with ints.
1417 data = [0, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9]
1418 random.shuffle(data)
1419 self.assertEqual(self.func(data), 4.8125)
1420
1421 def test_floats(self):
1422 # Test mean with floats.
1423 data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5]
1424 random.shuffle(data)
1425 self.assertEqual(self.func(data), 22.015625)
1426
1427 def test_decimals(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001428 # Test mean with Decimals.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001429 D = Decimal
1430 data = [D("1.634"), D("2.517"), D("3.912"), D("4.072"), D("5.813")]
1431 random.shuffle(data)
1432 self.assertEqual(self.func(data), D("3.5896"))
1433
1434 def test_fractions(self):
1435 # Test mean with Fractions.
1436 F = Fraction
1437 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1438 random.shuffle(data)
1439 self.assertEqual(self.func(data), F(1479, 1960))
1440
1441 def test_inf(self):
1442 # Test mean with infinities.
1443 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1444 for kind in (float, Decimal):
1445 for sign in (1, -1):
1446 inf = kind("inf")*sign
1447 data = raw + [inf]
1448 result = self.func(data)
1449 self.assertTrue(math.isinf(result))
1450 self.assertEqual(result, inf)
1451
1452 def test_mismatched_infs(self):
1453 # Test mean with infinities of opposite sign.
1454 data = [2, 4, 6, float('inf'), 1, 3, 5, float('-inf')]
1455 result = self.func(data)
1456 self.assertTrue(math.isnan(result))
1457
1458 def test_nan(self):
1459 # Test mean with NANs.
1460 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1461 for kind in (float, Decimal):
1462 inf = kind("nan")
1463 data = raw + [inf]
1464 result = self.func(data)
1465 self.assertTrue(math.isnan(result))
1466
1467 def test_big_data(self):
1468 # Test adding a large constant to every data point.
1469 c = 1e9
1470 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1471 expected = self.func(data) + c
1472 assert expected != c
1473 result = self.func([x+c for x in data])
1474 self.assertEqual(result, expected)
1475
1476 def test_doubled_data(self):
1477 # Mean of [a,b,c...z] should be same as for [a,a,b,b,c,c...z,z].
1478 data = [random.uniform(-3, 5) for _ in range(1000)]
1479 expected = self.func(data)
1480 actual = self.func(data*2)
1481 self.assertApproxEqual(actual, expected)
1482
Nick Coghlan4a7668a2014-02-08 23:55:14 +10001483 def test_regression_20561(self):
1484 # Regression test for issue 20561.
1485 # See http://bugs.python.org/issue20561
1486 d = Decimal('1e4')
1487 self.assertEqual(statistics.mean([d]), d)
1488
Steven D'Apranob28c3272015-12-01 19:59:53 +11001489 def test_regression_25177(self):
1490 # Regression test for issue 25177.
1491 # Ensure very big and very small floats don't overflow.
1492 # See http://bugs.python.org/issue25177.
1493 self.assertEqual(statistics.mean(
1494 [8.988465674311579e+307, 8.98846567431158e+307]),
1495 8.98846567431158e+307)
1496 big = 8.98846567431158e+307
1497 tiny = 5e-324
1498 for n in (2, 3, 5, 200):
1499 self.assertEqual(statistics.mean([big]*n), big)
1500 self.assertEqual(statistics.mean([tiny]*n), tiny)
1501
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001502
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001503class TestHarmonicMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1504 def setUp(self):
1505 self.func = statistics.harmonic_mean
1506
1507 def prepare_data(self):
1508 # Override mixin method.
1509 values = super().prepare_data()
1510 values.remove(0)
1511 return values
1512
1513 def prepare_values_for_repeated_single_test(self):
1514 # Override mixin method.
1515 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.125'))
1516
1517 def test_zero(self):
1518 # Test that harmonic mean returns zero when given zero.
1519 values = [1, 0, 2]
1520 self.assertEqual(self.func(values), 0)
1521
1522 def test_negative_error(self):
1523 # Test that harmonic mean raises when given a negative value.
1524 exc = statistics.StatisticsError
1525 for values in ([-1], [1, -2, 3]):
1526 with self.subTest(values=values):
1527 self.assertRaises(exc, self.func, values)
1528
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001529 def test_invalid_type_error(self):
1530 # Test error is raised when input contains invalid type(s)
1531 for data in [
1532 ['3.14'], # single string
1533 ['1', '2', '3'], # multiple strings
1534 [1, '2', 3, '4', 5], # mixed strings and valid integers
1535 [2.3, 3.4, 4.5, '5.6'] # only one string and valid floats
1536 ]:
1537 with self.subTest(data=data):
1538 with self.assertRaises(TypeError):
1539 self.func(data)
1540
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001541 def test_ints(self):
1542 # Test harmonic mean with ints.
1543 data = [2, 4, 4, 8, 16, 16]
1544 random.shuffle(data)
1545 self.assertEqual(self.func(data), 6*4/5)
1546
1547 def test_floats_exact(self):
1548 # Test harmonic mean with some carefully chosen floats.
1549 data = [1/8, 1/4, 1/4, 1/2, 1/2]
1550 random.shuffle(data)
1551 self.assertEqual(self.func(data), 1/4)
1552 self.assertEqual(self.func([0.25, 0.5, 1.0, 1.0]), 0.5)
1553
1554 def test_singleton_lists(self):
1555 # Test that harmonic mean([x]) returns (approximately) x.
1556 for x in range(1, 101):
Steven D'Apranoe7fef522016-08-09 13:19:48 +10001557 self.assertEqual(self.func([x]), x)
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001558
1559 def test_decimals_exact(self):
1560 # Test harmonic mean with some carefully chosen Decimals.
1561 D = Decimal
1562 self.assertEqual(self.func([D(15), D(30), D(60), D(60)]), D(30))
1563 data = [D("0.05"), D("0.10"), D("0.20"), D("0.20")]
1564 random.shuffle(data)
1565 self.assertEqual(self.func(data), D("0.10"))
1566 data = [D("1.68"), D("0.32"), D("5.94"), D("2.75")]
1567 random.shuffle(data)
1568 self.assertEqual(self.func(data), D(66528)/70723)
1569
1570 def test_fractions(self):
1571 # Test harmonic mean with Fractions.
1572 F = Fraction
1573 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1574 random.shuffle(data)
1575 self.assertEqual(self.func(data), F(7*420, 4029))
1576
1577 def test_inf(self):
1578 # Test harmonic mean with infinity.
1579 values = [2.0, float('inf'), 1.0]
1580 self.assertEqual(self.func(values), 2.0)
1581
1582 def test_nan(self):
1583 # Test harmonic mean with NANs.
1584 values = [2.0, float('nan'), 1.0]
1585 self.assertTrue(math.isnan(self.func(values)))
1586
1587 def test_multiply_data_points(self):
1588 # Test multiplying every data point by a constant.
1589 c = 111
1590 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1591 expected = self.func(data)*c
1592 result = self.func([x*c for x in data])
1593 self.assertEqual(result, expected)
1594
1595 def test_doubled_data(self):
1596 # Harmonic mean of [a,b...z] should be same as for [a,a,b,b...z,z].
1597 data = [random.uniform(1, 5) for _ in range(1000)]
1598 expected = self.func(data)
1599 actual = self.func(data*2)
1600 self.assertApproxEqual(actual, expected)
1601
1602
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001603class TestMedian(NumericTestCase, AverageMixin):
1604 # Common tests for median and all median.* functions.
1605 def setUp(self):
1606 self.func = statistics.median
1607
1608 def prepare_data(self):
1609 """Overload method from UnivariateCommonMixin."""
1610 data = super().prepare_data()
1611 if len(data)%2 != 1:
1612 data.append(2)
1613 return data
1614
1615 def test_even_ints(self):
1616 # Test median with an even number of int data points.
1617 data = [1, 2, 3, 4, 5, 6]
1618 assert len(data)%2 == 0
1619 self.assertEqual(self.func(data), 3.5)
1620
1621 def test_odd_ints(self):
1622 # Test median with an odd number of int data points.
1623 data = [1, 2, 3, 4, 5, 6, 9]
1624 assert len(data)%2 == 1
1625 self.assertEqual(self.func(data), 4)
1626
1627 def test_odd_fractions(self):
1628 # Test median works with an odd number of Fractions.
1629 F = Fraction
1630 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7)]
1631 assert len(data)%2 == 1
1632 random.shuffle(data)
1633 self.assertEqual(self.func(data), F(3, 7))
1634
1635 def test_even_fractions(self):
1636 # Test median works with an even number of Fractions.
1637 F = Fraction
1638 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1639 assert len(data)%2 == 0
1640 random.shuffle(data)
1641 self.assertEqual(self.func(data), F(1, 2))
1642
1643 def test_odd_decimals(self):
1644 # Test median works with an odd number of Decimals.
1645 D = Decimal
1646 data = [D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1647 assert len(data)%2 == 1
1648 random.shuffle(data)
1649 self.assertEqual(self.func(data), D('4.2'))
1650
1651 def test_even_decimals(self):
1652 # Test median works with an even number of Decimals.
1653 D = Decimal
1654 data = [D('1.2'), D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1655 assert len(data)%2 == 0
1656 random.shuffle(data)
1657 self.assertEqual(self.func(data), D('3.65'))
1658
1659
1660class TestMedianDataType(NumericTestCase, UnivariateTypeMixin):
1661 # Test conservation of data element type for median.
1662 def setUp(self):
1663 self.func = statistics.median
1664
1665 def prepare_data(self):
1666 data = list(range(15))
1667 assert len(data)%2 == 1
1668 while data == sorted(data):
1669 random.shuffle(data)
1670 return data
1671
1672
1673class TestMedianLow(TestMedian, UnivariateTypeMixin):
1674 def setUp(self):
1675 self.func = statistics.median_low
1676
1677 def test_even_ints(self):
1678 # Test median_low with an even number of ints.
1679 data = [1, 2, 3, 4, 5, 6]
1680 assert len(data)%2 == 0
1681 self.assertEqual(self.func(data), 3)
1682
1683 def test_even_fractions(self):
1684 # Test median_low works with an even number of Fractions.
1685 F = Fraction
1686 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1687 assert len(data)%2 == 0
1688 random.shuffle(data)
1689 self.assertEqual(self.func(data), F(3, 7))
1690
1691 def test_even_decimals(self):
1692 # Test median_low works with an even number of Decimals.
1693 D = Decimal
1694 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1695 assert len(data)%2 == 0
1696 random.shuffle(data)
1697 self.assertEqual(self.func(data), D('3.3'))
1698
1699
1700class TestMedianHigh(TestMedian, UnivariateTypeMixin):
1701 def setUp(self):
1702 self.func = statistics.median_high
1703
1704 def test_even_ints(self):
1705 # Test median_high with an even number of ints.
1706 data = [1, 2, 3, 4, 5, 6]
1707 assert len(data)%2 == 0
1708 self.assertEqual(self.func(data), 4)
1709
1710 def test_even_fractions(self):
1711 # Test median_high works with an even number of Fractions.
1712 F = Fraction
1713 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1714 assert len(data)%2 == 0
1715 random.shuffle(data)
1716 self.assertEqual(self.func(data), F(4, 7))
1717
1718 def test_even_decimals(self):
1719 # Test median_high works with an even number of Decimals.
1720 D = Decimal
1721 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1722 assert len(data)%2 == 0
1723 random.shuffle(data)
1724 self.assertEqual(self.func(data), D('4.4'))
1725
1726
1727class TestMedianGrouped(TestMedian):
1728 # Test median_grouped.
1729 # Doesn't conserve data element types, so don't use TestMedianType.
1730 def setUp(self):
1731 self.func = statistics.median_grouped
1732
1733 def test_odd_number_repeated(self):
1734 # Test median.grouped with repeated median values.
1735 data = [12, 13, 14, 14, 14, 15, 15]
1736 assert len(data)%2 == 1
1737 self.assertEqual(self.func(data), 14)
1738 #---
1739 data = [12, 13, 14, 14, 14, 14, 15]
1740 assert len(data)%2 == 1
1741 self.assertEqual(self.func(data), 13.875)
1742 #---
1743 data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30]
1744 assert len(data)%2 == 1
1745 self.assertEqual(self.func(data, 5), 19.375)
1746 #---
1747 data = [16, 18, 18, 18, 18, 20, 20, 20, 22, 22, 22, 24, 24, 26, 28]
1748 assert len(data)%2 == 1
1749 self.assertApproxEqual(self.func(data, 2), 20.66666667, tol=1e-8)
1750
1751 def test_even_number_repeated(self):
1752 # Test median.grouped with repeated median values.
1753 data = [5, 10, 10, 15, 20, 20, 20, 25, 25, 30]
1754 assert len(data)%2 == 0
1755 self.assertApproxEqual(self.func(data, 5), 19.16666667, tol=1e-8)
1756 #---
1757 data = [2, 3, 4, 4, 4, 5]
1758 assert len(data)%2 == 0
1759 self.assertApproxEqual(self.func(data), 3.83333333, tol=1e-8)
1760 #---
1761 data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1762 assert len(data)%2 == 0
1763 self.assertEqual(self.func(data), 4.5)
1764 #---
1765 data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1766 assert len(data)%2 == 0
1767 self.assertEqual(self.func(data), 4.75)
1768
1769 def test_repeated_single_value(self):
1770 # Override method from AverageMixin.
1771 # Yet again, failure of median_grouped to conserve the data type
1772 # causes me headaches :-(
1773 for x in (5.3, 68, 4.3e17, Fraction(29, 101), Decimal('32.9714')):
1774 for count in (2, 5, 10, 20):
1775 data = [x]*count
1776 self.assertEqual(self.func(data), float(x))
1777
1778 def test_odd_fractions(self):
1779 # Test median_grouped works with an odd number of Fractions.
1780 F = Fraction
1781 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4)]
1782 assert len(data)%2 == 1
1783 random.shuffle(data)
1784 self.assertEqual(self.func(data), 3.0)
1785
1786 def test_even_fractions(self):
1787 # Test median_grouped works with an even number of Fractions.
1788 F = Fraction
1789 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4), F(17, 4)]
1790 assert len(data)%2 == 0
1791 random.shuffle(data)
1792 self.assertEqual(self.func(data), 3.25)
1793
1794 def test_odd_decimals(self):
1795 # Test median_grouped works with an odd number of Decimals.
1796 D = Decimal
1797 data = [D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1798 assert len(data)%2 == 1
1799 random.shuffle(data)
1800 self.assertEqual(self.func(data), 6.75)
1801
1802 def test_even_decimals(self):
1803 # Test median_grouped works with an even number of Decimals.
1804 D = Decimal
1805 data = [D('5.5'), D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1806 assert len(data)%2 == 0
1807 random.shuffle(data)
1808 self.assertEqual(self.func(data), 6.5)
1809 #---
1810 data = [D('5.5'), D('5.5'), D('6.5'), D('7.5'), D('7.5'), D('8.5')]
1811 assert len(data)%2 == 0
1812 random.shuffle(data)
1813 self.assertEqual(self.func(data), 7.0)
1814
1815 def test_interval(self):
1816 # Test median_grouped with interval argument.
1817 data = [2.25, 2.5, 2.5, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1818 self.assertEqual(self.func(data, 0.25), 2.875)
1819 data = [2.25, 2.5, 2.5, 2.75, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1820 self.assertApproxEqual(self.func(data, 0.25), 2.83333333, tol=1e-8)
1821 data = [220, 220, 240, 260, 260, 260, 260, 280, 280, 300, 320, 340]
1822 self.assertEqual(self.func(data, 20), 265.0)
1823
Steven D'Aprano8c115a42016-07-08 02:38:45 +10001824 def test_data_type_error(self):
1825 # Test median_grouped with str, bytes data types for data and interval
1826 data = ["", "", ""]
1827 self.assertRaises(TypeError, self.func, data)
1828 #---
1829 data = [b"", b"", b""]
1830 self.assertRaises(TypeError, self.func, data)
1831 #---
1832 data = [1, 2, 3]
1833 interval = ""
1834 self.assertRaises(TypeError, self.func, data, interval)
1835 #---
1836 data = [1, 2, 3]
1837 interval = b""
1838 self.assertRaises(TypeError, self.func, data, interval)
1839
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001840
1841class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1842 # Test cases for the discrete version of mode.
1843 def setUp(self):
1844 self.func = statistics.mode
1845
1846 def prepare_data(self):
1847 """Overload method from UnivariateCommonMixin."""
1848 # Make sure test data has exactly one mode.
1849 return [1, 1, 1, 1, 3, 4, 7, 9, 0, 8, 2]
1850
1851 def test_range_data(self):
1852 # Override test from UnivariateCommonMixin.
1853 data = range(20, 50, 3)
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001854 self.assertEqual(self.func(data), 20)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001855
1856 def test_nominal_data(self):
1857 # Test mode with nominal data.
1858 data = 'abcbdb'
1859 self.assertEqual(self.func(data), 'b')
1860 data = 'fe fi fo fum fi fi'.split()
1861 self.assertEqual(self.func(data), 'fi')
1862
1863 def test_discrete_data(self):
1864 # Test mode with discrete numeric data.
1865 data = list(range(10))
1866 for i in range(10):
1867 d = data + [i]
1868 random.shuffle(d)
1869 self.assertEqual(self.func(d), i)
1870
1871 def test_bimodal_data(self):
1872 # Test mode with bimodal data.
1873 data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
1874 assert data.count(2) == data.count(6) == 4
Min ho Kim39d87b52019-08-31 06:21:19 +10001875 # mode() should return 2, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001876 self.assertEqual(self.func(data), 2)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001877
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001878 def test_unique_data(self):
1879 # Test mode when data points are all unique.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001880 data = list(range(10))
Min ho Kim39d87b52019-08-31 06:21:19 +10001881 # mode() should return 0, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001882 self.assertEqual(self.func(data), 0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001883
1884 def test_none_data(self):
1885 # Test that mode raises TypeError if given None as data.
1886
1887 # This test is necessary because the implementation of mode uses
1888 # collections.Counter, which accepts None and returns an empty dict.
1889 self.assertRaises(TypeError, self.func, None)
1890
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001891 def test_counter_data(self):
1892 # Test that a Counter is treated like any other iterable.
1893 data = collections.Counter([1, 1, 1, 2])
1894 # Since the keys of the counter are treated as data points, not the
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001895 # counts, this should return the first mode encountered, 1
1896 self.assertEqual(self.func(data), 1)
1897
1898
1899class TestMultiMode(unittest.TestCase):
1900
1901 def test_basics(self):
1902 multimode = statistics.multimode
1903 self.assertEqual(multimode('aabbbbbbbbcc'), ['b'])
1904 self.assertEqual(multimode('aabbbbccddddeeffffgg'), ['b', 'd', 'f'])
1905 self.assertEqual(multimode(''), [])
1906
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001907
Raymond Hettinger47d99872019-02-21 15:06:29 -08001908class TestFMean(unittest.TestCase):
1909
1910 def test_basics(self):
1911 fmean = statistics.fmean
1912 D = Decimal
1913 F = Fraction
1914 for data, expected_mean, kind in [
1915 ([3.5, 4.0, 5.25], 4.25, 'floats'),
1916 ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
1917 ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
1918 ([True, False, True, True, False], 0.60, 'booleans'),
1919 ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
1920 ((3.5, 4.0, 5.25), 4.25, 'tuple'),
1921 (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
1922 ]:
1923 actual_mean = fmean(data)
1924 self.assertIs(type(actual_mean), float, kind)
1925 self.assertEqual(actual_mean, expected_mean, kind)
1926
1927 def test_error_cases(self):
1928 fmean = statistics.fmean
1929 StatisticsError = statistics.StatisticsError
1930 with self.assertRaises(StatisticsError):
1931 fmean([]) # empty input
1932 with self.assertRaises(StatisticsError):
1933 fmean(iter([])) # empty iterator
1934 with self.assertRaises(TypeError):
1935 fmean(None) # non-iterable input
1936 with self.assertRaises(TypeError):
1937 fmean([10, None, 20]) # non-numeric input
1938 with self.assertRaises(TypeError):
1939 fmean() # missing data argument
1940 with self.assertRaises(TypeError):
1941 fmean([10, 20, 60], 70) # too many arguments
1942
1943 def test_special_values(self):
1944 # Rules for special values are inherited from math.fsum()
1945 fmean = statistics.fmean
1946 NaN = float('Nan')
1947 Inf = float('Inf')
1948 self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
1949 self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
1950 self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
1951 with self.assertRaises(ValueError):
1952 fmean([Inf, -Inf])
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001953
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001954
1955# === Tests for variances and standard deviations ===
1956
1957class VarianceStdevMixin(UnivariateCommonMixin):
1958 # Mixin class holding common tests for variance and std dev.
1959
1960 # Subclasses should inherit from this before NumericTestClass, in order
1961 # to see the rel attribute below. See testShiftData for an explanation.
1962
1963 rel = 1e-12
1964
1965 def test_single_value(self):
1966 # Deviation of a single value is zero.
1967 for x in (11, 19.8, 4.6e14, Fraction(21, 34), Decimal('8.392')):
1968 self.assertEqual(self.func([x]), 0)
1969
1970 def test_repeated_single_value(self):
1971 # The deviation of a single repeated value is zero.
1972 for x in (7.2, 49, 8.1e15, Fraction(3, 7), Decimal('62.4802')):
1973 for count in (2, 3, 5, 15):
1974 data = [x]*count
1975 self.assertEqual(self.func(data), 0)
1976
1977 def test_domain_error_regression(self):
1978 # Regression test for a domain error exception.
1979 # (Thanks to Geremy Condra.)
1980 data = [0.123456789012345]*10000
1981 # All the items are identical, so variance should be exactly zero.
1982 # We allow some small round-off error, but not much.
1983 result = self.func(data)
1984 self.assertApproxEqual(result, 0.0, tol=5e-17)
1985 self.assertGreaterEqual(result, 0) # A negative result must fail.
1986
1987 def test_shift_data(self):
1988 # Test that shifting the data by a constant amount does not affect
1989 # the variance or stdev. Or at least not much.
1990
1991 # Due to rounding, this test should be considered an ideal. We allow
1992 # some tolerance away from "no change at all" by setting tol and/or rel
1993 # attributes. Subclasses may set tighter or looser error tolerances.
1994 raw = [1.03, 1.27, 1.94, 2.04, 2.58, 3.14, 4.75, 4.98, 5.42, 6.78]
1995 expected = self.func(raw)
1996 # Don't set shift too high, the bigger it is, the more rounding error.
1997 shift = 1e5
1998 data = [x + shift for x in raw]
1999 self.assertApproxEqual(self.func(data), expected)
2000
2001 def test_shift_data_exact(self):
2002 # Like test_shift_data, but result is always exact.
2003 raw = [1, 3, 3, 4, 5, 7, 9, 10, 11, 16]
2004 assert all(x==int(x) for x in raw)
2005 expected = self.func(raw)
2006 shift = 10**9
2007 data = [x + shift for x in raw]
2008 self.assertEqual(self.func(data), expected)
2009
2010 def test_iter_list_same(self):
2011 # Test that iter data and list data give the same result.
2012
2013 # This is an explicit test that iterators and lists are treated the
2014 # same; justification for this test over and above the similar test
2015 # in UnivariateCommonMixin is that an earlier design had variance and
2016 # friends swap between one- and two-pass algorithms, which would
2017 # sometimes give different results.
2018 data = [random.uniform(-3, 8) for _ in range(1000)]
2019 expected = self.func(data)
2020 self.assertEqual(self.func(iter(data)), expected)
2021
2022
2023class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2024 # Tests for population variance.
2025 def setUp(self):
2026 self.func = statistics.pvariance
2027
2028 def test_exact_uniform(self):
2029 # Test the variance against an exact result for uniform data.
2030 data = list(range(10000))
2031 random.shuffle(data)
2032 expected = (10000**2 - 1)/12 # Exact value.
2033 self.assertEqual(self.func(data), expected)
2034
2035 def test_ints(self):
2036 # Test population variance with int data.
2037 data = [4, 7, 13, 16]
2038 exact = 22.5
2039 self.assertEqual(self.func(data), exact)
2040
2041 def test_fractions(self):
2042 # Test population variance with Fraction data.
2043 F = Fraction
2044 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2045 exact = F(3, 8)
2046 result = self.func(data)
2047 self.assertEqual(result, exact)
2048 self.assertIsInstance(result, Fraction)
2049
2050 def test_decimals(self):
2051 # Test population variance with Decimal data.
2052 D = Decimal
2053 data = [D("12.1"), D("12.2"), D("12.5"), D("12.9")]
2054 exact = D('0.096875')
2055 result = self.func(data)
2056 self.assertEqual(result, exact)
2057 self.assertIsInstance(result, Decimal)
2058
2059
2060class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2061 # Tests for sample variance.
2062 def setUp(self):
2063 self.func = statistics.variance
2064
2065 def test_single_value(self):
2066 # Override method from VarianceStdevMixin.
2067 for x in (35, 24.7, 8.2e15, Fraction(19, 30), Decimal('4.2084')):
2068 self.assertRaises(statistics.StatisticsError, self.func, [x])
2069
2070 def test_ints(self):
2071 # Test sample variance with int data.
2072 data = [4, 7, 13, 16]
2073 exact = 30
2074 self.assertEqual(self.func(data), exact)
2075
2076 def test_fractions(self):
2077 # Test sample variance with Fraction data.
2078 F = Fraction
2079 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2080 exact = F(1, 2)
2081 result = self.func(data)
2082 self.assertEqual(result, exact)
2083 self.assertIsInstance(result, Fraction)
2084
2085 def test_decimals(self):
2086 # Test sample variance with Decimal data.
2087 D = Decimal
2088 data = [D(2), D(2), D(7), D(9)]
2089 exact = 4*D('9.5')/D(3)
2090 result = self.func(data)
2091 self.assertEqual(result, exact)
2092 self.assertIsInstance(result, Decimal)
2093
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002094 def test_center_not_at_mean(self):
2095 data = (1.0, 2.0)
2096 self.assertEqual(self.func(data), 0.5)
2097 self.assertEqual(self.func(data, xbar=2.0), 1.0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002098
2099class TestPStdev(VarianceStdevMixin, NumericTestCase):
2100 # Tests for population standard deviation.
2101 def setUp(self):
2102 self.func = statistics.pstdev
2103
2104 def test_compare_to_variance(self):
2105 # Test that stdev is, in fact, the square root of variance.
2106 data = [random.uniform(-17, 24) for _ in range(1000)]
2107 expected = math.sqrt(statistics.pvariance(data))
2108 self.assertEqual(self.func(data), expected)
2109
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002110 def test_center_not_at_mean(self):
2111 # See issue: 40855
2112 data = (3, 6, 7, 10)
2113 self.assertEqual(self.func(data), 2.5)
2114 self.assertEqual(self.func(data, mu=0.5), 6.5)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002115
2116class TestStdev(VarianceStdevMixin, NumericTestCase):
2117 # Tests for sample standard deviation.
2118 def setUp(self):
2119 self.func = statistics.stdev
2120
2121 def test_single_value(self):
2122 # Override method from VarianceStdevMixin.
2123 for x in (81, 203.74, 3.9e14, Fraction(5, 21), Decimal('35.719')):
2124 self.assertRaises(statistics.StatisticsError, self.func, [x])
2125
2126 def test_compare_to_variance(self):
2127 # Test that stdev is, in fact, the square root of variance.
2128 data = [random.uniform(-2, 9) for _ in range(1000)]
2129 expected = math.sqrt(statistics.variance(data))
2130 self.assertEqual(self.func(data), expected)
2131
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002132 def test_center_not_at_mean(self):
2133 data = (1.0, 2.0)
2134 self.assertEqual(self.func(data, xbar=2.0), 1.0)
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002135
Raymond Hettinger6463ba32019-04-07 09:20:03 -07002136class TestGeometricMean(unittest.TestCase):
2137
2138 def test_basics(self):
2139 geometric_mean = statistics.geometric_mean
2140 self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
2141 self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
2142 self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
2143
2144 random.seed(86753095551212)
2145 for rng in [
2146 range(1, 100),
2147 range(1, 1_000),
2148 range(1, 10_000),
2149 range(500, 10_000, 3),
2150 range(10_000, 500, -3),
2151 [12, 17, 13, 5, 120, 7],
2152 [random.expovariate(50.0) for i in range(1_000)],
2153 [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
2154 [random.triangular(2000, 3000, 2200) for i in range(3_000)],
2155 ]:
2156 gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
2157 gm_float = geometric_mean(rng)
2158 self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
2159
2160 def test_various_input_types(self):
2161 geometric_mean = statistics.geometric_mean
2162 D = Decimal
2163 F = Fraction
2164 # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
2165 expected_mean = 4.18886
2166 for data, kind in [
2167 ([3.5, 4.0, 5.25], 'floats'),
2168 ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
2169 ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
2170 ([3.5, 4, F(21, 4)], 'mixed types'),
2171 ((3.5, 4.0, 5.25), 'tuple'),
2172 (iter([3.5, 4.0, 5.25]), 'iterator'),
2173 ]:
2174 actual_mean = geometric_mean(data)
2175 self.assertIs(type(actual_mean), float, kind)
2176 self.assertAlmostEqual(actual_mean, expected_mean, places=5)
2177
2178 def test_big_and_small(self):
2179 geometric_mean = statistics.geometric_mean
2180
2181 # Avoid overflow to infinity
2182 large = 2.0 ** 1000
2183 big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
2184 self.assertTrue(math.isclose(big_gm, 36.0 * large))
2185 self.assertFalse(math.isinf(big_gm))
2186
2187 # Avoid underflow to zero
2188 small = 2.0 ** -1000
2189 small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
2190 self.assertTrue(math.isclose(small_gm, 36.0 * small))
2191 self.assertNotEqual(small_gm, 0.0)
2192
2193 def test_error_cases(self):
2194 geometric_mean = statistics.geometric_mean
2195 StatisticsError = statistics.StatisticsError
2196 with self.assertRaises(StatisticsError):
2197 geometric_mean([]) # empty input
2198 with self.assertRaises(StatisticsError):
2199 geometric_mean([3.5, 0.0, 5.25]) # zero input
2200 with self.assertRaises(StatisticsError):
2201 geometric_mean([3.5, -4.0, 5.25]) # negative input
2202 with self.assertRaises(StatisticsError):
2203 geometric_mean(iter([])) # empty iterator
2204 with self.assertRaises(TypeError):
2205 geometric_mean(None) # non-iterable input
2206 with self.assertRaises(TypeError):
2207 geometric_mean([10, None, 20]) # non-numeric input
2208 with self.assertRaises(TypeError):
2209 geometric_mean() # missing data argument
2210 with self.assertRaises(TypeError):
2211 geometric_mean([10, 20, 60], 70) # too many arguments
2212
2213 def test_special_values(self):
2214 # Rules for special values are inherited from math.fsum()
2215 geometric_mean = statistics.geometric_mean
2216 NaN = float('Nan')
2217 Inf = float('Inf')
2218 self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
2219 self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
2220 self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
2221 with self.assertRaises(ValueError):
2222 geometric_mean([Inf, -Inf])
2223
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002224
2225class TestQuantiles(unittest.TestCase):
2226
2227 def test_specific_cases(self):
2228 # Match results computed by hand and cross-checked
2229 # against the PERCENTILE.EXC function in MS Excel.
2230 quantiles = statistics.quantiles
2231 data = [120, 200, 250, 320, 350]
2232 random.shuffle(data)
2233 for n, expected in [
2234 (1, []),
2235 (2, [250.0]),
2236 (3, [200.0, 320.0]),
2237 (4, [160.0, 250.0, 335.0]),
2238 (5, [136.0, 220.0, 292.0, 344.0]),
2239 (6, [120.0, 200.0, 250.0, 320.0, 350.0]),
2240 (8, [100.0, 160.0, 212.5, 250.0, 302.5, 335.0, 357.5]),
2241 (10, [88.0, 136.0, 184.0, 220.0, 250.0, 292.0, 326.0, 344.0, 362.0]),
2242 (12, [80.0, 120.0, 160.0, 200.0, 225.0, 250.0, 285.0, 320.0, 335.0,
2243 350.0, 365.0]),
2244 (15, [72.0, 104.0, 136.0, 168.0, 200.0, 220.0, 240.0, 264.0, 292.0,
2245 320.0, 332.0, 344.0, 356.0, 368.0]),
2246 ]:
2247 self.assertEqual(expected, quantiles(data, n=n))
2248 self.assertEqual(len(quantiles(data, n=n)), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002249 # Preserve datatype when possible
2250 for datatype in (float, Decimal, Fraction):
2251 result = quantiles(map(datatype, data), n=n)
2252 self.assertTrue(all(type(x) == datatype) for x in result)
2253 self.assertEqual(result, list(map(datatype, expected)))
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002254 # Quantiles should be idempotent
2255 if len(expected) >= 2:
2256 self.assertEqual(quantiles(expected, n=n), expected)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002257 # Cross-check against method='inclusive' which should give
2258 # the same result after adding in minimum and maximum values
2259 # extrapolated from the two lowest and two highest points.
2260 sdata = sorted(data)
2261 lo = 2 * sdata[0] - sdata[1]
2262 hi = 2 * sdata[-1] - sdata[-2]
2263 padded_data = data + [lo, hi]
2264 self.assertEqual(
2265 quantiles(data, n=n),
2266 quantiles(padded_data, n=n, method='inclusive'),
2267 (n, data),
2268 )
Tim Gatesc18b8052019-12-10 04:42:17 +11002269 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002270 def f(x):
2271 return 3.5 * x - 1234.675
2272 exp = list(map(f, expected))
2273 act = quantiles(map(f, data), n=n)
2274 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002275 # Q2 agrees with median()
2276 for k in range(2, 60):
2277 data = random.choices(range(100), k=k)
2278 q1, q2, q3 = quantiles(data)
2279 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002280
2281 def test_specific_cases_inclusive(self):
2282 # Match results computed by hand and cross-checked
2283 # against the PERCENTILE.INC function in MS Excel
Xtreak874ad1b2019-05-02 23:50:59 +05302284 # and against the quantile() function in SciPy.
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002285 quantiles = statistics.quantiles
2286 data = [100, 200, 400, 800]
2287 random.shuffle(data)
2288 for n, expected in [
2289 (1, []),
2290 (2, [300.0]),
2291 (3, [200.0, 400.0]),
2292 (4, [175.0, 300.0, 500.0]),
2293 (5, [160.0, 240.0, 360.0, 560.0]),
2294 (6, [150.0, 200.0, 300.0, 400.0, 600.0]),
2295 (8, [137.5, 175, 225.0, 300.0, 375.0, 500.0,650.0]),
2296 (10, [130.0, 160.0, 190.0, 240.0, 300.0, 360.0, 440.0, 560.0, 680.0]),
2297 (12, [125.0, 150.0, 175.0, 200.0, 250.0, 300.0, 350.0, 400.0,
2298 500.0, 600.0, 700.0]),
2299 (15, [120.0, 140.0, 160.0, 180.0, 200.0, 240.0, 280.0, 320.0, 360.0,
2300 400.0, 480.0, 560.0, 640.0, 720.0]),
2301 ]:
2302 self.assertEqual(expected, quantiles(data, n=n, method="inclusive"))
2303 self.assertEqual(len(quantiles(data, n=n, method="inclusive")), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002304 # Preserve datatype when possible
2305 for datatype in (float, Decimal, Fraction):
2306 result = quantiles(map(datatype, data), n=n, method="inclusive")
2307 self.assertTrue(all(type(x) == datatype) for x in result)
2308 self.assertEqual(result, list(map(datatype, expected)))
Tim Gatesc18b8052019-12-10 04:42:17 +11002309 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002310 def f(x):
2311 return 3.5 * x - 1234.675
2312 exp = list(map(f, expected))
2313 act = quantiles(map(f, data), n=n, method="inclusive")
2314 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002315 # Natural deciles
2316 self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
2317 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
2318 self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
2319 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002320 # Whenever n is smaller than the number of data points, running
2321 # method='inclusive' should give the same result as method='exclusive'
2322 # after the two included extreme points are removed.
2323 data = [random.randrange(10_000) for i in range(501)]
2324 actual = quantiles(data, n=32, method='inclusive')
2325 data.remove(min(data))
2326 data.remove(max(data))
2327 expected = quantiles(data, n=32)
2328 self.assertEqual(expected, actual)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002329 # Q2 agrees with median()
2330 for k in range(2, 60):
2331 data = random.choices(range(100), k=k)
2332 q1, q2, q3 = quantiles(data, method='inclusive')
2333 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002334
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002335 def test_equal_inputs(self):
2336 quantiles = statistics.quantiles
2337 for n in range(2, 10):
2338 data = [10.0] * n
2339 self.assertEqual(quantiles(data), [10.0, 10.0, 10.0])
2340 self.assertEqual(quantiles(data, method='inclusive'),
2341 [10.0, 10.0, 10.0])
2342
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002343 def test_equal_sized_groups(self):
2344 quantiles = statistics.quantiles
2345 total = 10_000
2346 data = [random.expovariate(0.2) for i in range(total)]
2347 while len(set(data)) != total:
2348 data.append(random.expovariate(0.2))
2349 data.sort()
2350
2351 # Cases where the group size exactly divides the total
2352 for n in (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000):
2353 group_size = total // n
2354 self.assertEqual(
2355 [bisect.bisect(data, q) for q in quantiles(data, n=n)],
2356 list(range(group_size, total, group_size)))
2357
2358 # When the group sizes can't be exactly equal, they should
2359 # differ by no more than one
2360 for n in (13, 19, 59, 109, 211, 571, 1019, 1907, 5261, 9769):
2361 group_sizes = {total // n, total // n + 1}
2362 pos = [bisect.bisect(data, q) for q in quantiles(data, n=n)]
2363 sizes = {q - p for p, q in zip(pos, pos[1:])}
2364 self.assertTrue(sizes <= group_sizes)
2365
2366 def test_error_cases(self):
2367 quantiles = statistics.quantiles
2368 StatisticsError = statistics.StatisticsError
2369 with self.assertRaises(TypeError):
2370 quantiles() # Missing arguments
2371 with self.assertRaises(TypeError):
2372 quantiles([10, 20, 30], 13, n=4) # Too many arguments
2373 with self.assertRaises(TypeError):
2374 quantiles([10, 20, 30], 4) # n is a positional argument
2375 with self.assertRaises(StatisticsError):
2376 quantiles([10, 20, 30], n=0) # n is zero
2377 with self.assertRaises(StatisticsError):
2378 quantiles([10, 20, 30], n=-1) # n is negative
2379 with self.assertRaises(TypeError):
2380 quantiles([10, 20, 30], n=1.5) # n is not an integer
2381 with self.assertRaises(ValueError):
2382 quantiles([10, 20, 30], method='X') # method is unknown
2383 with self.assertRaises(StatisticsError):
2384 quantiles([10], n=4) # not enough data points
2385 with self.assertRaises(TypeError):
2386 quantiles([10, None, 30], n=4) # data is non-numeric
2387
2388
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002389class TestNormalDist:
Raymond Hettinger11c79532019-02-23 14:44:07 -08002390
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002391 # General note on precision: The pdf(), cdf(), and overlap() methods
2392 # depend on functions in the math libraries that do not make
2393 # explicit accuracy guarantees. Accordingly, some of the accuracy
2394 # tests below may fail if the underlying math functions are
2395 # inaccurate. There isn't much we can do about this short of
2396 # implementing our own implementations from scratch.
2397
Raymond Hettinger11c79532019-02-23 14:44:07 -08002398 def test_slots(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002399 nd = self.module.NormalDist(300, 23)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002400 with self.assertRaises(TypeError):
2401 vars(nd)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002402 self.assertEqual(tuple(nd.__slots__), ('_mu', '_sigma'))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002403
2404 def test_instantiation_and_attributes(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002405 nd = self.module.NormalDist(500, 17)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002406 self.assertEqual(nd.mean, 500)
2407 self.assertEqual(nd.stdev, 17)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002408 self.assertEqual(nd.variance, 17**2)
2409
2410 # default arguments
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002411 nd = self.module.NormalDist()
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002412 self.assertEqual(nd.mean, 0)
2413 self.assertEqual(nd.stdev, 1)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002414 self.assertEqual(nd.variance, 1**2)
2415
2416 # error case: negative sigma
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002417 with self.assertRaises(self.module.StatisticsError):
2418 self.module.NormalDist(500, -10)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002419
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002420 # verify that subclass type is honored
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002421 class NewNormalDist(self.module.NormalDist):
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002422 pass
2423 nnd = NewNormalDist(200, 5)
2424 self.assertEqual(type(nnd), NewNormalDist)
2425
Raymond Hettinger11c79532019-02-23 14:44:07 -08002426 def test_alternative_constructor(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002427 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002428 data = [96, 107, 90, 92, 110]
2429 # list input
2430 self.assertEqual(NormalDist.from_samples(data), NormalDist(99, 9))
2431 # tuple input
2432 self.assertEqual(NormalDist.from_samples(tuple(data)), NormalDist(99, 9))
2433 # iterator input
2434 self.assertEqual(NormalDist.from_samples(iter(data)), NormalDist(99, 9))
2435 # error cases
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002436 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002437 NormalDist.from_samples([]) # empty input
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002438 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002439 NormalDist.from_samples([10]) # only one input
2440
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002441 # verify that subclass type is honored
2442 class NewNormalDist(NormalDist):
2443 pass
2444 nnd = NewNormalDist.from_samples(data)
2445 self.assertEqual(type(nnd), NewNormalDist)
2446
Raymond Hettinger11c79532019-02-23 14:44:07 -08002447 def test_sample_generation(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002448 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002449 mu, sigma = 10_000, 3.0
2450 X = NormalDist(mu, sigma)
2451 n = 1_000
2452 data = X.samples(n)
2453 self.assertEqual(len(data), n)
2454 self.assertEqual(set(map(type, data)), {float})
2455 # mean(data) expected to fall within 8 standard deviations
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002456 xbar = self.module.mean(data)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002457 self.assertTrue(mu - sigma*8 <= xbar <= mu + sigma*8)
2458
2459 # verify that seeding makes reproducible sequences
2460 n = 100
2461 data1 = X.samples(n, seed='happiness and joy')
2462 data2 = X.samples(n, seed='trouble and despair')
2463 data3 = X.samples(n, seed='happiness and joy')
2464 data4 = X.samples(n, seed='trouble and despair')
2465 self.assertEqual(data1, data3)
2466 self.assertEqual(data2, data4)
2467 self.assertNotEqual(data1, data2)
2468
Raymond Hettinger11c79532019-02-23 14:44:07 -08002469 def test_pdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002470 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002471 X = NormalDist(100, 15)
2472 # Verify peak around center
2473 self.assertLess(X.pdf(99), X.pdf(100))
2474 self.assertLess(X.pdf(101), X.pdf(100))
2475 # Test symmetry
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002476 for i in range(50):
2477 self.assertAlmostEqual(X.pdf(100 - i), X.pdf(100 + i))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002478 # Test vs CDF
2479 dx = 2.0 ** -10
2480 for x in range(90, 111):
2481 est_pdf = (X.cdf(x + dx) - X.cdf(x)) / dx
2482 self.assertAlmostEqual(X.pdf(x), est_pdf, places=4)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002483 # Test vs table of known values -- CRC 26th Edition
2484 Z = NormalDist()
2485 for x, px in enumerate([
2486 0.3989, 0.3989, 0.3989, 0.3988, 0.3986,
2487 0.3984, 0.3982, 0.3980, 0.3977, 0.3973,
2488 0.3970, 0.3965, 0.3961, 0.3956, 0.3951,
2489 0.3945, 0.3939, 0.3932, 0.3925, 0.3918,
2490 0.3910, 0.3902, 0.3894, 0.3885, 0.3876,
2491 0.3867, 0.3857, 0.3847, 0.3836, 0.3825,
2492 0.3814, 0.3802, 0.3790, 0.3778, 0.3765,
2493 0.3752, 0.3739, 0.3725, 0.3712, 0.3697,
2494 0.3683, 0.3668, 0.3653, 0.3637, 0.3621,
2495 0.3605, 0.3589, 0.3572, 0.3555, 0.3538,
2496 ]):
2497 self.assertAlmostEqual(Z.pdf(x / 100.0), px, places=4)
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002498 self.assertAlmostEqual(Z.pdf(-x / 100.0), px, places=4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002499 # Error case: variance is zero
2500 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002501 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002502 Y.pdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002503 # Special values
2504 self.assertEqual(X.pdf(float('-Inf')), 0.0)
2505 self.assertEqual(X.pdf(float('Inf')), 0.0)
2506 self.assertTrue(math.isnan(X.pdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002507
2508 def test_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002509 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002510 X = NormalDist(100, 15)
2511 cdfs = [X.cdf(x) for x in range(1, 200)]
2512 self.assertEqual(set(map(type, cdfs)), {float})
2513 # Verify montonic
2514 self.assertEqual(cdfs, sorted(cdfs))
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002515 # Verify center (should be exact)
2516 self.assertEqual(X.cdf(100), 0.50)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002517 # Check against a table of known values
2518 # https://en.wikipedia.org/wiki/Standard_normal_table#Cumulative
2519 Z = NormalDist()
2520 for z, cum_prob in [
2521 (0.00, 0.50000), (0.01, 0.50399), (0.02, 0.50798),
2522 (0.14, 0.55567), (0.29, 0.61409), (0.33, 0.62930),
2523 (0.54, 0.70540), (0.60, 0.72575), (1.17, 0.87900),
2524 (1.60, 0.94520), (2.05, 0.97982), (2.89, 0.99807),
2525 (3.52, 0.99978), (3.98, 0.99997), (4.07, 0.99998),
2526 ]:
2527 self.assertAlmostEqual(Z.cdf(z), cum_prob, places=5)
2528 self.assertAlmostEqual(Z.cdf(-z), 1.0 - cum_prob, places=5)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002529 # Error case: variance is zero
2530 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002531 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002532 Y.cdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002533 # Special values
2534 self.assertEqual(X.cdf(float('-Inf')), 0.0)
2535 self.assertEqual(X.cdf(float('Inf')), 1.0)
2536 self.assertTrue(math.isnan(X.cdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002537
Neil Schemenauer52a48e62019-07-30 11:08:18 -07002538 @support.skip_if_pgo_task
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002539 def test_inv_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002540 NormalDist = self.module.NormalDist
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002541
2542 # Center case should be exact.
2543 iq = NormalDist(100, 15)
2544 self.assertEqual(iq.inv_cdf(0.50), iq.mean)
2545
2546 # Test versus a published table of known percentage points.
2547 # See the second table at the bottom of the page here:
2548 # http://people.bath.ac.uk/masss/tables/normaltable.pdf
2549 Z = NormalDist()
2550 pp = {5.0: (0.000, 1.645, 2.576, 3.291, 3.891,
2551 4.417, 4.892, 5.327, 5.731, 6.109),
2552 2.5: (0.674, 1.960, 2.807, 3.481, 4.056,
2553 4.565, 5.026, 5.451, 5.847, 6.219),
2554 1.0: (1.282, 2.326, 3.090, 3.719, 4.265,
2555 4.753, 5.199, 5.612, 5.998, 6.361)}
2556 for base, row in pp.items():
2557 for exp, x in enumerate(row, start=1):
2558 p = base * 10.0 ** (-exp)
2559 self.assertAlmostEqual(-Z.inv_cdf(p), x, places=3)
2560 p = 1.0 - p
2561 self.assertAlmostEqual(Z.inv_cdf(p), x, places=3)
2562
2563 # Match published example for MS Excel
2564 # https://support.office.com/en-us/article/norm-inv-function-54b30935-fee7-493c-bedb-2278a9db7e13
2565 self.assertAlmostEqual(NormalDist(40, 1.5).inv_cdf(0.908789), 42.000002)
2566
2567 # One million equally spaced probabilities
2568 n = 2**20
2569 for p in range(1, n):
2570 p /= n
2571 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2572
2573 # One hundred ever smaller probabilities to test tails out to
2574 # extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
2575 for e in range(1, 51):
2576 p = 2.0 ** (-e)
2577 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2578 p = 1.0 - p
2579 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2580
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002581 # Now apply cdf() first. Near the tails, the round-trip loses
2582 # precision and is ill-conditioned (small changes in the inputs
2583 # give large changes in the output), so only check to 5 places.
2584 for x in range(200):
2585 self.assertAlmostEqual(iq.inv_cdf(iq.cdf(x)), x, places=5)
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002586
2587 # Error cases:
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002588 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002589 iq.inv_cdf(0.0) # p is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002590 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002591 iq.inv_cdf(-0.1) # p under zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002592 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002593 iq.inv_cdf(1.0) # p is one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002594 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002595 iq.inv_cdf(1.1) # p over one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002596 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002597 iq = NormalDist(100, 0) # sigma is zero
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002598 iq.inv_cdf(0.5)
2599
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002600 # Special values
2601 self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
2602
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002603 def test_quantiles(self):
2604 # Quartiles of a standard normal distribution
2605 Z = self.module.NormalDist()
2606 for n, expected in [
2607 (1, []),
2608 (2, [0.0]),
2609 (3, [-0.4307, 0.4307]),
2610 (4 ,[-0.6745, 0.0, 0.6745]),
2611 ]:
2612 actual = Z.quantiles(n=n)
2613 self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2614 for e, a in zip(expected, actual)))
2615
Raymond Hettinger318d5372019-03-06 22:59:40 -08002616 def test_overlap(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002617 NormalDist = self.module.NormalDist
Raymond Hettinger318d5372019-03-06 22:59:40 -08002618
2619 # Match examples from Imman and Bradley
2620 for X1, X2, published_result in [
2621 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0), 0.80258),
2622 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0), 0.60993),
2623 ]:
2624 self.assertAlmostEqual(X1.overlap(X2), published_result, places=4)
2625 self.assertAlmostEqual(X2.overlap(X1), published_result, places=4)
2626
2627 # Check against integration of the PDF
2628 def overlap_numeric(X, Y, *, steps=8_192, z=5):
2629 'Numerical integration cross-check for overlap() '
2630 fsum = math.fsum
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002631 center = (X.mean + Y.mean) / 2.0
2632 width = z * max(X.stdev, Y.stdev)
Raymond Hettinger318d5372019-03-06 22:59:40 -08002633 start = center - width
2634 dx = 2.0 * width / steps
2635 x_arr = [start + i*dx for i in range(steps)]
2636 xp = list(map(X.pdf, x_arr))
2637 yp = list(map(Y.pdf, x_arr))
2638 total = max(fsum(xp), fsum(yp))
2639 return fsum(map(min, xp, yp)) / total
2640
2641 for X1, X2 in [
2642 # Examples from Imman and Bradley
2643 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0)),
2644 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2645 # Example from https://www.rasch.org/rmt/rmt101r.htm
2646 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2647 # Gender heights from http://www.usablestats.com/lessons/normal
2648 (NormalDist(70, 4), NormalDist(65, 3.5)),
2649 # Misc cases with equal standard deviations
2650 (NormalDist(100, 15), NormalDist(110, 15)),
2651 (NormalDist(-100, 15), NormalDist(110, 15)),
2652 (NormalDist(-100, 15), NormalDist(-110, 15)),
2653 # Misc cases with unequal standard deviations
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002654 (NormalDist(100, 12), NormalDist(100, 15)),
Raymond Hettinger318d5372019-03-06 22:59:40 -08002655 (NormalDist(100, 12), NormalDist(110, 15)),
2656 (NormalDist(100, 12), NormalDist(150, 15)),
2657 (NormalDist(100, 12), NormalDist(150, 35)),
2658 # Misc cases with small values
2659 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.003)),
2660 (NormalDist(1.000, 0.002), NormalDist(1.006, 0.0003)),
2661 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.099)),
2662 ]:
2663 self.assertAlmostEqual(X1.overlap(X2), overlap_numeric(X1, X2), places=5)
2664 self.assertAlmostEqual(X2.overlap(X1), overlap_numeric(X1, X2), places=5)
2665
2666 # Error cases
2667 X = NormalDist()
2668 with self.assertRaises(TypeError):
2669 X.overlap() # too few arguments
2670 with self.assertRaises(TypeError):
2671 X.overlap(X, X) # too may arguments
2672 with self.assertRaises(TypeError):
2673 X.overlap(None) # right operand not a NormalDist
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002674 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002675 X.overlap(NormalDist(1, 0)) # right operand sigma is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002676 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002677 NormalDist(1, 0).overlap(X) # left operand sigma is zero
2678
Raymond Hettinger70f027d2020-04-16 10:25:14 -07002679 def test_zscore(self):
2680 NormalDist = self.module.NormalDist
2681 X = NormalDist(100, 15)
2682 self.assertEqual(X.zscore(142), 2.8)
2683 self.assertEqual(X.zscore(58), -2.8)
2684 self.assertEqual(X.zscore(100), 0.0)
2685 with self.assertRaises(TypeError):
2686 X.zscore() # too few arguments
2687 with self.assertRaises(TypeError):
2688 X.zscore(1, 1) # too may arguments
2689 with self.assertRaises(TypeError):
2690 X.zscore(None) # non-numeric type
2691 with self.assertRaises(self.module.StatisticsError):
2692 NormalDist(1, 0).zscore(100) # sigma is zero
2693
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002694 def test_properties(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002695 X = self.module.NormalDist(100, 15)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002696 self.assertEqual(X.mean, 100)
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002697 self.assertEqual(X.median, 100)
2698 self.assertEqual(X.mode, 100)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002699 self.assertEqual(X.stdev, 15)
2700 self.assertEqual(X.variance, 225)
2701
Raymond Hettinger11c79532019-02-23 14:44:07 -08002702 def test_same_type_addition_and_subtraction(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002703 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002704 X = NormalDist(100, 12)
2705 Y = NormalDist(40, 5)
2706 self.assertEqual(X + Y, NormalDist(140, 13)) # __add__
2707 self.assertEqual(X - Y, NormalDist(60, 13)) # __sub__
2708
2709 def test_translation_and_scaling(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002710 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002711 X = NormalDist(100, 15)
2712 y = 10
2713 self.assertEqual(+X, NormalDist(100, 15)) # __pos__
2714 self.assertEqual(-X, NormalDist(-100, 15)) # __neg__
2715 self.assertEqual(X + y, NormalDist(110, 15)) # __add__
2716 self.assertEqual(y + X, NormalDist(110, 15)) # __radd__
2717 self.assertEqual(X - y, NormalDist(90, 15)) # __sub__
2718 self.assertEqual(y - X, NormalDist(-90, 15)) # __rsub__
2719 self.assertEqual(X * y, NormalDist(1000, 150)) # __mul__
2720 self.assertEqual(y * X, NormalDist(1000, 150)) # __rmul__
2721 self.assertEqual(X / y, NormalDist(10, 1.5)) # __truediv__
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002722 with self.assertRaises(TypeError): # __rtruediv__
Raymond Hettinger11c79532019-02-23 14:44:07 -08002723 y / X
2724
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002725 def test_unary_operations(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002726 NormalDist = self.module.NormalDist
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002727 X = NormalDist(100, 12)
2728 Y = +X
2729 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002730 self.assertEqual(X.mean, Y.mean)
2731 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002732 Y = -X
2733 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002734 self.assertEqual(X.mean, -Y.mean)
2735 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002736
Raymond Hettinger11c79532019-02-23 14:44:07 -08002737 def test_equality(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002738 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002739 nd1 = NormalDist()
2740 nd2 = NormalDist(2, 4)
2741 nd3 = NormalDist()
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002742 nd4 = NormalDist(2, 4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002743 nd5 = NormalDist(2, 8)
2744 nd6 = NormalDist(8, 4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002745 self.assertNotEqual(nd1, nd2)
2746 self.assertEqual(nd1, nd3)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002747 self.assertEqual(nd2, nd4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002748 self.assertNotEqual(nd2, nd5)
2749 self.assertNotEqual(nd2, nd6)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002750
2751 # Test NotImplemented when types are different
2752 class A:
2753 def __eq__(self, other):
2754 return 10
2755 a = A()
2756 self.assertEqual(nd1.__eq__(a), NotImplemented)
2757 self.assertEqual(nd1 == a, 10)
2758 self.assertEqual(a == nd1, 10)
2759
2760 # All subclasses to compare equal giving the same behavior
2761 # as list, tuple, int, float, complex, str, dict, set, etc.
2762 class SizedNormalDist(NormalDist):
2763 def __init__(self, mu, sigma, n):
2764 super().__init__(mu, sigma)
2765 self.n = n
2766 s = SizedNormalDist(100, 15, 57)
2767 nd4 = NormalDist(100, 15)
2768 self.assertEqual(s, nd4)
2769
2770 # Don't allow duck type equality because we wouldn't
2771 # want a lognormal distribution to compare equal
2772 # to a normal distribution with the same parameters
2773 class LognormalDist:
2774 def __init__(self, mu, sigma):
2775 self.mu = mu
2776 self.sigma = sigma
2777 lnd = LognormalDist(100, 15)
2778 nd = NormalDist(100, 15)
2779 self.assertNotEqual(nd, lnd)
2780
2781 def test_pickle_and_copy(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002782 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002783 nd1 = copy.copy(nd)
2784 self.assertEqual(nd, nd1)
2785 nd2 = copy.deepcopy(nd)
2786 self.assertEqual(nd, nd2)
2787 nd3 = pickle.loads(pickle.dumps(nd))
2788 self.assertEqual(nd, nd3)
2789
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002790 def test_hashability(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002791 ND = self.module.NormalDist
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002792 s = {ND(100, 15), ND(100.0, 15.0), ND(100, 10), ND(95, 15), ND(100, 15)}
2793 self.assertEqual(len(s), 3)
2794
Raymond Hettinger11c79532019-02-23 14:44:07 -08002795 def test_repr(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002796 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002797 self.assertEqual(repr(nd), 'NormalDist(mu=37.5, sigma=5.625)')
2798
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002799# Swapping the sys.modules['statistics'] is to solving the
2800# _pickle.PicklingError:
2801# Can't pickle <class 'statistics.NormalDist'>:
2802# it's not the same object as statistics.NormalDist
2803class TestNormalDistPython(unittest.TestCase, TestNormalDist):
2804 module = py_statistics
2805 def setUp(self):
2806 sys.modules['statistics'] = self.module
2807
2808 def tearDown(self):
2809 sys.modules['statistics'] = statistics
2810
2811
2812@unittest.skipUnless(c_statistics, 'requires _statistics')
2813class TestNormalDistC(unittest.TestCase, TestNormalDist):
2814 module = c_statistics
2815 def setUp(self):
2816 sys.modules['statistics'] = self.module
2817
2818 def tearDown(self):
2819 sys.modules['statistics'] = statistics
2820
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002821
2822# === Run tests ===
2823
2824def load_tests(loader, tests, ignore):
2825 """Used for doctest/unittest integration."""
2826 tests.addTests(doctest.DocTestSuite())
2827 return tests
2828
2829
2830if __name__ == "__main__":
2831 unittest.main()