blob: 4b8686b681822049443aa22877cea269b59faea9 [file] [log] [blame]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001"""Test suite for statistics module, including helper NumericTestCase and
2approx_equal function.
3
4"""
5
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07006import bisect
Larry Hastingsf5e987b2013-10-19 11:50:09 -07007import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03008import collections.abc
Raymond Hettinger11c79532019-02-23 14:44:07 -08009import copy
Larry Hastingsf5e987b2013-10-19 11:50:09 -070010import decimal
11import doctest
12import math
Raymond Hettinger11c79532019-02-23 14:44:07 -080013import pickle
Larry Hastingsf5e987b2013-10-19 11:50:09 -070014import random
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +020015import sys
Larry Hastingsf5e987b2013-10-19 11:50:09 -070016import unittest
Neil Schemenauer52a48e62019-07-30 11:08:18 -070017from test import support
Hai Shi79bb2c92020-08-06 19:51:29 +080018from test.support import import_helper
Larry Hastingsf5e987b2013-10-19 11:50:09 -070019
20from decimal import Decimal
21from fractions import Fraction
22
23
24# Module to be tested.
25import statistics
26
27
28# === Helper functions and class ===
29
Steven D'Apranoa474afd2016-08-09 12:49:01 +100030def sign(x):
31 """Return -1.0 for negatives, including -0.0, otherwise +1.0."""
32 return math.copysign(1, x)
33
Steven D'Apranob28c3272015-12-01 19:59:53 +110034def _nan_equal(a, b):
35 """Return True if a and b are both the same kind of NAN.
36
37 >>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
38 True
39 >>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
40 True
41 >>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
42 False
43 >>> _nan_equal(Decimal(42), Decimal('NAN'))
44 False
45
46 >>> _nan_equal(float('NAN'), float('NAN'))
47 True
48 >>> _nan_equal(float('NAN'), 0.5)
49 False
50
51 >>> _nan_equal(float('NAN'), Decimal('NAN'))
52 False
53
54 NAN payloads are not compared.
55 """
56 if type(a) is not type(b):
57 return False
58 if isinstance(a, float):
59 return math.isnan(a) and math.isnan(b)
60 aexp = a.as_tuple()[2]
61 bexp = b.as_tuple()[2]
62 return (aexp == bexp) and (aexp in ('n', 'N')) # Both NAN or both sNAN.
63
64
Larry Hastingsf5e987b2013-10-19 11:50:09 -070065def _calc_errors(actual, expected):
66 """Return the absolute and relative errors between two numbers.
67
68 >>> _calc_errors(100, 75)
69 (25, 0.25)
70 >>> _calc_errors(100, 100)
71 (0, 0.0)
72
73 Returns the (absolute error, relative error) between the two arguments.
74 """
75 base = max(abs(actual), abs(expected))
76 abs_err = abs(actual - expected)
77 rel_err = abs_err/base if base else float('inf')
78 return (abs_err, rel_err)
79
80
81def approx_equal(x, y, tol=1e-12, rel=1e-7):
82 """approx_equal(x, y [, tol [, rel]]) => True|False
83
84 Return True if numbers x and y are approximately equal, to within some
85 margin of error, otherwise return False. Numbers which compare equal
86 will also compare approximately equal.
87
88 x is approximately equal to y if the difference between them is less than
89 an absolute error tol or a relative error rel, whichever is bigger.
90
91 If given, both tol and rel must be finite, non-negative numbers. If not
92 given, default values are tol=1e-12 and rel=1e-7.
93
94 >>> approx_equal(1.2589, 1.2587, tol=0.0003, rel=0)
95 True
96 >>> approx_equal(1.2589, 1.2587, tol=0.0001, rel=0)
97 False
98
99 Absolute error is defined as abs(x-y); if that is less than or equal to
100 tol, x and y are considered approximately equal.
101
102 Relative error is defined as abs((x-y)/x) or abs((x-y)/y), whichever is
103 smaller, provided x or y are not zero. If that figure is less than or
104 equal to rel, x and y are considered approximately equal.
105
106 Complex numbers are not directly supported. If you wish to compare to
107 complex numbers, extract their real and imaginary parts and compare them
108 individually.
109
110 NANs always compare unequal, even with themselves. Infinities compare
111 approximately equal if they have the same sign (both positive or both
112 negative). Infinities with different signs compare unequal; so do
113 comparisons of infinities with finite numbers.
114 """
115 if tol < 0 or rel < 0:
116 raise ValueError('error tolerances must be non-negative')
117 # NANs are never equal to anything, approximately or otherwise.
118 if math.isnan(x) or math.isnan(y):
119 return False
120 # Numbers which compare equal also compare approximately equal.
121 if x == y:
122 # This includes the case of two infinities with the same sign.
123 return True
124 if math.isinf(x) or math.isinf(y):
125 # This includes the case of two infinities of opposite sign, or
126 # one infinity and one finite number.
127 return False
128 # Two finite numbers.
129 actual_error = abs(x - y)
130 allowed_error = max(tol, rel*max(abs(x), abs(y)))
131 return actual_error <= allowed_error
132
133
134# This class exists only as somewhere to stick a docstring containing
135# doctests. The following docstring and tests were originally in a separate
136# module. Now that it has been merged in here, I need somewhere to hang the.
137# docstring. Ultimately, this class will die, and the information below will
138# either become redundant, or be moved into more appropriate places.
139class _DoNothing:
140 """
141 When doing numeric work, especially with floats, exact equality is often
142 not what you want. Due to round-off error, it is often a bad idea to try
143 to compare floats with equality. Instead the usual procedure is to test
144 them with some (hopefully small!) allowance for error.
145
146 The ``approx_equal`` function allows you to specify either an absolute
147 error tolerance, or a relative error, or both.
148
149 Absolute error tolerances are simple, but you need to know the magnitude
150 of the quantities being compared:
151
152 >>> approx_equal(12.345, 12.346, tol=1e-3)
153 True
154 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3) # tol is too small.
155 False
156
157 Relative errors are more suitable when the values you are comparing can
158 vary in magnitude:
159
160 >>> approx_equal(12.345, 12.346, rel=1e-4)
161 True
162 >>> approx_equal(12.345e6, 12.346e6, rel=1e-4)
163 True
164
165 but a naive implementation of relative error testing can run into trouble
166 around zero.
167
168 If you supply both an absolute tolerance and a relative error, the
169 comparison succeeds if either individual test succeeds:
170
171 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3, rel=1e-4)
172 True
173
174 """
175 pass
176
177
178
179# We prefer this for testing numeric values that may not be exactly equal,
180# and avoid using TestCase.assertAlmostEqual, because it sucks :-)
181
Hai Shi79bb2c92020-08-06 19:51:29 +0800182py_statistics = import_helper.import_fresh_module('statistics',
183 blocked=['_statistics'])
184c_statistics = import_helper.import_fresh_module('statistics',
185 fresh=['_statistics'])
Dong-hee Na8ad22a42019-08-25 02:51:20 +0900186
187
188class TestModules(unittest.TestCase):
189 func_names = ['_normal_dist_inv_cdf']
190
191 def test_py_functions(self):
192 for fname in self.func_names:
193 self.assertEqual(getattr(py_statistics, fname).__module__, 'statistics')
194
195 @unittest.skipUnless(c_statistics, 'requires _statistics')
196 def test_c_functions(self):
197 for fname in self.func_names:
198 self.assertEqual(getattr(c_statistics, fname).__module__, '_statistics')
199
200
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700201class NumericTestCase(unittest.TestCase):
202 """Unit test class for numeric work.
203
204 This subclasses TestCase. In addition to the standard method
205 ``TestCase.assertAlmostEqual``, ``assertApproxEqual`` is provided.
206 """
207 # By default, we expect exact equality, unless overridden.
208 tol = rel = 0
209
210 def assertApproxEqual(
211 self, first, second, tol=None, rel=None, msg=None
212 ):
213 """Test passes if ``first`` and ``second`` are approximately equal.
214
215 This test passes if ``first`` and ``second`` are equal to
216 within ``tol``, an absolute error, or ``rel``, a relative error.
217
218 If either ``tol`` or ``rel`` are None or not given, they default to
219 test attributes of the same name (by default, 0).
220
221 The objects may be either numbers, or sequences of numbers. Sequences
222 are tested element-by-element.
223
224 >>> class MyTest(NumericTestCase):
225 ... def test_number(self):
226 ... x = 1.0/6
227 ... y = sum([x]*6)
228 ... self.assertApproxEqual(y, 1.0, tol=1e-15)
229 ... def test_sequence(self):
230 ... a = [1.001, 1.001e-10, 1.001e10]
231 ... b = [1.0, 1e-10, 1e10]
232 ... self.assertApproxEqual(a, b, rel=1e-3)
233 ...
234 >>> import unittest
235 >>> from io import StringIO # Suppress test runner output.
236 >>> suite = unittest.TestLoader().loadTestsFromTestCase(MyTest)
237 >>> unittest.TextTestRunner(stream=StringIO()).run(suite)
238 <unittest.runner.TextTestResult run=2 errors=0 failures=0>
239
240 """
241 if tol is None:
242 tol = self.tol
243 if rel is None:
244 rel = self.rel
245 if (
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300246 isinstance(first, collections.abc.Sequence) and
247 isinstance(second, collections.abc.Sequence)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700248 ):
249 check = self._check_approx_seq
250 else:
251 check = self._check_approx_num
252 check(first, second, tol, rel, msg)
253
254 def _check_approx_seq(self, first, second, tol, rel, msg):
255 if len(first) != len(second):
256 standardMsg = (
257 "sequences differ in length: %d items != %d items"
258 % (len(first), len(second))
259 )
260 msg = self._formatMessage(msg, standardMsg)
261 raise self.failureException(msg)
262 for i, (a,e) in enumerate(zip(first, second)):
263 self._check_approx_num(a, e, tol, rel, msg, i)
264
265 def _check_approx_num(self, first, second, tol, rel, msg, idx=None):
266 if approx_equal(first, second, tol, rel):
267 # Test passes. Return early, we are done.
268 return None
269 # Otherwise we failed.
270 standardMsg = self._make_std_err_msg(first, second, tol, rel, idx)
271 msg = self._formatMessage(msg, standardMsg)
272 raise self.failureException(msg)
273
274 @staticmethod
275 def _make_std_err_msg(first, second, tol, rel, idx):
276 # Create the standard error message for approx_equal failures.
277 assert first != second
278 template = (
279 ' %r != %r\n'
280 ' values differ by more than tol=%r and rel=%r\n'
281 ' -> absolute error = %r\n'
282 ' -> relative error = %r'
283 )
284 if idx is not None:
285 header = 'numeric sequences first differ at index %d.\n' % idx
286 template = header + template
287 # Calculate actual errors:
288 abs_err, rel_err = _calc_errors(first, second)
289 return template % (first, second, tol, rel, abs_err, rel_err)
290
291
292# ========================
293# === Test the helpers ===
294# ========================
295
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000296class TestSign(unittest.TestCase):
297 """Test that the helper function sign() works correctly."""
298 def testZeroes(self):
299 # Test that signed zeroes report their sign correctly.
300 self.assertEqual(sign(0.0), +1)
301 self.assertEqual(sign(-0.0), -1)
302
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700303
304# --- Tests for approx_equal ---
305
306class ApproxEqualSymmetryTest(unittest.TestCase):
307 # Test symmetry of approx_equal.
308
309 def test_relative_symmetry(self):
310 # Check that approx_equal treats relative error symmetrically.
311 # (a-b)/a is usually not equal to (a-b)/b. Ensure that this
312 # doesn't matter.
313 #
314 # Note: the reason for this test is that an early version
315 # of approx_equal was not symmetric. A relative error test
316 # would pass, or fail, depending on which value was passed
317 # as the first argument.
318 #
319 args1 = [2456, 37.8, -12.45, Decimal('2.54'), Fraction(17, 54)]
320 args2 = [2459, 37.2, -12.41, Decimal('2.59'), Fraction(15, 54)]
321 assert len(args1) == len(args2)
322 for a, b in zip(args1, args2):
323 self.do_relative_symmetry(a, b)
324
325 def do_relative_symmetry(self, a, b):
326 a, b = min(a, b), max(a, b)
327 assert a < b
328 delta = b - a # The absolute difference between the values.
329 rel_err1, rel_err2 = abs(delta/a), abs(delta/b)
330 # Choose an error margin halfway between the two.
331 rel = (rel_err1 + rel_err2)/2
332 # Now see that values a and b compare approx equal regardless of
333 # which is given first.
334 self.assertTrue(approx_equal(a, b, tol=0, rel=rel))
335 self.assertTrue(approx_equal(b, a, tol=0, rel=rel))
336
337 def test_symmetry(self):
338 # Test that approx_equal(a, b) == approx_equal(b, a)
339 args = [-23, -2, 5, 107, 93568]
340 delta = 2
Christian Heimesad393602013-11-26 01:32:15 +0100341 for a in args:
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700342 for type_ in (int, float, Decimal, Fraction):
Christian Heimesad393602013-11-26 01:32:15 +0100343 x = type_(a)*100
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700344 y = x + delta
345 r = abs(delta/max(x, y))
346 # There are five cases to check:
347 # 1) actual error <= tol, <= rel
348 self.do_symmetry_test(x, y, tol=delta, rel=r)
349 self.do_symmetry_test(x, y, tol=delta+1, rel=2*r)
350 # 2) actual error > tol, > rel
351 self.do_symmetry_test(x, y, tol=delta-1, rel=r/2)
352 # 3) actual error <= tol, > rel
353 self.do_symmetry_test(x, y, tol=delta, rel=r/2)
354 # 4) actual error > tol, <= rel
355 self.do_symmetry_test(x, y, tol=delta-1, rel=r)
356 self.do_symmetry_test(x, y, tol=delta-1, rel=2*r)
357 # 5) exact equality test
358 self.do_symmetry_test(x, x, tol=0, rel=0)
359 self.do_symmetry_test(x, y, tol=0, rel=0)
360
361 def do_symmetry_test(self, a, b, tol, rel):
362 template = "approx_equal comparisons don't match for %r"
363 flag1 = approx_equal(a, b, tol, rel)
364 flag2 = approx_equal(b, a, tol, rel)
365 self.assertEqual(flag1, flag2, template.format((a, b, tol, rel)))
366
367
368class ApproxEqualExactTest(unittest.TestCase):
369 # Test the approx_equal function with exactly equal values.
370 # Equal values should compare as approximately equal.
371 # Test cases for exactly equal values, which should compare approx
372 # equal regardless of the error tolerances given.
373
374 def do_exactly_equal_test(self, x, tol, rel):
375 result = approx_equal(x, x, tol=tol, rel=rel)
376 self.assertTrue(result, 'equality failure for x=%r' % x)
377 result = approx_equal(-x, -x, tol=tol, rel=rel)
378 self.assertTrue(result, 'equality failure for x=%r' % -x)
379
380 def test_exactly_equal_ints(self):
381 # Test that equal int values are exactly equal.
382 for n in [42, 19740, 14974, 230, 1795, 700245, 36587]:
383 self.do_exactly_equal_test(n, 0, 0)
384
385 def test_exactly_equal_floats(self):
386 # Test that equal float values are exactly equal.
387 for x in [0.42, 1.9740, 1497.4, 23.0, 179.5, 70.0245, 36.587]:
388 self.do_exactly_equal_test(x, 0, 0)
389
390 def test_exactly_equal_fractions(self):
391 # Test that equal Fraction values are exactly equal.
392 F = Fraction
393 for f in [F(1, 2), F(0), F(5, 3), F(9, 7), F(35, 36), F(3, 7)]:
394 self.do_exactly_equal_test(f, 0, 0)
395
396 def test_exactly_equal_decimals(self):
397 # Test that equal Decimal values are exactly equal.
398 D = Decimal
399 for d in map(D, "8.2 31.274 912.04 16.745 1.2047".split()):
400 self.do_exactly_equal_test(d, 0, 0)
401
402 def test_exactly_equal_absolute(self):
403 # Test that equal values are exactly equal with an absolute error.
404 for n in [16, 1013, 1372, 1198, 971, 4]:
405 # Test as ints.
406 self.do_exactly_equal_test(n, 0.01, 0)
407 # Test as floats.
408 self.do_exactly_equal_test(n/10, 0.01, 0)
409 # Test as Fractions.
410 f = Fraction(n, 1234)
411 self.do_exactly_equal_test(f, 0.01, 0)
412
413 def test_exactly_equal_absolute_decimals(self):
414 # Test equal Decimal values are exactly equal with an absolute error.
415 self.do_exactly_equal_test(Decimal("3.571"), Decimal("0.01"), 0)
416 self.do_exactly_equal_test(-Decimal("81.3971"), Decimal("0.01"), 0)
417
418 def test_exactly_equal_relative(self):
419 # Test that equal values are exactly equal with a relative error.
420 for x in [8347, 101.3, -7910.28, Fraction(5, 21)]:
421 self.do_exactly_equal_test(x, 0, 0.01)
422 self.do_exactly_equal_test(Decimal("11.68"), 0, Decimal("0.01"))
423
424 def test_exactly_equal_both(self):
425 # Test that equal values are equal when both tol and rel are given.
426 for x in [41017, 16.742, -813.02, Fraction(3, 8)]:
427 self.do_exactly_equal_test(x, 0.1, 0.01)
428 D = Decimal
429 self.do_exactly_equal_test(D("7.2"), D("0.1"), D("0.01"))
430
431
432class ApproxEqualUnequalTest(unittest.TestCase):
433 # Unequal values should compare unequal with zero error tolerances.
434 # Test cases for unequal values, with exact equality test.
435
436 def do_exactly_unequal_test(self, x):
437 for a in (x, -x):
438 result = approx_equal(a, a+1, tol=0, rel=0)
439 self.assertFalse(result, 'inequality failure for x=%r' % a)
440
441 def test_exactly_unequal_ints(self):
442 # Test unequal int values are unequal with zero error tolerance.
443 for n in [951, 572305, 478, 917, 17240]:
444 self.do_exactly_unequal_test(n)
445
446 def test_exactly_unequal_floats(self):
447 # Test unequal float values are unequal with zero error tolerance.
448 for x in [9.51, 5723.05, 47.8, 9.17, 17.24]:
449 self.do_exactly_unequal_test(x)
450
451 def test_exactly_unequal_fractions(self):
452 # Test that unequal Fractions are unequal with zero error tolerance.
453 F = Fraction
454 for f in [F(1, 5), F(7, 9), F(12, 11), F(101, 99023)]:
455 self.do_exactly_unequal_test(f)
456
457 def test_exactly_unequal_decimals(self):
458 # Test that unequal Decimals are unequal with zero error tolerance.
459 for d in map(Decimal, "3.1415 298.12 3.47 18.996 0.00245".split()):
460 self.do_exactly_unequal_test(d)
461
462
463class ApproxEqualInexactTest(unittest.TestCase):
464 # Inexact test cases for approx_error.
465 # Test cases when comparing two values that are not exactly equal.
466
467 # === Absolute error tests ===
468
469 def do_approx_equal_abs_test(self, x, delta):
470 template = "Test failure for x={!r}, y={!r}"
471 for y in (x + delta, x - delta):
472 msg = template.format(x, y)
473 self.assertTrue(approx_equal(x, y, tol=2*delta, rel=0), msg)
474 self.assertFalse(approx_equal(x, y, tol=delta/2, rel=0), msg)
475
476 def test_approx_equal_absolute_ints(self):
477 # Test approximate equality of ints with an absolute error.
478 for n in [-10737, -1975, -7, -2, 0, 1, 9, 37, 423, 9874, 23789110]:
479 self.do_approx_equal_abs_test(n, 10)
480 self.do_approx_equal_abs_test(n, 2)
481
482 def test_approx_equal_absolute_floats(self):
483 # Test approximate equality of floats with an absolute error.
484 for x in [-284.126, -97.1, -3.4, -2.15, 0.5, 1.0, 7.8, 4.23, 3817.4]:
485 self.do_approx_equal_abs_test(x, 1.5)
486 self.do_approx_equal_abs_test(x, 0.01)
487 self.do_approx_equal_abs_test(x, 0.0001)
488
489 def test_approx_equal_absolute_fractions(self):
490 # Test approximate equality of Fractions with an absolute error.
491 delta = Fraction(1, 29)
492 numerators = [-84, -15, -2, -1, 0, 1, 5, 17, 23, 34, 71]
493 for f in (Fraction(n, 29) for n in numerators):
494 self.do_approx_equal_abs_test(f, delta)
495 self.do_approx_equal_abs_test(f, float(delta))
496
497 def test_approx_equal_absolute_decimals(self):
498 # Test approximate equality of Decimals with an absolute error.
499 delta = Decimal("0.01")
500 for d in map(Decimal, "1.0 3.5 36.08 61.79 7912.3648".split()):
501 self.do_approx_equal_abs_test(d, delta)
502 self.do_approx_equal_abs_test(-d, delta)
503
504 def test_cross_zero(self):
505 # Test for the case of the two values having opposite signs.
506 self.assertTrue(approx_equal(1e-5, -1e-5, tol=1e-4, rel=0))
507
508 # === Relative error tests ===
509
510 def do_approx_equal_rel_test(self, x, delta):
511 template = "Test failure for x={!r}, y={!r}"
512 for y in (x*(1+delta), x*(1-delta)):
513 msg = template.format(x, y)
514 self.assertTrue(approx_equal(x, y, tol=0, rel=2*delta), msg)
515 self.assertFalse(approx_equal(x, y, tol=0, rel=delta/2), msg)
516
517 def test_approx_equal_relative_ints(self):
518 # Test approximate equality of ints with a relative error.
519 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.36))
520 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.37))
521 # ---
522 self.assertTrue(approx_equal(449, 512, tol=0, rel=0.125))
523 self.assertTrue(approx_equal(448, 512, tol=0, rel=0.125))
524 self.assertFalse(approx_equal(447, 512, tol=0, rel=0.125))
525
526 def test_approx_equal_relative_floats(self):
527 # Test approximate equality of floats with a relative error.
528 for x in [-178.34, -0.1, 0.1, 1.0, 36.97, 2847.136, 9145.074]:
529 self.do_approx_equal_rel_test(x, 0.02)
530 self.do_approx_equal_rel_test(x, 0.0001)
531
532 def test_approx_equal_relative_fractions(self):
533 # Test approximate equality of Fractions with a relative error.
534 F = Fraction
535 delta = Fraction(3, 8)
536 for f in [F(3, 84), F(17, 30), F(49, 50), F(92, 85)]:
537 for d in (delta, float(delta)):
538 self.do_approx_equal_rel_test(f, d)
539 self.do_approx_equal_rel_test(-f, d)
540
541 def test_approx_equal_relative_decimals(self):
542 # Test approximate equality of Decimals with a relative error.
543 for d in map(Decimal, "0.02 1.0 5.7 13.67 94.138 91027.9321".split()):
544 self.do_approx_equal_rel_test(d, Decimal("0.001"))
545 self.do_approx_equal_rel_test(-d, Decimal("0.05"))
546
547 # === Both absolute and relative error tests ===
548
549 # There are four cases to consider:
550 # 1) actual error <= both absolute and relative error
551 # 2) actual error <= absolute error but > relative error
552 # 3) actual error <= relative error but > absolute error
553 # 4) actual error > both absolute and relative error
554
555 def do_check_both(self, a, b, tol, rel, tol_flag, rel_flag):
556 check = self.assertTrue if tol_flag else self.assertFalse
557 check(approx_equal(a, b, tol=tol, rel=0))
558 check = self.assertTrue if rel_flag else self.assertFalse
559 check(approx_equal(a, b, tol=0, rel=rel))
560 check = self.assertTrue if (tol_flag or rel_flag) else self.assertFalse
561 check(approx_equal(a, b, tol=tol, rel=rel))
562
563 def test_approx_equal_both1(self):
564 # Test actual error <= both absolute and relative error.
565 self.do_check_both(7.955, 7.952, 0.004, 3.8e-4, True, True)
566 self.do_check_both(-7.387, -7.386, 0.002, 0.0002, True, True)
567
568 def test_approx_equal_both2(self):
569 # Test actual error <= absolute error but > relative error.
570 self.do_check_both(7.955, 7.952, 0.004, 3.7e-4, True, False)
571
572 def test_approx_equal_both3(self):
573 # Test actual error <= relative error but > absolute error.
574 self.do_check_both(7.955, 7.952, 0.001, 3.8e-4, False, True)
575
576 def test_approx_equal_both4(self):
577 # Test actual error > both absolute and relative error.
578 self.do_check_both(2.78, 2.75, 0.01, 0.001, False, False)
579 self.do_check_both(971.44, 971.47, 0.02, 3e-5, False, False)
580
581
582class ApproxEqualSpecialsTest(unittest.TestCase):
583 # Test approx_equal with NANs and INFs and zeroes.
584
585 def test_inf(self):
586 for type_ in (float, Decimal):
587 inf = type_('inf')
588 self.assertTrue(approx_equal(inf, inf))
589 self.assertTrue(approx_equal(inf, inf, 0, 0))
590 self.assertTrue(approx_equal(inf, inf, 1, 0.01))
591 self.assertTrue(approx_equal(-inf, -inf))
592 self.assertFalse(approx_equal(inf, -inf))
593 self.assertFalse(approx_equal(inf, 1000))
594
595 def test_nan(self):
596 for type_ in (float, Decimal):
597 nan = type_('nan')
598 for other in (nan, type_('inf'), 1000):
599 self.assertFalse(approx_equal(nan, other))
600
601 def test_float_zeroes(self):
602 nzero = math.copysign(0.0, -1)
603 self.assertTrue(approx_equal(nzero, 0.0, tol=0.1, rel=0.1))
604
605 def test_decimal_zeroes(self):
606 nzero = Decimal("-0.0")
607 self.assertTrue(approx_equal(nzero, Decimal(0), tol=0.1, rel=0.1))
608
609
610class TestApproxEqualErrors(unittest.TestCase):
611 # Test error conditions of approx_equal.
612
613 def test_bad_tol(self):
614 # Test negative tol raises.
615 self.assertRaises(ValueError, approx_equal, 100, 100, -1, 0.1)
616
617 def test_bad_rel(self):
618 # Test negative rel raises.
619 self.assertRaises(ValueError, approx_equal, 100, 100, 1, -0.1)
620
621
622# --- Tests for NumericTestCase ---
623
624# The formatting routine that generates the error messages is complex enough
625# that it too needs testing.
626
627class TestNumericTestCase(unittest.TestCase):
628 # The exact wording of NumericTestCase error messages is *not* guaranteed,
629 # but we need to give them some sort of test to ensure that they are
630 # generated correctly. As a compromise, we look for specific substrings
631 # that are expected to be found even if the overall error message changes.
632
633 def do_test(self, args):
634 actual_msg = NumericTestCase._make_std_err_msg(*args)
635 expected = self.generate_substrings(*args)
636 for substring in expected:
637 self.assertIn(substring, actual_msg)
638
639 def test_numerictestcase_is_testcase(self):
640 # Ensure that NumericTestCase actually is a TestCase.
641 self.assertTrue(issubclass(NumericTestCase, unittest.TestCase))
642
643 def test_error_msg_numeric(self):
644 # Test the error message generated for numeric comparisons.
645 args = (2.5, 4.0, 0.5, 0.25, None)
646 self.do_test(args)
647
648 def test_error_msg_sequence(self):
649 # Test the error message generated for sequence comparisons.
650 args = (3.75, 8.25, 1.25, 0.5, 7)
651 self.do_test(args)
652
653 def generate_substrings(self, first, second, tol, rel, idx):
654 """Return substrings we expect to see in error messages."""
655 abs_err, rel_err = _calc_errors(first, second)
656 substrings = [
657 'tol=%r' % tol,
658 'rel=%r' % rel,
659 'absolute error = %r' % abs_err,
660 'relative error = %r' % rel_err,
661 ]
662 if idx is not None:
663 substrings.append('differ at index %d' % idx)
664 return substrings
665
666
667# =======================================
668# === Tests for the statistics module ===
669# =======================================
670
671
672class GlobalsTest(unittest.TestCase):
673 module = statistics
674 expected_metadata = ["__doc__", "__all__"]
675
676 def test_meta(self):
677 # Test for the existence of metadata.
678 for meta in self.expected_metadata:
679 self.assertTrue(hasattr(self.module, meta),
680 "%s not present" % meta)
681
682 def test_check_all(self):
683 # Check everything in __all__ exists and is public.
684 module = self.module
685 for name in module.__all__:
686 # No private names in __all__:
687 self.assertFalse(name.startswith("_"),
688 'private name "%s" in __all__' % name)
689 # And anything in __all__ must exist:
690 self.assertTrue(hasattr(module, name),
691 'missing name "%s" in __all__' % name)
692
693
694class DocTests(unittest.TestCase):
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +0200695 @unittest.skipIf(sys.flags.optimize >= 2,
696 "Docstrings are omitted with -OO and above")
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700697 def test_doc_tests(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000698 failed, tried = doctest.testmod(statistics, optionflags=doctest.ELLIPSIS)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700699 self.assertGreater(tried, 0)
700 self.assertEqual(failed, 0)
701
702class StatisticsErrorTest(unittest.TestCase):
703 def test_has_exception(self):
704 errmsg = (
705 "Expected StatisticsError to be a ValueError, but got a"
706 " subclass of %r instead."
707 )
708 self.assertTrue(hasattr(statistics, 'StatisticsError'))
709 self.assertTrue(
710 issubclass(statistics.StatisticsError, ValueError),
711 errmsg % statistics.StatisticsError.__base__
712 )
713
714
715# === Tests for private utility functions ===
716
717class ExactRatioTest(unittest.TestCase):
718 # Test _exact_ratio utility.
719
720 def test_int(self):
721 for i in (-20, -3, 0, 5, 99, 10**20):
722 self.assertEqual(statistics._exact_ratio(i), (i, 1))
723
724 def test_fraction(self):
725 numerators = (-5, 1, 12, 38)
726 for n in numerators:
727 f = Fraction(n, 37)
728 self.assertEqual(statistics._exact_ratio(f), (n, 37))
729
730 def test_float(self):
731 self.assertEqual(statistics._exact_ratio(0.125), (1, 8))
732 self.assertEqual(statistics._exact_ratio(1.125), (9, 8))
733 data = [random.uniform(-100, 100) for _ in range(100)]
734 for x in data:
735 num, den = statistics._exact_ratio(x)
736 self.assertEqual(x, num/den)
737
738 def test_decimal(self):
739 D = Decimal
740 _exact_ratio = statistics._exact_ratio
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000741 self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
742 self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
743 self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700744
Steven D'Apranob28c3272015-12-01 19:59:53 +1100745 def test_inf(self):
746 INF = float("INF")
747 class MyFloat(float):
748 pass
749 class MyDecimal(Decimal):
750 pass
751 for inf in (INF, -INF):
752 for type_ in (float, MyFloat, Decimal, MyDecimal):
753 x = type_(inf)
754 ratio = statistics._exact_ratio(x)
755 self.assertEqual(ratio, (x, None))
756 self.assertEqual(type(ratio[0]), type_)
757 self.assertTrue(math.isinf(ratio[0]))
758
759 def test_float_nan(self):
760 NAN = float("NAN")
761 class MyFloat(float):
762 pass
763 for nan in (NAN, MyFloat(NAN)):
764 ratio = statistics._exact_ratio(nan)
765 self.assertTrue(math.isnan(ratio[0]))
766 self.assertIs(ratio[1], None)
767 self.assertEqual(type(ratio[0]), type(nan))
768
769 def test_decimal_nan(self):
770 NAN = Decimal("NAN")
771 sNAN = Decimal("sNAN")
772 class MyDecimal(Decimal):
773 pass
774 for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)):
775 ratio = statistics._exact_ratio(nan)
776 self.assertTrue(_nan_equal(ratio[0], nan))
777 self.assertIs(ratio[1], None)
778 self.assertEqual(type(ratio[0]), type(nan))
779
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700780
781class DecimalToRatioTest(unittest.TestCase):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000782 # Test _exact_ratio private function.
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700783
Steven D'Apranob28c3272015-12-01 19:59:53 +1100784 def test_infinity(self):
785 # Test that INFs are handled correctly.
786 inf = Decimal('INF')
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000787 self.assertEqual(statistics._exact_ratio(inf), (inf, None))
788 self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
Steven D'Apranob28c3272015-12-01 19:59:53 +1100789
790 def test_nan(self):
791 # Test that NANs are handled correctly.
792 for nan in (Decimal('NAN'), Decimal('sNAN')):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000793 num, den = statistics._exact_ratio(nan)
Steven D'Apranob28c3272015-12-01 19:59:53 +1100794 # Because NANs always compare non-equal, we cannot use assertEqual.
795 # Nor can we use an identity test, as we don't guarantee anything
796 # about the object identity.
797 self.assertTrue(_nan_equal(num, nan))
798 self.assertIs(den, None)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700799
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000800 def test_sign(self):
801 # Test sign is calculated correctly.
802 numbers = [Decimal("9.8765e12"), Decimal("9.8765e-12")]
803 for d in numbers:
804 # First test positive decimals.
805 assert d > 0
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000806 num, den = statistics._exact_ratio(d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000807 self.assertGreaterEqual(num, 0)
808 self.assertGreater(den, 0)
809 # Then test negative decimals.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000810 num, den = statistics._exact_ratio(-d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000811 self.assertLessEqual(num, 0)
812 self.assertGreater(den, 0)
813
814 def test_negative_exponent(self):
815 # Test result when the exponent is negative.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000816 t = statistics._exact_ratio(Decimal("0.1234"))
817 self.assertEqual(t, (617, 5000))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000818
819 def test_positive_exponent(self):
820 # Test results when the exponent is positive.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000821 t = statistics._exact_ratio(Decimal("1.234e7"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000822 self.assertEqual(t, (12340000, 1))
823
824 def test_regression_20536(self):
825 # Regression test for issue 20536.
826 # See http://bugs.python.org/issue20536
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000827 t = statistics._exact_ratio(Decimal("1e2"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000828 self.assertEqual(t, (100, 1))
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000829 t = statistics._exact_ratio(Decimal("1.47e5"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000830 self.assertEqual(t, (147000, 1))
831
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700832
Steven D'Apranob28c3272015-12-01 19:59:53 +1100833class IsFiniteTest(unittest.TestCase):
834 # Test _isfinite private function.
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000835
Steven D'Apranob28c3272015-12-01 19:59:53 +1100836 def test_finite(self):
837 # Test that finite numbers are recognised as finite.
838 for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")):
839 self.assertTrue(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000840
Steven D'Apranob28c3272015-12-01 19:59:53 +1100841 def test_infinity(self):
842 # Test that INFs are not recognised as finite.
843 for x in (float("inf"), Decimal("inf")):
844 self.assertFalse(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000845
Steven D'Apranob28c3272015-12-01 19:59:53 +1100846 def test_nan(self):
847 # Test that NANs are not recognised as finite.
848 for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")):
849 self.assertFalse(statistics._isfinite(x))
850
851
852class CoerceTest(unittest.TestCase):
853 # Test that private function _coerce correctly deals with types.
854
855 # The coercion rules are currently an implementation detail, although at
856 # some point that should change. The tests and comments here define the
857 # correct implementation.
858
859 # Pre-conditions of _coerce:
860 #
861 # - The first time _sum calls _coerce, the
862 # - coerce(T, S) will never be called with bool as the first argument;
863 # this is a pre-condition, guarded with an assertion.
864
865 #
866 # - coerce(T, T) will always return T; we assume T is a valid numeric
867 # type. Violate this assumption at your own risk.
868 #
869 # - Apart from as above, bool is treated as if it were actually int.
870 #
871 # - coerce(int, X) and coerce(X, int) return X.
872 # -
873 def test_bool(self):
874 # bool is somewhat special, due to the pre-condition that it is
875 # never given as the first argument to _coerce, and that it cannot
876 # be subclassed. So we test it specially.
877 for T in (int, float, Fraction, Decimal):
878 self.assertIs(statistics._coerce(T, bool), T)
879 class MyClass(T): pass
880 self.assertIs(statistics._coerce(MyClass, bool), MyClass)
881
882 def assertCoerceTo(self, A, B):
883 """Assert that type A coerces to B."""
884 self.assertIs(statistics._coerce(A, B), B)
885 self.assertIs(statistics._coerce(B, A), B)
886
887 def check_coerce_to(self, A, B):
888 """Checks that type A coerces to B, including subclasses."""
889 # Assert that type A is coerced to B.
890 self.assertCoerceTo(A, B)
891 # Subclasses of A are also coerced to B.
892 class SubclassOfA(A): pass
893 self.assertCoerceTo(SubclassOfA, B)
894 # A, and subclasses of A, are coerced to subclasses of B.
895 class SubclassOfB(B): pass
896 self.assertCoerceTo(A, SubclassOfB)
897 self.assertCoerceTo(SubclassOfA, SubclassOfB)
898
899 def assertCoerceRaises(self, A, B):
900 """Assert that coercing A to B, or vice versa, raises TypeError."""
901 self.assertRaises(TypeError, statistics._coerce, (A, B))
902 self.assertRaises(TypeError, statistics._coerce, (B, A))
903
904 def check_type_coercions(self, T):
905 """Check that type T coerces correctly with subclasses of itself."""
906 assert T is not bool
907 # Coercing a type with itself returns the same type.
908 self.assertIs(statistics._coerce(T, T), T)
909 # Coercing a type with a subclass of itself returns the subclass.
910 class U(T): pass
911 class V(T): pass
912 class W(U): pass
913 for typ in (U, V, W):
914 self.assertCoerceTo(T, typ)
915 self.assertCoerceTo(U, W)
916 # Coercing two subclasses that aren't parent/child is an error.
917 self.assertCoerceRaises(U, V)
918 self.assertCoerceRaises(V, W)
919
920 def test_int(self):
921 # Check that int coerces correctly.
922 self.check_type_coercions(int)
923 for typ in (float, Fraction, Decimal):
924 self.check_coerce_to(int, typ)
925
926 def test_fraction(self):
927 # Check that Fraction coerces correctly.
928 self.check_type_coercions(Fraction)
929 self.check_coerce_to(Fraction, float)
930
931 def test_decimal(self):
932 # Check that Decimal coerces correctly.
933 self.check_type_coercions(Decimal)
934
935 def test_float(self):
936 # Check that float coerces correctly.
937 self.check_type_coercions(float)
938
939 def test_non_numeric_types(self):
940 for bad_type in (str, list, type(None), tuple, dict):
941 for good_type in (int, float, Fraction, Decimal):
942 self.assertCoerceRaises(good_type, bad_type)
943
944 def test_incompatible_types(self):
945 # Test that incompatible types raise.
946 for T in (float, Fraction):
947 class MySubclass(T): pass
948 self.assertCoerceRaises(T, Decimal)
949 self.assertCoerceRaises(MySubclass, Decimal)
950
951
952class ConvertTest(unittest.TestCase):
953 # Test private _convert function.
954
955 def check_exact_equal(self, x, y):
956 """Check that x equals y, and has the same type as well."""
957 self.assertEqual(x, y)
958 self.assertIs(type(x), type(y))
959
960 def test_int(self):
961 # Test conversions to int.
962 x = statistics._convert(Fraction(71), int)
963 self.check_exact_equal(x, 71)
964 class MyInt(int): pass
965 x = statistics._convert(Fraction(17), MyInt)
966 self.check_exact_equal(x, MyInt(17))
967
968 def test_fraction(self):
969 # Test conversions to Fraction.
970 x = statistics._convert(Fraction(95, 99), Fraction)
971 self.check_exact_equal(x, Fraction(95, 99))
972 class MyFraction(Fraction):
973 def __truediv__(self, other):
974 return self.__class__(super().__truediv__(other))
975 x = statistics._convert(Fraction(71, 13), MyFraction)
976 self.check_exact_equal(x, MyFraction(71, 13))
977
978 def test_float(self):
979 # Test conversions to float.
980 x = statistics._convert(Fraction(-1, 2), float)
981 self.check_exact_equal(x, -0.5)
982 class MyFloat(float):
983 def __truediv__(self, other):
984 return self.__class__(super().__truediv__(other))
985 x = statistics._convert(Fraction(9, 8), MyFloat)
986 self.check_exact_equal(x, MyFloat(1.125))
987
988 def test_decimal(self):
989 # Test conversions to Decimal.
990 x = statistics._convert(Fraction(1, 40), Decimal)
991 self.check_exact_equal(x, Decimal("0.025"))
992 class MyDecimal(Decimal):
993 def __truediv__(self, other):
994 return self.__class__(super().__truediv__(other))
995 x = statistics._convert(Fraction(-15, 16), MyDecimal)
996 self.check_exact_equal(x, MyDecimal("-0.9375"))
997
998 def test_inf(self):
999 for INF in (float('inf'), Decimal('inf')):
1000 for inf in (INF, -INF):
1001 x = statistics._convert(inf, type(inf))
1002 self.check_exact_equal(x, inf)
1003
1004 def test_nan(self):
1005 for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')):
1006 x = statistics._convert(nan, type(nan))
1007 self.assertTrue(_nan_equal(x, nan))
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001008
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001009 def test_invalid_input_type(self):
1010 with self.assertRaises(TypeError):
1011 statistics._convert(None, float)
1012
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001013
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001014class FailNegTest(unittest.TestCase):
1015 """Test _fail_neg private function."""
1016
1017 def test_pass_through(self):
1018 # Test that values are passed through unchanged.
1019 values = [1, 2.0, Fraction(3), Decimal(4)]
1020 new = list(statistics._fail_neg(values))
1021 self.assertEqual(values, new)
1022
1023 def test_negatives_raise(self):
1024 # Test that negatives raise an exception.
1025 for x in [1, 2.0, Fraction(3), Decimal(4)]:
1026 seq = [-x]
1027 it = statistics._fail_neg(seq)
1028 self.assertRaises(statistics.StatisticsError, next, it)
1029
1030 def test_error_msg(self):
1031 # Test that a given error message is used.
1032 msg = "badness #%d" % random.randint(10000, 99999)
1033 try:
1034 next(statistics._fail_neg([-1], msg))
1035 except statistics.StatisticsError as e:
1036 errmsg = e.args[0]
1037 else:
1038 self.fail("expected exception, but it didn't happen")
1039 self.assertEqual(errmsg, msg)
1040
1041
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001042class FindLteqTest(unittest.TestCase):
1043 # Test _find_lteq private function.
1044
1045 def test_invalid_input_values(self):
1046 for a, x in [
1047 ([], 1),
1048 ([1, 2], 3),
1049 ([1, 3], 2)
1050 ]:
1051 with self.subTest(a=a, x=x):
1052 with self.assertRaises(ValueError):
1053 statistics._find_lteq(a, x)
1054
1055 def test_locate_successfully(self):
1056 for a, x, expected_i in [
1057 ([1, 1, 1, 2, 3], 1, 0),
1058 ([0, 1, 1, 1, 2, 3], 1, 1),
1059 ([1, 2, 3, 3, 3], 3, 2)
1060 ]:
1061 with self.subTest(a=a, x=x):
1062 self.assertEqual(expected_i, statistics._find_lteq(a, x))
1063
1064
1065class FindRteqTest(unittest.TestCase):
1066 # Test _find_rteq private function.
1067
1068 def test_invalid_input_values(self):
1069 for a, l, x in [
1070 ([1], 2, 1),
1071 ([1, 3], 0, 2)
1072 ]:
1073 with self.assertRaises(ValueError):
1074 statistics._find_rteq(a, l, x)
1075
1076 def test_locate_successfully(self):
1077 for a, l, x, expected_i in [
1078 ([1, 1, 1, 2, 3], 0, 1, 2),
1079 ([0, 1, 1, 1, 2, 3], 0, 1, 3),
1080 ([1, 2, 3, 3, 3], 0, 3, 4)
1081 ]:
1082 with self.subTest(a=a, l=l, x=x):
1083 self.assertEqual(expected_i, statistics._find_rteq(a, l, x))
1084
1085
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001086# === Tests for public functions ===
1087
1088class UnivariateCommonMixin:
1089 # Common tests for most univariate functions that take a data argument.
1090
1091 def test_no_args(self):
1092 # Fail if given no arguments.
1093 self.assertRaises(TypeError, self.func)
1094
1095 def test_empty_data(self):
1096 # Fail when the data argument (first argument) is empty.
1097 for empty in ([], (), iter([])):
1098 self.assertRaises(statistics.StatisticsError, self.func, empty)
1099
1100 def prepare_data(self):
1101 """Return int data for various tests."""
1102 data = list(range(10))
1103 while data == sorted(data):
1104 random.shuffle(data)
1105 return data
1106
1107 def test_no_inplace_modifications(self):
1108 # Test that the function does not modify its input data.
1109 data = self.prepare_data()
1110 assert len(data) != 1 # Necessary to avoid infinite loop.
1111 assert data != sorted(data)
1112 saved = data[:]
1113 assert data is not saved
1114 _ = self.func(data)
1115 self.assertListEqual(data, saved, "data has been modified")
1116
1117 def test_order_doesnt_matter(self):
1118 # Test that the order of data points doesn't change the result.
1119
1120 # CAUTION: due to floating point rounding errors, the result actually
1121 # may depend on the order. Consider this test representing an ideal.
1122 # To avoid this test failing, only test with exact values such as ints
1123 # or Fractions.
1124 data = [1, 2, 3, 3, 3, 4, 5, 6]*100
1125 expected = self.func(data)
1126 random.shuffle(data)
1127 actual = self.func(data)
1128 self.assertEqual(expected, actual)
1129
1130 def test_type_of_data_collection(self):
1131 # Test that the type of iterable data doesn't effect the result.
1132 class MyList(list):
1133 pass
1134 class MyTuple(tuple):
1135 pass
1136 def generator(data):
1137 return (obj for obj in data)
1138 data = self.prepare_data()
1139 expected = self.func(data)
1140 for kind in (list, tuple, iter, MyList, MyTuple, generator):
1141 result = self.func(kind(data))
1142 self.assertEqual(result, expected)
1143
1144 def test_range_data(self):
1145 # Test that functions work with range objects.
1146 data = range(20, 50, 3)
1147 expected = self.func(list(data))
1148 self.assertEqual(self.func(data), expected)
1149
1150 def test_bad_arg_types(self):
1151 # Test that function raises when given data of the wrong type.
1152
1153 # Don't roll the following into a loop like this:
1154 # for bad in list_of_bad:
1155 # self.check_for_type_error(bad)
1156 #
1157 # Since assertRaises doesn't show the arguments that caused the test
1158 # failure, it is very difficult to debug these test failures when the
1159 # following are in a loop.
1160 self.check_for_type_error(None)
1161 self.check_for_type_error(23)
1162 self.check_for_type_error(42.0)
1163 self.check_for_type_error(object())
1164
1165 def check_for_type_error(self, *args):
1166 self.assertRaises(TypeError, self.func, *args)
1167
1168 def test_type_of_data_element(self):
1169 # Check the type of data elements doesn't affect the numeric result.
1170 # This is a weaker test than UnivariateTypeMixin.testTypesConserved,
1171 # because it checks the numeric result by equality, but not by type.
1172 class MyFloat(float):
1173 def __truediv__(self, other):
1174 return type(self)(super().__truediv__(other))
1175 def __add__(self, other):
1176 return type(self)(super().__add__(other))
1177 __radd__ = __add__
1178
1179 raw = self.prepare_data()
1180 expected = self.func(raw)
1181 for kind in (float, MyFloat, Decimal, Fraction):
1182 data = [kind(x) for x in raw]
1183 result = type(expected)(self.func(data))
1184 self.assertEqual(result, expected)
1185
1186
1187class UnivariateTypeMixin:
1188 """Mixin class for type-conserving functions.
1189
1190 This mixin class holds test(s) for functions which conserve the type of
1191 individual data points. E.g. the mean of a list of Fractions should itself
1192 be a Fraction.
1193
1194 Not all tests to do with types need go in this class. Only those that
1195 rely on the function returning the same type as its input data.
1196 """
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001197 def prepare_types_for_conservation_test(self):
1198 """Return the types which are expected to be conserved."""
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001199 class MyFloat(float):
1200 def __truediv__(self, other):
1201 return type(self)(super().__truediv__(other))
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001202 def __rtruediv__(self, other):
1203 return type(self)(super().__rtruediv__(other))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001204 def __sub__(self, other):
1205 return type(self)(super().__sub__(other))
1206 def __rsub__(self, other):
1207 return type(self)(super().__rsub__(other))
1208 def __pow__(self, other):
1209 return type(self)(super().__pow__(other))
1210 def __add__(self, other):
1211 return type(self)(super().__add__(other))
1212 __radd__ = __add__
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001213 return (float, Decimal, Fraction, MyFloat)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001214
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001215 def test_types_conserved(self):
1216 # Test that functions keeps the same type as their data points.
1217 # (Excludes mixed data types.) This only tests the type of the return
1218 # result, not the value.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001219 data = self.prepare_data()
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001220 for kind in self.prepare_types_for_conservation_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001221 d = [kind(x) for x in data]
1222 result = self.func(d)
1223 self.assertIs(type(result), kind)
1224
1225
Steven D'Apranob28c3272015-12-01 19:59:53 +11001226class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin):
1227 # Common test cases for statistics._sum() function.
1228
1229 # This test suite looks only at the numeric value returned by _sum,
1230 # after conversion to the appropriate type.
1231 def setUp(self):
1232 def simplified_sum(*args):
1233 T, value, n = statistics._sum(*args)
1234 return statistics._coerce(value, T)
1235 self.func = simplified_sum
1236
1237
1238class TestSum(NumericTestCase):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001239 # Test cases for statistics._sum() function.
1240
Steven D'Apranob28c3272015-12-01 19:59:53 +11001241 # These tests look at the entire three value tuple returned by _sum.
1242
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001243 def setUp(self):
1244 self.func = statistics._sum
1245
1246 def test_empty_data(self):
1247 # Override test for empty data.
1248 for data in ([], (), iter([])):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001249 self.assertEqual(self.func(data), (int, Fraction(0), 0))
1250 self.assertEqual(self.func(data, 23), (int, Fraction(23), 0))
1251 self.assertEqual(self.func(data, 2.3), (float, Fraction(2.3), 0))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001252
1253 def test_ints(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001254 self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]),
1255 (int, Fraction(60), 8))
1256 self.assertEqual(self.func([4, 2, 3, -8, 7], 1000),
1257 (int, Fraction(1008), 5))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001258
1259 def test_floats(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001260 self.assertEqual(self.func([0.25]*20),
1261 (float, Fraction(5.0), 20))
1262 self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5),
1263 (float, Fraction(3.125), 4))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001264
1265 def test_fractions(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001266 self.assertEqual(self.func([Fraction(1, 1000)]*500),
1267 (Fraction, Fraction(1, 2), 500))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001268
1269 def test_decimals(self):
1270 D = Decimal
1271 data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"),
1272 D("3.974"), D("2.328"), D("4.617"), D("2.843"),
1273 ]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001274 self.assertEqual(self.func(data),
1275 (Decimal, Decimal("20.686"), 8))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001276
1277 def test_compare_with_math_fsum(self):
1278 # Compare with the math.fsum function.
1279 # Ideally we ought to get the exact same result, but sometimes
1280 # we differ by a very slight amount :-(
1281 data = [random.uniform(-100, 1000) for _ in range(1000)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001282 self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001283
1284 def test_start_argument(self):
1285 # Test that the optional start argument works correctly.
1286 data = [random.uniform(1, 1000) for _ in range(100)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001287 t = self.func(data)[1]
1288 self.assertEqual(t+42, self.func(data, 42)[1])
1289 self.assertEqual(t-23, self.func(data, -23)[1])
1290 self.assertEqual(t+Fraction(1e20), self.func(data, 1e20)[1])
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001291
1292 def test_strings_fail(self):
1293 # Sum of strings should fail.
1294 self.assertRaises(TypeError, self.func, [1, 2, 3], '999')
1295 self.assertRaises(TypeError, self.func, [1, 2, 3, '999'])
1296
1297 def test_bytes_fail(self):
1298 # Sum of bytes should fail.
1299 self.assertRaises(TypeError, self.func, [1, 2, 3], b'999')
1300 self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
1301
1302 def test_mixed_sum(self):
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001303 # Mixed input types are not (currently) allowed.
1304 # Check that mixed data types fail.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001305 self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)])
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001306 # And so does mixed start argument.
1307 self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001308
1309
1310class SumTortureTest(NumericTestCase):
1311 def test_torture(self):
1312 # Tim Peters' torture test for sum, and variants of same.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001313 self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000),
1314 (float, Fraction(20000.0), 40000))
1315 self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000),
1316 (float, Fraction(20000.0), 40000))
1317 T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000)
1318 self.assertIs(T, float)
1319 self.assertEqual(count, 40000)
1320 self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001321
1322
1323class SumSpecialValues(NumericTestCase):
1324 # Test that sum works correctly with IEEE-754 special values.
1325
1326 def test_nan(self):
1327 for type_ in (float, Decimal):
1328 nan = type_('nan')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001329 result = statistics._sum([1, nan, 2])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001330 self.assertIs(type(result), type_)
1331 self.assertTrue(math.isnan(result))
1332
1333 def check_infinity(self, x, inf):
1334 """Check x is an infinity of the same type and sign as inf."""
1335 self.assertTrue(math.isinf(x))
1336 self.assertIs(type(x), type(inf))
1337 self.assertEqual(x > 0, inf > 0)
1338 assert x == inf
1339
1340 def do_test_inf(self, inf):
1341 # Adding a single infinity gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001342 result = statistics._sum([1, 2, inf, 3])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001343 self.check_infinity(result, inf)
1344 # Adding two infinities of the same sign also gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001345 result = statistics._sum([1, 2, inf, 3, inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001346 self.check_infinity(result, inf)
1347
1348 def test_float_inf(self):
1349 inf = float('inf')
1350 for sign in (+1, -1):
1351 self.do_test_inf(sign*inf)
1352
1353 def test_decimal_inf(self):
1354 inf = Decimal('inf')
1355 for sign in (+1, -1):
1356 self.do_test_inf(sign*inf)
1357
1358 def test_float_mismatched_infs(self):
1359 # Test that adding two infinities of opposite sign gives a NAN.
1360 inf = float('inf')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001361 result = statistics._sum([1, 2, inf, 3, -inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001362 self.assertTrue(math.isnan(result))
1363
Berker Peksagf8c111d2014-09-24 15:03:25 +03001364 def test_decimal_extendedcontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001365 # Test adding Decimal INFs with opposite sign returns NAN.
1366 inf = Decimal('inf')
1367 data = [1, 2, inf, 3, -inf, 4]
1368 with decimal.localcontext(decimal.ExtendedContext):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001369 self.assertTrue(math.isnan(statistics._sum(data)[1]))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001370
Berker Peksagf8c111d2014-09-24 15:03:25 +03001371 def test_decimal_basiccontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001372 # Test adding Decimal INFs with opposite sign raises InvalidOperation.
1373 inf = Decimal('inf')
1374 data = [1, 2, inf, 3, -inf, 4]
1375 with decimal.localcontext(decimal.BasicContext):
1376 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1377
1378 def test_decimal_snan_raises(self):
1379 # Adding sNAN should raise InvalidOperation.
1380 sNAN = Decimal('sNAN')
1381 data = [1, sNAN, 2]
1382 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1383
1384
1385# === Tests for averages ===
1386
1387class AverageMixin(UnivariateCommonMixin):
1388 # Mixin class holding common tests for averages.
1389
1390 def test_single_value(self):
1391 # Average of a single value is the value itself.
1392 for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
1393 self.assertEqual(self.func([x]), x)
1394
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001395 def prepare_values_for_repeated_single_test(self):
1396 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.9712'))
1397
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001398 def test_repeated_single_value(self):
1399 # The average of a single repeated value is the value itself.
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001400 for x in self.prepare_values_for_repeated_single_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001401 for count in (2, 5, 10, 20):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001402 with self.subTest(x=x, count=count):
1403 data = [x]*count
1404 self.assertEqual(self.func(data), x)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001405
1406
1407class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1408 def setUp(self):
1409 self.func = statistics.mean
1410
1411 def test_torture_pep(self):
1412 # "Torture Test" from PEP-450.
1413 self.assertEqual(self.func([1e100, 1, 3, -1e100]), 1)
1414
1415 def test_ints(self):
1416 # Test mean with ints.
1417 data = [0, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9]
1418 random.shuffle(data)
1419 self.assertEqual(self.func(data), 4.8125)
1420
1421 def test_floats(self):
1422 # Test mean with floats.
1423 data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5]
1424 random.shuffle(data)
1425 self.assertEqual(self.func(data), 22.015625)
1426
1427 def test_decimals(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001428 # Test mean with Decimals.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001429 D = Decimal
1430 data = [D("1.634"), D("2.517"), D("3.912"), D("4.072"), D("5.813")]
1431 random.shuffle(data)
1432 self.assertEqual(self.func(data), D("3.5896"))
1433
1434 def test_fractions(self):
1435 # Test mean with Fractions.
1436 F = Fraction
1437 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1438 random.shuffle(data)
1439 self.assertEqual(self.func(data), F(1479, 1960))
1440
1441 def test_inf(self):
1442 # Test mean with infinities.
1443 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1444 for kind in (float, Decimal):
1445 for sign in (1, -1):
1446 inf = kind("inf")*sign
1447 data = raw + [inf]
1448 result = self.func(data)
1449 self.assertTrue(math.isinf(result))
1450 self.assertEqual(result, inf)
1451
1452 def test_mismatched_infs(self):
1453 # Test mean with infinities of opposite sign.
1454 data = [2, 4, 6, float('inf'), 1, 3, 5, float('-inf')]
1455 result = self.func(data)
1456 self.assertTrue(math.isnan(result))
1457
1458 def test_nan(self):
1459 # Test mean with NANs.
1460 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1461 for kind in (float, Decimal):
1462 inf = kind("nan")
1463 data = raw + [inf]
1464 result = self.func(data)
1465 self.assertTrue(math.isnan(result))
1466
1467 def test_big_data(self):
1468 # Test adding a large constant to every data point.
1469 c = 1e9
1470 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1471 expected = self.func(data) + c
1472 assert expected != c
1473 result = self.func([x+c for x in data])
1474 self.assertEqual(result, expected)
1475
1476 def test_doubled_data(self):
1477 # Mean of [a,b,c...z] should be same as for [a,a,b,b,c,c...z,z].
1478 data = [random.uniform(-3, 5) for _ in range(1000)]
1479 expected = self.func(data)
1480 actual = self.func(data*2)
1481 self.assertApproxEqual(actual, expected)
1482
Nick Coghlan4a7668a2014-02-08 23:55:14 +10001483 def test_regression_20561(self):
1484 # Regression test for issue 20561.
1485 # See http://bugs.python.org/issue20561
1486 d = Decimal('1e4')
1487 self.assertEqual(statistics.mean([d]), d)
1488
Steven D'Apranob28c3272015-12-01 19:59:53 +11001489 def test_regression_25177(self):
1490 # Regression test for issue 25177.
1491 # Ensure very big and very small floats don't overflow.
1492 # See http://bugs.python.org/issue25177.
1493 self.assertEqual(statistics.mean(
1494 [8.988465674311579e+307, 8.98846567431158e+307]),
1495 8.98846567431158e+307)
1496 big = 8.98846567431158e+307
1497 tiny = 5e-324
1498 for n in (2, 3, 5, 200):
1499 self.assertEqual(statistics.mean([big]*n), big)
1500 self.assertEqual(statistics.mean([tiny]*n), tiny)
1501
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001502
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001503class TestHarmonicMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1504 def setUp(self):
1505 self.func = statistics.harmonic_mean
1506
1507 def prepare_data(self):
1508 # Override mixin method.
1509 values = super().prepare_data()
1510 values.remove(0)
1511 return values
1512
1513 def prepare_values_for_repeated_single_test(self):
1514 # Override mixin method.
1515 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.125'))
1516
1517 def test_zero(self):
1518 # Test that harmonic mean returns zero when given zero.
1519 values = [1, 0, 2]
1520 self.assertEqual(self.func(values), 0)
1521
1522 def test_negative_error(self):
1523 # Test that harmonic mean raises when given a negative value.
1524 exc = statistics.StatisticsError
1525 for values in ([-1], [1, -2, 3]):
1526 with self.subTest(values=values):
1527 self.assertRaises(exc, self.func, values)
1528
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001529 def test_invalid_type_error(self):
1530 # Test error is raised when input contains invalid type(s)
1531 for data in [
1532 ['3.14'], # single string
1533 ['1', '2', '3'], # multiple strings
1534 [1, '2', 3, '4', 5], # mixed strings and valid integers
1535 [2.3, 3.4, 4.5, '5.6'] # only one string and valid floats
1536 ]:
1537 with self.subTest(data=data):
1538 with self.assertRaises(TypeError):
1539 self.func(data)
1540
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001541 def test_ints(self):
1542 # Test harmonic mean with ints.
1543 data = [2, 4, 4, 8, 16, 16]
1544 random.shuffle(data)
1545 self.assertEqual(self.func(data), 6*4/5)
1546
1547 def test_floats_exact(self):
1548 # Test harmonic mean with some carefully chosen floats.
1549 data = [1/8, 1/4, 1/4, 1/2, 1/2]
1550 random.shuffle(data)
1551 self.assertEqual(self.func(data), 1/4)
1552 self.assertEqual(self.func([0.25, 0.5, 1.0, 1.0]), 0.5)
1553
1554 def test_singleton_lists(self):
1555 # Test that harmonic mean([x]) returns (approximately) x.
1556 for x in range(1, 101):
Steven D'Apranoe7fef522016-08-09 13:19:48 +10001557 self.assertEqual(self.func([x]), x)
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001558
1559 def test_decimals_exact(self):
1560 # Test harmonic mean with some carefully chosen Decimals.
1561 D = Decimal
1562 self.assertEqual(self.func([D(15), D(30), D(60), D(60)]), D(30))
1563 data = [D("0.05"), D("0.10"), D("0.20"), D("0.20")]
1564 random.shuffle(data)
1565 self.assertEqual(self.func(data), D("0.10"))
1566 data = [D("1.68"), D("0.32"), D("5.94"), D("2.75")]
1567 random.shuffle(data)
1568 self.assertEqual(self.func(data), D(66528)/70723)
1569
1570 def test_fractions(self):
1571 # Test harmonic mean with Fractions.
1572 F = Fraction
1573 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1574 random.shuffle(data)
1575 self.assertEqual(self.func(data), F(7*420, 4029))
1576
1577 def test_inf(self):
1578 # Test harmonic mean with infinity.
1579 values = [2.0, float('inf'), 1.0]
1580 self.assertEqual(self.func(values), 2.0)
1581
1582 def test_nan(self):
1583 # Test harmonic mean with NANs.
1584 values = [2.0, float('nan'), 1.0]
1585 self.assertTrue(math.isnan(self.func(values)))
1586
1587 def test_multiply_data_points(self):
1588 # Test multiplying every data point by a constant.
1589 c = 111
1590 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1591 expected = self.func(data)*c
1592 result = self.func([x*c for x in data])
1593 self.assertEqual(result, expected)
1594
1595 def test_doubled_data(self):
1596 # Harmonic mean of [a,b...z] should be same as for [a,a,b,b...z,z].
1597 data = [random.uniform(1, 5) for _ in range(1000)]
1598 expected = self.func(data)
1599 actual = self.func(data*2)
1600 self.assertApproxEqual(actual, expected)
1601
Raymond Hettingercc3467a2020-12-23 19:52:09 -08001602 def test_with_weights(self):
1603 self.assertEqual(self.func([40, 60], [5, 30]), 56.0) # common case
1604 self.assertEqual(self.func([40, 60],
1605 weights=[5, 30]), 56.0) # keyword argument
1606 self.assertEqual(self.func(iter([40, 60]),
1607 iter([5, 30])), 56.0) # iterator inputs
1608 self.assertEqual(
1609 self.func([Fraction(10, 3), Fraction(23, 5), Fraction(7, 2)], [5, 2, 10]),
1610 self.func([Fraction(10, 3)] * 5 +
1611 [Fraction(23, 5)] * 2 +
1612 [Fraction(7, 2)] * 10))
1613 self.assertEqual(self.func([10], [7]), 10) # n=1 fast path
1614 with self.assertRaises(TypeError):
1615 self.func([1, 2, 3], [1, (), 3]) # non-numeric weight
1616 with self.assertRaises(statistics.StatisticsError):
1617 self.func([1, 2, 3], [1, 2]) # wrong number of weights
1618 with self.assertRaises(statistics.StatisticsError):
1619 self.func([10], [0]) # no non-zero weights
1620 with self.assertRaises(statistics.StatisticsError):
1621 self.func([10, 20], [0, 0]) # no non-zero weights
1622
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001623
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001624class TestMedian(NumericTestCase, AverageMixin):
1625 # Common tests for median and all median.* functions.
1626 def setUp(self):
1627 self.func = statistics.median
1628
1629 def prepare_data(self):
1630 """Overload method from UnivariateCommonMixin."""
1631 data = super().prepare_data()
1632 if len(data)%2 != 1:
1633 data.append(2)
1634 return data
1635
1636 def test_even_ints(self):
1637 # Test median with an even number of int data points.
1638 data = [1, 2, 3, 4, 5, 6]
1639 assert len(data)%2 == 0
1640 self.assertEqual(self.func(data), 3.5)
1641
1642 def test_odd_ints(self):
1643 # Test median with an odd number of int data points.
1644 data = [1, 2, 3, 4, 5, 6, 9]
1645 assert len(data)%2 == 1
1646 self.assertEqual(self.func(data), 4)
1647
1648 def test_odd_fractions(self):
1649 # Test median works with an odd number of Fractions.
1650 F = Fraction
1651 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7)]
1652 assert len(data)%2 == 1
1653 random.shuffle(data)
1654 self.assertEqual(self.func(data), F(3, 7))
1655
1656 def test_even_fractions(self):
1657 # Test median works with an even number of Fractions.
1658 F = Fraction
1659 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1660 assert len(data)%2 == 0
1661 random.shuffle(data)
1662 self.assertEqual(self.func(data), F(1, 2))
1663
1664 def test_odd_decimals(self):
1665 # Test median works with an odd number of Decimals.
1666 D = Decimal
1667 data = [D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1668 assert len(data)%2 == 1
1669 random.shuffle(data)
1670 self.assertEqual(self.func(data), D('4.2'))
1671
1672 def test_even_decimals(self):
1673 # Test median works with an even number of Decimals.
1674 D = Decimal
1675 data = [D('1.2'), D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1676 assert len(data)%2 == 0
1677 random.shuffle(data)
1678 self.assertEqual(self.func(data), D('3.65'))
1679
1680
1681class TestMedianDataType(NumericTestCase, UnivariateTypeMixin):
1682 # Test conservation of data element type for median.
1683 def setUp(self):
1684 self.func = statistics.median
1685
1686 def prepare_data(self):
1687 data = list(range(15))
1688 assert len(data)%2 == 1
1689 while data == sorted(data):
1690 random.shuffle(data)
1691 return data
1692
1693
1694class TestMedianLow(TestMedian, UnivariateTypeMixin):
1695 def setUp(self):
1696 self.func = statistics.median_low
1697
1698 def test_even_ints(self):
1699 # Test median_low with an even number of ints.
1700 data = [1, 2, 3, 4, 5, 6]
1701 assert len(data)%2 == 0
1702 self.assertEqual(self.func(data), 3)
1703
1704 def test_even_fractions(self):
1705 # Test median_low works with an even number of Fractions.
1706 F = Fraction
1707 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1708 assert len(data)%2 == 0
1709 random.shuffle(data)
1710 self.assertEqual(self.func(data), F(3, 7))
1711
1712 def test_even_decimals(self):
1713 # Test median_low works with an even number of Decimals.
1714 D = Decimal
1715 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1716 assert len(data)%2 == 0
1717 random.shuffle(data)
1718 self.assertEqual(self.func(data), D('3.3'))
1719
1720
1721class TestMedianHigh(TestMedian, UnivariateTypeMixin):
1722 def setUp(self):
1723 self.func = statistics.median_high
1724
1725 def test_even_ints(self):
1726 # Test median_high with an even number of ints.
1727 data = [1, 2, 3, 4, 5, 6]
1728 assert len(data)%2 == 0
1729 self.assertEqual(self.func(data), 4)
1730
1731 def test_even_fractions(self):
1732 # Test median_high works with an even number of Fractions.
1733 F = Fraction
1734 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1735 assert len(data)%2 == 0
1736 random.shuffle(data)
1737 self.assertEqual(self.func(data), F(4, 7))
1738
1739 def test_even_decimals(self):
1740 # Test median_high works with an even number of Decimals.
1741 D = Decimal
1742 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1743 assert len(data)%2 == 0
1744 random.shuffle(data)
1745 self.assertEqual(self.func(data), D('4.4'))
1746
1747
1748class TestMedianGrouped(TestMedian):
1749 # Test median_grouped.
1750 # Doesn't conserve data element types, so don't use TestMedianType.
1751 def setUp(self):
1752 self.func = statistics.median_grouped
1753
1754 def test_odd_number_repeated(self):
1755 # Test median.grouped with repeated median values.
1756 data = [12, 13, 14, 14, 14, 15, 15]
1757 assert len(data)%2 == 1
1758 self.assertEqual(self.func(data), 14)
1759 #---
1760 data = [12, 13, 14, 14, 14, 14, 15]
1761 assert len(data)%2 == 1
1762 self.assertEqual(self.func(data), 13.875)
1763 #---
1764 data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30]
1765 assert len(data)%2 == 1
1766 self.assertEqual(self.func(data, 5), 19.375)
1767 #---
1768 data = [16, 18, 18, 18, 18, 20, 20, 20, 22, 22, 22, 24, 24, 26, 28]
1769 assert len(data)%2 == 1
1770 self.assertApproxEqual(self.func(data, 2), 20.66666667, tol=1e-8)
1771
1772 def test_even_number_repeated(self):
1773 # Test median.grouped with repeated median values.
1774 data = [5, 10, 10, 15, 20, 20, 20, 25, 25, 30]
1775 assert len(data)%2 == 0
1776 self.assertApproxEqual(self.func(data, 5), 19.16666667, tol=1e-8)
1777 #---
1778 data = [2, 3, 4, 4, 4, 5]
1779 assert len(data)%2 == 0
1780 self.assertApproxEqual(self.func(data), 3.83333333, tol=1e-8)
1781 #---
1782 data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1783 assert len(data)%2 == 0
1784 self.assertEqual(self.func(data), 4.5)
1785 #---
1786 data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1787 assert len(data)%2 == 0
1788 self.assertEqual(self.func(data), 4.75)
1789
1790 def test_repeated_single_value(self):
1791 # Override method from AverageMixin.
1792 # Yet again, failure of median_grouped to conserve the data type
1793 # causes me headaches :-(
1794 for x in (5.3, 68, 4.3e17, Fraction(29, 101), Decimal('32.9714')):
1795 for count in (2, 5, 10, 20):
1796 data = [x]*count
1797 self.assertEqual(self.func(data), float(x))
1798
1799 def test_odd_fractions(self):
1800 # Test median_grouped works with an odd number of Fractions.
1801 F = Fraction
1802 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4)]
1803 assert len(data)%2 == 1
1804 random.shuffle(data)
1805 self.assertEqual(self.func(data), 3.0)
1806
1807 def test_even_fractions(self):
1808 # Test median_grouped works with an even number of Fractions.
1809 F = Fraction
1810 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4), F(17, 4)]
1811 assert len(data)%2 == 0
1812 random.shuffle(data)
1813 self.assertEqual(self.func(data), 3.25)
1814
1815 def test_odd_decimals(self):
1816 # Test median_grouped works with an odd number of Decimals.
1817 D = Decimal
1818 data = [D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1819 assert len(data)%2 == 1
1820 random.shuffle(data)
1821 self.assertEqual(self.func(data), 6.75)
1822
1823 def test_even_decimals(self):
1824 # Test median_grouped works with an even number of Decimals.
1825 D = Decimal
1826 data = [D('5.5'), D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1827 assert len(data)%2 == 0
1828 random.shuffle(data)
1829 self.assertEqual(self.func(data), 6.5)
1830 #---
1831 data = [D('5.5'), D('5.5'), D('6.5'), D('7.5'), D('7.5'), D('8.5')]
1832 assert len(data)%2 == 0
1833 random.shuffle(data)
1834 self.assertEqual(self.func(data), 7.0)
1835
1836 def test_interval(self):
1837 # Test median_grouped with interval argument.
1838 data = [2.25, 2.5, 2.5, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1839 self.assertEqual(self.func(data, 0.25), 2.875)
1840 data = [2.25, 2.5, 2.5, 2.75, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1841 self.assertApproxEqual(self.func(data, 0.25), 2.83333333, tol=1e-8)
1842 data = [220, 220, 240, 260, 260, 260, 260, 280, 280, 300, 320, 340]
1843 self.assertEqual(self.func(data, 20), 265.0)
1844
Steven D'Aprano8c115a42016-07-08 02:38:45 +10001845 def test_data_type_error(self):
1846 # Test median_grouped with str, bytes data types for data and interval
1847 data = ["", "", ""]
1848 self.assertRaises(TypeError, self.func, data)
1849 #---
1850 data = [b"", b"", b""]
1851 self.assertRaises(TypeError, self.func, data)
1852 #---
1853 data = [1, 2, 3]
1854 interval = ""
1855 self.assertRaises(TypeError, self.func, data, interval)
1856 #---
1857 data = [1, 2, 3]
1858 interval = b""
1859 self.assertRaises(TypeError, self.func, data, interval)
1860
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001861
1862class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1863 # Test cases for the discrete version of mode.
1864 def setUp(self):
1865 self.func = statistics.mode
1866
1867 def prepare_data(self):
1868 """Overload method from UnivariateCommonMixin."""
1869 # Make sure test data has exactly one mode.
1870 return [1, 1, 1, 1, 3, 4, 7, 9, 0, 8, 2]
1871
1872 def test_range_data(self):
1873 # Override test from UnivariateCommonMixin.
1874 data = range(20, 50, 3)
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001875 self.assertEqual(self.func(data), 20)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001876
1877 def test_nominal_data(self):
1878 # Test mode with nominal data.
1879 data = 'abcbdb'
1880 self.assertEqual(self.func(data), 'b')
1881 data = 'fe fi fo fum fi fi'.split()
1882 self.assertEqual(self.func(data), 'fi')
1883
1884 def test_discrete_data(self):
1885 # Test mode with discrete numeric data.
1886 data = list(range(10))
1887 for i in range(10):
1888 d = data + [i]
1889 random.shuffle(d)
1890 self.assertEqual(self.func(d), i)
1891
1892 def test_bimodal_data(self):
1893 # Test mode with bimodal data.
1894 data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
1895 assert data.count(2) == data.count(6) == 4
Min ho Kim39d87b52019-08-31 06:21:19 +10001896 # mode() should return 2, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001897 self.assertEqual(self.func(data), 2)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001898
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001899 def test_unique_data(self):
1900 # Test mode when data points are all unique.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001901 data = list(range(10))
Min ho Kim39d87b52019-08-31 06:21:19 +10001902 # mode() should return 0, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001903 self.assertEqual(self.func(data), 0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001904
1905 def test_none_data(self):
1906 # Test that mode raises TypeError if given None as data.
1907
1908 # This test is necessary because the implementation of mode uses
1909 # collections.Counter, which accepts None and returns an empty dict.
1910 self.assertRaises(TypeError, self.func, None)
1911
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001912 def test_counter_data(self):
1913 # Test that a Counter is treated like any other iterable.
1914 data = collections.Counter([1, 1, 1, 2])
1915 # Since the keys of the counter are treated as data points, not the
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001916 # counts, this should return the first mode encountered, 1
1917 self.assertEqual(self.func(data), 1)
1918
1919
1920class TestMultiMode(unittest.TestCase):
1921
1922 def test_basics(self):
1923 multimode = statistics.multimode
1924 self.assertEqual(multimode('aabbbbbbbbcc'), ['b'])
1925 self.assertEqual(multimode('aabbbbccddddeeffffgg'), ['b', 'd', 'f'])
1926 self.assertEqual(multimode(''), [])
1927
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001928
Raymond Hettinger47d99872019-02-21 15:06:29 -08001929class TestFMean(unittest.TestCase):
1930
1931 def test_basics(self):
1932 fmean = statistics.fmean
1933 D = Decimal
1934 F = Fraction
1935 for data, expected_mean, kind in [
1936 ([3.5, 4.0, 5.25], 4.25, 'floats'),
1937 ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
1938 ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
1939 ([True, False, True, True, False], 0.60, 'booleans'),
1940 ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
1941 ((3.5, 4.0, 5.25), 4.25, 'tuple'),
1942 (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
1943 ]:
1944 actual_mean = fmean(data)
1945 self.assertIs(type(actual_mean), float, kind)
1946 self.assertEqual(actual_mean, expected_mean, kind)
1947
1948 def test_error_cases(self):
1949 fmean = statistics.fmean
1950 StatisticsError = statistics.StatisticsError
1951 with self.assertRaises(StatisticsError):
1952 fmean([]) # empty input
1953 with self.assertRaises(StatisticsError):
1954 fmean(iter([])) # empty iterator
1955 with self.assertRaises(TypeError):
1956 fmean(None) # non-iterable input
1957 with self.assertRaises(TypeError):
1958 fmean([10, None, 20]) # non-numeric input
1959 with self.assertRaises(TypeError):
1960 fmean() # missing data argument
1961 with self.assertRaises(TypeError):
1962 fmean([10, 20, 60], 70) # too many arguments
1963
1964 def test_special_values(self):
1965 # Rules for special values are inherited from math.fsum()
1966 fmean = statistics.fmean
1967 NaN = float('Nan')
1968 Inf = float('Inf')
1969 self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
1970 self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
1971 self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
1972 with self.assertRaises(ValueError):
1973 fmean([Inf, -Inf])
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001974
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001975
1976# === Tests for variances and standard deviations ===
1977
1978class VarianceStdevMixin(UnivariateCommonMixin):
1979 # Mixin class holding common tests for variance and std dev.
1980
1981 # Subclasses should inherit from this before NumericTestClass, in order
1982 # to see the rel attribute below. See testShiftData for an explanation.
1983
1984 rel = 1e-12
1985
1986 def test_single_value(self):
1987 # Deviation of a single value is zero.
1988 for x in (11, 19.8, 4.6e14, Fraction(21, 34), Decimal('8.392')):
1989 self.assertEqual(self.func([x]), 0)
1990
1991 def test_repeated_single_value(self):
1992 # The deviation of a single repeated value is zero.
1993 for x in (7.2, 49, 8.1e15, Fraction(3, 7), Decimal('62.4802')):
1994 for count in (2, 3, 5, 15):
1995 data = [x]*count
1996 self.assertEqual(self.func(data), 0)
1997
1998 def test_domain_error_regression(self):
1999 # Regression test for a domain error exception.
2000 # (Thanks to Geremy Condra.)
2001 data = [0.123456789012345]*10000
2002 # All the items are identical, so variance should be exactly zero.
2003 # We allow some small round-off error, but not much.
2004 result = self.func(data)
2005 self.assertApproxEqual(result, 0.0, tol=5e-17)
2006 self.assertGreaterEqual(result, 0) # A negative result must fail.
2007
2008 def test_shift_data(self):
2009 # Test that shifting the data by a constant amount does not affect
2010 # the variance or stdev. Or at least not much.
2011
2012 # Due to rounding, this test should be considered an ideal. We allow
2013 # some tolerance away from "no change at all" by setting tol and/or rel
2014 # attributes. Subclasses may set tighter or looser error tolerances.
2015 raw = [1.03, 1.27, 1.94, 2.04, 2.58, 3.14, 4.75, 4.98, 5.42, 6.78]
2016 expected = self.func(raw)
2017 # Don't set shift too high, the bigger it is, the more rounding error.
2018 shift = 1e5
2019 data = [x + shift for x in raw]
2020 self.assertApproxEqual(self.func(data), expected)
2021
2022 def test_shift_data_exact(self):
2023 # Like test_shift_data, but result is always exact.
2024 raw = [1, 3, 3, 4, 5, 7, 9, 10, 11, 16]
2025 assert all(x==int(x) for x in raw)
2026 expected = self.func(raw)
2027 shift = 10**9
2028 data = [x + shift for x in raw]
2029 self.assertEqual(self.func(data), expected)
2030
2031 def test_iter_list_same(self):
2032 # Test that iter data and list data give the same result.
2033
2034 # This is an explicit test that iterators and lists are treated the
2035 # same; justification for this test over and above the similar test
2036 # in UnivariateCommonMixin is that an earlier design had variance and
2037 # friends swap between one- and two-pass algorithms, which would
2038 # sometimes give different results.
2039 data = [random.uniform(-3, 8) for _ in range(1000)]
2040 expected = self.func(data)
2041 self.assertEqual(self.func(iter(data)), expected)
2042
2043
2044class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2045 # Tests for population variance.
2046 def setUp(self):
2047 self.func = statistics.pvariance
2048
2049 def test_exact_uniform(self):
2050 # Test the variance against an exact result for uniform data.
2051 data = list(range(10000))
2052 random.shuffle(data)
2053 expected = (10000**2 - 1)/12 # Exact value.
2054 self.assertEqual(self.func(data), expected)
2055
2056 def test_ints(self):
2057 # Test population variance with int data.
2058 data = [4, 7, 13, 16]
2059 exact = 22.5
2060 self.assertEqual(self.func(data), exact)
2061
2062 def test_fractions(self):
2063 # Test population variance with Fraction data.
2064 F = Fraction
2065 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2066 exact = F(3, 8)
2067 result = self.func(data)
2068 self.assertEqual(result, exact)
2069 self.assertIsInstance(result, Fraction)
2070
2071 def test_decimals(self):
2072 # Test population variance with Decimal data.
2073 D = Decimal
2074 data = [D("12.1"), D("12.2"), D("12.5"), D("12.9")]
2075 exact = D('0.096875')
2076 result = self.func(data)
2077 self.assertEqual(result, exact)
2078 self.assertIsInstance(result, Decimal)
2079
2080
2081class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2082 # Tests for sample variance.
2083 def setUp(self):
2084 self.func = statistics.variance
2085
2086 def test_single_value(self):
2087 # Override method from VarianceStdevMixin.
2088 for x in (35, 24.7, 8.2e15, Fraction(19, 30), Decimal('4.2084')):
2089 self.assertRaises(statistics.StatisticsError, self.func, [x])
2090
2091 def test_ints(self):
2092 # Test sample variance with int data.
2093 data = [4, 7, 13, 16]
2094 exact = 30
2095 self.assertEqual(self.func(data), exact)
2096
2097 def test_fractions(self):
2098 # Test sample variance with Fraction data.
2099 F = Fraction
2100 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2101 exact = F(1, 2)
2102 result = self.func(data)
2103 self.assertEqual(result, exact)
2104 self.assertIsInstance(result, Fraction)
2105
2106 def test_decimals(self):
2107 # Test sample variance with Decimal data.
2108 D = Decimal
2109 data = [D(2), D(2), D(7), D(9)]
2110 exact = 4*D('9.5')/D(3)
2111 result = self.func(data)
2112 self.assertEqual(result, exact)
2113 self.assertIsInstance(result, Decimal)
2114
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002115 def test_center_not_at_mean(self):
2116 data = (1.0, 2.0)
2117 self.assertEqual(self.func(data), 0.5)
2118 self.assertEqual(self.func(data, xbar=2.0), 1.0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002119
2120class TestPStdev(VarianceStdevMixin, NumericTestCase):
2121 # Tests for population standard deviation.
2122 def setUp(self):
2123 self.func = statistics.pstdev
2124
2125 def test_compare_to_variance(self):
2126 # Test that stdev is, in fact, the square root of variance.
2127 data = [random.uniform(-17, 24) for _ in range(1000)]
2128 expected = math.sqrt(statistics.pvariance(data))
2129 self.assertEqual(self.func(data), expected)
2130
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002131 def test_center_not_at_mean(self):
2132 # See issue: 40855
2133 data = (3, 6, 7, 10)
2134 self.assertEqual(self.func(data), 2.5)
2135 self.assertEqual(self.func(data, mu=0.5), 6.5)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002136
2137class TestStdev(VarianceStdevMixin, NumericTestCase):
2138 # Tests for sample standard deviation.
2139 def setUp(self):
2140 self.func = statistics.stdev
2141
2142 def test_single_value(self):
2143 # Override method from VarianceStdevMixin.
2144 for x in (81, 203.74, 3.9e14, Fraction(5, 21), Decimal('35.719')):
2145 self.assertRaises(statistics.StatisticsError, self.func, [x])
2146
2147 def test_compare_to_variance(self):
2148 # Test that stdev is, in fact, the square root of variance.
2149 data = [random.uniform(-2, 9) for _ in range(1000)]
2150 expected = math.sqrt(statistics.variance(data))
2151 self.assertEqual(self.func(data), expected)
2152
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002153 def test_center_not_at_mean(self):
2154 data = (1.0, 2.0)
2155 self.assertEqual(self.func(data, xbar=2.0), 1.0)
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002156
Raymond Hettinger6463ba32019-04-07 09:20:03 -07002157class TestGeometricMean(unittest.TestCase):
2158
2159 def test_basics(self):
2160 geometric_mean = statistics.geometric_mean
2161 self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
2162 self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
2163 self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
2164
2165 random.seed(86753095551212)
2166 for rng in [
2167 range(1, 100),
2168 range(1, 1_000),
2169 range(1, 10_000),
2170 range(500, 10_000, 3),
2171 range(10_000, 500, -3),
2172 [12, 17, 13, 5, 120, 7],
2173 [random.expovariate(50.0) for i in range(1_000)],
2174 [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
2175 [random.triangular(2000, 3000, 2200) for i in range(3_000)],
2176 ]:
2177 gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
2178 gm_float = geometric_mean(rng)
2179 self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
2180
2181 def test_various_input_types(self):
2182 geometric_mean = statistics.geometric_mean
2183 D = Decimal
2184 F = Fraction
2185 # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
2186 expected_mean = 4.18886
2187 for data, kind in [
2188 ([3.5, 4.0, 5.25], 'floats'),
2189 ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
2190 ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
2191 ([3.5, 4, F(21, 4)], 'mixed types'),
2192 ((3.5, 4.0, 5.25), 'tuple'),
2193 (iter([3.5, 4.0, 5.25]), 'iterator'),
2194 ]:
2195 actual_mean = geometric_mean(data)
2196 self.assertIs(type(actual_mean), float, kind)
2197 self.assertAlmostEqual(actual_mean, expected_mean, places=5)
2198
2199 def test_big_and_small(self):
2200 geometric_mean = statistics.geometric_mean
2201
2202 # Avoid overflow to infinity
2203 large = 2.0 ** 1000
2204 big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
2205 self.assertTrue(math.isclose(big_gm, 36.0 * large))
2206 self.assertFalse(math.isinf(big_gm))
2207
2208 # Avoid underflow to zero
2209 small = 2.0 ** -1000
2210 small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
2211 self.assertTrue(math.isclose(small_gm, 36.0 * small))
2212 self.assertNotEqual(small_gm, 0.0)
2213
2214 def test_error_cases(self):
2215 geometric_mean = statistics.geometric_mean
2216 StatisticsError = statistics.StatisticsError
2217 with self.assertRaises(StatisticsError):
2218 geometric_mean([]) # empty input
2219 with self.assertRaises(StatisticsError):
2220 geometric_mean([3.5, 0.0, 5.25]) # zero input
2221 with self.assertRaises(StatisticsError):
2222 geometric_mean([3.5, -4.0, 5.25]) # negative input
2223 with self.assertRaises(StatisticsError):
2224 geometric_mean(iter([])) # empty iterator
2225 with self.assertRaises(TypeError):
2226 geometric_mean(None) # non-iterable input
2227 with self.assertRaises(TypeError):
2228 geometric_mean([10, None, 20]) # non-numeric input
2229 with self.assertRaises(TypeError):
2230 geometric_mean() # missing data argument
2231 with self.assertRaises(TypeError):
2232 geometric_mean([10, 20, 60], 70) # too many arguments
2233
2234 def test_special_values(self):
2235 # Rules for special values are inherited from math.fsum()
2236 geometric_mean = statistics.geometric_mean
2237 NaN = float('Nan')
2238 Inf = float('Inf')
2239 self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
2240 self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
2241 self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
2242 with self.assertRaises(ValueError):
2243 geometric_mean([Inf, -Inf])
2244
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002245
2246class TestQuantiles(unittest.TestCase):
2247
2248 def test_specific_cases(self):
2249 # Match results computed by hand and cross-checked
2250 # against the PERCENTILE.EXC function in MS Excel.
2251 quantiles = statistics.quantiles
2252 data = [120, 200, 250, 320, 350]
2253 random.shuffle(data)
2254 for n, expected in [
2255 (1, []),
2256 (2, [250.0]),
2257 (3, [200.0, 320.0]),
2258 (4, [160.0, 250.0, 335.0]),
2259 (5, [136.0, 220.0, 292.0, 344.0]),
2260 (6, [120.0, 200.0, 250.0, 320.0, 350.0]),
2261 (8, [100.0, 160.0, 212.5, 250.0, 302.5, 335.0, 357.5]),
2262 (10, [88.0, 136.0, 184.0, 220.0, 250.0, 292.0, 326.0, 344.0, 362.0]),
2263 (12, [80.0, 120.0, 160.0, 200.0, 225.0, 250.0, 285.0, 320.0, 335.0,
2264 350.0, 365.0]),
2265 (15, [72.0, 104.0, 136.0, 168.0, 200.0, 220.0, 240.0, 264.0, 292.0,
2266 320.0, 332.0, 344.0, 356.0, 368.0]),
2267 ]:
2268 self.assertEqual(expected, quantiles(data, n=n))
2269 self.assertEqual(len(quantiles(data, n=n)), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002270 # Preserve datatype when possible
2271 for datatype in (float, Decimal, Fraction):
2272 result = quantiles(map(datatype, data), n=n)
2273 self.assertTrue(all(type(x) == datatype) for x in result)
2274 self.assertEqual(result, list(map(datatype, expected)))
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002275 # Quantiles should be idempotent
2276 if len(expected) >= 2:
2277 self.assertEqual(quantiles(expected, n=n), expected)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002278 # Cross-check against method='inclusive' which should give
2279 # the same result after adding in minimum and maximum values
2280 # extrapolated from the two lowest and two highest points.
2281 sdata = sorted(data)
2282 lo = 2 * sdata[0] - sdata[1]
2283 hi = 2 * sdata[-1] - sdata[-2]
2284 padded_data = data + [lo, hi]
2285 self.assertEqual(
2286 quantiles(data, n=n),
2287 quantiles(padded_data, n=n, method='inclusive'),
2288 (n, data),
2289 )
Tim Gatesc18b8052019-12-10 04:42:17 +11002290 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002291 def f(x):
2292 return 3.5 * x - 1234.675
2293 exp = list(map(f, expected))
2294 act = quantiles(map(f, data), n=n)
2295 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002296 # Q2 agrees with median()
2297 for k in range(2, 60):
2298 data = random.choices(range(100), k=k)
2299 q1, q2, q3 = quantiles(data)
2300 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002301
2302 def test_specific_cases_inclusive(self):
2303 # Match results computed by hand and cross-checked
2304 # against the PERCENTILE.INC function in MS Excel
Xtreak874ad1b2019-05-02 23:50:59 +05302305 # and against the quantile() function in SciPy.
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002306 quantiles = statistics.quantiles
2307 data = [100, 200, 400, 800]
2308 random.shuffle(data)
2309 for n, expected in [
2310 (1, []),
2311 (2, [300.0]),
2312 (3, [200.0, 400.0]),
2313 (4, [175.0, 300.0, 500.0]),
2314 (5, [160.0, 240.0, 360.0, 560.0]),
2315 (6, [150.0, 200.0, 300.0, 400.0, 600.0]),
2316 (8, [137.5, 175, 225.0, 300.0, 375.0, 500.0,650.0]),
2317 (10, [130.0, 160.0, 190.0, 240.0, 300.0, 360.0, 440.0, 560.0, 680.0]),
2318 (12, [125.0, 150.0, 175.0, 200.0, 250.0, 300.0, 350.0, 400.0,
2319 500.0, 600.0, 700.0]),
2320 (15, [120.0, 140.0, 160.0, 180.0, 200.0, 240.0, 280.0, 320.0, 360.0,
2321 400.0, 480.0, 560.0, 640.0, 720.0]),
2322 ]:
2323 self.assertEqual(expected, quantiles(data, n=n, method="inclusive"))
2324 self.assertEqual(len(quantiles(data, n=n, method="inclusive")), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002325 # Preserve datatype when possible
2326 for datatype in (float, Decimal, Fraction):
2327 result = quantiles(map(datatype, data), n=n, method="inclusive")
2328 self.assertTrue(all(type(x) == datatype) for x in result)
2329 self.assertEqual(result, list(map(datatype, expected)))
Tim Gatesc18b8052019-12-10 04:42:17 +11002330 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002331 def f(x):
2332 return 3.5 * x - 1234.675
2333 exp = list(map(f, expected))
2334 act = quantiles(map(f, data), n=n, method="inclusive")
2335 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002336 # Natural deciles
2337 self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
2338 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
2339 self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
2340 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002341 # Whenever n is smaller than the number of data points, running
2342 # method='inclusive' should give the same result as method='exclusive'
2343 # after the two included extreme points are removed.
2344 data = [random.randrange(10_000) for i in range(501)]
2345 actual = quantiles(data, n=32, method='inclusive')
2346 data.remove(min(data))
2347 data.remove(max(data))
2348 expected = quantiles(data, n=32)
2349 self.assertEqual(expected, actual)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002350 # Q2 agrees with median()
2351 for k in range(2, 60):
2352 data = random.choices(range(100), k=k)
2353 q1, q2, q3 = quantiles(data, method='inclusive')
2354 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002355
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002356 def test_equal_inputs(self):
2357 quantiles = statistics.quantiles
2358 for n in range(2, 10):
2359 data = [10.0] * n
2360 self.assertEqual(quantiles(data), [10.0, 10.0, 10.0])
2361 self.assertEqual(quantiles(data, method='inclusive'),
2362 [10.0, 10.0, 10.0])
2363
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002364 def test_equal_sized_groups(self):
2365 quantiles = statistics.quantiles
2366 total = 10_000
2367 data = [random.expovariate(0.2) for i in range(total)]
2368 while len(set(data)) != total:
2369 data.append(random.expovariate(0.2))
2370 data.sort()
2371
2372 # Cases where the group size exactly divides the total
2373 for n in (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000):
2374 group_size = total // n
2375 self.assertEqual(
2376 [bisect.bisect(data, q) for q in quantiles(data, n=n)],
2377 list(range(group_size, total, group_size)))
2378
2379 # When the group sizes can't be exactly equal, they should
2380 # differ by no more than one
2381 for n in (13, 19, 59, 109, 211, 571, 1019, 1907, 5261, 9769):
2382 group_sizes = {total // n, total // n + 1}
2383 pos = [bisect.bisect(data, q) for q in quantiles(data, n=n)]
2384 sizes = {q - p for p, q in zip(pos, pos[1:])}
2385 self.assertTrue(sizes <= group_sizes)
2386
2387 def test_error_cases(self):
2388 quantiles = statistics.quantiles
2389 StatisticsError = statistics.StatisticsError
2390 with self.assertRaises(TypeError):
2391 quantiles() # Missing arguments
2392 with self.assertRaises(TypeError):
2393 quantiles([10, 20, 30], 13, n=4) # Too many arguments
2394 with self.assertRaises(TypeError):
2395 quantiles([10, 20, 30], 4) # n is a positional argument
2396 with self.assertRaises(StatisticsError):
2397 quantiles([10, 20, 30], n=0) # n is zero
2398 with self.assertRaises(StatisticsError):
2399 quantiles([10, 20, 30], n=-1) # n is negative
2400 with self.assertRaises(TypeError):
2401 quantiles([10, 20, 30], n=1.5) # n is not an integer
2402 with self.assertRaises(ValueError):
2403 quantiles([10, 20, 30], method='X') # method is unknown
2404 with self.assertRaises(StatisticsError):
2405 quantiles([10], n=4) # not enough data points
2406 with self.assertRaises(TypeError):
2407 quantiles([10, None, 30], n=4) # data is non-numeric
2408
2409
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002410class TestNormalDist:
Raymond Hettinger11c79532019-02-23 14:44:07 -08002411
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002412 # General note on precision: The pdf(), cdf(), and overlap() methods
2413 # depend on functions in the math libraries that do not make
2414 # explicit accuracy guarantees. Accordingly, some of the accuracy
2415 # tests below may fail if the underlying math functions are
2416 # inaccurate. There isn't much we can do about this short of
2417 # implementing our own implementations from scratch.
2418
Raymond Hettinger11c79532019-02-23 14:44:07 -08002419 def test_slots(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002420 nd = self.module.NormalDist(300, 23)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002421 with self.assertRaises(TypeError):
2422 vars(nd)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002423 self.assertEqual(tuple(nd.__slots__), ('_mu', '_sigma'))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002424
2425 def test_instantiation_and_attributes(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002426 nd = self.module.NormalDist(500, 17)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002427 self.assertEqual(nd.mean, 500)
2428 self.assertEqual(nd.stdev, 17)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002429 self.assertEqual(nd.variance, 17**2)
2430
2431 # default arguments
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002432 nd = self.module.NormalDist()
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002433 self.assertEqual(nd.mean, 0)
2434 self.assertEqual(nd.stdev, 1)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002435 self.assertEqual(nd.variance, 1**2)
2436
2437 # error case: negative sigma
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002438 with self.assertRaises(self.module.StatisticsError):
2439 self.module.NormalDist(500, -10)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002440
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002441 # verify that subclass type is honored
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002442 class NewNormalDist(self.module.NormalDist):
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002443 pass
2444 nnd = NewNormalDist(200, 5)
2445 self.assertEqual(type(nnd), NewNormalDist)
2446
Raymond Hettinger11c79532019-02-23 14:44:07 -08002447 def test_alternative_constructor(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002448 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002449 data = [96, 107, 90, 92, 110]
2450 # list input
2451 self.assertEqual(NormalDist.from_samples(data), NormalDist(99, 9))
2452 # tuple input
2453 self.assertEqual(NormalDist.from_samples(tuple(data)), NormalDist(99, 9))
2454 # iterator input
2455 self.assertEqual(NormalDist.from_samples(iter(data)), NormalDist(99, 9))
2456 # error cases
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002457 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002458 NormalDist.from_samples([]) # empty input
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002459 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002460 NormalDist.from_samples([10]) # only one input
2461
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002462 # verify that subclass type is honored
2463 class NewNormalDist(NormalDist):
2464 pass
2465 nnd = NewNormalDist.from_samples(data)
2466 self.assertEqual(type(nnd), NewNormalDist)
2467
Raymond Hettinger11c79532019-02-23 14:44:07 -08002468 def test_sample_generation(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002469 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002470 mu, sigma = 10_000, 3.0
2471 X = NormalDist(mu, sigma)
2472 n = 1_000
2473 data = X.samples(n)
2474 self.assertEqual(len(data), n)
2475 self.assertEqual(set(map(type, data)), {float})
2476 # mean(data) expected to fall within 8 standard deviations
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002477 xbar = self.module.mean(data)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002478 self.assertTrue(mu - sigma*8 <= xbar <= mu + sigma*8)
2479
2480 # verify that seeding makes reproducible sequences
2481 n = 100
2482 data1 = X.samples(n, seed='happiness and joy')
2483 data2 = X.samples(n, seed='trouble and despair')
2484 data3 = X.samples(n, seed='happiness and joy')
2485 data4 = X.samples(n, seed='trouble and despair')
2486 self.assertEqual(data1, data3)
2487 self.assertEqual(data2, data4)
2488 self.assertNotEqual(data1, data2)
2489
Raymond Hettinger11c79532019-02-23 14:44:07 -08002490 def test_pdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002491 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002492 X = NormalDist(100, 15)
2493 # Verify peak around center
2494 self.assertLess(X.pdf(99), X.pdf(100))
2495 self.assertLess(X.pdf(101), X.pdf(100))
2496 # Test symmetry
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002497 for i in range(50):
2498 self.assertAlmostEqual(X.pdf(100 - i), X.pdf(100 + i))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002499 # Test vs CDF
2500 dx = 2.0 ** -10
2501 for x in range(90, 111):
2502 est_pdf = (X.cdf(x + dx) - X.cdf(x)) / dx
2503 self.assertAlmostEqual(X.pdf(x), est_pdf, places=4)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002504 # Test vs table of known values -- CRC 26th Edition
2505 Z = NormalDist()
2506 for x, px in enumerate([
2507 0.3989, 0.3989, 0.3989, 0.3988, 0.3986,
2508 0.3984, 0.3982, 0.3980, 0.3977, 0.3973,
2509 0.3970, 0.3965, 0.3961, 0.3956, 0.3951,
2510 0.3945, 0.3939, 0.3932, 0.3925, 0.3918,
2511 0.3910, 0.3902, 0.3894, 0.3885, 0.3876,
2512 0.3867, 0.3857, 0.3847, 0.3836, 0.3825,
2513 0.3814, 0.3802, 0.3790, 0.3778, 0.3765,
2514 0.3752, 0.3739, 0.3725, 0.3712, 0.3697,
2515 0.3683, 0.3668, 0.3653, 0.3637, 0.3621,
2516 0.3605, 0.3589, 0.3572, 0.3555, 0.3538,
2517 ]):
2518 self.assertAlmostEqual(Z.pdf(x / 100.0), px, places=4)
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002519 self.assertAlmostEqual(Z.pdf(-x / 100.0), px, places=4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002520 # Error case: variance is zero
2521 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002522 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002523 Y.pdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002524 # Special values
2525 self.assertEqual(X.pdf(float('-Inf')), 0.0)
2526 self.assertEqual(X.pdf(float('Inf')), 0.0)
2527 self.assertTrue(math.isnan(X.pdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002528
2529 def test_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002530 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002531 X = NormalDist(100, 15)
2532 cdfs = [X.cdf(x) for x in range(1, 200)]
2533 self.assertEqual(set(map(type, cdfs)), {float})
2534 # Verify montonic
2535 self.assertEqual(cdfs, sorted(cdfs))
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002536 # Verify center (should be exact)
2537 self.assertEqual(X.cdf(100), 0.50)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002538 # Check against a table of known values
2539 # https://en.wikipedia.org/wiki/Standard_normal_table#Cumulative
2540 Z = NormalDist()
2541 for z, cum_prob in [
2542 (0.00, 0.50000), (0.01, 0.50399), (0.02, 0.50798),
2543 (0.14, 0.55567), (0.29, 0.61409), (0.33, 0.62930),
2544 (0.54, 0.70540), (0.60, 0.72575), (1.17, 0.87900),
2545 (1.60, 0.94520), (2.05, 0.97982), (2.89, 0.99807),
2546 (3.52, 0.99978), (3.98, 0.99997), (4.07, 0.99998),
2547 ]:
2548 self.assertAlmostEqual(Z.cdf(z), cum_prob, places=5)
2549 self.assertAlmostEqual(Z.cdf(-z), 1.0 - cum_prob, places=5)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002550 # Error case: variance is zero
2551 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002552 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002553 Y.cdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002554 # Special values
2555 self.assertEqual(X.cdf(float('-Inf')), 0.0)
2556 self.assertEqual(X.cdf(float('Inf')), 1.0)
2557 self.assertTrue(math.isnan(X.cdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002558
Neil Schemenauer52a48e62019-07-30 11:08:18 -07002559 @support.skip_if_pgo_task
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002560 def test_inv_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002561 NormalDist = self.module.NormalDist
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002562
2563 # Center case should be exact.
2564 iq = NormalDist(100, 15)
2565 self.assertEqual(iq.inv_cdf(0.50), iq.mean)
2566
2567 # Test versus a published table of known percentage points.
2568 # See the second table at the bottom of the page here:
2569 # http://people.bath.ac.uk/masss/tables/normaltable.pdf
2570 Z = NormalDist()
2571 pp = {5.0: (0.000, 1.645, 2.576, 3.291, 3.891,
2572 4.417, 4.892, 5.327, 5.731, 6.109),
2573 2.5: (0.674, 1.960, 2.807, 3.481, 4.056,
2574 4.565, 5.026, 5.451, 5.847, 6.219),
2575 1.0: (1.282, 2.326, 3.090, 3.719, 4.265,
2576 4.753, 5.199, 5.612, 5.998, 6.361)}
2577 for base, row in pp.items():
2578 for exp, x in enumerate(row, start=1):
2579 p = base * 10.0 ** (-exp)
2580 self.assertAlmostEqual(-Z.inv_cdf(p), x, places=3)
2581 p = 1.0 - p
2582 self.assertAlmostEqual(Z.inv_cdf(p), x, places=3)
2583
2584 # Match published example for MS Excel
2585 # https://support.office.com/en-us/article/norm-inv-function-54b30935-fee7-493c-bedb-2278a9db7e13
2586 self.assertAlmostEqual(NormalDist(40, 1.5).inv_cdf(0.908789), 42.000002)
2587
2588 # One million equally spaced probabilities
2589 n = 2**20
2590 for p in range(1, n):
2591 p /= n
2592 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2593
2594 # One hundred ever smaller probabilities to test tails out to
2595 # extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
2596 for e in range(1, 51):
2597 p = 2.0 ** (-e)
2598 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2599 p = 1.0 - p
2600 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2601
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002602 # Now apply cdf() first. Near the tails, the round-trip loses
2603 # precision and is ill-conditioned (small changes in the inputs
2604 # give large changes in the output), so only check to 5 places.
2605 for x in range(200):
2606 self.assertAlmostEqual(iq.inv_cdf(iq.cdf(x)), x, places=5)
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002607
2608 # Error cases:
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002609 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002610 iq.inv_cdf(0.0) # p is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002611 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002612 iq.inv_cdf(-0.1) # p under zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002613 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002614 iq.inv_cdf(1.0) # p is one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002615 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002616 iq.inv_cdf(1.1) # p over one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002617 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002618 iq = NormalDist(100, 0) # sigma is zero
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002619 iq.inv_cdf(0.5)
2620
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002621 # Special values
2622 self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
2623
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002624 def test_quantiles(self):
2625 # Quartiles of a standard normal distribution
2626 Z = self.module.NormalDist()
2627 for n, expected in [
2628 (1, []),
2629 (2, [0.0]),
2630 (3, [-0.4307, 0.4307]),
2631 (4 ,[-0.6745, 0.0, 0.6745]),
2632 ]:
2633 actual = Z.quantiles(n=n)
2634 self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2635 for e, a in zip(expected, actual)))
2636
Raymond Hettinger318d5372019-03-06 22:59:40 -08002637 def test_overlap(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002638 NormalDist = self.module.NormalDist
Raymond Hettinger318d5372019-03-06 22:59:40 -08002639
2640 # Match examples from Imman and Bradley
2641 for X1, X2, published_result in [
2642 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0), 0.80258),
2643 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0), 0.60993),
2644 ]:
2645 self.assertAlmostEqual(X1.overlap(X2), published_result, places=4)
2646 self.assertAlmostEqual(X2.overlap(X1), published_result, places=4)
2647
2648 # Check against integration of the PDF
2649 def overlap_numeric(X, Y, *, steps=8_192, z=5):
2650 'Numerical integration cross-check for overlap() '
2651 fsum = math.fsum
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002652 center = (X.mean + Y.mean) / 2.0
2653 width = z * max(X.stdev, Y.stdev)
Raymond Hettinger318d5372019-03-06 22:59:40 -08002654 start = center - width
2655 dx = 2.0 * width / steps
2656 x_arr = [start + i*dx for i in range(steps)]
2657 xp = list(map(X.pdf, x_arr))
2658 yp = list(map(Y.pdf, x_arr))
2659 total = max(fsum(xp), fsum(yp))
2660 return fsum(map(min, xp, yp)) / total
2661
2662 for X1, X2 in [
2663 # Examples from Imman and Bradley
2664 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0)),
2665 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2666 # Example from https://www.rasch.org/rmt/rmt101r.htm
2667 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2668 # Gender heights from http://www.usablestats.com/lessons/normal
2669 (NormalDist(70, 4), NormalDist(65, 3.5)),
2670 # Misc cases with equal standard deviations
2671 (NormalDist(100, 15), NormalDist(110, 15)),
2672 (NormalDist(-100, 15), NormalDist(110, 15)),
2673 (NormalDist(-100, 15), NormalDist(-110, 15)),
2674 # Misc cases with unequal standard deviations
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002675 (NormalDist(100, 12), NormalDist(100, 15)),
Raymond Hettinger318d5372019-03-06 22:59:40 -08002676 (NormalDist(100, 12), NormalDist(110, 15)),
2677 (NormalDist(100, 12), NormalDist(150, 15)),
2678 (NormalDist(100, 12), NormalDist(150, 35)),
2679 # Misc cases with small values
2680 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.003)),
2681 (NormalDist(1.000, 0.002), NormalDist(1.006, 0.0003)),
2682 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.099)),
2683 ]:
2684 self.assertAlmostEqual(X1.overlap(X2), overlap_numeric(X1, X2), places=5)
2685 self.assertAlmostEqual(X2.overlap(X1), overlap_numeric(X1, X2), places=5)
2686
2687 # Error cases
2688 X = NormalDist()
2689 with self.assertRaises(TypeError):
2690 X.overlap() # too few arguments
2691 with self.assertRaises(TypeError):
2692 X.overlap(X, X) # too may arguments
2693 with self.assertRaises(TypeError):
2694 X.overlap(None) # right operand not a NormalDist
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002695 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002696 X.overlap(NormalDist(1, 0)) # right operand sigma is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002697 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002698 NormalDist(1, 0).overlap(X) # left operand sigma is zero
2699
Raymond Hettinger70f027d2020-04-16 10:25:14 -07002700 def test_zscore(self):
2701 NormalDist = self.module.NormalDist
2702 X = NormalDist(100, 15)
2703 self.assertEqual(X.zscore(142), 2.8)
2704 self.assertEqual(X.zscore(58), -2.8)
2705 self.assertEqual(X.zscore(100), 0.0)
2706 with self.assertRaises(TypeError):
2707 X.zscore() # too few arguments
2708 with self.assertRaises(TypeError):
2709 X.zscore(1, 1) # too may arguments
2710 with self.assertRaises(TypeError):
2711 X.zscore(None) # non-numeric type
2712 with self.assertRaises(self.module.StatisticsError):
2713 NormalDist(1, 0).zscore(100) # sigma is zero
2714
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002715 def test_properties(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002716 X = self.module.NormalDist(100, 15)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002717 self.assertEqual(X.mean, 100)
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002718 self.assertEqual(X.median, 100)
2719 self.assertEqual(X.mode, 100)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002720 self.assertEqual(X.stdev, 15)
2721 self.assertEqual(X.variance, 225)
2722
Raymond Hettinger11c79532019-02-23 14:44:07 -08002723 def test_same_type_addition_and_subtraction(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002724 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002725 X = NormalDist(100, 12)
2726 Y = NormalDist(40, 5)
2727 self.assertEqual(X + Y, NormalDist(140, 13)) # __add__
2728 self.assertEqual(X - Y, NormalDist(60, 13)) # __sub__
2729
2730 def test_translation_and_scaling(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002731 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002732 X = NormalDist(100, 15)
2733 y = 10
2734 self.assertEqual(+X, NormalDist(100, 15)) # __pos__
2735 self.assertEqual(-X, NormalDist(-100, 15)) # __neg__
2736 self.assertEqual(X + y, NormalDist(110, 15)) # __add__
2737 self.assertEqual(y + X, NormalDist(110, 15)) # __radd__
2738 self.assertEqual(X - y, NormalDist(90, 15)) # __sub__
2739 self.assertEqual(y - X, NormalDist(-90, 15)) # __rsub__
2740 self.assertEqual(X * y, NormalDist(1000, 150)) # __mul__
2741 self.assertEqual(y * X, NormalDist(1000, 150)) # __rmul__
2742 self.assertEqual(X / y, NormalDist(10, 1.5)) # __truediv__
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002743 with self.assertRaises(TypeError): # __rtruediv__
Raymond Hettinger11c79532019-02-23 14:44:07 -08002744 y / X
2745
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002746 def test_unary_operations(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002747 NormalDist = self.module.NormalDist
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002748 X = NormalDist(100, 12)
2749 Y = +X
2750 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002751 self.assertEqual(X.mean, Y.mean)
2752 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002753 Y = -X
2754 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002755 self.assertEqual(X.mean, -Y.mean)
2756 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002757
Raymond Hettinger11c79532019-02-23 14:44:07 -08002758 def test_equality(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002759 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002760 nd1 = NormalDist()
2761 nd2 = NormalDist(2, 4)
2762 nd3 = NormalDist()
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002763 nd4 = NormalDist(2, 4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002764 nd5 = NormalDist(2, 8)
2765 nd6 = NormalDist(8, 4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002766 self.assertNotEqual(nd1, nd2)
2767 self.assertEqual(nd1, nd3)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002768 self.assertEqual(nd2, nd4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002769 self.assertNotEqual(nd2, nd5)
2770 self.assertNotEqual(nd2, nd6)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002771
2772 # Test NotImplemented when types are different
2773 class A:
2774 def __eq__(self, other):
2775 return 10
2776 a = A()
2777 self.assertEqual(nd1.__eq__(a), NotImplemented)
2778 self.assertEqual(nd1 == a, 10)
2779 self.assertEqual(a == nd1, 10)
2780
2781 # All subclasses to compare equal giving the same behavior
2782 # as list, tuple, int, float, complex, str, dict, set, etc.
2783 class SizedNormalDist(NormalDist):
2784 def __init__(self, mu, sigma, n):
2785 super().__init__(mu, sigma)
2786 self.n = n
2787 s = SizedNormalDist(100, 15, 57)
2788 nd4 = NormalDist(100, 15)
2789 self.assertEqual(s, nd4)
2790
2791 # Don't allow duck type equality because we wouldn't
2792 # want a lognormal distribution to compare equal
2793 # to a normal distribution with the same parameters
2794 class LognormalDist:
2795 def __init__(self, mu, sigma):
2796 self.mu = mu
2797 self.sigma = sigma
2798 lnd = LognormalDist(100, 15)
2799 nd = NormalDist(100, 15)
2800 self.assertNotEqual(nd, lnd)
2801
2802 def test_pickle_and_copy(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002803 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002804 nd1 = copy.copy(nd)
2805 self.assertEqual(nd, nd1)
2806 nd2 = copy.deepcopy(nd)
2807 self.assertEqual(nd, nd2)
2808 nd3 = pickle.loads(pickle.dumps(nd))
2809 self.assertEqual(nd, nd3)
2810
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002811 def test_hashability(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002812 ND = self.module.NormalDist
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002813 s = {ND(100, 15), ND(100.0, 15.0), ND(100, 10), ND(95, 15), ND(100, 15)}
2814 self.assertEqual(len(s), 3)
2815
Raymond Hettinger11c79532019-02-23 14:44:07 -08002816 def test_repr(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002817 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002818 self.assertEqual(repr(nd), 'NormalDist(mu=37.5, sigma=5.625)')
2819
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002820# Swapping the sys.modules['statistics'] is to solving the
2821# _pickle.PicklingError:
2822# Can't pickle <class 'statistics.NormalDist'>:
2823# it's not the same object as statistics.NormalDist
2824class TestNormalDistPython(unittest.TestCase, TestNormalDist):
2825 module = py_statistics
2826 def setUp(self):
2827 sys.modules['statistics'] = self.module
2828
2829 def tearDown(self):
2830 sys.modules['statistics'] = statistics
2831
2832
2833@unittest.skipUnless(c_statistics, 'requires _statistics')
2834class TestNormalDistC(unittest.TestCase, TestNormalDist):
2835 module = c_statistics
2836 def setUp(self):
2837 sys.modules['statistics'] = self.module
2838
2839 def tearDown(self):
2840 sys.modules['statistics'] = statistics
2841
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002842
2843# === Run tests ===
2844
2845def load_tests(loader, tests, ignore):
2846 """Used for doctest/unittest integration."""
2847 tests.addTests(doctest.DocTestSuite())
2848 return tests
2849
2850
2851if __name__ == "__main__":
2852 unittest.main()