blob: adccfad7b8ed1a4a8270a16acf1ba49fe683a793 [file] [log] [blame]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001"""Test suite for statistics module, including helper NumericTestCase and
2approx_equal function.
3
4"""
5
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07006import bisect
Larry Hastingsf5e987b2013-10-19 11:50:09 -07007import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03008import collections.abc
Raymond Hettinger11c79532019-02-23 14:44:07 -08009import copy
Larry Hastingsf5e987b2013-10-19 11:50:09 -070010import decimal
11import doctest
12import math
Raymond Hettinger11c79532019-02-23 14:44:07 -080013import pickle
Larry Hastingsf5e987b2013-10-19 11:50:09 -070014import random
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +020015import sys
Larry Hastingsf5e987b2013-10-19 11:50:09 -070016import unittest
Neil Schemenauer52a48e62019-07-30 11:08:18 -070017from test import support
Hai Shi79bb2c92020-08-06 19:51:29 +080018from test.support import import_helper
Larry Hastingsf5e987b2013-10-19 11:50:09 -070019
20from decimal import Decimal
21from fractions import Fraction
22
23
24# Module to be tested.
25import statistics
26
27
28# === Helper functions and class ===
29
Steven D'Apranoa474afd2016-08-09 12:49:01 +100030def sign(x):
31 """Return -1.0 for negatives, including -0.0, otherwise +1.0."""
32 return math.copysign(1, x)
33
Steven D'Apranob28c3272015-12-01 19:59:53 +110034def _nan_equal(a, b):
35 """Return True if a and b are both the same kind of NAN.
36
37 >>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
38 True
39 >>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
40 True
41 >>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
42 False
43 >>> _nan_equal(Decimal(42), Decimal('NAN'))
44 False
45
46 >>> _nan_equal(float('NAN'), float('NAN'))
47 True
48 >>> _nan_equal(float('NAN'), 0.5)
49 False
50
51 >>> _nan_equal(float('NAN'), Decimal('NAN'))
52 False
53
54 NAN payloads are not compared.
55 """
56 if type(a) is not type(b):
57 return False
58 if isinstance(a, float):
59 return math.isnan(a) and math.isnan(b)
60 aexp = a.as_tuple()[2]
61 bexp = b.as_tuple()[2]
62 return (aexp == bexp) and (aexp in ('n', 'N')) # Both NAN or both sNAN.
63
64
Larry Hastingsf5e987b2013-10-19 11:50:09 -070065def _calc_errors(actual, expected):
66 """Return the absolute and relative errors between two numbers.
67
68 >>> _calc_errors(100, 75)
69 (25, 0.25)
70 >>> _calc_errors(100, 100)
71 (0, 0.0)
72
73 Returns the (absolute error, relative error) between the two arguments.
74 """
75 base = max(abs(actual), abs(expected))
76 abs_err = abs(actual - expected)
77 rel_err = abs_err/base if base else float('inf')
78 return (abs_err, rel_err)
79
80
81def approx_equal(x, y, tol=1e-12, rel=1e-7):
82 """approx_equal(x, y [, tol [, rel]]) => True|False
83
84 Return True if numbers x and y are approximately equal, to within some
85 margin of error, otherwise return False. Numbers which compare equal
86 will also compare approximately equal.
87
88 x is approximately equal to y if the difference between them is less than
89 an absolute error tol or a relative error rel, whichever is bigger.
90
91 If given, both tol and rel must be finite, non-negative numbers. If not
92 given, default values are tol=1e-12 and rel=1e-7.
93
94 >>> approx_equal(1.2589, 1.2587, tol=0.0003, rel=0)
95 True
96 >>> approx_equal(1.2589, 1.2587, tol=0.0001, rel=0)
97 False
98
99 Absolute error is defined as abs(x-y); if that is less than or equal to
100 tol, x and y are considered approximately equal.
101
102 Relative error is defined as abs((x-y)/x) or abs((x-y)/y), whichever is
103 smaller, provided x or y are not zero. If that figure is less than or
104 equal to rel, x and y are considered approximately equal.
105
106 Complex numbers are not directly supported. If you wish to compare to
107 complex numbers, extract their real and imaginary parts and compare them
108 individually.
109
110 NANs always compare unequal, even with themselves. Infinities compare
111 approximately equal if they have the same sign (both positive or both
112 negative). Infinities with different signs compare unequal; so do
113 comparisons of infinities with finite numbers.
114 """
115 if tol < 0 or rel < 0:
116 raise ValueError('error tolerances must be non-negative')
117 # NANs are never equal to anything, approximately or otherwise.
118 if math.isnan(x) or math.isnan(y):
119 return False
120 # Numbers which compare equal also compare approximately equal.
121 if x == y:
122 # This includes the case of two infinities with the same sign.
123 return True
124 if math.isinf(x) or math.isinf(y):
125 # This includes the case of two infinities of opposite sign, or
126 # one infinity and one finite number.
127 return False
128 # Two finite numbers.
129 actual_error = abs(x - y)
130 allowed_error = max(tol, rel*max(abs(x), abs(y)))
131 return actual_error <= allowed_error
132
133
134# This class exists only as somewhere to stick a docstring containing
135# doctests. The following docstring and tests were originally in a separate
136# module. Now that it has been merged in here, I need somewhere to hang the.
137# docstring. Ultimately, this class will die, and the information below will
138# either become redundant, or be moved into more appropriate places.
139class _DoNothing:
140 """
141 When doing numeric work, especially with floats, exact equality is often
142 not what you want. Due to round-off error, it is often a bad idea to try
143 to compare floats with equality. Instead the usual procedure is to test
144 them with some (hopefully small!) allowance for error.
145
146 The ``approx_equal`` function allows you to specify either an absolute
147 error tolerance, or a relative error, or both.
148
149 Absolute error tolerances are simple, but you need to know the magnitude
150 of the quantities being compared:
151
152 >>> approx_equal(12.345, 12.346, tol=1e-3)
153 True
154 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3) # tol is too small.
155 False
156
157 Relative errors are more suitable when the values you are comparing can
158 vary in magnitude:
159
160 >>> approx_equal(12.345, 12.346, rel=1e-4)
161 True
162 >>> approx_equal(12.345e6, 12.346e6, rel=1e-4)
163 True
164
165 but a naive implementation of relative error testing can run into trouble
166 around zero.
167
168 If you supply both an absolute tolerance and a relative error, the
169 comparison succeeds if either individual test succeeds:
170
171 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3, rel=1e-4)
172 True
173
174 """
175 pass
176
177
178
179# We prefer this for testing numeric values that may not be exactly equal,
180# and avoid using TestCase.assertAlmostEqual, because it sucks :-)
181
Hai Shi79bb2c92020-08-06 19:51:29 +0800182py_statistics = import_helper.import_fresh_module('statistics',
183 blocked=['_statistics'])
184c_statistics = import_helper.import_fresh_module('statistics',
185 fresh=['_statistics'])
Dong-hee Na8ad22a42019-08-25 02:51:20 +0900186
187
188class TestModules(unittest.TestCase):
189 func_names = ['_normal_dist_inv_cdf']
190
191 def test_py_functions(self):
192 for fname in self.func_names:
193 self.assertEqual(getattr(py_statistics, fname).__module__, 'statistics')
194
195 @unittest.skipUnless(c_statistics, 'requires _statistics')
196 def test_c_functions(self):
197 for fname in self.func_names:
198 self.assertEqual(getattr(c_statistics, fname).__module__, '_statistics')
199
200
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700201class NumericTestCase(unittest.TestCase):
202 """Unit test class for numeric work.
203
204 This subclasses TestCase. In addition to the standard method
205 ``TestCase.assertAlmostEqual``, ``assertApproxEqual`` is provided.
206 """
207 # By default, we expect exact equality, unless overridden.
208 tol = rel = 0
209
210 def assertApproxEqual(
211 self, first, second, tol=None, rel=None, msg=None
212 ):
213 """Test passes if ``first`` and ``second`` are approximately equal.
214
215 This test passes if ``first`` and ``second`` are equal to
216 within ``tol``, an absolute error, or ``rel``, a relative error.
217
218 If either ``tol`` or ``rel`` are None or not given, they default to
219 test attributes of the same name (by default, 0).
220
221 The objects may be either numbers, or sequences of numbers. Sequences
222 are tested element-by-element.
223
224 >>> class MyTest(NumericTestCase):
225 ... def test_number(self):
226 ... x = 1.0/6
227 ... y = sum([x]*6)
228 ... self.assertApproxEqual(y, 1.0, tol=1e-15)
229 ... def test_sequence(self):
230 ... a = [1.001, 1.001e-10, 1.001e10]
231 ... b = [1.0, 1e-10, 1e10]
232 ... self.assertApproxEqual(a, b, rel=1e-3)
233 ...
234 >>> import unittest
235 >>> from io import StringIO # Suppress test runner output.
236 >>> suite = unittest.TestLoader().loadTestsFromTestCase(MyTest)
237 >>> unittest.TextTestRunner(stream=StringIO()).run(suite)
238 <unittest.runner.TextTestResult run=2 errors=0 failures=0>
239
240 """
241 if tol is None:
242 tol = self.tol
243 if rel is None:
244 rel = self.rel
245 if (
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300246 isinstance(first, collections.abc.Sequence) and
247 isinstance(second, collections.abc.Sequence)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700248 ):
249 check = self._check_approx_seq
250 else:
251 check = self._check_approx_num
252 check(first, second, tol, rel, msg)
253
254 def _check_approx_seq(self, first, second, tol, rel, msg):
255 if len(first) != len(second):
256 standardMsg = (
257 "sequences differ in length: %d items != %d items"
258 % (len(first), len(second))
259 )
260 msg = self._formatMessage(msg, standardMsg)
261 raise self.failureException(msg)
262 for i, (a,e) in enumerate(zip(first, second)):
263 self._check_approx_num(a, e, tol, rel, msg, i)
264
265 def _check_approx_num(self, first, second, tol, rel, msg, idx=None):
266 if approx_equal(first, second, tol, rel):
267 # Test passes. Return early, we are done.
268 return None
269 # Otherwise we failed.
270 standardMsg = self._make_std_err_msg(first, second, tol, rel, idx)
271 msg = self._formatMessage(msg, standardMsg)
272 raise self.failureException(msg)
273
274 @staticmethod
275 def _make_std_err_msg(first, second, tol, rel, idx):
276 # Create the standard error message for approx_equal failures.
277 assert first != second
278 template = (
279 ' %r != %r\n'
280 ' values differ by more than tol=%r and rel=%r\n'
281 ' -> absolute error = %r\n'
282 ' -> relative error = %r'
283 )
284 if idx is not None:
285 header = 'numeric sequences first differ at index %d.\n' % idx
286 template = header + template
287 # Calculate actual errors:
288 abs_err, rel_err = _calc_errors(first, second)
289 return template % (first, second, tol, rel, abs_err, rel_err)
290
291
292# ========================
293# === Test the helpers ===
294# ========================
295
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000296class TestSign(unittest.TestCase):
297 """Test that the helper function sign() works correctly."""
298 def testZeroes(self):
299 # Test that signed zeroes report their sign correctly.
300 self.assertEqual(sign(0.0), +1)
301 self.assertEqual(sign(-0.0), -1)
302
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700303
304# --- Tests for approx_equal ---
305
306class ApproxEqualSymmetryTest(unittest.TestCase):
307 # Test symmetry of approx_equal.
308
309 def test_relative_symmetry(self):
310 # Check that approx_equal treats relative error symmetrically.
311 # (a-b)/a is usually not equal to (a-b)/b. Ensure that this
312 # doesn't matter.
313 #
314 # Note: the reason for this test is that an early version
315 # of approx_equal was not symmetric. A relative error test
316 # would pass, or fail, depending on which value was passed
317 # as the first argument.
318 #
319 args1 = [2456, 37.8, -12.45, Decimal('2.54'), Fraction(17, 54)]
320 args2 = [2459, 37.2, -12.41, Decimal('2.59'), Fraction(15, 54)]
321 assert len(args1) == len(args2)
322 for a, b in zip(args1, args2):
323 self.do_relative_symmetry(a, b)
324
325 def do_relative_symmetry(self, a, b):
326 a, b = min(a, b), max(a, b)
327 assert a < b
328 delta = b - a # The absolute difference between the values.
329 rel_err1, rel_err2 = abs(delta/a), abs(delta/b)
330 # Choose an error margin halfway between the two.
331 rel = (rel_err1 + rel_err2)/2
332 # Now see that values a and b compare approx equal regardless of
333 # which is given first.
334 self.assertTrue(approx_equal(a, b, tol=0, rel=rel))
335 self.assertTrue(approx_equal(b, a, tol=0, rel=rel))
336
337 def test_symmetry(self):
338 # Test that approx_equal(a, b) == approx_equal(b, a)
339 args = [-23, -2, 5, 107, 93568]
340 delta = 2
Christian Heimesad393602013-11-26 01:32:15 +0100341 for a in args:
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700342 for type_ in (int, float, Decimal, Fraction):
Christian Heimesad393602013-11-26 01:32:15 +0100343 x = type_(a)*100
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700344 y = x + delta
345 r = abs(delta/max(x, y))
346 # There are five cases to check:
347 # 1) actual error <= tol, <= rel
348 self.do_symmetry_test(x, y, tol=delta, rel=r)
349 self.do_symmetry_test(x, y, tol=delta+1, rel=2*r)
350 # 2) actual error > tol, > rel
351 self.do_symmetry_test(x, y, tol=delta-1, rel=r/2)
352 # 3) actual error <= tol, > rel
353 self.do_symmetry_test(x, y, tol=delta, rel=r/2)
354 # 4) actual error > tol, <= rel
355 self.do_symmetry_test(x, y, tol=delta-1, rel=r)
356 self.do_symmetry_test(x, y, tol=delta-1, rel=2*r)
357 # 5) exact equality test
358 self.do_symmetry_test(x, x, tol=0, rel=0)
359 self.do_symmetry_test(x, y, tol=0, rel=0)
360
361 def do_symmetry_test(self, a, b, tol, rel):
362 template = "approx_equal comparisons don't match for %r"
363 flag1 = approx_equal(a, b, tol, rel)
364 flag2 = approx_equal(b, a, tol, rel)
365 self.assertEqual(flag1, flag2, template.format((a, b, tol, rel)))
366
367
368class ApproxEqualExactTest(unittest.TestCase):
369 # Test the approx_equal function with exactly equal values.
370 # Equal values should compare as approximately equal.
371 # Test cases for exactly equal values, which should compare approx
372 # equal regardless of the error tolerances given.
373
374 def do_exactly_equal_test(self, x, tol, rel):
375 result = approx_equal(x, x, tol=tol, rel=rel)
376 self.assertTrue(result, 'equality failure for x=%r' % x)
377 result = approx_equal(-x, -x, tol=tol, rel=rel)
378 self.assertTrue(result, 'equality failure for x=%r' % -x)
379
380 def test_exactly_equal_ints(self):
381 # Test that equal int values are exactly equal.
382 for n in [42, 19740, 14974, 230, 1795, 700245, 36587]:
383 self.do_exactly_equal_test(n, 0, 0)
384
385 def test_exactly_equal_floats(self):
386 # Test that equal float values are exactly equal.
387 for x in [0.42, 1.9740, 1497.4, 23.0, 179.5, 70.0245, 36.587]:
388 self.do_exactly_equal_test(x, 0, 0)
389
390 def test_exactly_equal_fractions(self):
391 # Test that equal Fraction values are exactly equal.
392 F = Fraction
393 for f in [F(1, 2), F(0), F(5, 3), F(9, 7), F(35, 36), F(3, 7)]:
394 self.do_exactly_equal_test(f, 0, 0)
395
396 def test_exactly_equal_decimals(self):
397 # Test that equal Decimal values are exactly equal.
398 D = Decimal
399 for d in map(D, "8.2 31.274 912.04 16.745 1.2047".split()):
400 self.do_exactly_equal_test(d, 0, 0)
401
402 def test_exactly_equal_absolute(self):
403 # Test that equal values are exactly equal with an absolute error.
404 for n in [16, 1013, 1372, 1198, 971, 4]:
405 # Test as ints.
406 self.do_exactly_equal_test(n, 0.01, 0)
407 # Test as floats.
408 self.do_exactly_equal_test(n/10, 0.01, 0)
409 # Test as Fractions.
410 f = Fraction(n, 1234)
411 self.do_exactly_equal_test(f, 0.01, 0)
412
413 def test_exactly_equal_absolute_decimals(self):
414 # Test equal Decimal values are exactly equal with an absolute error.
415 self.do_exactly_equal_test(Decimal("3.571"), Decimal("0.01"), 0)
416 self.do_exactly_equal_test(-Decimal("81.3971"), Decimal("0.01"), 0)
417
418 def test_exactly_equal_relative(self):
419 # Test that equal values are exactly equal with a relative error.
420 for x in [8347, 101.3, -7910.28, Fraction(5, 21)]:
421 self.do_exactly_equal_test(x, 0, 0.01)
422 self.do_exactly_equal_test(Decimal("11.68"), 0, Decimal("0.01"))
423
424 def test_exactly_equal_both(self):
425 # Test that equal values are equal when both tol and rel are given.
426 for x in [41017, 16.742, -813.02, Fraction(3, 8)]:
427 self.do_exactly_equal_test(x, 0.1, 0.01)
428 D = Decimal
429 self.do_exactly_equal_test(D("7.2"), D("0.1"), D("0.01"))
430
431
432class ApproxEqualUnequalTest(unittest.TestCase):
433 # Unequal values should compare unequal with zero error tolerances.
434 # Test cases for unequal values, with exact equality test.
435
436 def do_exactly_unequal_test(self, x):
437 for a in (x, -x):
438 result = approx_equal(a, a+1, tol=0, rel=0)
439 self.assertFalse(result, 'inequality failure for x=%r' % a)
440
441 def test_exactly_unequal_ints(self):
442 # Test unequal int values are unequal with zero error tolerance.
443 for n in [951, 572305, 478, 917, 17240]:
444 self.do_exactly_unequal_test(n)
445
446 def test_exactly_unequal_floats(self):
447 # Test unequal float values are unequal with zero error tolerance.
448 for x in [9.51, 5723.05, 47.8, 9.17, 17.24]:
449 self.do_exactly_unequal_test(x)
450
451 def test_exactly_unequal_fractions(self):
452 # Test that unequal Fractions are unequal with zero error tolerance.
453 F = Fraction
454 for f in [F(1, 5), F(7, 9), F(12, 11), F(101, 99023)]:
455 self.do_exactly_unequal_test(f)
456
457 def test_exactly_unequal_decimals(self):
458 # Test that unequal Decimals are unequal with zero error tolerance.
459 for d in map(Decimal, "3.1415 298.12 3.47 18.996 0.00245".split()):
460 self.do_exactly_unequal_test(d)
461
462
463class ApproxEqualInexactTest(unittest.TestCase):
464 # Inexact test cases for approx_error.
465 # Test cases when comparing two values that are not exactly equal.
466
467 # === Absolute error tests ===
468
469 def do_approx_equal_abs_test(self, x, delta):
470 template = "Test failure for x={!r}, y={!r}"
471 for y in (x + delta, x - delta):
472 msg = template.format(x, y)
473 self.assertTrue(approx_equal(x, y, tol=2*delta, rel=0), msg)
474 self.assertFalse(approx_equal(x, y, tol=delta/2, rel=0), msg)
475
476 def test_approx_equal_absolute_ints(self):
477 # Test approximate equality of ints with an absolute error.
478 for n in [-10737, -1975, -7, -2, 0, 1, 9, 37, 423, 9874, 23789110]:
479 self.do_approx_equal_abs_test(n, 10)
480 self.do_approx_equal_abs_test(n, 2)
481
482 def test_approx_equal_absolute_floats(self):
483 # Test approximate equality of floats with an absolute error.
484 for x in [-284.126, -97.1, -3.4, -2.15, 0.5, 1.0, 7.8, 4.23, 3817.4]:
485 self.do_approx_equal_abs_test(x, 1.5)
486 self.do_approx_equal_abs_test(x, 0.01)
487 self.do_approx_equal_abs_test(x, 0.0001)
488
489 def test_approx_equal_absolute_fractions(self):
490 # Test approximate equality of Fractions with an absolute error.
491 delta = Fraction(1, 29)
492 numerators = [-84, -15, -2, -1, 0, 1, 5, 17, 23, 34, 71]
493 for f in (Fraction(n, 29) for n in numerators):
494 self.do_approx_equal_abs_test(f, delta)
495 self.do_approx_equal_abs_test(f, float(delta))
496
497 def test_approx_equal_absolute_decimals(self):
498 # Test approximate equality of Decimals with an absolute error.
499 delta = Decimal("0.01")
500 for d in map(Decimal, "1.0 3.5 36.08 61.79 7912.3648".split()):
501 self.do_approx_equal_abs_test(d, delta)
502 self.do_approx_equal_abs_test(-d, delta)
503
504 def test_cross_zero(self):
505 # Test for the case of the two values having opposite signs.
506 self.assertTrue(approx_equal(1e-5, -1e-5, tol=1e-4, rel=0))
507
508 # === Relative error tests ===
509
510 def do_approx_equal_rel_test(self, x, delta):
511 template = "Test failure for x={!r}, y={!r}"
512 for y in (x*(1+delta), x*(1-delta)):
513 msg = template.format(x, y)
514 self.assertTrue(approx_equal(x, y, tol=0, rel=2*delta), msg)
515 self.assertFalse(approx_equal(x, y, tol=0, rel=delta/2), msg)
516
517 def test_approx_equal_relative_ints(self):
518 # Test approximate equality of ints with a relative error.
519 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.36))
520 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.37))
521 # ---
522 self.assertTrue(approx_equal(449, 512, tol=0, rel=0.125))
523 self.assertTrue(approx_equal(448, 512, tol=0, rel=0.125))
524 self.assertFalse(approx_equal(447, 512, tol=0, rel=0.125))
525
526 def test_approx_equal_relative_floats(self):
527 # Test approximate equality of floats with a relative error.
528 for x in [-178.34, -0.1, 0.1, 1.0, 36.97, 2847.136, 9145.074]:
529 self.do_approx_equal_rel_test(x, 0.02)
530 self.do_approx_equal_rel_test(x, 0.0001)
531
532 def test_approx_equal_relative_fractions(self):
533 # Test approximate equality of Fractions with a relative error.
534 F = Fraction
535 delta = Fraction(3, 8)
536 for f in [F(3, 84), F(17, 30), F(49, 50), F(92, 85)]:
537 for d in (delta, float(delta)):
538 self.do_approx_equal_rel_test(f, d)
539 self.do_approx_equal_rel_test(-f, d)
540
541 def test_approx_equal_relative_decimals(self):
542 # Test approximate equality of Decimals with a relative error.
543 for d in map(Decimal, "0.02 1.0 5.7 13.67 94.138 91027.9321".split()):
544 self.do_approx_equal_rel_test(d, Decimal("0.001"))
545 self.do_approx_equal_rel_test(-d, Decimal("0.05"))
546
547 # === Both absolute and relative error tests ===
548
549 # There are four cases to consider:
550 # 1) actual error <= both absolute and relative error
551 # 2) actual error <= absolute error but > relative error
552 # 3) actual error <= relative error but > absolute error
553 # 4) actual error > both absolute and relative error
554
555 def do_check_both(self, a, b, tol, rel, tol_flag, rel_flag):
556 check = self.assertTrue if tol_flag else self.assertFalse
557 check(approx_equal(a, b, tol=tol, rel=0))
558 check = self.assertTrue if rel_flag else self.assertFalse
559 check(approx_equal(a, b, tol=0, rel=rel))
560 check = self.assertTrue if (tol_flag or rel_flag) else self.assertFalse
561 check(approx_equal(a, b, tol=tol, rel=rel))
562
563 def test_approx_equal_both1(self):
564 # Test actual error <= both absolute and relative error.
565 self.do_check_both(7.955, 7.952, 0.004, 3.8e-4, True, True)
566 self.do_check_both(-7.387, -7.386, 0.002, 0.0002, True, True)
567
568 def test_approx_equal_both2(self):
569 # Test actual error <= absolute error but > relative error.
570 self.do_check_both(7.955, 7.952, 0.004, 3.7e-4, True, False)
571
572 def test_approx_equal_both3(self):
573 # Test actual error <= relative error but > absolute error.
574 self.do_check_both(7.955, 7.952, 0.001, 3.8e-4, False, True)
575
576 def test_approx_equal_both4(self):
577 # Test actual error > both absolute and relative error.
578 self.do_check_both(2.78, 2.75, 0.01, 0.001, False, False)
579 self.do_check_both(971.44, 971.47, 0.02, 3e-5, False, False)
580
581
582class ApproxEqualSpecialsTest(unittest.TestCase):
583 # Test approx_equal with NANs and INFs and zeroes.
584
585 def test_inf(self):
586 for type_ in (float, Decimal):
587 inf = type_('inf')
588 self.assertTrue(approx_equal(inf, inf))
589 self.assertTrue(approx_equal(inf, inf, 0, 0))
590 self.assertTrue(approx_equal(inf, inf, 1, 0.01))
591 self.assertTrue(approx_equal(-inf, -inf))
592 self.assertFalse(approx_equal(inf, -inf))
593 self.assertFalse(approx_equal(inf, 1000))
594
595 def test_nan(self):
596 for type_ in (float, Decimal):
597 nan = type_('nan')
598 for other in (nan, type_('inf'), 1000):
599 self.assertFalse(approx_equal(nan, other))
600
601 def test_float_zeroes(self):
602 nzero = math.copysign(0.0, -1)
603 self.assertTrue(approx_equal(nzero, 0.0, tol=0.1, rel=0.1))
604
605 def test_decimal_zeroes(self):
606 nzero = Decimal("-0.0")
607 self.assertTrue(approx_equal(nzero, Decimal(0), tol=0.1, rel=0.1))
608
609
610class TestApproxEqualErrors(unittest.TestCase):
611 # Test error conditions of approx_equal.
612
613 def test_bad_tol(self):
614 # Test negative tol raises.
615 self.assertRaises(ValueError, approx_equal, 100, 100, -1, 0.1)
616
617 def test_bad_rel(self):
618 # Test negative rel raises.
619 self.assertRaises(ValueError, approx_equal, 100, 100, 1, -0.1)
620
621
622# --- Tests for NumericTestCase ---
623
624# The formatting routine that generates the error messages is complex enough
625# that it too needs testing.
626
627class TestNumericTestCase(unittest.TestCase):
628 # The exact wording of NumericTestCase error messages is *not* guaranteed,
629 # but we need to give them some sort of test to ensure that they are
630 # generated correctly. As a compromise, we look for specific substrings
631 # that are expected to be found even if the overall error message changes.
632
633 def do_test(self, args):
634 actual_msg = NumericTestCase._make_std_err_msg(*args)
635 expected = self.generate_substrings(*args)
636 for substring in expected:
637 self.assertIn(substring, actual_msg)
638
639 def test_numerictestcase_is_testcase(self):
640 # Ensure that NumericTestCase actually is a TestCase.
641 self.assertTrue(issubclass(NumericTestCase, unittest.TestCase))
642
643 def test_error_msg_numeric(self):
644 # Test the error message generated for numeric comparisons.
645 args = (2.5, 4.0, 0.5, 0.25, None)
646 self.do_test(args)
647
648 def test_error_msg_sequence(self):
649 # Test the error message generated for sequence comparisons.
650 args = (3.75, 8.25, 1.25, 0.5, 7)
651 self.do_test(args)
652
653 def generate_substrings(self, first, second, tol, rel, idx):
654 """Return substrings we expect to see in error messages."""
655 abs_err, rel_err = _calc_errors(first, second)
656 substrings = [
657 'tol=%r' % tol,
658 'rel=%r' % rel,
659 'absolute error = %r' % abs_err,
660 'relative error = %r' % rel_err,
661 ]
662 if idx is not None:
663 substrings.append('differ at index %d' % idx)
664 return substrings
665
666
667# =======================================
668# === Tests for the statistics module ===
669# =======================================
670
671
672class GlobalsTest(unittest.TestCase):
673 module = statistics
674 expected_metadata = ["__doc__", "__all__"]
675
676 def test_meta(self):
677 # Test for the existence of metadata.
678 for meta in self.expected_metadata:
679 self.assertTrue(hasattr(self.module, meta),
680 "%s not present" % meta)
681
682 def test_check_all(self):
683 # Check everything in __all__ exists and is public.
684 module = self.module
685 for name in module.__all__:
686 # No private names in __all__:
687 self.assertFalse(name.startswith("_"),
688 'private name "%s" in __all__' % name)
689 # And anything in __all__ must exist:
690 self.assertTrue(hasattr(module, name),
691 'missing name "%s" in __all__' % name)
692
693
694class DocTests(unittest.TestCase):
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +0200695 @unittest.skipIf(sys.flags.optimize >= 2,
696 "Docstrings are omitted with -OO and above")
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700697 def test_doc_tests(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000698 failed, tried = doctest.testmod(statistics, optionflags=doctest.ELLIPSIS)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700699 self.assertGreater(tried, 0)
700 self.assertEqual(failed, 0)
701
702class StatisticsErrorTest(unittest.TestCase):
703 def test_has_exception(self):
704 errmsg = (
705 "Expected StatisticsError to be a ValueError, but got a"
706 " subclass of %r instead."
707 )
708 self.assertTrue(hasattr(statistics, 'StatisticsError'))
709 self.assertTrue(
710 issubclass(statistics.StatisticsError, ValueError),
711 errmsg % statistics.StatisticsError.__base__
712 )
713
714
715# === Tests for private utility functions ===
716
717class ExactRatioTest(unittest.TestCase):
718 # Test _exact_ratio utility.
719
720 def test_int(self):
721 for i in (-20, -3, 0, 5, 99, 10**20):
722 self.assertEqual(statistics._exact_ratio(i), (i, 1))
723
724 def test_fraction(self):
725 numerators = (-5, 1, 12, 38)
726 for n in numerators:
727 f = Fraction(n, 37)
728 self.assertEqual(statistics._exact_ratio(f), (n, 37))
729
730 def test_float(self):
731 self.assertEqual(statistics._exact_ratio(0.125), (1, 8))
732 self.assertEqual(statistics._exact_ratio(1.125), (9, 8))
733 data = [random.uniform(-100, 100) for _ in range(100)]
734 for x in data:
735 num, den = statistics._exact_ratio(x)
736 self.assertEqual(x, num/den)
737
738 def test_decimal(self):
739 D = Decimal
740 _exact_ratio = statistics._exact_ratio
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000741 self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
742 self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
743 self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700744
Steven D'Apranob28c3272015-12-01 19:59:53 +1100745 def test_inf(self):
746 INF = float("INF")
747 class MyFloat(float):
748 pass
749 class MyDecimal(Decimal):
750 pass
751 for inf in (INF, -INF):
752 for type_ in (float, MyFloat, Decimal, MyDecimal):
753 x = type_(inf)
754 ratio = statistics._exact_ratio(x)
755 self.assertEqual(ratio, (x, None))
756 self.assertEqual(type(ratio[0]), type_)
757 self.assertTrue(math.isinf(ratio[0]))
758
759 def test_float_nan(self):
760 NAN = float("NAN")
761 class MyFloat(float):
762 pass
763 for nan in (NAN, MyFloat(NAN)):
764 ratio = statistics._exact_ratio(nan)
765 self.assertTrue(math.isnan(ratio[0]))
766 self.assertIs(ratio[1], None)
767 self.assertEqual(type(ratio[0]), type(nan))
768
769 def test_decimal_nan(self):
770 NAN = Decimal("NAN")
771 sNAN = Decimal("sNAN")
772 class MyDecimal(Decimal):
773 pass
774 for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)):
775 ratio = statistics._exact_ratio(nan)
776 self.assertTrue(_nan_equal(ratio[0], nan))
777 self.assertIs(ratio[1], None)
778 self.assertEqual(type(ratio[0]), type(nan))
779
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700780
781class DecimalToRatioTest(unittest.TestCase):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000782 # Test _exact_ratio private function.
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700783
Steven D'Apranob28c3272015-12-01 19:59:53 +1100784 def test_infinity(self):
785 # Test that INFs are handled correctly.
786 inf = Decimal('INF')
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000787 self.assertEqual(statistics._exact_ratio(inf), (inf, None))
788 self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
Steven D'Apranob28c3272015-12-01 19:59:53 +1100789
790 def test_nan(self):
791 # Test that NANs are handled correctly.
792 for nan in (Decimal('NAN'), Decimal('sNAN')):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000793 num, den = statistics._exact_ratio(nan)
Steven D'Apranob28c3272015-12-01 19:59:53 +1100794 # Because NANs always compare non-equal, we cannot use assertEqual.
795 # Nor can we use an identity test, as we don't guarantee anything
796 # about the object identity.
797 self.assertTrue(_nan_equal(num, nan))
798 self.assertIs(den, None)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700799
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000800 def test_sign(self):
801 # Test sign is calculated correctly.
802 numbers = [Decimal("9.8765e12"), Decimal("9.8765e-12")]
803 for d in numbers:
804 # First test positive decimals.
805 assert d > 0
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000806 num, den = statistics._exact_ratio(d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000807 self.assertGreaterEqual(num, 0)
808 self.assertGreater(den, 0)
809 # Then test negative decimals.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000810 num, den = statistics._exact_ratio(-d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000811 self.assertLessEqual(num, 0)
812 self.assertGreater(den, 0)
813
814 def test_negative_exponent(self):
815 # Test result when the exponent is negative.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000816 t = statistics._exact_ratio(Decimal("0.1234"))
817 self.assertEqual(t, (617, 5000))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000818
819 def test_positive_exponent(self):
820 # Test results when the exponent is positive.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000821 t = statistics._exact_ratio(Decimal("1.234e7"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000822 self.assertEqual(t, (12340000, 1))
823
824 def test_regression_20536(self):
825 # Regression test for issue 20536.
826 # See http://bugs.python.org/issue20536
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000827 t = statistics._exact_ratio(Decimal("1e2"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000828 self.assertEqual(t, (100, 1))
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000829 t = statistics._exact_ratio(Decimal("1.47e5"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000830 self.assertEqual(t, (147000, 1))
831
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700832
Steven D'Apranob28c3272015-12-01 19:59:53 +1100833class IsFiniteTest(unittest.TestCase):
834 # Test _isfinite private function.
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000835
Steven D'Apranob28c3272015-12-01 19:59:53 +1100836 def test_finite(self):
837 # Test that finite numbers are recognised as finite.
838 for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")):
839 self.assertTrue(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000840
Steven D'Apranob28c3272015-12-01 19:59:53 +1100841 def test_infinity(self):
842 # Test that INFs are not recognised as finite.
843 for x in (float("inf"), Decimal("inf")):
844 self.assertFalse(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000845
Steven D'Apranob28c3272015-12-01 19:59:53 +1100846 def test_nan(self):
847 # Test that NANs are not recognised as finite.
848 for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")):
849 self.assertFalse(statistics._isfinite(x))
850
851
852class CoerceTest(unittest.TestCase):
853 # Test that private function _coerce correctly deals with types.
854
855 # The coercion rules are currently an implementation detail, although at
856 # some point that should change. The tests and comments here define the
857 # correct implementation.
858
859 # Pre-conditions of _coerce:
860 #
861 # - The first time _sum calls _coerce, the
862 # - coerce(T, S) will never be called with bool as the first argument;
863 # this is a pre-condition, guarded with an assertion.
864
865 #
866 # - coerce(T, T) will always return T; we assume T is a valid numeric
867 # type. Violate this assumption at your own risk.
868 #
869 # - Apart from as above, bool is treated as if it were actually int.
870 #
871 # - coerce(int, X) and coerce(X, int) return X.
872 # -
873 def test_bool(self):
874 # bool is somewhat special, due to the pre-condition that it is
875 # never given as the first argument to _coerce, and that it cannot
876 # be subclassed. So we test it specially.
877 for T in (int, float, Fraction, Decimal):
878 self.assertIs(statistics._coerce(T, bool), T)
879 class MyClass(T): pass
880 self.assertIs(statistics._coerce(MyClass, bool), MyClass)
881
882 def assertCoerceTo(self, A, B):
883 """Assert that type A coerces to B."""
884 self.assertIs(statistics._coerce(A, B), B)
885 self.assertIs(statistics._coerce(B, A), B)
886
887 def check_coerce_to(self, A, B):
888 """Checks that type A coerces to B, including subclasses."""
889 # Assert that type A is coerced to B.
890 self.assertCoerceTo(A, B)
891 # Subclasses of A are also coerced to B.
892 class SubclassOfA(A): pass
893 self.assertCoerceTo(SubclassOfA, B)
894 # A, and subclasses of A, are coerced to subclasses of B.
895 class SubclassOfB(B): pass
896 self.assertCoerceTo(A, SubclassOfB)
897 self.assertCoerceTo(SubclassOfA, SubclassOfB)
898
899 def assertCoerceRaises(self, A, B):
900 """Assert that coercing A to B, or vice versa, raises TypeError."""
901 self.assertRaises(TypeError, statistics._coerce, (A, B))
902 self.assertRaises(TypeError, statistics._coerce, (B, A))
903
904 def check_type_coercions(self, T):
905 """Check that type T coerces correctly with subclasses of itself."""
906 assert T is not bool
907 # Coercing a type with itself returns the same type.
908 self.assertIs(statistics._coerce(T, T), T)
909 # Coercing a type with a subclass of itself returns the subclass.
910 class U(T): pass
911 class V(T): pass
912 class W(U): pass
913 for typ in (U, V, W):
914 self.assertCoerceTo(T, typ)
915 self.assertCoerceTo(U, W)
916 # Coercing two subclasses that aren't parent/child is an error.
917 self.assertCoerceRaises(U, V)
918 self.assertCoerceRaises(V, W)
919
920 def test_int(self):
921 # Check that int coerces correctly.
922 self.check_type_coercions(int)
923 for typ in (float, Fraction, Decimal):
924 self.check_coerce_to(int, typ)
925
926 def test_fraction(self):
927 # Check that Fraction coerces correctly.
928 self.check_type_coercions(Fraction)
929 self.check_coerce_to(Fraction, float)
930
931 def test_decimal(self):
932 # Check that Decimal coerces correctly.
933 self.check_type_coercions(Decimal)
934
935 def test_float(self):
936 # Check that float coerces correctly.
937 self.check_type_coercions(float)
938
939 def test_non_numeric_types(self):
940 for bad_type in (str, list, type(None), tuple, dict):
941 for good_type in (int, float, Fraction, Decimal):
942 self.assertCoerceRaises(good_type, bad_type)
943
944 def test_incompatible_types(self):
945 # Test that incompatible types raise.
946 for T in (float, Fraction):
947 class MySubclass(T): pass
948 self.assertCoerceRaises(T, Decimal)
949 self.assertCoerceRaises(MySubclass, Decimal)
950
951
952class ConvertTest(unittest.TestCase):
953 # Test private _convert function.
954
955 def check_exact_equal(self, x, y):
956 """Check that x equals y, and has the same type as well."""
957 self.assertEqual(x, y)
958 self.assertIs(type(x), type(y))
959
960 def test_int(self):
961 # Test conversions to int.
962 x = statistics._convert(Fraction(71), int)
963 self.check_exact_equal(x, 71)
964 class MyInt(int): pass
965 x = statistics._convert(Fraction(17), MyInt)
966 self.check_exact_equal(x, MyInt(17))
967
968 def test_fraction(self):
969 # Test conversions to Fraction.
970 x = statistics._convert(Fraction(95, 99), Fraction)
971 self.check_exact_equal(x, Fraction(95, 99))
972 class MyFraction(Fraction):
973 def __truediv__(self, other):
974 return self.__class__(super().__truediv__(other))
975 x = statistics._convert(Fraction(71, 13), MyFraction)
976 self.check_exact_equal(x, MyFraction(71, 13))
977
978 def test_float(self):
979 # Test conversions to float.
980 x = statistics._convert(Fraction(-1, 2), float)
981 self.check_exact_equal(x, -0.5)
982 class MyFloat(float):
983 def __truediv__(self, other):
984 return self.__class__(super().__truediv__(other))
985 x = statistics._convert(Fraction(9, 8), MyFloat)
986 self.check_exact_equal(x, MyFloat(1.125))
987
988 def test_decimal(self):
989 # Test conversions to Decimal.
990 x = statistics._convert(Fraction(1, 40), Decimal)
991 self.check_exact_equal(x, Decimal("0.025"))
992 class MyDecimal(Decimal):
993 def __truediv__(self, other):
994 return self.__class__(super().__truediv__(other))
995 x = statistics._convert(Fraction(-15, 16), MyDecimal)
996 self.check_exact_equal(x, MyDecimal("-0.9375"))
997
998 def test_inf(self):
999 for INF in (float('inf'), Decimal('inf')):
1000 for inf in (INF, -INF):
1001 x = statistics._convert(inf, type(inf))
1002 self.check_exact_equal(x, inf)
1003
1004 def test_nan(self):
1005 for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')):
1006 x = statistics._convert(nan, type(nan))
1007 self.assertTrue(_nan_equal(x, nan))
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001008
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001009 def test_invalid_input_type(self):
1010 with self.assertRaises(TypeError):
1011 statistics._convert(None, float)
1012
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001013
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001014class FailNegTest(unittest.TestCase):
1015 """Test _fail_neg private function."""
1016
1017 def test_pass_through(self):
1018 # Test that values are passed through unchanged.
1019 values = [1, 2.0, Fraction(3), Decimal(4)]
1020 new = list(statistics._fail_neg(values))
1021 self.assertEqual(values, new)
1022
1023 def test_negatives_raise(self):
1024 # Test that negatives raise an exception.
1025 for x in [1, 2.0, Fraction(3), Decimal(4)]:
1026 seq = [-x]
1027 it = statistics._fail_neg(seq)
1028 self.assertRaises(statistics.StatisticsError, next, it)
1029
1030 def test_error_msg(self):
1031 # Test that a given error message is used.
1032 msg = "badness #%d" % random.randint(10000, 99999)
1033 try:
1034 next(statistics._fail_neg([-1], msg))
1035 except statistics.StatisticsError as e:
1036 errmsg = e.args[0]
1037 else:
1038 self.fail("expected exception, but it didn't happen")
1039 self.assertEqual(errmsg, msg)
1040
1041
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001042class FindLteqTest(unittest.TestCase):
1043 # Test _find_lteq private function.
1044
1045 def test_invalid_input_values(self):
1046 for a, x in [
1047 ([], 1),
1048 ([1, 2], 3),
1049 ([1, 3], 2)
1050 ]:
1051 with self.subTest(a=a, x=x):
1052 with self.assertRaises(ValueError):
1053 statistics._find_lteq(a, x)
1054
1055 def test_locate_successfully(self):
1056 for a, x, expected_i in [
1057 ([1, 1, 1, 2, 3], 1, 0),
1058 ([0, 1, 1, 1, 2, 3], 1, 1),
1059 ([1, 2, 3, 3, 3], 3, 2)
1060 ]:
1061 with self.subTest(a=a, x=x):
1062 self.assertEqual(expected_i, statistics._find_lteq(a, x))
1063
1064
1065class FindRteqTest(unittest.TestCase):
1066 # Test _find_rteq private function.
1067
1068 def test_invalid_input_values(self):
1069 for a, l, x in [
1070 ([1], 2, 1),
1071 ([1, 3], 0, 2)
1072 ]:
1073 with self.assertRaises(ValueError):
1074 statistics._find_rteq(a, l, x)
1075
1076 def test_locate_successfully(self):
1077 for a, l, x, expected_i in [
1078 ([1, 1, 1, 2, 3], 0, 1, 2),
1079 ([0, 1, 1, 1, 2, 3], 0, 1, 3),
1080 ([1, 2, 3, 3, 3], 0, 3, 4)
1081 ]:
1082 with self.subTest(a=a, l=l, x=x):
1083 self.assertEqual(expected_i, statistics._find_rteq(a, l, x))
1084
1085
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001086# === Tests for public functions ===
1087
1088class UnivariateCommonMixin:
1089 # Common tests for most univariate functions that take a data argument.
1090
1091 def test_no_args(self):
1092 # Fail if given no arguments.
1093 self.assertRaises(TypeError, self.func)
1094
1095 def test_empty_data(self):
1096 # Fail when the data argument (first argument) is empty.
1097 for empty in ([], (), iter([])):
1098 self.assertRaises(statistics.StatisticsError, self.func, empty)
1099
1100 def prepare_data(self):
1101 """Return int data for various tests."""
1102 data = list(range(10))
1103 while data == sorted(data):
1104 random.shuffle(data)
1105 return data
1106
1107 def test_no_inplace_modifications(self):
1108 # Test that the function does not modify its input data.
1109 data = self.prepare_data()
1110 assert len(data) != 1 # Necessary to avoid infinite loop.
1111 assert data != sorted(data)
1112 saved = data[:]
1113 assert data is not saved
1114 _ = self.func(data)
1115 self.assertListEqual(data, saved, "data has been modified")
1116
1117 def test_order_doesnt_matter(self):
1118 # Test that the order of data points doesn't change the result.
1119
1120 # CAUTION: due to floating point rounding errors, the result actually
1121 # may depend on the order. Consider this test representing an ideal.
1122 # To avoid this test failing, only test with exact values such as ints
1123 # or Fractions.
1124 data = [1, 2, 3, 3, 3, 4, 5, 6]*100
1125 expected = self.func(data)
1126 random.shuffle(data)
1127 actual = self.func(data)
1128 self.assertEqual(expected, actual)
1129
1130 def test_type_of_data_collection(self):
1131 # Test that the type of iterable data doesn't effect the result.
1132 class MyList(list):
1133 pass
1134 class MyTuple(tuple):
1135 pass
1136 def generator(data):
1137 return (obj for obj in data)
1138 data = self.prepare_data()
1139 expected = self.func(data)
1140 for kind in (list, tuple, iter, MyList, MyTuple, generator):
1141 result = self.func(kind(data))
1142 self.assertEqual(result, expected)
1143
1144 def test_range_data(self):
1145 # Test that functions work with range objects.
1146 data = range(20, 50, 3)
1147 expected = self.func(list(data))
1148 self.assertEqual(self.func(data), expected)
1149
1150 def test_bad_arg_types(self):
1151 # Test that function raises when given data of the wrong type.
1152
1153 # Don't roll the following into a loop like this:
1154 # for bad in list_of_bad:
1155 # self.check_for_type_error(bad)
1156 #
1157 # Since assertRaises doesn't show the arguments that caused the test
1158 # failure, it is very difficult to debug these test failures when the
1159 # following are in a loop.
1160 self.check_for_type_error(None)
1161 self.check_for_type_error(23)
1162 self.check_for_type_error(42.0)
1163 self.check_for_type_error(object())
1164
1165 def check_for_type_error(self, *args):
1166 self.assertRaises(TypeError, self.func, *args)
1167
1168 def test_type_of_data_element(self):
1169 # Check the type of data elements doesn't affect the numeric result.
1170 # This is a weaker test than UnivariateTypeMixin.testTypesConserved,
1171 # because it checks the numeric result by equality, but not by type.
1172 class MyFloat(float):
1173 def __truediv__(self, other):
1174 return type(self)(super().__truediv__(other))
1175 def __add__(self, other):
1176 return type(self)(super().__add__(other))
1177 __radd__ = __add__
1178
1179 raw = self.prepare_data()
1180 expected = self.func(raw)
1181 for kind in (float, MyFloat, Decimal, Fraction):
1182 data = [kind(x) for x in raw]
1183 result = type(expected)(self.func(data))
1184 self.assertEqual(result, expected)
1185
1186
1187class UnivariateTypeMixin:
1188 """Mixin class for type-conserving functions.
1189
1190 This mixin class holds test(s) for functions which conserve the type of
1191 individual data points. E.g. the mean of a list of Fractions should itself
1192 be a Fraction.
1193
1194 Not all tests to do with types need go in this class. Only those that
1195 rely on the function returning the same type as its input data.
1196 """
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001197 def prepare_types_for_conservation_test(self):
1198 """Return the types which are expected to be conserved."""
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001199 class MyFloat(float):
1200 def __truediv__(self, other):
1201 return type(self)(super().__truediv__(other))
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001202 def __rtruediv__(self, other):
1203 return type(self)(super().__rtruediv__(other))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001204 def __sub__(self, other):
1205 return type(self)(super().__sub__(other))
1206 def __rsub__(self, other):
1207 return type(self)(super().__rsub__(other))
1208 def __pow__(self, other):
1209 return type(self)(super().__pow__(other))
1210 def __add__(self, other):
1211 return type(self)(super().__add__(other))
1212 __radd__ = __add__
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001213 return (float, Decimal, Fraction, MyFloat)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001214
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001215 def test_types_conserved(self):
1216 # Test that functions keeps the same type as their data points.
1217 # (Excludes mixed data types.) This only tests the type of the return
1218 # result, not the value.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001219 data = self.prepare_data()
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001220 for kind in self.prepare_types_for_conservation_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001221 d = [kind(x) for x in data]
1222 result = self.func(d)
1223 self.assertIs(type(result), kind)
1224
1225
Steven D'Apranob28c3272015-12-01 19:59:53 +11001226class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin):
1227 # Common test cases for statistics._sum() function.
1228
1229 # This test suite looks only at the numeric value returned by _sum,
1230 # after conversion to the appropriate type.
1231 def setUp(self):
1232 def simplified_sum(*args):
1233 T, value, n = statistics._sum(*args)
1234 return statistics._coerce(value, T)
1235 self.func = simplified_sum
1236
1237
1238class TestSum(NumericTestCase):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001239 # Test cases for statistics._sum() function.
1240
Steven D'Apranob28c3272015-12-01 19:59:53 +11001241 # These tests look at the entire three value tuple returned by _sum.
1242
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001243 def setUp(self):
1244 self.func = statistics._sum
1245
1246 def test_empty_data(self):
1247 # Override test for empty data.
1248 for data in ([], (), iter([])):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001249 self.assertEqual(self.func(data), (int, Fraction(0), 0))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001250
1251 def test_ints(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001252 self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]),
1253 (int, Fraction(60), 8))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001254
1255 def test_floats(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001256 self.assertEqual(self.func([0.25]*20),
1257 (float, Fraction(5.0), 20))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001258
1259 def test_fractions(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001260 self.assertEqual(self.func([Fraction(1, 1000)]*500),
1261 (Fraction, Fraction(1, 2), 500))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001262
1263 def test_decimals(self):
1264 D = Decimal
1265 data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"),
1266 D("3.974"), D("2.328"), D("4.617"), D("2.843"),
1267 ]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001268 self.assertEqual(self.func(data),
1269 (Decimal, Decimal("20.686"), 8))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001270
1271 def test_compare_with_math_fsum(self):
1272 # Compare with the math.fsum function.
1273 # Ideally we ought to get the exact same result, but sometimes
1274 # we differ by a very slight amount :-(
1275 data = [random.uniform(-100, 1000) for _ in range(1000)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001276 self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001277
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001278 def test_strings_fail(self):
1279 # Sum of strings should fail.
1280 self.assertRaises(TypeError, self.func, [1, 2, 3], '999')
1281 self.assertRaises(TypeError, self.func, [1, 2, 3, '999'])
1282
1283 def test_bytes_fail(self):
1284 # Sum of bytes should fail.
1285 self.assertRaises(TypeError, self.func, [1, 2, 3], b'999')
1286 self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
1287
1288 def test_mixed_sum(self):
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001289 # Mixed input types are not (currently) allowed.
1290 # Check that mixed data types fail.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001291 self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)])
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001292 # And so does mixed start argument.
1293 self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001294
1295
1296class SumTortureTest(NumericTestCase):
1297 def test_torture(self):
1298 # Tim Peters' torture test for sum, and variants of same.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001299 self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000),
1300 (float, Fraction(20000.0), 40000))
1301 self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000),
1302 (float, Fraction(20000.0), 40000))
1303 T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000)
1304 self.assertIs(T, float)
1305 self.assertEqual(count, 40000)
1306 self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001307
1308
1309class SumSpecialValues(NumericTestCase):
1310 # Test that sum works correctly with IEEE-754 special values.
1311
1312 def test_nan(self):
1313 for type_ in (float, Decimal):
1314 nan = type_('nan')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001315 result = statistics._sum([1, nan, 2])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001316 self.assertIs(type(result), type_)
1317 self.assertTrue(math.isnan(result))
1318
1319 def check_infinity(self, x, inf):
1320 """Check x is an infinity of the same type and sign as inf."""
1321 self.assertTrue(math.isinf(x))
1322 self.assertIs(type(x), type(inf))
1323 self.assertEqual(x > 0, inf > 0)
1324 assert x == inf
1325
1326 def do_test_inf(self, inf):
1327 # Adding a single infinity gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001328 result = statistics._sum([1, 2, inf, 3])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001329 self.check_infinity(result, inf)
1330 # Adding two infinities of the same sign also gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001331 result = statistics._sum([1, 2, inf, 3, inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001332 self.check_infinity(result, inf)
1333
1334 def test_float_inf(self):
1335 inf = float('inf')
1336 for sign in (+1, -1):
1337 self.do_test_inf(sign*inf)
1338
1339 def test_decimal_inf(self):
1340 inf = Decimal('inf')
1341 for sign in (+1, -1):
1342 self.do_test_inf(sign*inf)
1343
1344 def test_float_mismatched_infs(self):
1345 # Test that adding two infinities of opposite sign gives a NAN.
1346 inf = float('inf')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001347 result = statistics._sum([1, 2, inf, 3, -inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001348 self.assertTrue(math.isnan(result))
1349
Berker Peksagf8c111d2014-09-24 15:03:25 +03001350 def test_decimal_extendedcontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001351 # Test adding Decimal INFs with opposite sign returns NAN.
1352 inf = Decimal('inf')
1353 data = [1, 2, inf, 3, -inf, 4]
1354 with decimal.localcontext(decimal.ExtendedContext):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001355 self.assertTrue(math.isnan(statistics._sum(data)[1]))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001356
Berker Peksagf8c111d2014-09-24 15:03:25 +03001357 def test_decimal_basiccontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001358 # Test adding Decimal INFs with opposite sign raises InvalidOperation.
1359 inf = Decimal('inf')
1360 data = [1, 2, inf, 3, -inf, 4]
1361 with decimal.localcontext(decimal.BasicContext):
1362 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1363
1364 def test_decimal_snan_raises(self):
1365 # Adding sNAN should raise InvalidOperation.
1366 sNAN = Decimal('sNAN')
1367 data = [1, sNAN, 2]
1368 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1369
1370
1371# === Tests for averages ===
1372
1373class AverageMixin(UnivariateCommonMixin):
1374 # Mixin class holding common tests for averages.
1375
1376 def test_single_value(self):
1377 # Average of a single value is the value itself.
1378 for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
1379 self.assertEqual(self.func([x]), x)
1380
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001381 def prepare_values_for_repeated_single_test(self):
1382 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.9712'))
1383
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001384 def test_repeated_single_value(self):
1385 # The average of a single repeated value is the value itself.
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001386 for x in self.prepare_values_for_repeated_single_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001387 for count in (2, 5, 10, 20):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001388 with self.subTest(x=x, count=count):
1389 data = [x]*count
1390 self.assertEqual(self.func(data), x)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001391
1392
1393class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1394 def setUp(self):
1395 self.func = statistics.mean
1396
1397 def test_torture_pep(self):
1398 # "Torture Test" from PEP-450.
1399 self.assertEqual(self.func([1e100, 1, 3, -1e100]), 1)
1400
1401 def test_ints(self):
1402 # Test mean with ints.
1403 data = [0, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9]
1404 random.shuffle(data)
1405 self.assertEqual(self.func(data), 4.8125)
1406
1407 def test_floats(self):
1408 # Test mean with floats.
1409 data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5]
1410 random.shuffle(data)
1411 self.assertEqual(self.func(data), 22.015625)
1412
1413 def test_decimals(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001414 # Test mean with Decimals.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001415 D = Decimal
1416 data = [D("1.634"), D("2.517"), D("3.912"), D("4.072"), D("5.813")]
1417 random.shuffle(data)
1418 self.assertEqual(self.func(data), D("3.5896"))
1419
1420 def test_fractions(self):
1421 # Test mean with Fractions.
1422 F = Fraction
1423 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1424 random.shuffle(data)
1425 self.assertEqual(self.func(data), F(1479, 1960))
1426
1427 def test_inf(self):
1428 # Test mean with infinities.
1429 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1430 for kind in (float, Decimal):
1431 for sign in (1, -1):
1432 inf = kind("inf")*sign
1433 data = raw + [inf]
1434 result = self.func(data)
1435 self.assertTrue(math.isinf(result))
1436 self.assertEqual(result, inf)
1437
1438 def test_mismatched_infs(self):
1439 # Test mean with infinities of opposite sign.
1440 data = [2, 4, 6, float('inf'), 1, 3, 5, float('-inf')]
1441 result = self.func(data)
1442 self.assertTrue(math.isnan(result))
1443
1444 def test_nan(self):
1445 # Test mean with NANs.
1446 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1447 for kind in (float, Decimal):
1448 inf = kind("nan")
1449 data = raw + [inf]
1450 result = self.func(data)
1451 self.assertTrue(math.isnan(result))
1452
1453 def test_big_data(self):
1454 # Test adding a large constant to every data point.
1455 c = 1e9
1456 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1457 expected = self.func(data) + c
1458 assert expected != c
1459 result = self.func([x+c for x in data])
1460 self.assertEqual(result, expected)
1461
1462 def test_doubled_data(self):
1463 # Mean of [a,b,c...z] should be same as for [a,a,b,b,c,c...z,z].
1464 data = [random.uniform(-3, 5) for _ in range(1000)]
1465 expected = self.func(data)
1466 actual = self.func(data*2)
1467 self.assertApproxEqual(actual, expected)
1468
Nick Coghlan4a7668a2014-02-08 23:55:14 +10001469 def test_regression_20561(self):
1470 # Regression test for issue 20561.
1471 # See http://bugs.python.org/issue20561
1472 d = Decimal('1e4')
1473 self.assertEqual(statistics.mean([d]), d)
1474
Steven D'Apranob28c3272015-12-01 19:59:53 +11001475 def test_regression_25177(self):
1476 # Regression test for issue 25177.
1477 # Ensure very big and very small floats don't overflow.
1478 # See http://bugs.python.org/issue25177.
1479 self.assertEqual(statistics.mean(
1480 [8.988465674311579e+307, 8.98846567431158e+307]),
1481 8.98846567431158e+307)
1482 big = 8.98846567431158e+307
1483 tiny = 5e-324
1484 for n in (2, 3, 5, 200):
1485 self.assertEqual(statistics.mean([big]*n), big)
1486 self.assertEqual(statistics.mean([tiny]*n), tiny)
1487
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001488
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001489class TestHarmonicMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1490 def setUp(self):
1491 self.func = statistics.harmonic_mean
1492
1493 def prepare_data(self):
1494 # Override mixin method.
1495 values = super().prepare_data()
1496 values.remove(0)
1497 return values
1498
1499 def prepare_values_for_repeated_single_test(self):
1500 # Override mixin method.
1501 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.125'))
1502
1503 def test_zero(self):
1504 # Test that harmonic mean returns zero when given zero.
1505 values = [1, 0, 2]
1506 self.assertEqual(self.func(values), 0)
1507
1508 def test_negative_error(self):
1509 # Test that harmonic mean raises when given a negative value.
1510 exc = statistics.StatisticsError
1511 for values in ([-1], [1, -2, 3]):
1512 with self.subTest(values=values):
1513 self.assertRaises(exc, self.func, values)
1514
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001515 def test_invalid_type_error(self):
1516 # Test error is raised when input contains invalid type(s)
1517 for data in [
1518 ['3.14'], # single string
1519 ['1', '2', '3'], # multiple strings
1520 [1, '2', 3, '4', 5], # mixed strings and valid integers
1521 [2.3, 3.4, 4.5, '5.6'] # only one string and valid floats
1522 ]:
1523 with self.subTest(data=data):
1524 with self.assertRaises(TypeError):
1525 self.func(data)
1526
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001527 def test_ints(self):
1528 # Test harmonic mean with ints.
1529 data = [2, 4, 4, 8, 16, 16]
1530 random.shuffle(data)
1531 self.assertEqual(self.func(data), 6*4/5)
1532
1533 def test_floats_exact(self):
1534 # Test harmonic mean with some carefully chosen floats.
1535 data = [1/8, 1/4, 1/4, 1/2, 1/2]
1536 random.shuffle(data)
1537 self.assertEqual(self.func(data), 1/4)
1538 self.assertEqual(self.func([0.25, 0.5, 1.0, 1.0]), 0.5)
1539
1540 def test_singleton_lists(self):
1541 # Test that harmonic mean([x]) returns (approximately) x.
1542 for x in range(1, 101):
Steven D'Apranoe7fef522016-08-09 13:19:48 +10001543 self.assertEqual(self.func([x]), x)
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001544
1545 def test_decimals_exact(self):
1546 # Test harmonic mean with some carefully chosen Decimals.
1547 D = Decimal
1548 self.assertEqual(self.func([D(15), D(30), D(60), D(60)]), D(30))
1549 data = [D("0.05"), D("0.10"), D("0.20"), D("0.20")]
1550 random.shuffle(data)
1551 self.assertEqual(self.func(data), D("0.10"))
1552 data = [D("1.68"), D("0.32"), D("5.94"), D("2.75")]
1553 random.shuffle(data)
1554 self.assertEqual(self.func(data), D(66528)/70723)
1555
1556 def test_fractions(self):
1557 # Test harmonic mean with Fractions.
1558 F = Fraction
1559 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1560 random.shuffle(data)
1561 self.assertEqual(self.func(data), F(7*420, 4029))
1562
1563 def test_inf(self):
1564 # Test harmonic mean with infinity.
1565 values = [2.0, float('inf'), 1.0]
1566 self.assertEqual(self.func(values), 2.0)
1567
1568 def test_nan(self):
1569 # Test harmonic mean with NANs.
1570 values = [2.0, float('nan'), 1.0]
1571 self.assertTrue(math.isnan(self.func(values)))
1572
1573 def test_multiply_data_points(self):
1574 # Test multiplying every data point by a constant.
1575 c = 111
1576 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1577 expected = self.func(data)*c
1578 result = self.func([x*c for x in data])
1579 self.assertEqual(result, expected)
1580
1581 def test_doubled_data(self):
1582 # Harmonic mean of [a,b...z] should be same as for [a,a,b,b...z,z].
1583 data = [random.uniform(1, 5) for _ in range(1000)]
1584 expected = self.func(data)
1585 actual = self.func(data*2)
1586 self.assertApproxEqual(actual, expected)
1587
Raymond Hettingercc3467a2020-12-23 19:52:09 -08001588 def test_with_weights(self):
1589 self.assertEqual(self.func([40, 60], [5, 30]), 56.0) # common case
1590 self.assertEqual(self.func([40, 60],
1591 weights=[5, 30]), 56.0) # keyword argument
1592 self.assertEqual(self.func(iter([40, 60]),
1593 iter([5, 30])), 56.0) # iterator inputs
1594 self.assertEqual(
1595 self.func([Fraction(10, 3), Fraction(23, 5), Fraction(7, 2)], [5, 2, 10]),
1596 self.func([Fraction(10, 3)] * 5 +
1597 [Fraction(23, 5)] * 2 +
1598 [Fraction(7, 2)] * 10))
1599 self.assertEqual(self.func([10], [7]), 10) # n=1 fast path
1600 with self.assertRaises(TypeError):
1601 self.func([1, 2, 3], [1, (), 3]) # non-numeric weight
1602 with self.assertRaises(statistics.StatisticsError):
1603 self.func([1, 2, 3], [1, 2]) # wrong number of weights
1604 with self.assertRaises(statistics.StatisticsError):
1605 self.func([10], [0]) # no non-zero weights
1606 with self.assertRaises(statistics.StatisticsError):
1607 self.func([10, 20], [0, 0]) # no non-zero weights
1608
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001609
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001610class TestMedian(NumericTestCase, AverageMixin):
1611 # Common tests for median and all median.* functions.
1612 def setUp(self):
1613 self.func = statistics.median
1614
1615 def prepare_data(self):
1616 """Overload method from UnivariateCommonMixin."""
1617 data = super().prepare_data()
1618 if len(data)%2 != 1:
1619 data.append(2)
1620 return data
1621
1622 def test_even_ints(self):
1623 # Test median with an even number of int data points.
1624 data = [1, 2, 3, 4, 5, 6]
1625 assert len(data)%2 == 0
1626 self.assertEqual(self.func(data), 3.5)
1627
1628 def test_odd_ints(self):
1629 # Test median with an odd number of int data points.
1630 data = [1, 2, 3, 4, 5, 6, 9]
1631 assert len(data)%2 == 1
1632 self.assertEqual(self.func(data), 4)
1633
1634 def test_odd_fractions(self):
1635 # Test median works with an odd number of Fractions.
1636 F = Fraction
1637 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7)]
1638 assert len(data)%2 == 1
1639 random.shuffle(data)
1640 self.assertEqual(self.func(data), F(3, 7))
1641
1642 def test_even_fractions(self):
1643 # Test median works with an even number of Fractions.
1644 F = Fraction
1645 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1646 assert len(data)%2 == 0
1647 random.shuffle(data)
1648 self.assertEqual(self.func(data), F(1, 2))
1649
1650 def test_odd_decimals(self):
1651 # Test median works with an odd number of Decimals.
1652 D = Decimal
1653 data = [D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1654 assert len(data)%2 == 1
1655 random.shuffle(data)
1656 self.assertEqual(self.func(data), D('4.2'))
1657
1658 def test_even_decimals(self):
1659 # Test median works with an even number of Decimals.
1660 D = Decimal
1661 data = [D('1.2'), D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1662 assert len(data)%2 == 0
1663 random.shuffle(data)
1664 self.assertEqual(self.func(data), D('3.65'))
1665
1666
1667class TestMedianDataType(NumericTestCase, UnivariateTypeMixin):
1668 # Test conservation of data element type for median.
1669 def setUp(self):
1670 self.func = statistics.median
1671
1672 def prepare_data(self):
1673 data = list(range(15))
1674 assert len(data)%2 == 1
1675 while data == sorted(data):
1676 random.shuffle(data)
1677 return data
1678
1679
1680class TestMedianLow(TestMedian, UnivariateTypeMixin):
1681 def setUp(self):
1682 self.func = statistics.median_low
1683
1684 def test_even_ints(self):
1685 # Test median_low with an even number of ints.
1686 data = [1, 2, 3, 4, 5, 6]
1687 assert len(data)%2 == 0
1688 self.assertEqual(self.func(data), 3)
1689
1690 def test_even_fractions(self):
1691 # Test median_low works with an even number of Fractions.
1692 F = Fraction
1693 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1694 assert len(data)%2 == 0
1695 random.shuffle(data)
1696 self.assertEqual(self.func(data), F(3, 7))
1697
1698 def test_even_decimals(self):
1699 # Test median_low works with an even number of Decimals.
1700 D = Decimal
1701 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1702 assert len(data)%2 == 0
1703 random.shuffle(data)
1704 self.assertEqual(self.func(data), D('3.3'))
1705
1706
1707class TestMedianHigh(TestMedian, UnivariateTypeMixin):
1708 def setUp(self):
1709 self.func = statistics.median_high
1710
1711 def test_even_ints(self):
1712 # Test median_high with an even number of ints.
1713 data = [1, 2, 3, 4, 5, 6]
1714 assert len(data)%2 == 0
1715 self.assertEqual(self.func(data), 4)
1716
1717 def test_even_fractions(self):
1718 # Test median_high works with an even number of Fractions.
1719 F = Fraction
1720 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1721 assert len(data)%2 == 0
1722 random.shuffle(data)
1723 self.assertEqual(self.func(data), F(4, 7))
1724
1725 def test_even_decimals(self):
1726 # Test median_high works with an even number of Decimals.
1727 D = Decimal
1728 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1729 assert len(data)%2 == 0
1730 random.shuffle(data)
1731 self.assertEqual(self.func(data), D('4.4'))
1732
1733
1734class TestMedianGrouped(TestMedian):
1735 # Test median_grouped.
1736 # Doesn't conserve data element types, so don't use TestMedianType.
1737 def setUp(self):
1738 self.func = statistics.median_grouped
1739
1740 def test_odd_number_repeated(self):
1741 # Test median.grouped with repeated median values.
1742 data = [12, 13, 14, 14, 14, 15, 15]
1743 assert len(data)%2 == 1
1744 self.assertEqual(self.func(data), 14)
1745 #---
1746 data = [12, 13, 14, 14, 14, 14, 15]
1747 assert len(data)%2 == 1
1748 self.assertEqual(self.func(data), 13.875)
1749 #---
1750 data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30]
1751 assert len(data)%2 == 1
1752 self.assertEqual(self.func(data, 5), 19.375)
1753 #---
1754 data = [16, 18, 18, 18, 18, 20, 20, 20, 22, 22, 22, 24, 24, 26, 28]
1755 assert len(data)%2 == 1
1756 self.assertApproxEqual(self.func(data, 2), 20.66666667, tol=1e-8)
1757
1758 def test_even_number_repeated(self):
1759 # Test median.grouped with repeated median values.
1760 data = [5, 10, 10, 15, 20, 20, 20, 25, 25, 30]
1761 assert len(data)%2 == 0
1762 self.assertApproxEqual(self.func(data, 5), 19.16666667, tol=1e-8)
1763 #---
1764 data = [2, 3, 4, 4, 4, 5]
1765 assert len(data)%2 == 0
1766 self.assertApproxEqual(self.func(data), 3.83333333, tol=1e-8)
1767 #---
1768 data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1769 assert len(data)%2 == 0
1770 self.assertEqual(self.func(data), 4.5)
1771 #---
1772 data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1773 assert len(data)%2 == 0
1774 self.assertEqual(self.func(data), 4.75)
1775
1776 def test_repeated_single_value(self):
1777 # Override method from AverageMixin.
1778 # Yet again, failure of median_grouped to conserve the data type
1779 # causes me headaches :-(
1780 for x in (5.3, 68, 4.3e17, Fraction(29, 101), Decimal('32.9714')):
1781 for count in (2, 5, 10, 20):
1782 data = [x]*count
1783 self.assertEqual(self.func(data), float(x))
1784
1785 def test_odd_fractions(self):
1786 # Test median_grouped works with an odd number of Fractions.
1787 F = Fraction
1788 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4)]
1789 assert len(data)%2 == 1
1790 random.shuffle(data)
1791 self.assertEqual(self.func(data), 3.0)
1792
1793 def test_even_fractions(self):
1794 # Test median_grouped works with an even number of Fractions.
1795 F = Fraction
1796 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4), F(17, 4)]
1797 assert len(data)%2 == 0
1798 random.shuffle(data)
1799 self.assertEqual(self.func(data), 3.25)
1800
1801 def test_odd_decimals(self):
1802 # Test median_grouped works with an odd number of Decimals.
1803 D = Decimal
1804 data = [D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1805 assert len(data)%2 == 1
1806 random.shuffle(data)
1807 self.assertEqual(self.func(data), 6.75)
1808
1809 def test_even_decimals(self):
1810 # Test median_grouped works with an even number of Decimals.
1811 D = Decimal
1812 data = [D('5.5'), D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1813 assert len(data)%2 == 0
1814 random.shuffle(data)
1815 self.assertEqual(self.func(data), 6.5)
1816 #---
1817 data = [D('5.5'), D('5.5'), D('6.5'), D('7.5'), D('7.5'), D('8.5')]
1818 assert len(data)%2 == 0
1819 random.shuffle(data)
1820 self.assertEqual(self.func(data), 7.0)
1821
1822 def test_interval(self):
1823 # Test median_grouped with interval argument.
1824 data = [2.25, 2.5, 2.5, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1825 self.assertEqual(self.func(data, 0.25), 2.875)
1826 data = [2.25, 2.5, 2.5, 2.75, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1827 self.assertApproxEqual(self.func(data, 0.25), 2.83333333, tol=1e-8)
1828 data = [220, 220, 240, 260, 260, 260, 260, 280, 280, 300, 320, 340]
1829 self.assertEqual(self.func(data, 20), 265.0)
1830
Steven D'Aprano8c115a42016-07-08 02:38:45 +10001831 def test_data_type_error(self):
1832 # Test median_grouped with str, bytes data types for data and interval
1833 data = ["", "", ""]
1834 self.assertRaises(TypeError, self.func, data)
1835 #---
1836 data = [b"", b"", b""]
1837 self.assertRaises(TypeError, self.func, data)
1838 #---
1839 data = [1, 2, 3]
1840 interval = ""
1841 self.assertRaises(TypeError, self.func, data, interval)
1842 #---
1843 data = [1, 2, 3]
1844 interval = b""
1845 self.assertRaises(TypeError, self.func, data, interval)
1846
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001847
1848class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1849 # Test cases for the discrete version of mode.
1850 def setUp(self):
1851 self.func = statistics.mode
1852
1853 def prepare_data(self):
1854 """Overload method from UnivariateCommonMixin."""
1855 # Make sure test data has exactly one mode.
1856 return [1, 1, 1, 1, 3, 4, 7, 9, 0, 8, 2]
1857
1858 def test_range_data(self):
1859 # Override test from UnivariateCommonMixin.
1860 data = range(20, 50, 3)
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001861 self.assertEqual(self.func(data), 20)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001862
1863 def test_nominal_data(self):
1864 # Test mode with nominal data.
1865 data = 'abcbdb'
1866 self.assertEqual(self.func(data), 'b')
1867 data = 'fe fi fo fum fi fi'.split()
1868 self.assertEqual(self.func(data), 'fi')
1869
1870 def test_discrete_data(self):
1871 # Test mode with discrete numeric data.
1872 data = list(range(10))
1873 for i in range(10):
1874 d = data + [i]
1875 random.shuffle(d)
1876 self.assertEqual(self.func(d), i)
1877
1878 def test_bimodal_data(self):
1879 # Test mode with bimodal data.
1880 data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
1881 assert data.count(2) == data.count(6) == 4
Min ho Kim39d87b52019-08-31 06:21:19 +10001882 # mode() should return 2, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001883 self.assertEqual(self.func(data), 2)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001884
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001885 def test_unique_data(self):
1886 # Test mode when data points are all unique.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001887 data = list(range(10))
Min ho Kim39d87b52019-08-31 06:21:19 +10001888 # mode() should return 0, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001889 self.assertEqual(self.func(data), 0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001890
1891 def test_none_data(self):
1892 # Test that mode raises TypeError if given None as data.
1893
1894 # This test is necessary because the implementation of mode uses
1895 # collections.Counter, which accepts None and returns an empty dict.
1896 self.assertRaises(TypeError, self.func, None)
1897
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001898 def test_counter_data(self):
1899 # Test that a Counter is treated like any other iterable.
1900 data = collections.Counter([1, 1, 1, 2])
1901 # Since the keys of the counter are treated as data points, not the
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001902 # counts, this should return the first mode encountered, 1
1903 self.assertEqual(self.func(data), 1)
1904
1905
1906class TestMultiMode(unittest.TestCase):
1907
1908 def test_basics(self):
1909 multimode = statistics.multimode
1910 self.assertEqual(multimode('aabbbbbbbbcc'), ['b'])
1911 self.assertEqual(multimode('aabbbbccddddeeffffgg'), ['b', 'd', 'f'])
1912 self.assertEqual(multimode(''), [])
1913
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001914
Raymond Hettinger47d99872019-02-21 15:06:29 -08001915class TestFMean(unittest.TestCase):
1916
1917 def test_basics(self):
1918 fmean = statistics.fmean
1919 D = Decimal
1920 F = Fraction
1921 for data, expected_mean, kind in [
1922 ([3.5, 4.0, 5.25], 4.25, 'floats'),
1923 ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
1924 ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
1925 ([True, False, True, True, False], 0.60, 'booleans'),
1926 ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
1927 ((3.5, 4.0, 5.25), 4.25, 'tuple'),
1928 (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
1929 ]:
1930 actual_mean = fmean(data)
1931 self.assertIs(type(actual_mean), float, kind)
1932 self.assertEqual(actual_mean, expected_mean, kind)
1933
1934 def test_error_cases(self):
1935 fmean = statistics.fmean
1936 StatisticsError = statistics.StatisticsError
1937 with self.assertRaises(StatisticsError):
1938 fmean([]) # empty input
1939 with self.assertRaises(StatisticsError):
1940 fmean(iter([])) # empty iterator
1941 with self.assertRaises(TypeError):
1942 fmean(None) # non-iterable input
1943 with self.assertRaises(TypeError):
1944 fmean([10, None, 20]) # non-numeric input
1945 with self.assertRaises(TypeError):
1946 fmean() # missing data argument
1947 with self.assertRaises(TypeError):
1948 fmean([10, 20, 60], 70) # too many arguments
1949
1950 def test_special_values(self):
1951 # Rules for special values are inherited from math.fsum()
1952 fmean = statistics.fmean
1953 NaN = float('Nan')
1954 Inf = float('Inf')
1955 self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
1956 self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
1957 self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
1958 with self.assertRaises(ValueError):
1959 fmean([Inf, -Inf])
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001960
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001961
1962# === Tests for variances and standard deviations ===
1963
1964class VarianceStdevMixin(UnivariateCommonMixin):
1965 # Mixin class holding common tests for variance and std dev.
1966
1967 # Subclasses should inherit from this before NumericTestClass, in order
1968 # to see the rel attribute below. See testShiftData for an explanation.
1969
1970 rel = 1e-12
1971
1972 def test_single_value(self):
1973 # Deviation of a single value is zero.
1974 for x in (11, 19.8, 4.6e14, Fraction(21, 34), Decimal('8.392')):
1975 self.assertEqual(self.func([x]), 0)
1976
1977 def test_repeated_single_value(self):
1978 # The deviation of a single repeated value is zero.
1979 for x in (7.2, 49, 8.1e15, Fraction(3, 7), Decimal('62.4802')):
1980 for count in (2, 3, 5, 15):
1981 data = [x]*count
1982 self.assertEqual(self.func(data), 0)
1983
1984 def test_domain_error_regression(self):
1985 # Regression test for a domain error exception.
1986 # (Thanks to Geremy Condra.)
1987 data = [0.123456789012345]*10000
1988 # All the items are identical, so variance should be exactly zero.
1989 # We allow some small round-off error, but not much.
1990 result = self.func(data)
1991 self.assertApproxEqual(result, 0.0, tol=5e-17)
1992 self.assertGreaterEqual(result, 0) # A negative result must fail.
1993
1994 def test_shift_data(self):
1995 # Test that shifting the data by a constant amount does not affect
1996 # the variance or stdev. Or at least not much.
1997
1998 # Due to rounding, this test should be considered an ideal. We allow
1999 # some tolerance away from "no change at all" by setting tol and/or rel
2000 # attributes. Subclasses may set tighter or looser error tolerances.
2001 raw = [1.03, 1.27, 1.94, 2.04, 2.58, 3.14, 4.75, 4.98, 5.42, 6.78]
2002 expected = self.func(raw)
2003 # Don't set shift too high, the bigger it is, the more rounding error.
2004 shift = 1e5
2005 data = [x + shift for x in raw]
2006 self.assertApproxEqual(self.func(data), expected)
2007
2008 def test_shift_data_exact(self):
2009 # Like test_shift_data, but result is always exact.
2010 raw = [1, 3, 3, 4, 5, 7, 9, 10, 11, 16]
2011 assert all(x==int(x) for x in raw)
2012 expected = self.func(raw)
2013 shift = 10**9
2014 data = [x + shift for x in raw]
2015 self.assertEqual(self.func(data), expected)
2016
2017 def test_iter_list_same(self):
2018 # Test that iter data and list data give the same result.
2019
2020 # This is an explicit test that iterators and lists are treated the
2021 # same; justification for this test over and above the similar test
2022 # in UnivariateCommonMixin is that an earlier design had variance and
2023 # friends swap between one- and two-pass algorithms, which would
2024 # sometimes give different results.
2025 data = [random.uniform(-3, 8) for _ in range(1000)]
2026 expected = self.func(data)
2027 self.assertEqual(self.func(iter(data)), expected)
2028
2029
2030class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2031 # Tests for population variance.
2032 def setUp(self):
2033 self.func = statistics.pvariance
2034
2035 def test_exact_uniform(self):
2036 # Test the variance against an exact result for uniform data.
2037 data = list(range(10000))
2038 random.shuffle(data)
2039 expected = (10000**2 - 1)/12 # Exact value.
2040 self.assertEqual(self.func(data), expected)
2041
2042 def test_ints(self):
2043 # Test population variance with int data.
2044 data = [4, 7, 13, 16]
2045 exact = 22.5
2046 self.assertEqual(self.func(data), exact)
2047
2048 def test_fractions(self):
2049 # Test population variance with Fraction data.
2050 F = Fraction
2051 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2052 exact = F(3, 8)
2053 result = self.func(data)
2054 self.assertEqual(result, exact)
2055 self.assertIsInstance(result, Fraction)
2056
2057 def test_decimals(self):
2058 # Test population variance with Decimal data.
2059 D = Decimal
2060 data = [D("12.1"), D("12.2"), D("12.5"), D("12.9")]
2061 exact = D('0.096875')
2062 result = self.func(data)
2063 self.assertEqual(result, exact)
2064 self.assertIsInstance(result, Decimal)
2065
Raymond Hettinger3c308052021-09-08 22:42:29 -05002066 def test_accuracy_bug_20499(self):
2067 data = [0, 0, 1]
2068 exact = 2 / 9
2069 result = self.func(data)
2070 self.assertEqual(result, exact)
2071 self.assertIsInstance(result, float)
2072
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002073
2074class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2075 # Tests for sample variance.
2076 def setUp(self):
2077 self.func = statistics.variance
2078
2079 def test_single_value(self):
2080 # Override method from VarianceStdevMixin.
2081 for x in (35, 24.7, 8.2e15, Fraction(19, 30), Decimal('4.2084')):
2082 self.assertRaises(statistics.StatisticsError, self.func, [x])
2083
2084 def test_ints(self):
2085 # Test sample variance with int data.
2086 data = [4, 7, 13, 16]
2087 exact = 30
2088 self.assertEqual(self.func(data), exact)
2089
2090 def test_fractions(self):
2091 # Test sample variance with Fraction data.
2092 F = Fraction
2093 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2094 exact = F(1, 2)
2095 result = self.func(data)
2096 self.assertEqual(result, exact)
2097 self.assertIsInstance(result, Fraction)
2098
2099 def test_decimals(self):
2100 # Test sample variance with Decimal data.
2101 D = Decimal
2102 data = [D(2), D(2), D(7), D(9)]
2103 exact = 4*D('9.5')/D(3)
2104 result = self.func(data)
2105 self.assertEqual(result, exact)
2106 self.assertIsInstance(result, Decimal)
2107
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002108 def test_center_not_at_mean(self):
2109 data = (1.0, 2.0)
2110 self.assertEqual(self.func(data), 0.5)
2111 self.assertEqual(self.func(data, xbar=2.0), 1.0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002112
Raymond Hettinger3c308052021-09-08 22:42:29 -05002113 def test_accuracy_bug_20499(self):
2114 data = [0, 0, 2]
2115 exact = 4 / 3
2116 result = self.func(data)
2117 self.assertEqual(result, exact)
2118 self.assertIsInstance(result, float)
2119
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002120class TestPStdev(VarianceStdevMixin, NumericTestCase):
2121 # Tests for population standard deviation.
2122 def setUp(self):
2123 self.func = statistics.pstdev
2124
2125 def test_compare_to_variance(self):
2126 # Test that stdev is, in fact, the square root of variance.
2127 data = [random.uniform(-17, 24) for _ in range(1000)]
2128 expected = math.sqrt(statistics.pvariance(data))
2129 self.assertEqual(self.func(data), expected)
2130
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002131 def test_center_not_at_mean(self):
2132 # See issue: 40855
2133 data = (3, 6, 7, 10)
2134 self.assertEqual(self.func(data), 2.5)
2135 self.assertEqual(self.func(data, mu=0.5), 6.5)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002136
2137class TestStdev(VarianceStdevMixin, NumericTestCase):
2138 # Tests for sample standard deviation.
2139 def setUp(self):
2140 self.func = statistics.stdev
2141
2142 def test_single_value(self):
2143 # Override method from VarianceStdevMixin.
2144 for x in (81, 203.74, 3.9e14, Fraction(5, 21), Decimal('35.719')):
2145 self.assertRaises(statistics.StatisticsError, self.func, [x])
2146
2147 def test_compare_to_variance(self):
2148 # Test that stdev is, in fact, the square root of variance.
2149 data = [random.uniform(-2, 9) for _ in range(1000)]
2150 expected = math.sqrt(statistics.variance(data))
2151 self.assertEqual(self.func(data), expected)
2152
Raymond Hettingerd71ab4f2020-06-13 15:55:52 -07002153 def test_center_not_at_mean(self):
2154 data = (1.0, 2.0)
2155 self.assertEqual(self.func(data, xbar=2.0), 1.0)
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002156
Raymond Hettinger6463ba32019-04-07 09:20:03 -07002157class TestGeometricMean(unittest.TestCase):
2158
2159 def test_basics(self):
2160 geometric_mean = statistics.geometric_mean
2161 self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
2162 self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
2163 self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
2164
2165 random.seed(86753095551212)
2166 for rng in [
2167 range(1, 100),
2168 range(1, 1_000),
2169 range(1, 10_000),
2170 range(500, 10_000, 3),
2171 range(10_000, 500, -3),
2172 [12, 17, 13, 5, 120, 7],
2173 [random.expovariate(50.0) for i in range(1_000)],
2174 [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
2175 [random.triangular(2000, 3000, 2200) for i in range(3_000)],
2176 ]:
2177 gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
2178 gm_float = geometric_mean(rng)
2179 self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
2180
2181 def test_various_input_types(self):
2182 geometric_mean = statistics.geometric_mean
2183 D = Decimal
2184 F = Fraction
2185 # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
2186 expected_mean = 4.18886
2187 for data, kind in [
2188 ([3.5, 4.0, 5.25], 'floats'),
2189 ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
2190 ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
2191 ([3.5, 4, F(21, 4)], 'mixed types'),
2192 ((3.5, 4.0, 5.25), 'tuple'),
2193 (iter([3.5, 4.0, 5.25]), 'iterator'),
2194 ]:
2195 actual_mean = geometric_mean(data)
2196 self.assertIs(type(actual_mean), float, kind)
2197 self.assertAlmostEqual(actual_mean, expected_mean, places=5)
2198
2199 def test_big_and_small(self):
2200 geometric_mean = statistics.geometric_mean
2201
2202 # Avoid overflow to infinity
2203 large = 2.0 ** 1000
2204 big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
2205 self.assertTrue(math.isclose(big_gm, 36.0 * large))
2206 self.assertFalse(math.isinf(big_gm))
2207
2208 # Avoid underflow to zero
2209 small = 2.0 ** -1000
2210 small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
2211 self.assertTrue(math.isclose(small_gm, 36.0 * small))
2212 self.assertNotEqual(small_gm, 0.0)
2213
2214 def test_error_cases(self):
2215 geometric_mean = statistics.geometric_mean
2216 StatisticsError = statistics.StatisticsError
2217 with self.assertRaises(StatisticsError):
2218 geometric_mean([]) # empty input
2219 with self.assertRaises(StatisticsError):
2220 geometric_mean([3.5, 0.0, 5.25]) # zero input
2221 with self.assertRaises(StatisticsError):
2222 geometric_mean([3.5, -4.0, 5.25]) # negative input
2223 with self.assertRaises(StatisticsError):
2224 geometric_mean(iter([])) # empty iterator
2225 with self.assertRaises(TypeError):
2226 geometric_mean(None) # non-iterable input
2227 with self.assertRaises(TypeError):
2228 geometric_mean([10, None, 20]) # non-numeric input
2229 with self.assertRaises(TypeError):
2230 geometric_mean() # missing data argument
2231 with self.assertRaises(TypeError):
2232 geometric_mean([10, 20, 60], 70) # too many arguments
2233
2234 def test_special_values(self):
2235 # Rules for special values are inherited from math.fsum()
2236 geometric_mean = statistics.geometric_mean
2237 NaN = float('Nan')
2238 Inf = float('Inf')
2239 self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
2240 self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
2241 self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
2242 with self.assertRaises(ValueError):
2243 geometric_mean([Inf, -Inf])
2244
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002245
2246class TestQuantiles(unittest.TestCase):
2247
2248 def test_specific_cases(self):
2249 # Match results computed by hand and cross-checked
2250 # against the PERCENTILE.EXC function in MS Excel.
2251 quantiles = statistics.quantiles
2252 data = [120, 200, 250, 320, 350]
2253 random.shuffle(data)
2254 for n, expected in [
2255 (1, []),
2256 (2, [250.0]),
2257 (3, [200.0, 320.0]),
2258 (4, [160.0, 250.0, 335.0]),
2259 (5, [136.0, 220.0, 292.0, 344.0]),
2260 (6, [120.0, 200.0, 250.0, 320.0, 350.0]),
2261 (8, [100.0, 160.0, 212.5, 250.0, 302.5, 335.0, 357.5]),
2262 (10, [88.0, 136.0, 184.0, 220.0, 250.0, 292.0, 326.0, 344.0, 362.0]),
2263 (12, [80.0, 120.0, 160.0, 200.0, 225.0, 250.0, 285.0, 320.0, 335.0,
2264 350.0, 365.0]),
2265 (15, [72.0, 104.0, 136.0, 168.0, 200.0, 220.0, 240.0, 264.0, 292.0,
2266 320.0, 332.0, 344.0, 356.0, 368.0]),
2267 ]:
2268 self.assertEqual(expected, quantiles(data, n=n))
2269 self.assertEqual(len(quantiles(data, n=n)), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002270 # Preserve datatype when possible
2271 for datatype in (float, Decimal, Fraction):
2272 result = quantiles(map(datatype, data), n=n)
2273 self.assertTrue(all(type(x) == datatype) for x in result)
2274 self.assertEqual(result, list(map(datatype, expected)))
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002275 # Quantiles should be idempotent
2276 if len(expected) >= 2:
2277 self.assertEqual(quantiles(expected, n=n), expected)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002278 # Cross-check against method='inclusive' which should give
2279 # the same result after adding in minimum and maximum values
2280 # extrapolated from the two lowest and two highest points.
2281 sdata = sorted(data)
2282 lo = 2 * sdata[0] - sdata[1]
2283 hi = 2 * sdata[-1] - sdata[-2]
2284 padded_data = data + [lo, hi]
2285 self.assertEqual(
2286 quantiles(data, n=n),
2287 quantiles(padded_data, n=n, method='inclusive'),
2288 (n, data),
2289 )
Tim Gatesc18b8052019-12-10 04:42:17 +11002290 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002291 def f(x):
2292 return 3.5 * x - 1234.675
2293 exp = list(map(f, expected))
2294 act = quantiles(map(f, data), n=n)
2295 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002296 # Q2 agrees with median()
2297 for k in range(2, 60):
2298 data = random.choices(range(100), k=k)
2299 q1, q2, q3 = quantiles(data)
2300 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002301
2302 def test_specific_cases_inclusive(self):
2303 # Match results computed by hand and cross-checked
2304 # against the PERCENTILE.INC function in MS Excel
Xtreak874ad1b2019-05-02 23:50:59 +05302305 # and against the quantile() function in SciPy.
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002306 quantiles = statistics.quantiles
2307 data = [100, 200, 400, 800]
2308 random.shuffle(data)
2309 for n, expected in [
2310 (1, []),
2311 (2, [300.0]),
2312 (3, [200.0, 400.0]),
2313 (4, [175.0, 300.0, 500.0]),
2314 (5, [160.0, 240.0, 360.0, 560.0]),
2315 (6, [150.0, 200.0, 300.0, 400.0, 600.0]),
2316 (8, [137.5, 175, 225.0, 300.0, 375.0, 500.0,650.0]),
2317 (10, [130.0, 160.0, 190.0, 240.0, 300.0, 360.0, 440.0, 560.0, 680.0]),
2318 (12, [125.0, 150.0, 175.0, 200.0, 250.0, 300.0, 350.0, 400.0,
2319 500.0, 600.0, 700.0]),
2320 (15, [120.0, 140.0, 160.0, 180.0, 200.0, 240.0, 280.0, 320.0, 360.0,
2321 400.0, 480.0, 560.0, 640.0, 720.0]),
2322 ]:
2323 self.assertEqual(expected, quantiles(data, n=n, method="inclusive"))
2324 self.assertEqual(len(quantiles(data, n=n, method="inclusive")), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002325 # Preserve datatype when possible
2326 for datatype in (float, Decimal, Fraction):
2327 result = quantiles(map(datatype, data), n=n, method="inclusive")
2328 self.assertTrue(all(type(x) == datatype) for x in result)
2329 self.assertEqual(result, list(map(datatype, expected)))
Tim Gatesc18b8052019-12-10 04:42:17 +11002330 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002331 def f(x):
2332 return 3.5 * x - 1234.675
2333 exp = list(map(f, expected))
2334 act = quantiles(map(f, data), n=n, method="inclusive")
2335 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002336 # Natural deciles
2337 self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
2338 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
2339 self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
2340 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002341 # Whenever n is smaller than the number of data points, running
2342 # method='inclusive' should give the same result as method='exclusive'
2343 # after the two included extreme points are removed.
2344 data = [random.randrange(10_000) for i in range(501)]
2345 actual = quantiles(data, n=32, method='inclusive')
2346 data.remove(min(data))
2347 data.remove(max(data))
2348 expected = quantiles(data, n=32)
2349 self.assertEqual(expected, actual)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002350 # Q2 agrees with median()
2351 for k in range(2, 60):
2352 data = random.choices(range(100), k=k)
2353 q1, q2, q3 = quantiles(data, method='inclusive')
2354 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002355
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002356 def test_equal_inputs(self):
2357 quantiles = statistics.quantiles
2358 for n in range(2, 10):
2359 data = [10.0] * n
2360 self.assertEqual(quantiles(data), [10.0, 10.0, 10.0])
2361 self.assertEqual(quantiles(data, method='inclusive'),
2362 [10.0, 10.0, 10.0])
2363
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002364 def test_equal_sized_groups(self):
2365 quantiles = statistics.quantiles
2366 total = 10_000
2367 data = [random.expovariate(0.2) for i in range(total)]
2368 while len(set(data)) != total:
2369 data.append(random.expovariate(0.2))
2370 data.sort()
2371
2372 # Cases where the group size exactly divides the total
2373 for n in (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000):
2374 group_size = total // n
2375 self.assertEqual(
2376 [bisect.bisect(data, q) for q in quantiles(data, n=n)],
2377 list(range(group_size, total, group_size)))
2378
2379 # When the group sizes can't be exactly equal, they should
2380 # differ by no more than one
2381 for n in (13, 19, 59, 109, 211, 571, 1019, 1907, 5261, 9769):
2382 group_sizes = {total // n, total // n + 1}
2383 pos = [bisect.bisect(data, q) for q in quantiles(data, n=n)]
2384 sizes = {q - p for p, q in zip(pos, pos[1:])}
2385 self.assertTrue(sizes <= group_sizes)
2386
2387 def test_error_cases(self):
2388 quantiles = statistics.quantiles
2389 StatisticsError = statistics.StatisticsError
2390 with self.assertRaises(TypeError):
2391 quantiles() # Missing arguments
2392 with self.assertRaises(TypeError):
2393 quantiles([10, 20, 30], 13, n=4) # Too many arguments
2394 with self.assertRaises(TypeError):
2395 quantiles([10, 20, 30], 4) # n is a positional argument
2396 with self.assertRaises(StatisticsError):
2397 quantiles([10, 20, 30], n=0) # n is zero
2398 with self.assertRaises(StatisticsError):
2399 quantiles([10, 20, 30], n=-1) # n is negative
2400 with self.assertRaises(TypeError):
2401 quantiles([10, 20, 30], n=1.5) # n is not an integer
2402 with self.assertRaises(ValueError):
2403 quantiles([10, 20, 30], method='X') # method is unknown
2404 with self.assertRaises(StatisticsError):
2405 quantiles([10], n=4) # not enough data points
2406 with self.assertRaises(TypeError):
2407 quantiles([10, None, 30], n=4) # data is non-numeric
2408
2409
Tymoteusz Wołodźko09aa6f92021-04-25 13:45:09 +02002410class TestBivariateStatistics(unittest.TestCase):
2411
2412 def test_unequal_size_error(self):
2413 for x, y in [
2414 ([1, 2, 3], [1, 2]),
2415 ([1, 2], [1, 2, 3]),
2416 ]:
2417 with self.assertRaises(statistics.StatisticsError):
2418 statistics.covariance(x, y)
2419 with self.assertRaises(statistics.StatisticsError):
2420 statistics.correlation(x, y)
2421 with self.assertRaises(statistics.StatisticsError):
2422 statistics.linear_regression(x, y)
2423
2424 def test_small_sample_error(self):
2425 for x, y in [
2426 ([], []),
2427 ([], [1, 2,]),
2428 ([1, 2,], []),
2429 ([1,], [1,]),
2430 ([1,], [1, 2,]),
2431 ([1, 2,], [1,]),
2432 ]:
2433 with self.assertRaises(statistics.StatisticsError):
2434 statistics.covariance(x, y)
2435 with self.assertRaises(statistics.StatisticsError):
2436 statistics.correlation(x, y)
2437 with self.assertRaises(statistics.StatisticsError):
2438 statistics.linear_regression(x, y)
2439
2440
2441class TestCorrelationAndCovariance(unittest.TestCase):
2442
2443 def test_results(self):
2444 for x, y, result in [
2445 ([1, 2, 3], [1, 2, 3], 1),
2446 ([1, 2, 3], [-1, -2, -3], -1),
2447 ([1, 2, 3], [3, 2, 1], -1),
2448 ([1, 2, 3], [1, 2, 1], 0),
2449 ([1, 2, 3], [1, 3, 2], 0.5),
2450 ]:
2451 self.assertAlmostEqual(statistics.correlation(x, y), result)
2452 self.assertAlmostEqual(statistics.covariance(x, y), result)
2453
2454 def test_different_scales(self):
2455 x = [1, 2, 3]
2456 y = [10, 30, 20]
2457 self.assertAlmostEqual(statistics.correlation(x, y), 0.5)
2458 self.assertAlmostEqual(statistics.covariance(x, y), 5)
2459
2460 y = [.1, .2, .3]
2461 self.assertAlmostEqual(statistics.correlation(x, y), 1)
2462 self.assertAlmostEqual(statistics.covariance(x, y), 0.1)
2463
2464
2465class TestLinearRegression(unittest.TestCase):
2466
2467 def test_constant_input_error(self):
2468 x = [1, 1, 1,]
2469 y = [1, 2, 3,]
2470 with self.assertRaises(statistics.StatisticsError):
2471 statistics.linear_regression(x, y)
2472
2473 def test_results(self):
2474 for x, y, true_intercept, true_slope in [
2475 ([1, 2, 3], [0, 0, 0], 0, 0),
2476 ([1, 2, 3], [1, 2, 3], 0, 1),
2477 ([1, 2, 3], [100, 100, 100], 100, 0),
2478 ([1, 2, 3], [12, 14, 16], 10, 2),
2479 ([1, 2, 3], [-1, -2, -3], 0, -1),
2480 ([1, 2, 3], [21, 22, 23], 20, 1),
2481 ([1, 2, 3], [5.1, 5.2, 5.3], 5, 0.1),
2482 ]:
Miss Islington (bot)86779872021-05-24 18:11:12 -07002483 slope, intercept = statistics.linear_regression(x, y)
Tymoteusz Wołodźko09aa6f92021-04-25 13:45:09 +02002484 self.assertAlmostEqual(intercept, true_intercept)
2485 self.assertAlmostEqual(slope, true_slope)
2486
2487
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002488class TestNormalDist:
Raymond Hettinger11c79532019-02-23 14:44:07 -08002489
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002490 # General note on precision: The pdf(), cdf(), and overlap() methods
2491 # depend on functions in the math libraries that do not make
2492 # explicit accuracy guarantees. Accordingly, some of the accuracy
2493 # tests below may fail if the underlying math functions are
2494 # inaccurate. There isn't much we can do about this short of
2495 # implementing our own implementations from scratch.
2496
Raymond Hettinger11c79532019-02-23 14:44:07 -08002497 def test_slots(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002498 nd = self.module.NormalDist(300, 23)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002499 with self.assertRaises(TypeError):
2500 vars(nd)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002501 self.assertEqual(tuple(nd.__slots__), ('_mu', '_sigma'))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002502
2503 def test_instantiation_and_attributes(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002504 nd = self.module.NormalDist(500, 17)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002505 self.assertEqual(nd.mean, 500)
2506 self.assertEqual(nd.stdev, 17)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002507 self.assertEqual(nd.variance, 17**2)
2508
2509 # default arguments
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002510 nd = self.module.NormalDist()
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002511 self.assertEqual(nd.mean, 0)
2512 self.assertEqual(nd.stdev, 1)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002513 self.assertEqual(nd.variance, 1**2)
2514
2515 # error case: negative sigma
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002516 with self.assertRaises(self.module.StatisticsError):
2517 self.module.NormalDist(500, -10)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002518
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002519 # verify that subclass type is honored
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002520 class NewNormalDist(self.module.NormalDist):
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002521 pass
2522 nnd = NewNormalDist(200, 5)
2523 self.assertEqual(type(nnd), NewNormalDist)
2524
Raymond Hettinger11c79532019-02-23 14:44:07 -08002525 def test_alternative_constructor(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002526 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002527 data = [96, 107, 90, 92, 110]
2528 # list input
2529 self.assertEqual(NormalDist.from_samples(data), NormalDist(99, 9))
2530 # tuple input
2531 self.assertEqual(NormalDist.from_samples(tuple(data)), NormalDist(99, 9))
2532 # iterator input
2533 self.assertEqual(NormalDist.from_samples(iter(data)), NormalDist(99, 9))
2534 # error cases
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002535 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002536 NormalDist.from_samples([]) # empty input
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002537 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002538 NormalDist.from_samples([10]) # only one input
2539
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002540 # verify that subclass type is honored
2541 class NewNormalDist(NormalDist):
2542 pass
2543 nnd = NewNormalDist.from_samples(data)
2544 self.assertEqual(type(nnd), NewNormalDist)
2545
Raymond Hettinger11c79532019-02-23 14:44:07 -08002546 def test_sample_generation(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002547 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002548 mu, sigma = 10_000, 3.0
2549 X = NormalDist(mu, sigma)
2550 n = 1_000
2551 data = X.samples(n)
2552 self.assertEqual(len(data), n)
2553 self.assertEqual(set(map(type, data)), {float})
2554 # mean(data) expected to fall within 8 standard deviations
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002555 xbar = self.module.mean(data)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002556 self.assertTrue(mu - sigma*8 <= xbar <= mu + sigma*8)
2557
2558 # verify that seeding makes reproducible sequences
2559 n = 100
2560 data1 = X.samples(n, seed='happiness and joy')
2561 data2 = X.samples(n, seed='trouble and despair')
2562 data3 = X.samples(n, seed='happiness and joy')
2563 data4 = X.samples(n, seed='trouble and despair')
2564 self.assertEqual(data1, data3)
2565 self.assertEqual(data2, data4)
2566 self.assertNotEqual(data1, data2)
2567
Raymond Hettinger11c79532019-02-23 14:44:07 -08002568 def test_pdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002569 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002570 X = NormalDist(100, 15)
2571 # Verify peak around center
2572 self.assertLess(X.pdf(99), X.pdf(100))
2573 self.assertLess(X.pdf(101), X.pdf(100))
2574 # Test symmetry
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002575 for i in range(50):
2576 self.assertAlmostEqual(X.pdf(100 - i), X.pdf(100 + i))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002577 # Test vs CDF
2578 dx = 2.0 ** -10
2579 for x in range(90, 111):
2580 est_pdf = (X.cdf(x + dx) - X.cdf(x)) / dx
2581 self.assertAlmostEqual(X.pdf(x), est_pdf, places=4)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002582 # Test vs table of known values -- CRC 26th Edition
2583 Z = NormalDist()
2584 for x, px in enumerate([
2585 0.3989, 0.3989, 0.3989, 0.3988, 0.3986,
2586 0.3984, 0.3982, 0.3980, 0.3977, 0.3973,
2587 0.3970, 0.3965, 0.3961, 0.3956, 0.3951,
2588 0.3945, 0.3939, 0.3932, 0.3925, 0.3918,
2589 0.3910, 0.3902, 0.3894, 0.3885, 0.3876,
2590 0.3867, 0.3857, 0.3847, 0.3836, 0.3825,
2591 0.3814, 0.3802, 0.3790, 0.3778, 0.3765,
2592 0.3752, 0.3739, 0.3725, 0.3712, 0.3697,
2593 0.3683, 0.3668, 0.3653, 0.3637, 0.3621,
2594 0.3605, 0.3589, 0.3572, 0.3555, 0.3538,
2595 ]):
2596 self.assertAlmostEqual(Z.pdf(x / 100.0), px, places=4)
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002597 self.assertAlmostEqual(Z.pdf(-x / 100.0), px, places=4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002598 # Error case: variance is zero
2599 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002600 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002601 Y.pdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002602 # Special values
2603 self.assertEqual(X.pdf(float('-Inf')), 0.0)
2604 self.assertEqual(X.pdf(float('Inf')), 0.0)
2605 self.assertTrue(math.isnan(X.pdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002606
2607 def test_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002608 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002609 X = NormalDist(100, 15)
2610 cdfs = [X.cdf(x) for x in range(1, 200)]
2611 self.assertEqual(set(map(type, cdfs)), {float})
2612 # Verify montonic
2613 self.assertEqual(cdfs, sorted(cdfs))
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002614 # Verify center (should be exact)
2615 self.assertEqual(X.cdf(100), 0.50)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002616 # Check against a table of known values
2617 # https://en.wikipedia.org/wiki/Standard_normal_table#Cumulative
2618 Z = NormalDist()
2619 for z, cum_prob in [
2620 (0.00, 0.50000), (0.01, 0.50399), (0.02, 0.50798),
2621 (0.14, 0.55567), (0.29, 0.61409), (0.33, 0.62930),
2622 (0.54, 0.70540), (0.60, 0.72575), (1.17, 0.87900),
2623 (1.60, 0.94520), (2.05, 0.97982), (2.89, 0.99807),
2624 (3.52, 0.99978), (3.98, 0.99997), (4.07, 0.99998),
2625 ]:
2626 self.assertAlmostEqual(Z.cdf(z), cum_prob, places=5)
2627 self.assertAlmostEqual(Z.cdf(-z), 1.0 - cum_prob, places=5)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002628 # Error case: variance is zero
2629 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002630 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002631 Y.cdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002632 # Special values
2633 self.assertEqual(X.cdf(float('-Inf')), 0.0)
2634 self.assertEqual(X.cdf(float('Inf')), 1.0)
2635 self.assertTrue(math.isnan(X.cdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002636
Neil Schemenauer52a48e62019-07-30 11:08:18 -07002637 @support.skip_if_pgo_task
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002638 def test_inv_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002639 NormalDist = self.module.NormalDist
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002640
2641 # Center case should be exact.
2642 iq = NormalDist(100, 15)
2643 self.assertEqual(iq.inv_cdf(0.50), iq.mean)
2644
2645 # Test versus a published table of known percentage points.
2646 # See the second table at the bottom of the page here:
2647 # http://people.bath.ac.uk/masss/tables/normaltable.pdf
2648 Z = NormalDist()
2649 pp = {5.0: (0.000, 1.645, 2.576, 3.291, 3.891,
2650 4.417, 4.892, 5.327, 5.731, 6.109),
2651 2.5: (0.674, 1.960, 2.807, 3.481, 4.056,
2652 4.565, 5.026, 5.451, 5.847, 6.219),
2653 1.0: (1.282, 2.326, 3.090, 3.719, 4.265,
2654 4.753, 5.199, 5.612, 5.998, 6.361)}
2655 for base, row in pp.items():
2656 for exp, x in enumerate(row, start=1):
2657 p = base * 10.0 ** (-exp)
2658 self.assertAlmostEqual(-Z.inv_cdf(p), x, places=3)
2659 p = 1.0 - p
2660 self.assertAlmostEqual(Z.inv_cdf(p), x, places=3)
2661
2662 # Match published example for MS Excel
2663 # https://support.office.com/en-us/article/norm-inv-function-54b30935-fee7-493c-bedb-2278a9db7e13
2664 self.assertAlmostEqual(NormalDist(40, 1.5).inv_cdf(0.908789), 42.000002)
2665
2666 # One million equally spaced probabilities
2667 n = 2**20
2668 for p in range(1, n):
2669 p /= n
2670 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2671
2672 # One hundred ever smaller probabilities to test tails out to
2673 # extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
2674 for e in range(1, 51):
2675 p = 2.0 ** (-e)
2676 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2677 p = 1.0 - p
2678 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2679
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002680 # Now apply cdf() first. Near the tails, the round-trip loses
2681 # precision and is ill-conditioned (small changes in the inputs
2682 # give large changes in the output), so only check to 5 places.
2683 for x in range(200):
2684 self.assertAlmostEqual(iq.inv_cdf(iq.cdf(x)), x, places=5)
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002685
2686 # Error cases:
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002687 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002688 iq.inv_cdf(0.0) # p is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002689 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002690 iq.inv_cdf(-0.1) # p under zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002691 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002692 iq.inv_cdf(1.0) # p is one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002693 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002694 iq.inv_cdf(1.1) # p over one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002695 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002696 iq = NormalDist(100, 0) # sigma is zero
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002697 iq.inv_cdf(0.5)
2698
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002699 # Special values
2700 self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
2701
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002702 def test_quantiles(self):
2703 # Quartiles of a standard normal distribution
2704 Z = self.module.NormalDist()
2705 for n, expected in [
2706 (1, []),
2707 (2, [0.0]),
2708 (3, [-0.4307, 0.4307]),
2709 (4 ,[-0.6745, 0.0, 0.6745]),
2710 ]:
2711 actual = Z.quantiles(n=n)
2712 self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2713 for e, a in zip(expected, actual)))
2714
Raymond Hettinger318d5372019-03-06 22:59:40 -08002715 def test_overlap(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002716 NormalDist = self.module.NormalDist
Raymond Hettinger318d5372019-03-06 22:59:40 -08002717
2718 # Match examples from Imman and Bradley
2719 for X1, X2, published_result in [
2720 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0), 0.80258),
2721 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0), 0.60993),
2722 ]:
2723 self.assertAlmostEqual(X1.overlap(X2), published_result, places=4)
2724 self.assertAlmostEqual(X2.overlap(X1), published_result, places=4)
2725
2726 # Check against integration of the PDF
2727 def overlap_numeric(X, Y, *, steps=8_192, z=5):
2728 'Numerical integration cross-check for overlap() '
2729 fsum = math.fsum
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002730 center = (X.mean + Y.mean) / 2.0
2731 width = z * max(X.stdev, Y.stdev)
Raymond Hettinger318d5372019-03-06 22:59:40 -08002732 start = center - width
2733 dx = 2.0 * width / steps
2734 x_arr = [start + i*dx for i in range(steps)]
2735 xp = list(map(X.pdf, x_arr))
2736 yp = list(map(Y.pdf, x_arr))
2737 total = max(fsum(xp), fsum(yp))
2738 return fsum(map(min, xp, yp)) / total
2739
2740 for X1, X2 in [
2741 # Examples from Imman and Bradley
2742 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0)),
2743 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2744 # Example from https://www.rasch.org/rmt/rmt101r.htm
2745 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2746 # Gender heights from http://www.usablestats.com/lessons/normal
2747 (NormalDist(70, 4), NormalDist(65, 3.5)),
2748 # Misc cases with equal standard deviations
2749 (NormalDist(100, 15), NormalDist(110, 15)),
2750 (NormalDist(-100, 15), NormalDist(110, 15)),
2751 (NormalDist(-100, 15), NormalDist(-110, 15)),
2752 # Misc cases with unequal standard deviations
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002753 (NormalDist(100, 12), NormalDist(100, 15)),
Raymond Hettinger318d5372019-03-06 22:59:40 -08002754 (NormalDist(100, 12), NormalDist(110, 15)),
2755 (NormalDist(100, 12), NormalDist(150, 15)),
2756 (NormalDist(100, 12), NormalDist(150, 35)),
2757 # Misc cases with small values
2758 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.003)),
2759 (NormalDist(1.000, 0.002), NormalDist(1.006, 0.0003)),
2760 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.099)),
2761 ]:
2762 self.assertAlmostEqual(X1.overlap(X2), overlap_numeric(X1, X2), places=5)
2763 self.assertAlmostEqual(X2.overlap(X1), overlap_numeric(X1, X2), places=5)
2764
2765 # Error cases
2766 X = NormalDist()
2767 with self.assertRaises(TypeError):
2768 X.overlap() # too few arguments
2769 with self.assertRaises(TypeError):
2770 X.overlap(X, X) # too may arguments
2771 with self.assertRaises(TypeError):
2772 X.overlap(None) # right operand not a NormalDist
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002773 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002774 X.overlap(NormalDist(1, 0)) # right operand sigma is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002775 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002776 NormalDist(1, 0).overlap(X) # left operand sigma is zero
2777
Raymond Hettinger70f027d2020-04-16 10:25:14 -07002778 def test_zscore(self):
2779 NormalDist = self.module.NormalDist
2780 X = NormalDist(100, 15)
2781 self.assertEqual(X.zscore(142), 2.8)
2782 self.assertEqual(X.zscore(58), -2.8)
2783 self.assertEqual(X.zscore(100), 0.0)
2784 with self.assertRaises(TypeError):
2785 X.zscore() # too few arguments
2786 with self.assertRaises(TypeError):
2787 X.zscore(1, 1) # too may arguments
2788 with self.assertRaises(TypeError):
2789 X.zscore(None) # non-numeric type
2790 with self.assertRaises(self.module.StatisticsError):
2791 NormalDist(1, 0).zscore(100) # sigma is zero
2792
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002793 def test_properties(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002794 X = self.module.NormalDist(100, 15)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002795 self.assertEqual(X.mean, 100)
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002796 self.assertEqual(X.median, 100)
2797 self.assertEqual(X.mode, 100)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002798 self.assertEqual(X.stdev, 15)
2799 self.assertEqual(X.variance, 225)
2800
Raymond Hettinger11c79532019-02-23 14:44:07 -08002801 def test_same_type_addition_and_subtraction(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002802 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002803 X = NormalDist(100, 12)
2804 Y = NormalDist(40, 5)
2805 self.assertEqual(X + Y, NormalDist(140, 13)) # __add__
2806 self.assertEqual(X - Y, NormalDist(60, 13)) # __sub__
2807
2808 def test_translation_and_scaling(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002809 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002810 X = NormalDist(100, 15)
2811 y = 10
2812 self.assertEqual(+X, NormalDist(100, 15)) # __pos__
2813 self.assertEqual(-X, NormalDist(-100, 15)) # __neg__
2814 self.assertEqual(X + y, NormalDist(110, 15)) # __add__
2815 self.assertEqual(y + X, NormalDist(110, 15)) # __radd__
2816 self.assertEqual(X - y, NormalDist(90, 15)) # __sub__
2817 self.assertEqual(y - X, NormalDist(-90, 15)) # __rsub__
2818 self.assertEqual(X * y, NormalDist(1000, 150)) # __mul__
2819 self.assertEqual(y * X, NormalDist(1000, 150)) # __rmul__
2820 self.assertEqual(X / y, NormalDist(10, 1.5)) # __truediv__
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002821 with self.assertRaises(TypeError): # __rtruediv__
Raymond Hettinger11c79532019-02-23 14:44:07 -08002822 y / X
2823
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002824 def test_unary_operations(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002825 NormalDist = self.module.NormalDist
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002826 X = NormalDist(100, 12)
2827 Y = +X
2828 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002829 self.assertEqual(X.mean, Y.mean)
2830 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002831 Y = -X
2832 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002833 self.assertEqual(X.mean, -Y.mean)
2834 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002835
Raymond Hettinger11c79532019-02-23 14:44:07 -08002836 def test_equality(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002837 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002838 nd1 = NormalDist()
2839 nd2 = NormalDist(2, 4)
2840 nd3 = NormalDist()
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002841 nd4 = NormalDist(2, 4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002842 nd5 = NormalDist(2, 8)
2843 nd6 = NormalDist(8, 4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002844 self.assertNotEqual(nd1, nd2)
2845 self.assertEqual(nd1, nd3)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002846 self.assertEqual(nd2, nd4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002847 self.assertNotEqual(nd2, nd5)
2848 self.assertNotEqual(nd2, nd6)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002849
2850 # Test NotImplemented when types are different
2851 class A:
2852 def __eq__(self, other):
2853 return 10
2854 a = A()
2855 self.assertEqual(nd1.__eq__(a), NotImplemented)
2856 self.assertEqual(nd1 == a, 10)
2857 self.assertEqual(a == nd1, 10)
2858
2859 # All subclasses to compare equal giving the same behavior
2860 # as list, tuple, int, float, complex, str, dict, set, etc.
2861 class SizedNormalDist(NormalDist):
2862 def __init__(self, mu, sigma, n):
2863 super().__init__(mu, sigma)
2864 self.n = n
2865 s = SizedNormalDist(100, 15, 57)
2866 nd4 = NormalDist(100, 15)
2867 self.assertEqual(s, nd4)
2868
2869 # Don't allow duck type equality because we wouldn't
2870 # want a lognormal distribution to compare equal
2871 # to a normal distribution with the same parameters
2872 class LognormalDist:
2873 def __init__(self, mu, sigma):
2874 self.mu = mu
2875 self.sigma = sigma
2876 lnd = LognormalDist(100, 15)
2877 nd = NormalDist(100, 15)
2878 self.assertNotEqual(nd, lnd)
2879
2880 def test_pickle_and_copy(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002881 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002882 nd1 = copy.copy(nd)
2883 self.assertEqual(nd, nd1)
2884 nd2 = copy.deepcopy(nd)
2885 self.assertEqual(nd, nd2)
2886 nd3 = pickle.loads(pickle.dumps(nd))
2887 self.assertEqual(nd, nd3)
2888
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002889 def test_hashability(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002890 ND = self.module.NormalDist
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002891 s = {ND(100, 15), ND(100.0, 15.0), ND(100, 10), ND(95, 15), ND(100, 15)}
2892 self.assertEqual(len(s), 3)
2893
Raymond Hettinger11c79532019-02-23 14:44:07 -08002894 def test_repr(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002895 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002896 self.assertEqual(repr(nd), 'NormalDist(mu=37.5, sigma=5.625)')
2897
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002898# Swapping the sys.modules['statistics'] is to solving the
2899# _pickle.PicklingError:
2900# Can't pickle <class 'statistics.NormalDist'>:
2901# it's not the same object as statistics.NormalDist
2902class TestNormalDistPython(unittest.TestCase, TestNormalDist):
2903 module = py_statistics
2904 def setUp(self):
2905 sys.modules['statistics'] = self.module
2906
2907 def tearDown(self):
2908 sys.modules['statistics'] = statistics
2909
2910
2911@unittest.skipUnless(c_statistics, 'requires _statistics')
2912class TestNormalDistC(unittest.TestCase, TestNormalDist):
2913 module = c_statistics
2914 def setUp(self):
2915 sys.modules['statistics'] = self.module
2916
2917 def tearDown(self):
2918 sys.modules['statistics'] = statistics
2919
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002920
2921# === Run tests ===
2922
2923def load_tests(loader, tests, ignore):
2924 """Used for doctest/unittest integration."""
2925 tests.addTests(doctest.DocTestSuite())
2926 return tests
2927
2928
2929if __name__ == "__main__":
2930 unittest.main()