blob: 5c3b1fdd8b110d2ec22b5b1906cba9d7469bd124 [file] [log] [blame]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001"""Test suite for statistics module, including helper NumericTestCase and
2approx_equal function.
3
4"""
5
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07006import bisect
Larry Hastingsf5e987b2013-10-19 11:50:09 -07007import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03008import collections.abc
Raymond Hettinger11c79532019-02-23 14:44:07 -08009import copy
Larry Hastingsf5e987b2013-10-19 11:50:09 -070010import decimal
11import doctest
12import math
Raymond Hettinger11c79532019-02-23 14:44:07 -080013import pickle
Larry Hastingsf5e987b2013-10-19 11:50:09 -070014import random
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +020015import sys
Larry Hastingsf5e987b2013-10-19 11:50:09 -070016import unittest
Neil Schemenauer52a48e62019-07-30 11:08:18 -070017from test import support
Larry Hastingsf5e987b2013-10-19 11:50:09 -070018
19from decimal import Decimal
20from fractions import Fraction
Dong-hee Na8ad22a42019-08-25 02:51:20 +090021from test import support
Larry Hastingsf5e987b2013-10-19 11:50:09 -070022
23
24# Module to be tested.
25import statistics
26
27
28# === Helper functions and class ===
29
Steven D'Apranoa474afd2016-08-09 12:49:01 +100030def sign(x):
31 """Return -1.0 for negatives, including -0.0, otherwise +1.0."""
32 return math.copysign(1, x)
33
Steven D'Apranob28c3272015-12-01 19:59:53 +110034def _nan_equal(a, b):
35 """Return True if a and b are both the same kind of NAN.
36
37 >>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
38 True
39 >>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
40 True
41 >>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
42 False
43 >>> _nan_equal(Decimal(42), Decimal('NAN'))
44 False
45
46 >>> _nan_equal(float('NAN'), float('NAN'))
47 True
48 >>> _nan_equal(float('NAN'), 0.5)
49 False
50
51 >>> _nan_equal(float('NAN'), Decimal('NAN'))
52 False
53
54 NAN payloads are not compared.
55 """
56 if type(a) is not type(b):
57 return False
58 if isinstance(a, float):
59 return math.isnan(a) and math.isnan(b)
60 aexp = a.as_tuple()[2]
61 bexp = b.as_tuple()[2]
62 return (aexp == bexp) and (aexp in ('n', 'N')) # Both NAN or both sNAN.
63
64
Larry Hastingsf5e987b2013-10-19 11:50:09 -070065def _calc_errors(actual, expected):
66 """Return the absolute and relative errors between two numbers.
67
68 >>> _calc_errors(100, 75)
69 (25, 0.25)
70 >>> _calc_errors(100, 100)
71 (0, 0.0)
72
73 Returns the (absolute error, relative error) between the two arguments.
74 """
75 base = max(abs(actual), abs(expected))
76 abs_err = abs(actual - expected)
77 rel_err = abs_err/base if base else float('inf')
78 return (abs_err, rel_err)
79
80
81def approx_equal(x, y, tol=1e-12, rel=1e-7):
82 """approx_equal(x, y [, tol [, rel]]) => True|False
83
84 Return True if numbers x and y are approximately equal, to within some
85 margin of error, otherwise return False. Numbers which compare equal
86 will also compare approximately equal.
87
88 x is approximately equal to y if the difference between them is less than
89 an absolute error tol or a relative error rel, whichever is bigger.
90
91 If given, both tol and rel must be finite, non-negative numbers. If not
92 given, default values are tol=1e-12 and rel=1e-7.
93
94 >>> approx_equal(1.2589, 1.2587, tol=0.0003, rel=0)
95 True
96 >>> approx_equal(1.2589, 1.2587, tol=0.0001, rel=0)
97 False
98
99 Absolute error is defined as abs(x-y); if that is less than or equal to
100 tol, x and y are considered approximately equal.
101
102 Relative error is defined as abs((x-y)/x) or abs((x-y)/y), whichever is
103 smaller, provided x or y are not zero. If that figure is less than or
104 equal to rel, x and y are considered approximately equal.
105
106 Complex numbers are not directly supported. If you wish to compare to
107 complex numbers, extract their real and imaginary parts and compare them
108 individually.
109
110 NANs always compare unequal, even with themselves. Infinities compare
111 approximately equal if they have the same sign (both positive or both
112 negative). Infinities with different signs compare unequal; so do
113 comparisons of infinities with finite numbers.
114 """
115 if tol < 0 or rel < 0:
116 raise ValueError('error tolerances must be non-negative')
117 # NANs are never equal to anything, approximately or otherwise.
118 if math.isnan(x) or math.isnan(y):
119 return False
120 # Numbers which compare equal also compare approximately equal.
121 if x == y:
122 # This includes the case of two infinities with the same sign.
123 return True
124 if math.isinf(x) or math.isinf(y):
125 # This includes the case of two infinities of opposite sign, or
126 # one infinity and one finite number.
127 return False
128 # Two finite numbers.
129 actual_error = abs(x - y)
130 allowed_error = max(tol, rel*max(abs(x), abs(y)))
131 return actual_error <= allowed_error
132
133
134# This class exists only as somewhere to stick a docstring containing
135# doctests. The following docstring and tests were originally in a separate
136# module. Now that it has been merged in here, I need somewhere to hang the.
137# docstring. Ultimately, this class will die, and the information below will
138# either become redundant, or be moved into more appropriate places.
139class _DoNothing:
140 """
141 When doing numeric work, especially with floats, exact equality is often
142 not what you want. Due to round-off error, it is often a bad idea to try
143 to compare floats with equality. Instead the usual procedure is to test
144 them with some (hopefully small!) allowance for error.
145
146 The ``approx_equal`` function allows you to specify either an absolute
147 error tolerance, or a relative error, or both.
148
149 Absolute error tolerances are simple, but you need to know the magnitude
150 of the quantities being compared:
151
152 >>> approx_equal(12.345, 12.346, tol=1e-3)
153 True
154 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3) # tol is too small.
155 False
156
157 Relative errors are more suitable when the values you are comparing can
158 vary in magnitude:
159
160 >>> approx_equal(12.345, 12.346, rel=1e-4)
161 True
162 >>> approx_equal(12.345e6, 12.346e6, rel=1e-4)
163 True
164
165 but a naive implementation of relative error testing can run into trouble
166 around zero.
167
168 If you supply both an absolute tolerance and a relative error, the
169 comparison succeeds if either individual test succeeds:
170
171 >>> approx_equal(12.345e6, 12.346e6, tol=1e-3, rel=1e-4)
172 True
173
174 """
175 pass
176
177
178
179# We prefer this for testing numeric values that may not be exactly equal,
180# and avoid using TestCase.assertAlmostEqual, because it sucks :-)
181
Dong-hee Na8ad22a42019-08-25 02:51:20 +0900182py_statistics = support.import_fresh_module('statistics', blocked=['_statistics'])
183c_statistics = support.import_fresh_module('statistics', fresh=['_statistics'])
184
185
186class TestModules(unittest.TestCase):
187 func_names = ['_normal_dist_inv_cdf']
188
189 def test_py_functions(self):
190 for fname in self.func_names:
191 self.assertEqual(getattr(py_statistics, fname).__module__, 'statistics')
192
193 @unittest.skipUnless(c_statistics, 'requires _statistics')
194 def test_c_functions(self):
195 for fname in self.func_names:
196 self.assertEqual(getattr(c_statistics, fname).__module__, '_statistics')
197
198
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700199class NumericTestCase(unittest.TestCase):
200 """Unit test class for numeric work.
201
202 This subclasses TestCase. In addition to the standard method
203 ``TestCase.assertAlmostEqual``, ``assertApproxEqual`` is provided.
204 """
205 # By default, we expect exact equality, unless overridden.
206 tol = rel = 0
207
208 def assertApproxEqual(
209 self, first, second, tol=None, rel=None, msg=None
210 ):
211 """Test passes if ``first`` and ``second`` are approximately equal.
212
213 This test passes if ``first`` and ``second`` are equal to
214 within ``tol``, an absolute error, or ``rel``, a relative error.
215
216 If either ``tol`` or ``rel`` are None or not given, they default to
217 test attributes of the same name (by default, 0).
218
219 The objects may be either numbers, or sequences of numbers. Sequences
220 are tested element-by-element.
221
222 >>> class MyTest(NumericTestCase):
223 ... def test_number(self):
224 ... x = 1.0/6
225 ... y = sum([x]*6)
226 ... self.assertApproxEqual(y, 1.0, tol=1e-15)
227 ... def test_sequence(self):
228 ... a = [1.001, 1.001e-10, 1.001e10]
229 ... b = [1.0, 1e-10, 1e10]
230 ... self.assertApproxEqual(a, b, rel=1e-3)
231 ...
232 >>> import unittest
233 >>> from io import StringIO # Suppress test runner output.
234 >>> suite = unittest.TestLoader().loadTestsFromTestCase(MyTest)
235 >>> unittest.TextTestRunner(stream=StringIO()).run(suite)
236 <unittest.runner.TextTestResult run=2 errors=0 failures=0>
237
238 """
239 if tol is None:
240 tol = self.tol
241 if rel is None:
242 rel = self.rel
243 if (
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300244 isinstance(first, collections.abc.Sequence) and
245 isinstance(second, collections.abc.Sequence)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700246 ):
247 check = self._check_approx_seq
248 else:
249 check = self._check_approx_num
250 check(first, second, tol, rel, msg)
251
252 def _check_approx_seq(self, first, second, tol, rel, msg):
253 if len(first) != len(second):
254 standardMsg = (
255 "sequences differ in length: %d items != %d items"
256 % (len(first), len(second))
257 )
258 msg = self._formatMessage(msg, standardMsg)
259 raise self.failureException(msg)
260 for i, (a,e) in enumerate(zip(first, second)):
261 self._check_approx_num(a, e, tol, rel, msg, i)
262
263 def _check_approx_num(self, first, second, tol, rel, msg, idx=None):
264 if approx_equal(first, second, tol, rel):
265 # Test passes. Return early, we are done.
266 return None
267 # Otherwise we failed.
268 standardMsg = self._make_std_err_msg(first, second, tol, rel, idx)
269 msg = self._formatMessage(msg, standardMsg)
270 raise self.failureException(msg)
271
272 @staticmethod
273 def _make_std_err_msg(first, second, tol, rel, idx):
274 # Create the standard error message for approx_equal failures.
275 assert first != second
276 template = (
277 ' %r != %r\n'
278 ' values differ by more than tol=%r and rel=%r\n'
279 ' -> absolute error = %r\n'
280 ' -> relative error = %r'
281 )
282 if idx is not None:
283 header = 'numeric sequences first differ at index %d.\n' % idx
284 template = header + template
285 # Calculate actual errors:
286 abs_err, rel_err = _calc_errors(first, second)
287 return template % (first, second, tol, rel, abs_err, rel_err)
288
289
290# ========================
291# === Test the helpers ===
292# ========================
293
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000294class TestSign(unittest.TestCase):
295 """Test that the helper function sign() works correctly."""
296 def testZeroes(self):
297 # Test that signed zeroes report their sign correctly.
298 self.assertEqual(sign(0.0), +1)
299 self.assertEqual(sign(-0.0), -1)
300
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700301
302# --- Tests for approx_equal ---
303
304class ApproxEqualSymmetryTest(unittest.TestCase):
305 # Test symmetry of approx_equal.
306
307 def test_relative_symmetry(self):
308 # Check that approx_equal treats relative error symmetrically.
309 # (a-b)/a is usually not equal to (a-b)/b. Ensure that this
310 # doesn't matter.
311 #
312 # Note: the reason for this test is that an early version
313 # of approx_equal was not symmetric. A relative error test
314 # would pass, or fail, depending on which value was passed
315 # as the first argument.
316 #
317 args1 = [2456, 37.8, -12.45, Decimal('2.54'), Fraction(17, 54)]
318 args2 = [2459, 37.2, -12.41, Decimal('2.59'), Fraction(15, 54)]
319 assert len(args1) == len(args2)
320 for a, b in zip(args1, args2):
321 self.do_relative_symmetry(a, b)
322
323 def do_relative_symmetry(self, a, b):
324 a, b = min(a, b), max(a, b)
325 assert a < b
326 delta = b - a # The absolute difference between the values.
327 rel_err1, rel_err2 = abs(delta/a), abs(delta/b)
328 # Choose an error margin halfway between the two.
329 rel = (rel_err1 + rel_err2)/2
330 # Now see that values a and b compare approx equal regardless of
331 # which is given first.
332 self.assertTrue(approx_equal(a, b, tol=0, rel=rel))
333 self.assertTrue(approx_equal(b, a, tol=0, rel=rel))
334
335 def test_symmetry(self):
336 # Test that approx_equal(a, b) == approx_equal(b, a)
337 args = [-23, -2, 5, 107, 93568]
338 delta = 2
Christian Heimesad393602013-11-26 01:32:15 +0100339 for a in args:
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700340 for type_ in (int, float, Decimal, Fraction):
Christian Heimesad393602013-11-26 01:32:15 +0100341 x = type_(a)*100
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700342 y = x + delta
343 r = abs(delta/max(x, y))
344 # There are five cases to check:
345 # 1) actual error <= tol, <= rel
346 self.do_symmetry_test(x, y, tol=delta, rel=r)
347 self.do_symmetry_test(x, y, tol=delta+1, rel=2*r)
348 # 2) actual error > tol, > rel
349 self.do_symmetry_test(x, y, tol=delta-1, rel=r/2)
350 # 3) actual error <= tol, > rel
351 self.do_symmetry_test(x, y, tol=delta, rel=r/2)
352 # 4) actual error > tol, <= rel
353 self.do_symmetry_test(x, y, tol=delta-1, rel=r)
354 self.do_symmetry_test(x, y, tol=delta-1, rel=2*r)
355 # 5) exact equality test
356 self.do_symmetry_test(x, x, tol=0, rel=0)
357 self.do_symmetry_test(x, y, tol=0, rel=0)
358
359 def do_symmetry_test(self, a, b, tol, rel):
360 template = "approx_equal comparisons don't match for %r"
361 flag1 = approx_equal(a, b, tol, rel)
362 flag2 = approx_equal(b, a, tol, rel)
363 self.assertEqual(flag1, flag2, template.format((a, b, tol, rel)))
364
365
366class ApproxEqualExactTest(unittest.TestCase):
367 # Test the approx_equal function with exactly equal values.
368 # Equal values should compare as approximately equal.
369 # Test cases for exactly equal values, which should compare approx
370 # equal regardless of the error tolerances given.
371
372 def do_exactly_equal_test(self, x, tol, rel):
373 result = approx_equal(x, x, tol=tol, rel=rel)
374 self.assertTrue(result, 'equality failure for x=%r' % x)
375 result = approx_equal(-x, -x, tol=tol, rel=rel)
376 self.assertTrue(result, 'equality failure for x=%r' % -x)
377
378 def test_exactly_equal_ints(self):
379 # Test that equal int values are exactly equal.
380 for n in [42, 19740, 14974, 230, 1795, 700245, 36587]:
381 self.do_exactly_equal_test(n, 0, 0)
382
383 def test_exactly_equal_floats(self):
384 # Test that equal float values are exactly equal.
385 for x in [0.42, 1.9740, 1497.4, 23.0, 179.5, 70.0245, 36.587]:
386 self.do_exactly_equal_test(x, 0, 0)
387
388 def test_exactly_equal_fractions(self):
389 # Test that equal Fraction values are exactly equal.
390 F = Fraction
391 for f in [F(1, 2), F(0), F(5, 3), F(9, 7), F(35, 36), F(3, 7)]:
392 self.do_exactly_equal_test(f, 0, 0)
393
394 def test_exactly_equal_decimals(self):
395 # Test that equal Decimal values are exactly equal.
396 D = Decimal
397 for d in map(D, "8.2 31.274 912.04 16.745 1.2047".split()):
398 self.do_exactly_equal_test(d, 0, 0)
399
400 def test_exactly_equal_absolute(self):
401 # Test that equal values are exactly equal with an absolute error.
402 for n in [16, 1013, 1372, 1198, 971, 4]:
403 # Test as ints.
404 self.do_exactly_equal_test(n, 0.01, 0)
405 # Test as floats.
406 self.do_exactly_equal_test(n/10, 0.01, 0)
407 # Test as Fractions.
408 f = Fraction(n, 1234)
409 self.do_exactly_equal_test(f, 0.01, 0)
410
411 def test_exactly_equal_absolute_decimals(self):
412 # Test equal Decimal values are exactly equal with an absolute error.
413 self.do_exactly_equal_test(Decimal("3.571"), Decimal("0.01"), 0)
414 self.do_exactly_equal_test(-Decimal("81.3971"), Decimal("0.01"), 0)
415
416 def test_exactly_equal_relative(self):
417 # Test that equal values are exactly equal with a relative error.
418 for x in [8347, 101.3, -7910.28, Fraction(5, 21)]:
419 self.do_exactly_equal_test(x, 0, 0.01)
420 self.do_exactly_equal_test(Decimal("11.68"), 0, Decimal("0.01"))
421
422 def test_exactly_equal_both(self):
423 # Test that equal values are equal when both tol and rel are given.
424 for x in [41017, 16.742, -813.02, Fraction(3, 8)]:
425 self.do_exactly_equal_test(x, 0.1, 0.01)
426 D = Decimal
427 self.do_exactly_equal_test(D("7.2"), D("0.1"), D("0.01"))
428
429
430class ApproxEqualUnequalTest(unittest.TestCase):
431 # Unequal values should compare unequal with zero error tolerances.
432 # Test cases for unequal values, with exact equality test.
433
434 def do_exactly_unequal_test(self, x):
435 for a in (x, -x):
436 result = approx_equal(a, a+1, tol=0, rel=0)
437 self.assertFalse(result, 'inequality failure for x=%r' % a)
438
439 def test_exactly_unequal_ints(self):
440 # Test unequal int values are unequal with zero error tolerance.
441 for n in [951, 572305, 478, 917, 17240]:
442 self.do_exactly_unequal_test(n)
443
444 def test_exactly_unequal_floats(self):
445 # Test unequal float values are unequal with zero error tolerance.
446 for x in [9.51, 5723.05, 47.8, 9.17, 17.24]:
447 self.do_exactly_unequal_test(x)
448
449 def test_exactly_unequal_fractions(self):
450 # Test that unequal Fractions are unequal with zero error tolerance.
451 F = Fraction
452 for f in [F(1, 5), F(7, 9), F(12, 11), F(101, 99023)]:
453 self.do_exactly_unequal_test(f)
454
455 def test_exactly_unequal_decimals(self):
456 # Test that unequal Decimals are unequal with zero error tolerance.
457 for d in map(Decimal, "3.1415 298.12 3.47 18.996 0.00245".split()):
458 self.do_exactly_unequal_test(d)
459
460
461class ApproxEqualInexactTest(unittest.TestCase):
462 # Inexact test cases for approx_error.
463 # Test cases when comparing two values that are not exactly equal.
464
465 # === Absolute error tests ===
466
467 def do_approx_equal_abs_test(self, x, delta):
468 template = "Test failure for x={!r}, y={!r}"
469 for y in (x + delta, x - delta):
470 msg = template.format(x, y)
471 self.assertTrue(approx_equal(x, y, tol=2*delta, rel=0), msg)
472 self.assertFalse(approx_equal(x, y, tol=delta/2, rel=0), msg)
473
474 def test_approx_equal_absolute_ints(self):
475 # Test approximate equality of ints with an absolute error.
476 for n in [-10737, -1975, -7, -2, 0, 1, 9, 37, 423, 9874, 23789110]:
477 self.do_approx_equal_abs_test(n, 10)
478 self.do_approx_equal_abs_test(n, 2)
479
480 def test_approx_equal_absolute_floats(self):
481 # Test approximate equality of floats with an absolute error.
482 for x in [-284.126, -97.1, -3.4, -2.15, 0.5, 1.0, 7.8, 4.23, 3817.4]:
483 self.do_approx_equal_abs_test(x, 1.5)
484 self.do_approx_equal_abs_test(x, 0.01)
485 self.do_approx_equal_abs_test(x, 0.0001)
486
487 def test_approx_equal_absolute_fractions(self):
488 # Test approximate equality of Fractions with an absolute error.
489 delta = Fraction(1, 29)
490 numerators = [-84, -15, -2, -1, 0, 1, 5, 17, 23, 34, 71]
491 for f in (Fraction(n, 29) for n in numerators):
492 self.do_approx_equal_abs_test(f, delta)
493 self.do_approx_equal_abs_test(f, float(delta))
494
495 def test_approx_equal_absolute_decimals(self):
496 # Test approximate equality of Decimals with an absolute error.
497 delta = Decimal("0.01")
498 for d in map(Decimal, "1.0 3.5 36.08 61.79 7912.3648".split()):
499 self.do_approx_equal_abs_test(d, delta)
500 self.do_approx_equal_abs_test(-d, delta)
501
502 def test_cross_zero(self):
503 # Test for the case of the two values having opposite signs.
504 self.assertTrue(approx_equal(1e-5, -1e-5, tol=1e-4, rel=0))
505
506 # === Relative error tests ===
507
508 def do_approx_equal_rel_test(self, x, delta):
509 template = "Test failure for x={!r}, y={!r}"
510 for y in (x*(1+delta), x*(1-delta)):
511 msg = template.format(x, y)
512 self.assertTrue(approx_equal(x, y, tol=0, rel=2*delta), msg)
513 self.assertFalse(approx_equal(x, y, tol=0, rel=delta/2), msg)
514
515 def test_approx_equal_relative_ints(self):
516 # Test approximate equality of ints with a relative error.
517 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.36))
518 self.assertTrue(approx_equal(64, 47, tol=0, rel=0.37))
519 # ---
520 self.assertTrue(approx_equal(449, 512, tol=0, rel=0.125))
521 self.assertTrue(approx_equal(448, 512, tol=0, rel=0.125))
522 self.assertFalse(approx_equal(447, 512, tol=0, rel=0.125))
523
524 def test_approx_equal_relative_floats(self):
525 # Test approximate equality of floats with a relative error.
526 for x in [-178.34, -0.1, 0.1, 1.0, 36.97, 2847.136, 9145.074]:
527 self.do_approx_equal_rel_test(x, 0.02)
528 self.do_approx_equal_rel_test(x, 0.0001)
529
530 def test_approx_equal_relative_fractions(self):
531 # Test approximate equality of Fractions with a relative error.
532 F = Fraction
533 delta = Fraction(3, 8)
534 for f in [F(3, 84), F(17, 30), F(49, 50), F(92, 85)]:
535 for d in (delta, float(delta)):
536 self.do_approx_equal_rel_test(f, d)
537 self.do_approx_equal_rel_test(-f, d)
538
539 def test_approx_equal_relative_decimals(self):
540 # Test approximate equality of Decimals with a relative error.
541 for d in map(Decimal, "0.02 1.0 5.7 13.67 94.138 91027.9321".split()):
542 self.do_approx_equal_rel_test(d, Decimal("0.001"))
543 self.do_approx_equal_rel_test(-d, Decimal("0.05"))
544
545 # === Both absolute and relative error tests ===
546
547 # There are four cases to consider:
548 # 1) actual error <= both absolute and relative error
549 # 2) actual error <= absolute error but > relative error
550 # 3) actual error <= relative error but > absolute error
551 # 4) actual error > both absolute and relative error
552
553 def do_check_both(self, a, b, tol, rel, tol_flag, rel_flag):
554 check = self.assertTrue if tol_flag else self.assertFalse
555 check(approx_equal(a, b, tol=tol, rel=0))
556 check = self.assertTrue if rel_flag else self.assertFalse
557 check(approx_equal(a, b, tol=0, rel=rel))
558 check = self.assertTrue if (tol_flag or rel_flag) else self.assertFalse
559 check(approx_equal(a, b, tol=tol, rel=rel))
560
561 def test_approx_equal_both1(self):
562 # Test actual error <= both absolute and relative error.
563 self.do_check_both(7.955, 7.952, 0.004, 3.8e-4, True, True)
564 self.do_check_both(-7.387, -7.386, 0.002, 0.0002, True, True)
565
566 def test_approx_equal_both2(self):
567 # Test actual error <= absolute error but > relative error.
568 self.do_check_both(7.955, 7.952, 0.004, 3.7e-4, True, False)
569
570 def test_approx_equal_both3(self):
571 # Test actual error <= relative error but > absolute error.
572 self.do_check_both(7.955, 7.952, 0.001, 3.8e-4, False, True)
573
574 def test_approx_equal_both4(self):
575 # Test actual error > both absolute and relative error.
576 self.do_check_both(2.78, 2.75, 0.01, 0.001, False, False)
577 self.do_check_both(971.44, 971.47, 0.02, 3e-5, False, False)
578
579
580class ApproxEqualSpecialsTest(unittest.TestCase):
581 # Test approx_equal with NANs and INFs and zeroes.
582
583 def test_inf(self):
584 for type_ in (float, Decimal):
585 inf = type_('inf')
586 self.assertTrue(approx_equal(inf, inf))
587 self.assertTrue(approx_equal(inf, inf, 0, 0))
588 self.assertTrue(approx_equal(inf, inf, 1, 0.01))
589 self.assertTrue(approx_equal(-inf, -inf))
590 self.assertFalse(approx_equal(inf, -inf))
591 self.assertFalse(approx_equal(inf, 1000))
592
593 def test_nan(self):
594 for type_ in (float, Decimal):
595 nan = type_('nan')
596 for other in (nan, type_('inf'), 1000):
597 self.assertFalse(approx_equal(nan, other))
598
599 def test_float_zeroes(self):
600 nzero = math.copysign(0.0, -1)
601 self.assertTrue(approx_equal(nzero, 0.0, tol=0.1, rel=0.1))
602
603 def test_decimal_zeroes(self):
604 nzero = Decimal("-0.0")
605 self.assertTrue(approx_equal(nzero, Decimal(0), tol=0.1, rel=0.1))
606
607
608class TestApproxEqualErrors(unittest.TestCase):
609 # Test error conditions of approx_equal.
610
611 def test_bad_tol(self):
612 # Test negative tol raises.
613 self.assertRaises(ValueError, approx_equal, 100, 100, -1, 0.1)
614
615 def test_bad_rel(self):
616 # Test negative rel raises.
617 self.assertRaises(ValueError, approx_equal, 100, 100, 1, -0.1)
618
619
620# --- Tests for NumericTestCase ---
621
622# The formatting routine that generates the error messages is complex enough
623# that it too needs testing.
624
625class TestNumericTestCase(unittest.TestCase):
626 # The exact wording of NumericTestCase error messages is *not* guaranteed,
627 # but we need to give them some sort of test to ensure that they are
628 # generated correctly. As a compromise, we look for specific substrings
629 # that are expected to be found even if the overall error message changes.
630
631 def do_test(self, args):
632 actual_msg = NumericTestCase._make_std_err_msg(*args)
633 expected = self.generate_substrings(*args)
634 for substring in expected:
635 self.assertIn(substring, actual_msg)
636
637 def test_numerictestcase_is_testcase(self):
638 # Ensure that NumericTestCase actually is a TestCase.
639 self.assertTrue(issubclass(NumericTestCase, unittest.TestCase))
640
641 def test_error_msg_numeric(self):
642 # Test the error message generated for numeric comparisons.
643 args = (2.5, 4.0, 0.5, 0.25, None)
644 self.do_test(args)
645
646 def test_error_msg_sequence(self):
647 # Test the error message generated for sequence comparisons.
648 args = (3.75, 8.25, 1.25, 0.5, 7)
649 self.do_test(args)
650
651 def generate_substrings(self, first, second, tol, rel, idx):
652 """Return substrings we expect to see in error messages."""
653 abs_err, rel_err = _calc_errors(first, second)
654 substrings = [
655 'tol=%r' % tol,
656 'rel=%r' % rel,
657 'absolute error = %r' % abs_err,
658 'relative error = %r' % rel_err,
659 ]
660 if idx is not None:
661 substrings.append('differ at index %d' % idx)
662 return substrings
663
664
665# =======================================
666# === Tests for the statistics module ===
667# =======================================
668
669
670class GlobalsTest(unittest.TestCase):
671 module = statistics
672 expected_metadata = ["__doc__", "__all__"]
673
674 def test_meta(self):
675 # Test for the existence of metadata.
676 for meta in self.expected_metadata:
677 self.assertTrue(hasattr(self.module, meta),
678 "%s not present" % meta)
679
680 def test_check_all(self):
681 # Check everything in __all__ exists and is public.
682 module = self.module
683 for name in module.__all__:
684 # No private names in __all__:
685 self.assertFalse(name.startswith("_"),
686 'private name "%s" in __all__' % name)
687 # And anything in __all__ must exist:
688 self.assertTrue(hasattr(module, name),
689 'missing name "%s" in __all__' % name)
690
691
692class DocTests(unittest.TestCase):
Serhiy Storchakab12cb6a2013-12-08 18:16:18 +0200693 @unittest.skipIf(sys.flags.optimize >= 2,
694 "Docstrings are omitted with -OO and above")
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700695 def test_doc_tests(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +1000696 failed, tried = doctest.testmod(statistics, optionflags=doctest.ELLIPSIS)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700697 self.assertGreater(tried, 0)
698 self.assertEqual(failed, 0)
699
700class StatisticsErrorTest(unittest.TestCase):
701 def test_has_exception(self):
702 errmsg = (
703 "Expected StatisticsError to be a ValueError, but got a"
704 " subclass of %r instead."
705 )
706 self.assertTrue(hasattr(statistics, 'StatisticsError'))
707 self.assertTrue(
708 issubclass(statistics.StatisticsError, ValueError),
709 errmsg % statistics.StatisticsError.__base__
710 )
711
712
713# === Tests for private utility functions ===
714
715class ExactRatioTest(unittest.TestCase):
716 # Test _exact_ratio utility.
717
718 def test_int(self):
719 for i in (-20, -3, 0, 5, 99, 10**20):
720 self.assertEqual(statistics._exact_ratio(i), (i, 1))
721
722 def test_fraction(self):
723 numerators = (-5, 1, 12, 38)
724 for n in numerators:
725 f = Fraction(n, 37)
726 self.assertEqual(statistics._exact_ratio(f), (n, 37))
727
728 def test_float(self):
729 self.assertEqual(statistics._exact_ratio(0.125), (1, 8))
730 self.assertEqual(statistics._exact_ratio(1.125), (9, 8))
731 data = [random.uniform(-100, 100) for _ in range(100)]
732 for x in data:
733 num, den = statistics._exact_ratio(x)
734 self.assertEqual(x, num/den)
735
736 def test_decimal(self):
737 D = Decimal
738 _exact_ratio = statistics._exact_ratio
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000739 self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
740 self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
741 self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700742
Steven D'Apranob28c3272015-12-01 19:59:53 +1100743 def test_inf(self):
744 INF = float("INF")
745 class MyFloat(float):
746 pass
747 class MyDecimal(Decimal):
748 pass
749 for inf in (INF, -INF):
750 for type_ in (float, MyFloat, Decimal, MyDecimal):
751 x = type_(inf)
752 ratio = statistics._exact_ratio(x)
753 self.assertEqual(ratio, (x, None))
754 self.assertEqual(type(ratio[0]), type_)
755 self.assertTrue(math.isinf(ratio[0]))
756
757 def test_float_nan(self):
758 NAN = float("NAN")
759 class MyFloat(float):
760 pass
761 for nan in (NAN, MyFloat(NAN)):
762 ratio = statistics._exact_ratio(nan)
763 self.assertTrue(math.isnan(ratio[0]))
764 self.assertIs(ratio[1], None)
765 self.assertEqual(type(ratio[0]), type(nan))
766
767 def test_decimal_nan(self):
768 NAN = Decimal("NAN")
769 sNAN = Decimal("sNAN")
770 class MyDecimal(Decimal):
771 pass
772 for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)):
773 ratio = statistics._exact_ratio(nan)
774 self.assertTrue(_nan_equal(ratio[0], nan))
775 self.assertIs(ratio[1], None)
776 self.assertEqual(type(ratio[0]), type(nan))
777
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700778
779class DecimalToRatioTest(unittest.TestCase):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000780 # Test _exact_ratio private function.
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700781
Steven D'Apranob28c3272015-12-01 19:59:53 +1100782 def test_infinity(self):
783 # Test that INFs are handled correctly.
784 inf = Decimal('INF')
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000785 self.assertEqual(statistics._exact_ratio(inf), (inf, None))
786 self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
Steven D'Apranob28c3272015-12-01 19:59:53 +1100787
788 def test_nan(self):
789 # Test that NANs are handled correctly.
790 for nan in (Decimal('NAN'), Decimal('sNAN')):
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000791 num, den = statistics._exact_ratio(nan)
Steven D'Apranob28c3272015-12-01 19:59:53 +1100792 # Because NANs always compare non-equal, we cannot use assertEqual.
793 # Nor can we use an identity test, as we don't guarantee anything
794 # about the object identity.
795 self.assertTrue(_nan_equal(num, nan))
796 self.assertIs(den, None)
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700797
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000798 def test_sign(self):
799 # Test sign is calculated correctly.
800 numbers = [Decimal("9.8765e12"), Decimal("9.8765e-12")]
801 for d in numbers:
802 # First test positive decimals.
803 assert d > 0
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000804 num, den = statistics._exact_ratio(d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000805 self.assertGreaterEqual(num, 0)
806 self.assertGreater(den, 0)
807 # Then test negative decimals.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000808 num, den = statistics._exact_ratio(-d)
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000809 self.assertLessEqual(num, 0)
810 self.assertGreater(den, 0)
811
812 def test_negative_exponent(self):
813 # Test result when the exponent is negative.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000814 t = statistics._exact_ratio(Decimal("0.1234"))
815 self.assertEqual(t, (617, 5000))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000816
817 def test_positive_exponent(self):
818 # Test results when the exponent is positive.
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000819 t = statistics._exact_ratio(Decimal("1.234e7"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000820 self.assertEqual(t, (12340000, 1))
821
822 def test_regression_20536(self):
823 # Regression test for issue 20536.
824 # See http://bugs.python.org/issue20536
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000825 t = statistics._exact_ratio(Decimal("1e2"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000826 self.assertEqual(t, (100, 1))
Steven D'Aprano3b06e242016-05-05 03:54:29 +1000827 t = statistics._exact_ratio(Decimal("1.47e5"))
Nick Coghlan4a7668a2014-02-08 23:55:14 +1000828 self.assertEqual(t, (147000, 1))
829
Larry Hastingsf5e987b2013-10-19 11:50:09 -0700830
Steven D'Apranob28c3272015-12-01 19:59:53 +1100831class IsFiniteTest(unittest.TestCase):
832 # Test _isfinite private function.
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000833
Steven D'Apranob28c3272015-12-01 19:59:53 +1100834 def test_finite(self):
835 # Test that finite numbers are recognised as finite.
836 for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")):
837 self.assertTrue(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000838
Steven D'Apranob28c3272015-12-01 19:59:53 +1100839 def test_infinity(self):
840 # Test that INFs are not recognised as finite.
841 for x in (float("inf"), Decimal("inf")):
842 self.assertFalse(statistics._isfinite(x))
Nick Coghlan73afe2a2014-02-08 19:58:04 +1000843
Steven D'Apranob28c3272015-12-01 19:59:53 +1100844 def test_nan(self):
845 # Test that NANs are not recognised as finite.
846 for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")):
847 self.assertFalse(statistics._isfinite(x))
848
849
850class CoerceTest(unittest.TestCase):
851 # Test that private function _coerce correctly deals with types.
852
853 # The coercion rules are currently an implementation detail, although at
854 # some point that should change. The tests and comments here define the
855 # correct implementation.
856
857 # Pre-conditions of _coerce:
858 #
859 # - The first time _sum calls _coerce, the
860 # - coerce(T, S) will never be called with bool as the first argument;
861 # this is a pre-condition, guarded with an assertion.
862
863 #
864 # - coerce(T, T) will always return T; we assume T is a valid numeric
865 # type. Violate this assumption at your own risk.
866 #
867 # - Apart from as above, bool is treated as if it were actually int.
868 #
869 # - coerce(int, X) and coerce(X, int) return X.
870 # -
871 def test_bool(self):
872 # bool is somewhat special, due to the pre-condition that it is
873 # never given as the first argument to _coerce, and that it cannot
874 # be subclassed. So we test it specially.
875 for T in (int, float, Fraction, Decimal):
876 self.assertIs(statistics._coerce(T, bool), T)
877 class MyClass(T): pass
878 self.assertIs(statistics._coerce(MyClass, bool), MyClass)
879
880 def assertCoerceTo(self, A, B):
881 """Assert that type A coerces to B."""
882 self.assertIs(statistics._coerce(A, B), B)
883 self.assertIs(statistics._coerce(B, A), B)
884
885 def check_coerce_to(self, A, B):
886 """Checks that type A coerces to B, including subclasses."""
887 # Assert that type A is coerced to B.
888 self.assertCoerceTo(A, B)
889 # Subclasses of A are also coerced to B.
890 class SubclassOfA(A): pass
891 self.assertCoerceTo(SubclassOfA, B)
892 # A, and subclasses of A, are coerced to subclasses of B.
893 class SubclassOfB(B): pass
894 self.assertCoerceTo(A, SubclassOfB)
895 self.assertCoerceTo(SubclassOfA, SubclassOfB)
896
897 def assertCoerceRaises(self, A, B):
898 """Assert that coercing A to B, or vice versa, raises TypeError."""
899 self.assertRaises(TypeError, statistics._coerce, (A, B))
900 self.assertRaises(TypeError, statistics._coerce, (B, A))
901
902 def check_type_coercions(self, T):
903 """Check that type T coerces correctly with subclasses of itself."""
904 assert T is not bool
905 # Coercing a type with itself returns the same type.
906 self.assertIs(statistics._coerce(T, T), T)
907 # Coercing a type with a subclass of itself returns the subclass.
908 class U(T): pass
909 class V(T): pass
910 class W(U): pass
911 for typ in (U, V, W):
912 self.assertCoerceTo(T, typ)
913 self.assertCoerceTo(U, W)
914 # Coercing two subclasses that aren't parent/child is an error.
915 self.assertCoerceRaises(U, V)
916 self.assertCoerceRaises(V, W)
917
918 def test_int(self):
919 # Check that int coerces correctly.
920 self.check_type_coercions(int)
921 for typ in (float, Fraction, Decimal):
922 self.check_coerce_to(int, typ)
923
924 def test_fraction(self):
925 # Check that Fraction coerces correctly.
926 self.check_type_coercions(Fraction)
927 self.check_coerce_to(Fraction, float)
928
929 def test_decimal(self):
930 # Check that Decimal coerces correctly.
931 self.check_type_coercions(Decimal)
932
933 def test_float(self):
934 # Check that float coerces correctly.
935 self.check_type_coercions(float)
936
937 def test_non_numeric_types(self):
938 for bad_type in (str, list, type(None), tuple, dict):
939 for good_type in (int, float, Fraction, Decimal):
940 self.assertCoerceRaises(good_type, bad_type)
941
942 def test_incompatible_types(self):
943 # Test that incompatible types raise.
944 for T in (float, Fraction):
945 class MySubclass(T): pass
946 self.assertCoerceRaises(T, Decimal)
947 self.assertCoerceRaises(MySubclass, Decimal)
948
949
950class ConvertTest(unittest.TestCase):
951 # Test private _convert function.
952
953 def check_exact_equal(self, x, y):
954 """Check that x equals y, and has the same type as well."""
955 self.assertEqual(x, y)
956 self.assertIs(type(x), type(y))
957
958 def test_int(self):
959 # Test conversions to int.
960 x = statistics._convert(Fraction(71), int)
961 self.check_exact_equal(x, 71)
962 class MyInt(int): pass
963 x = statistics._convert(Fraction(17), MyInt)
964 self.check_exact_equal(x, MyInt(17))
965
966 def test_fraction(self):
967 # Test conversions to Fraction.
968 x = statistics._convert(Fraction(95, 99), Fraction)
969 self.check_exact_equal(x, Fraction(95, 99))
970 class MyFraction(Fraction):
971 def __truediv__(self, other):
972 return self.__class__(super().__truediv__(other))
973 x = statistics._convert(Fraction(71, 13), MyFraction)
974 self.check_exact_equal(x, MyFraction(71, 13))
975
976 def test_float(self):
977 # Test conversions to float.
978 x = statistics._convert(Fraction(-1, 2), float)
979 self.check_exact_equal(x, -0.5)
980 class MyFloat(float):
981 def __truediv__(self, other):
982 return self.__class__(super().__truediv__(other))
983 x = statistics._convert(Fraction(9, 8), MyFloat)
984 self.check_exact_equal(x, MyFloat(1.125))
985
986 def test_decimal(self):
987 # Test conversions to Decimal.
988 x = statistics._convert(Fraction(1, 40), Decimal)
989 self.check_exact_equal(x, Decimal("0.025"))
990 class MyDecimal(Decimal):
991 def __truediv__(self, other):
992 return self.__class__(super().__truediv__(other))
993 x = statistics._convert(Fraction(-15, 16), MyDecimal)
994 self.check_exact_equal(x, MyDecimal("-0.9375"))
995
996 def test_inf(self):
997 for INF in (float('inf'), Decimal('inf')):
998 for inf in (INF, -INF):
999 x = statistics._convert(inf, type(inf))
1000 self.check_exact_equal(x, inf)
1001
1002 def test_nan(self):
1003 for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')):
1004 x = statistics._convert(nan, type(nan))
1005 self.assertTrue(_nan_equal(x, nan))
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001006
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001007 def test_invalid_input_type(self):
1008 with self.assertRaises(TypeError):
1009 statistics._convert(None, float)
1010
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001011
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001012class FailNegTest(unittest.TestCase):
1013 """Test _fail_neg private function."""
1014
1015 def test_pass_through(self):
1016 # Test that values are passed through unchanged.
1017 values = [1, 2.0, Fraction(3), Decimal(4)]
1018 new = list(statistics._fail_neg(values))
1019 self.assertEqual(values, new)
1020
1021 def test_negatives_raise(self):
1022 # Test that negatives raise an exception.
1023 for x in [1, 2.0, Fraction(3), Decimal(4)]:
1024 seq = [-x]
1025 it = statistics._fail_neg(seq)
1026 self.assertRaises(statistics.StatisticsError, next, it)
1027
1028 def test_error_msg(self):
1029 # Test that a given error message is used.
1030 msg = "badness #%d" % random.randint(10000, 99999)
1031 try:
1032 next(statistics._fail_neg([-1], msg))
1033 except statistics.StatisticsError as e:
1034 errmsg = e.args[0]
1035 else:
1036 self.fail("expected exception, but it didn't happen")
1037 self.assertEqual(errmsg, msg)
1038
1039
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001040class FindLteqTest(unittest.TestCase):
1041 # Test _find_lteq private function.
1042
1043 def test_invalid_input_values(self):
1044 for a, x in [
1045 ([], 1),
1046 ([1, 2], 3),
1047 ([1, 3], 2)
1048 ]:
1049 with self.subTest(a=a, x=x):
1050 with self.assertRaises(ValueError):
1051 statistics._find_lteq(a, x)
1052
1053 def test_locate_successfully(self):
1054 for a, x, expected_i in [
1055 ([1, 1, 1, 2, 3], 1, 0),
1056 ([0, 1, 1, 1, 2, 3], 1, 1),
1057 ([1, 2, 3, 3, 3], 3, 2)
1058 ]:
1059 with self.subTest(a=a, x=x):
1060 self.assertEqual(expected_i, statistics._find_lteq(a, x))
1061
1062
1063class FindRteqTest(unittest.TestCase):
1064 # Test _find_rteq private function.
1065
1066 def test_invalid_input_values(self):
1067 for a, l, x in [
1068 ([1], 2, 1),
1069 ([1, 3], 0, 2)
1070 ]:
1071 with self.assertRaises(ValueError):
1072 statistics._find_rteq(a, l, x)
1073
1074 def test_locate_successfully(self):
1075 for a, l, x, expected_i in [
1076 ([1, 1, 1, 2, 3], 0, 1, 2),
1077 ([0, 1, 1, 1, 2, 3], 0, 1, 3),
1078 ([1, 2, 3, 3, 3], 0, 3, 4)
1079 ]:
1080 with self.subTest(a=a, l=l, x=x):
1081 self.assertEqual(expected_i, statistics._find_rteq(a, l, x))
1082
1083
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001084# === Tests for public functions ===
1085
1086class UnivariateCommonMixin:
1087 # Common tests for most univariate functions that take a data argument.
1088
1089 def test_no_args(self):
1090 # Fail if given no arguments.
1091 self.assertRaises(TypeError, self.func)
1092
1093 def test_empty_data(self):
1094 # Fail when the data argument (first argument) is empty.
1095 for empty in ([], (), iter([])):
1096 self.assertRaises(statistics.StatisticsError, self.func, empty)
1097
1098 def prepare_data(self):
1099 """Return int data for various tests."""
1100 data = list(range(10))
1101 while data == sorted(data):
1102 random.shuffle(data)
1103 return data
1104
1105 def test_no_inplace_modifications(self):
1106 # Test that the function does not modify its input data.
1107 data = self.prepare_data()
1108 assert len(data) != 1 # Necessary to avoid infinite loop.
1109 assert data != sorted(data)
1110 saved = data[:]
1111 assert data is not saved
1112 _ = self.func(data)
1113 self.assertListEqual(data, saved, "data has been modified")
1114
1115 def test_order_doesnt_matter(self):
1116 # Test that the order of data points doesn't change the result.
1117
1118 # CAUTION: due to floating point rounding errors, the result actually
1119 # may depend on the order. Consider this test representing an ideal.
1120 # To avoid this test failing, only test with exact values such as ints
1121 # or Fractions.
1122 data = [1, 2, 3, 3, 3, 4, 5, 6]*100
1123 expected = self.func(data)
1124 random.shuffle(data)
1125 actual = self.func(data)
1126 self.assertEqual(expected, actual)
1127
1128 def test_type_of_data_collection(self):
1129 # Test that the type of iterable data doesn't effect the result.
1130 class MyList(list):
1131 pass
1132 class MyTuple(tuple):
1133 pass
1134 def generator(data):
1135 return (obj for obj in data)
1136 data = self.prepare_data()
1137 expected = self.func(data)
1138 for kind in (list, tuple, iter, MyList, MyTuple, generator):
1139 result = self.func(kind(data))
1140 self.assertEqual(result, expected)
1141
1142 def test_range_data(self):
1143 # Test that functions work with range objects.
1144 data = range(20, 50, 3)
1145 expected = self.func(list(data))
1146 self.assertEqual(self.func(data), expected)
1147
1148 def test_bad_arg_types(self):
1149 # Test that function raises when given data of the wrong type.
1150
1151 # Don't roll the following into a loop like this:
1152 # for bad in list_of_bad:
1153 # self.check_for_type_error(bad)
1154 #
1155 # Since assertRaises doesn't show the arguments that caused the test
1156 # failure, it is very difficult to debug these test failures when the
1157 # following are in a loop.
1158 self.check_for_type_error(None)
1159 self.check_for_type_error(23)
1160 self.check_for_type_error(42.0)
1161 self.check_for_type_error(object())
1162
1163 def check_for_type_error(self, *args):
1164 self.assertRaises(TypeError, self.func, *args)
1165
1166 def test_type_of_data_element(self):
1167 # Check the type of data elements doesn't affect the numeric result.
1168 # This is a weaker test than UnivariateTypeMixin.testTypesConserved,
1169 # because it checks the numeric result by equality, but not by type.
1170 class MyFloat(float):
1171 def __truediv__(self, other):
1172 return type(self)(super().__truediv__(other))
1173 def __add__(self, other):
1174 return type(self)(super().__add__(other))
1175 __radd__ = __add__
1176
1177 raw = self.prepare_data()
1178 expected = self.func(raw)
1179 for kind in (float, MyFloat, Decimal, Fraction):
1180 data = [kind(x) for x in raw]
1181 result = type(expected)(self.func(data))
1182 self.assertEqual(result, expected)
1183
1184
1185class UnivariateTypeMixin:
1186 """Mixin class for type-conserving functions.
1187
1188 This mixin class holds test(s) for functions which conserve the type of
1189 individual data points. E.g. the mean of a list of Fractions should itself
1190 be a Fraction.
1191
1192 Not all tests to do with types need go in this class. Only those that
1193 rely on the function returning the same type as its input data.
1194 """
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001195 def prepare_types_for_conservation_test(self):
1196 """Return the types which are expected to be conserved."""
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001197 class MyFloat(float):
1198 def __truediv__(self, other):
1199 return type(self)(super().__truediv__(other))
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001200 def __rtruediv__(self, other):
1201 return type(self)(super().__rtruediv__(other))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001202 def __sub__(self, other):
1203 return type(self)(super().__sub__(other))
1204 def __rsub__(self, other):
1205 return type(self)(super().__rsub__(other))
1206 def __pow__(self, other):
1207 return type(self)(super().__pow__(other))
1208 def __add__(self, other):
1209 return type(self)(super().__add__(other))
1210 __radd__ = __add__
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001211 return (float, Decimal, Fraction, MyFloat)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001212
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001213 def test_types_conserved(self):
1214 # Test that functions keeps the same type as their data points.
1215 # (Excludes mixed data types.) This only tests the type of the return
1216 # result, not the value.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001217 data = self.prepare_data()
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001218 for kind in self.prepare_types_for_conservation_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001219 d = [kind(x) for x in data]
1220 result = self.func(d)
1221 self.assertIs(type(result), kind)
1222
1223
Steven D'Apranob28c3272015-12-01 19:59:53 +11001224class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin):
1225 # Common test cases for statistics._sum() function.
1226
1227 # This test suite looks only at the numeric value returned by _sum,
1228 # after conversion to the appropriate type.
1229 def setUp(self):
1230 def simplified_sum(*args):
1231 T, value, n = statistics._sum(*args)
1232 return statistics._coerce(value, T)
1233 self.func = simplified_sum
1234
1235
1236class TestSum(NumericTestCase):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001237 # Test cases for statistics._sum() function.
1238
Steven D'Apranob28c3272015-12-01 19:59:53 +11001239 # These tests look at the entire three value tuple returned by _sum.
1240
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001241 def setUp(self):
1242 self.func = statistics._sum
1243
1244 def test_empty_data(self):
1245 # Override test for empty data.
1246 for data in ([], (), iter([])):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001247 self.assertEqual(self.func(data), (int, Fraction(0), 0))
1248 self.assertEqual(self.func(data, 23), (int, Fraction(23), 0))
1249 self.assertEqual(self.func(data, 2.3), (float, Fraction(2.3), 0))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001250
1251 def test_ints(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001252 self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]),
1253 (int, Fraction(60), 8))
1254 self.assertEqual(self.func([4, 2, 3, -8, 7], 1000),
1255 (int, Fraction(1008), 5))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001256
1257 def test_floats(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001258 self.assertEqual(self.func([0.25]*20),
1259 (float, Fraction(5.0), 20))
1260 self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5),
1261 (float, Fraction(3.125), 4))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001262
1263 def test_fractions(self):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001264 self.assertEqual(self.func([Fraction(1, 1000)]*500),
1265 (Fraction, Fraction(1, 2), 500))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001266
1267 def test_decimals(self):
1268 D = Decimal
1269 data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"),
1270 D("3.974"), D("2.328"), D("4.617"), D("2.843"),
1271 ]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001272 self.assertEqual(self.func(data),
1273 (Decimal, Decimal("20.686"), 8))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001274
1275 def test_compare_with_math_fsum(self):
1276 # Compare with the math.fsum function.
1277 # Ideally we ought to get the exact same result, but sometimes
1278 # we differ by a very slight amount :-(
1279 data = [random.uniform(-100, 1000) for _ in range(1000)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001280 self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001281
1282 def test_start_argument(self):
1283 # Test that the optional start argument works correctly.
1284 data = [random.uniform(1, 1000) for _ in range(100)]
Steven D'Apranob28c3272015-12-01 19:59:53 +11001285 t = self.func(data)[1]
1286 self.assertEqual(t+42, self.func(data, 42)[1])
1287 self.assertEqual(t-23, self.func(data, -23)[1])
1288 self.assertEqual(t+Fraction(1e20), self.func(data, 1e20)[1])
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001289
1290 def test_strings_fail(self):
1291 # Sum of strings should fail.
1292 self.assertRaises(TypeError, self.func, [1, 2, 3], '999')
1293 self.assertRaises(TypeError, self.func, [1, 2, 3, '999'])
1294
1295 def test_bytes_fail(self):
1296 # Sum of bytes should fail.
1297 self.assertRaises(TypeError, self.func, [1, 2, 3], b'999')
1298 self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
1299
1300 def test_mixed_sum(self):
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001301 # Mixed input types are not (currently) allowed.
1302 # Check that mixed data types fail.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001303 self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)])
Nick Coghlan73afe2a2014-02-08 19:58:04 +10001304 # And so does mixed start argument.
1305 self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001306
1307
1308class SumTortureTest(NumericTestCase):
1309 def test_torture(self):
1310 # Tim Peters' torture test for sum, and variants of same.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001311 self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000),
1312 (float, Fraction(20000.0), 40000))
1313 self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000),
1314 (float, Fraction(20000.0), 40000))
1315 T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000)
1316 self.assertIs(T, float)
1317 self.assertEqual(count, 40000)
1318 self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001319
1320
1321class SumSpecialValues(NumericTestCase):
1322 # Test that sum works correctly with IEEE-754 special values.
1323
1324 def test_nan(self):
1325 for type_ in (float, Decimal):
1326 nan = type_('nan')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001327 result = statistics._sum([1, nan, 2])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001328 self.assertIs(type(result), type_)
1329 self.assertTrue(math.isnan(result))
1330
1331 def check_infinity(self, x, inf):
1332 """Check x is an infinity of the same type and sign as inf."""
1333 self.assertTrue(math.isinf(x))
1334 self.assertIs(type(x), type(inf))
1335 self.assertEqual(x > 0, inf > 0)
1336 assert x == inf
1337
1338 def do_test_inf(self, inf):
1339 # Adding a single infinity gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001340 result = statistics._sum([1, 2, inf, 3])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001341 self.check_infinity(result, inf)
1342 # Adding two infinities of the same sign also gives infinity.
Steven D'Apranob28c3272015-12-01 19:59:53 +11001343 result = statistics._sum([1, 2, inf, 3, inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001344 self.check_infinity(result, inf)
1345
1346 def test_float_inf(self):
1347 inf = float('inf')
1348 for sign in (+1, -1):
1349 self.do_test_inf(sign*inf)
1350
1351 def test_decimal_inf(self):
1352 inf = Decimal('inf')
1353 for sign in (+1, -1):
1354 self.do_test_inf(sign*inf)
1355
1356 def test_float_mismatched_infs(self):
1357 # Test that adding two infinities of opposite sign gives a NAN.
1358 inf = float('inf')
Steven D'Apranob28c3272015-12-01 19:59:53 +11001359 result = statistics._sum([1, 2, inf, 3, -inf, 4])[1]
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001360 self.assertTrue(math.isnan(result))
1361
Berker Peksagf8c111d2014-09-24 15:03:25 +03001362 def test_decimal_extendedcontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001363 # Test adding Decimal INFs with opposite sign returns NAN.
1364 inf = Decimal('inf')
1365 data = [1, 2, inf, 3, -inf, 4]
1366 with decimal.localcontext(decimal.ExtendedContext):
Steven D'Apranob28c3272015-12-01 19:59:53 +11001367 self.assertTrue(math.isnan(statistics._sum(data)[1]))
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001368
Berker Peksagf8c111d2014-09-24 15:03:25 +03001369 def test_decimal_basiccontext_mismatched_infs_to_nan(self):
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001370 # Test adding Decimal INFs with opposite sign raises InvalidOperation.
1371 inf = Decimal('inf')
1372 data = [1, 2, inf, 3, -inf, 4]
1373 with decimal.localcontext(decimal.BasicContext):
1374 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1375
1376 def test_decimal_snan_raises(self):
1377 # Adding sNAN should raise InvalidOperation.
1378 sNAN = Decimal('sNAN')
1379 data = [1, sNAN, 2]
1380 self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
1381
1382
1383# === Tests for averages ===
1384
1385class AverageMixin(UnivariateCommonMixin):
1386 # Mixin class holding common tests for averages.
1387
1388 def test_single_value(self):
1389 # Average of a single value is the value itself.
1390 for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
1391 self.assertEqual(self.func([x]), x)
1392
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001393 def prepare_values_for_repeated_single_test(self):
1394 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.9712'))
1395
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001396 def test_repeated_single_value(self):
1397 # The average of a single repeated value is the value itself.
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001398 for x in self.prepare_values_for_repeated_single_test():
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001399 for count in (2, 5, 10, 20):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001400 with self.subTest(x=x, count=count):
1401 data = [x]*count
1402 self.assertEqual(self.func(data), x)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001403
1404
1405class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1406 def setUp(self):
1407 self.func = statistics.mean
1408
1409 def test_torture_pep(self):
1410 # "Torture Test" from PEP-450.
1411 self.assertEqual(self.func([1e100, 1, 3, -1e100]), 1)
1412
1413 def test_ints(self):
1414 # Test mean with ints.
1415 data = [0, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9]
1416 random.shuffle(data)
1417 self.assertEqual(self.func(data), 4.8125)
1418
1419 def test_floats(self):
1420 # Test mean with floats.
1421 data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5]
1422 random.shuffle(data)
1423 self.assertEqual(self.func(data), 22.015625)
1424
1425 def test_decimals(self):
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001426 # Test mean with Decimals.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001427 D = Decimal
1428 data = [D("1.634"), D("2.517"), D("3.912"), D("4.072"), D("5.813")]
1429 random.shuffle(data)
1430 self.assertEqual(self.func(data), D("3.5896"))
1431
1432 def test_fractions(self):
1433 # Test mean with Fractions.
1434 F = Fraction
1435 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1436 random.shuffle(data)
1437 self.assertEqual(self.func(data), F(1479, 1960))
1438
1439 def test_inf(self):
1440 # Test mean with infinities.
1441 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1442 for kind in (float, Decimal):
1443 for sign in (1, -1):
1444 inf = kind("inf")*sign
1445 data = raw + [inf]
1446 result = self.func(data)
1447 self.assertTrue(math.isinf(result))
1448 self.assertEqual(result, inf)
1449
1450 def test_mismatched_infs(self):
1451 # Test mean with infinities of opposite sign.
1452 data = [2, 4, 6, float('inf'), 1, 3, 5, float('-inf')]
1453 result = self.func(data)
1454 self.assertTrue(math.isnan(result))
1455
1456 def test_nan(self):
1457 # Test mean with NANs.
1458 raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later.
1459 for kind in (float, Decimal):
1460 inf = kind("nan")
1461 data = raw + [inf]
1462 result = self.func(data)
1463 self.assertTrue(math.isnan(result))
1464
1465 def test_big_data(self):
1466 # Test adding a large constant to every data point.
1467 c = 1e9
1468 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1469 expected = self.func(data) + c
1470 assert expected != c
1471 result = self.func([x+c for x in data])
1472 self.assertEqual(result, expected)
1473
1474 def test_doubled_data(self):
1475 # Mean of [a,b,c...z] should be same as for [a,a,b,b,c,c...z,z].
1476 data = [random.uniform(-3, 5) for _ in range(1000)]
1477 expected = self.func(data)
1478 actual = self.func(data*2)
1479 self.assertApproxEqual(actual, expected)
1480
Nick Coghlan4a7668a2014-02-08 23:55:14 +10001481 def test_regression_20561(self):
1482 # Regression test for issue 20561.
1483 # See http://bugs.python.org/issue20561
1484 d = Decimal('1e4')
1485 self.assertEqual(statistics.mean([d]), d)
1486
Steven D'Apranob28c3272015-12-01 19:59:53 +11001487 def test_regression_25177(self):
1488 # Regression test for issue 25177.
1489 # Ensure very big and very small floats don't overflow.
1490 # See http://bugs.python.org/issue25177.
1491 self.assertEqual(statistics.mean(
1492 [8.988465674311579e+307, 8.98846567431158e+307]),
1493 8.98846567431158e+307)
1494 big = 8.98846567431158e+307
1495 tiny = 5e-324
1496 for n in (2, 3, 5, 200):
1497 self.assertEqual(statistics.mean([big]*n), big)
1498 self.assertEqual(statistics.mean([tiny]*n), tiny)
1499
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001500
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001501class TestHarmonicMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1502 def setUp(self):
1503 self.func = statistics.harmonic_mean
1504
1505 def prepare_data(self):
1506 # Override mixin method.
1507 values = super().prepare_data()
1508 values.remove(0)
1509 return values
1510
1511 def prepare_values_for_repeated_single_test(self):
1512 # Override mixin method.
1513 return (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.125'))
1514
1515 def test_zero(self):
1516 # Test that harmonic mean returns zero when given zero.
1517 values = [1, 0, 2]
1518 self.assertEqual(self.func(values), 0)
1519
1520 def test_negative_error(self):
1521 # Test that harmonic mean raises when given a negative value.
1522 exc = statistics.StatisticsError
1523 for values in ([-1], [1, -2, 3]):
1524 with self.subTest(values=values):
1525 self.assertRaises(exc, self.func, values)
1526
Tzanetos Balitsarisb8097172020-05-13 13:29:31 +03001527 def test_invalid_type_error(self):
1528 # Test error is raised when input contains invalid type(s)
1529 for data in [
1530 ['3.14'], # single string
1531 ['1', '2', '3'], # multiple strings
1532 [1, '2', 3, '4', 5], # mixed strings and valid integers
1533 [2.3, 3.4, 4.5, '5.6'] # only one string and valid floats
1534 ]:
1535 with self.subTest(data=data):
1536 with self.assertRaises(TypeError):
1537 self.func(data)
1538
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001539 def test_ints(self):
1540 # Test harmonic mean with ints.
1541 data = [2, 4, 4, 8, 16, 16]
1542 random.shuffle(data)
1543 self.assertEqual(self.func(data), 6*4/5)
1544
1545 def test_floats_exact(self):
1546 # Test harmonic mean with some carefully chosen floats.
1547 data = [1/8, 1/4, 1/4, 1/2, 1/2]
1548 random.shuffle(data)
1549 self.assertEqual(self.func(data), 1/4)
1550 self.assertEqual(self.func([0.25, 0.5, 1.0, 1.0]), 0.5)
1551
1552 def test_singleton_lists(self):
1553 # Test that harmonic mean([x]) returns (approximately) x.
1554 for x in range(1, 101):
Steven D'Apranoe7fef522016-08-09 13:19:48 +10001555 self.assertEqual(self.func([x]), x)
Steven D'Apranoa474afd2016-08-09 12:49:01 +10001556
1557 def test_decimals_exact(self):
1558 # Test harmonic mean with some carefully chosen Decimals.
1559 D = Decimal
1560 self.assertEqual(self.func([D(15), D(30), D(60), D(60)]), D(30))
1561 data = [D("0.05"), D("0.10"), D("0.20"), D("0.20")]
1562 random.shuffle(data)
1563 self.assertEqual(self.func(data), D("0.10"))
1564 data = [D("1.68"), D("0.32"), D("5.94"), D("2.75")]
1565 random.shuffle(data)
1566 self.assertEqual(self.func(data), D(66528)/70723)
1567
1568 def test_fractions(self):
1569 # Test harmonic mean with Fractions.
1570 F = Fraction
1571 data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)]
1572 random.shuffle(data)
1573 self.assertEqual(self.func(data), F(7*420, 4029))
1574
1575 def test_inf(self):
1576 # Test harmonic mean with infinity.
1577 values = [2.0, float('inf'), 1.0]
1578 self.assertEqual(self.func(values), 2.0)
1579
1580 def test_nan(self):
1581 # Test harmonic mean with NANs.
1582 values = [2.0, float('nan'), 1.0]
1583 self.assertTrue(math.isnan(self.func(values)))
1584
1585 def test_multiply_data_points(self):
1586 # Test multiplying every data point by a constant.
1587 c = 111
1588 data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4]
1589 expected = self.func(data)*c
1590 result = self.func([x*c for x in data])
1591 self.assertEqual(result, expected)
1592
1593 def test_doubled_data(self):
1594 # Harmonic mean of [a,b...z] should be same as for [a,a,b,b...z,z].
1595 data = [random.uniform(1, 5) for _ in range(1000)]
1596 expected = self.func(data)
1597 actual = self.func(data*2)
1598 self.assertApproxEqual(actual, expected)
1599
1600
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001601class TestMedian(NumericTestCase, AverageMixin):
1602 # Common tests for median and all median.* functions.
1603 def setUp(self):
1604 self.func = statistics.median
1605
1606 def prepare_data(self):
1607 """Overload method from UnivariateCommonMixin."""
1608 data = super().prepare_data()
1609 if len(data)%2 != 1:
1610 data.append(2)
1611 return data
1612
1613 def test_even_ints(self):
1614 # Test median with an even number of int data points.
1615 data = [1, 2, 3, 4, 5, 6]
1616 assert len(data)%2 == 0
1617 self.assertEqual(self.func(data), 3.5)
1618
1619 def test_odd_ints(self):
1620 # Test median with an odd number of int data points.
1621 data = [1, 2, 3, 4, 5, 6, 9]
1622 assert len(data)%2 == 1
1623 self.assertEqual(self.func(data), 4)
1624
1625 def test_odd_fractions(self):
1626 # Test median works with an odd number of Fractions.
1627 F = Fraction
1628 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7)]
1629 assert len(data)%2 == 1
1630 random.shuffle(data)
1631 self.assertEqual(self.func(data), F(3, 7))
1632
1633 def test_even_fractions(self):
1634 # Test median works with an even number of Fractions.
1635 F = Fraction
1636 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1637 assert len(data)%2 == 0
1638 random.shuffle(data)
1639 self.assertEqual(self.func(data), F(1, 2))
1640
1641 def test_odd_decimals(self):
1642 # Test median works with an odd number of Decimals.
1643 D = Decimal
1644 data = [D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1645 assert len(data)%2 == 1
1646 random.shuffle(data)
1647 self.assertEqual(self.func(data), D('4.2'))
1648
1649 def test_even_decimals(self):
1650 # Test median works with an even number of Decimals.
1651 D = Decimal
1652 data = [D('1.2'), D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')]
1653 assert len(data)%2 == 0
1654 random.shuffle(data)
1655 self.assertEqual(self.func(data), D('3.65'))
1656
1657
1658class TestMedianDataType(NumericTestCase, UnivariateTypeMixin):
1659 # Test conservation of data element type for median.
1660 def setUp(self):
1661 self.func = statistics.median
1662
1663 def prepare_data(self):
1664 data = list(range(15))
1665 assert len(data)%2 == 1
1666 while data == sorted(data):
1667 random.shuffle(data)
1668 return data
1669
1670
1671class TestMedianLow(TestMedian, UnivariateTypeMixin):
1672 def setUp(self):
1673 self.func = statistics.median_low
1674
1675 def test_even_ints(self):
1676 # Test median_low with an even number of ints.
1677 data = [1, 2, 3, 4, 5, 6]
1678 assert len(data)%2 == 0
1679 self.assertEqual(self.func(data), 3)
1680
1681 def test_even_fractions(self):
1682 # Test median_low works with an even number of Fractions.
1683 F = Fraction
1684 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1685 assert len(data)%2 == 0
1686 random.shuffle(data)
1687 self.assertEqual(self.func(data), F(3, 7))
1688
1689 def test_even_decimals(self):
1690 # Test median_low works with an even number of Decimals.
1691 D = Decimal
1692 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1693 assert len(data)%2 == 0
1694 random.shuffle(data)
1695 self.assertEqual(self.func(data), D('3.3'))
1696
1697
1698class TestMedianHigh(TestMedian, UnivariateTypeMixin):
1699 def setUp(self):
1700 self.func = statistics.median_high
1701
1702 def test_even_ints(self):
1703 # Test median_high with an even number of ints.
1704 data = [1, 2, 3, 4, 5, 6]
1705 assert len(data)%2 == 0
1706 self.assertEqual(self.func(data), 4)
1707
1708 def test_even_fractions(self):
1709 # Test median_high works with an even number of Fractions.
1710 F = Fraction
1711 data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)]
1712 assert len(data)%2 == 0
1713 random.shuffle(data)
1714 self.assertEqual(self.func(data), F(4, 7))
1715
1716 def test_even_decimals(self):
1717 # Test median_high works with an even number of Decimals.
1718 D = Decimal
1719 data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')]
1720 assert len(data)%2 == 0
1721 random.shuffle(data)
1722 self.assertEqual(self.func(data), D('4.4'))
1723
1724
1725class TestMedianGrouped(TestMedian):
1726 # Test median_grouped.
1727 # Doesn't conserve data element types, so don't use TestMedianType.
1728 def setUp(self):
1729 self.func = statistics.median_grouped
1730
1731 def test_odd_number_repeated(self):
1732 # Test median.grouped with repeated median values.
1733 data = [12, 13, 14, 14, 14, 15, 15]
1734 assert len(data)%2 == 1
1735 self.assertEqual(self.func(data), 14)
1736 #---
1737 data = [12, 13, 14, 14, 14, 14, 15]
1738 assert len(data)%2 == 1
1739 self.assertEqual(self.func(data), 13.875)
1740 #---
1741 data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30]
1742 assert len(data)%2 == 1
1743 self.assertEqual(self.func(data, 5), 19.375)
1744 #---
1745 data = [16, 18, 18, 18, 18, 20, 20, 20, 22, 22, 22, 24, 24, 26, 28]
1746 assert len(data)%2 == 1
1747 self.assertApproxEqual(self.func(data, 2), 20.66666667, tol=1e-8)
1748
1749 def test_even_number_repeated(self):
1750 # Test median.grouped with repeated median values.
1751 data = [5, 10, 10, 15, 20, 20, 20, 25, 25, 30]
1752 assert len(data)%2 == 0
1753 self.assertApproxEqual(self.func(data, 5), 19.16666667, tol=1e-8)
1754 #---
1755 data = [2, 3, 4, 4, 4, 5]
1756 assert len(data)%2 == 0
1757 self.assertApproxEqual(self.func(data), 3.83333333, tol=1e-8)
1758 #---
1759 data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1760 assert len(data)%2 == 0
1761 self.assertEqual(self.func(data), 4.5)
1762 #---
1763 data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
1764 assert len(data)%2 == 0
1765 self.assertEqual(self.func(data), 4.75)
1766
1767 def test_repeated_single_value(self):
1768 # Override method from AverageMixin.
1769 # Yet again, failure of median_grouped to conserve the data type
1770 # causes me headaches :-(
1771 for x in (5.3, 68, 4.3e17, Fraction(29, 101), Decimal('32.9714')):
1772 for count in (2, 5, 10, 20):
1773 data = [x]*count
1774 self.assertEqual(self.func(data), float(x))
1775
1776 def test_odd_fractions(self):
1777 # Test median_grouped works with an odd number of Fractions.
1778 F = Fraction
1779 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4)]
1780 assert len(data)%2 == 1
1781 random.shuffle(data)
1782 self.assertEqual(self.func(data), 3.0)
1783
1784 def test_even_fractions(self):
1785 # Test median_grouped works with an even number of Fractions.
1786 F = Fraction
1787 data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4), F(17, 4)]
1788 assert len(data)%2 == 0
1789 random.shuffle(data)
1790 self.assertEqual(self.func(data), 3.25)
1791
1792 def test_odd_decimals(self):
1793 # Test median_grouped works with an odd number of Decimals.
1794 D = Decimal
1795 data = [D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1796 assert len(data)%2 == 1
1797 random.shuffle(data)
1798 self.assertEqual(self.func(data), 6.75)
1799
1800 def test_even_decimals(self):
1801 # Test median_grouped works with an even number of Decimals.
1802 D = Decimal
1803 data = [D('5.5'), D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')]
1804 assert len(data)%2 == 0
1805 random.shuffle(data)
1806 self.assertEqual(self.func(data), 6.5)
1807 #---
1808 data = [D('5.5'), D('5.5'), D('6.5'), D('7.5'), D('7.5'), D('8.5')]
1809 assert len(data)%2 == 0
1810 random.shuffle(data)
1811 self.assertEqual(self.func(data), 7.0)
1812
1813 def test_interval(self):
1814 # Test median_grouped with interval argument.
1815 data = [2.25, 2.5, 2.5, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1816 self.assertEqual(self.func(data, 0.25), 2.875)
1817 data = [2.25, 2.5, 2.5, 2.75, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75]
1818 self.assertApproxEqual(self.func(data, 0.25), 2.83333333, tol=1e-8)
1819 data = [220, 220, 240, 260, 260, 260, 260, 280, 280, 300, 320, 340]
1820 self.assertEqual(self.func(data, 20), 265.0)
1821
Steven D'Aprano8c115a42016-07-08 02:38:45 +10001822 def test_data_type_error(self):
1823 # Test median_grouped with str, bytes data types for data and interval
1824 data = ["", "", ""]
1825 self.assertRaises(TypeError, self.func, data)
1826 #---
1827 data = [b"", b"", b""]
1828 self.assertRaises(TypeError, self.func, data)
1829 #---
1830 data = [1, 2, 3]
1831 interval = ""
1832 self.assertRaises(TypeError, self.func, data, interval)
1833 #---
1834 data = [1, 2, 3]
1835 interval = b""
1836 self.assertRaises(TypeError, self.func, data, interval)
1837
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001838
1839class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
1840 # Test cases for the discrete version of mode.
1841 def setUp(self):
1842 self.func = statistics.mode
1843
1844 def prepare_data(self):
1845 """Overload method from UnivariateCommonMixin."""
1846 # Make sure test data has exactly one mode.
1847 return [1, 1, 1, 1, 3, 4, 7, 9, 0, 8, 2]
1848
1849 def test_range_data(self):
1850 # Override test from UnivariateCommonMixin.
1851 data = range(20, 50, 3)
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001852 self.assertEqual(self.func(data), 20)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001853
1854 def test_nominal_data(self):
1855 # Test mode with nominal data.
1856 data = 'abcbdb'
1857 self.assertEqual(self.func(data), 'b')
1858 data = 'fe fi fo fum fi fi'.split()
1859 self.assertEqual(self.func(data), 'fi')
1860
1861 def test_discrete_data(self):
1862 # Test mode with discrete numeric data.
1863 data = list(range(10))
1864 for i in range(10):
1865 d = data + [i]
1866 random.shuffle(d)
1867 self.assertEqual(self.func(d), i)
1868
1869 def test_bimodal_data(self):
1870 # Test mode with bimodal data.
1871 data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
1872 assert data.count(2) == data.count(6) == 4
Min ho Kim39d87b52019-08-31 06:21:19 +10001873 # mode() should return 2, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001874 self.assertEqual(self.func(data), 2)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001875
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001876 def test_unique_data(self):
1877 # Test mode when data points are all unique.
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001878 data = list(range(10))
Min ho Kim39d87b52019-08-31 06:21:19 +10001879 # mode() should return 0, the first encountered mode
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001880 self.assertEqual(self.func(data), 0)
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001881
1882 def test_none_data(self):
1883 # Test that mode raises TypeError if given None as data.
1884
1885 # This test is necessary because the implementation of mode uses
1886 # collections.Counter, which accepts None and returns an empty dict.
1887 self.assertRaises(TypeError, self.func, None)
1888
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001889 def test_counter_data(self):
1890 # Test that a Counter is treated like any other iterable.
1891 data = collections.Counter([1, 1, 1, 2])
1892 # Since the keys of the counter are treated as data points, not the
Raymond Hettingerfc06a192019-03-12 00:43:27 -07001893 # counts, this should return the first mode encountered, 1
1894 self.assertEqual(self.func(data), 1)
1895
1896
1897class TestMultiMode(unittest.TestCase):
1898
1899 def test_basics(self):
1900 multimode = statistics.multimode
1901 self.assertEqual(multimode('aabbbbbbbbcc'), ['b'])
1902 self.assertEqual(multimode('aabbbbccddddeeffffgg'), ['b', 'd', 'f'])
1903 self.assertEqual(multimode(''), [])
1904
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001905
Raymond Hettinger47d99872019-02-21 15:06:29 -08001906class TestFMean(unittest.TestCase):
1907
1908 def test_basics(self):
1909 fmean = statistics.fmean
1910 D = Decimal
1911 F = Fraction
1912 for data, expected_mean, kind in [
1913 ([3.5, 4.0, 5.25], 4.25, 'floats'),
1914 ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
1915 ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
1916 ([True, False, True, True, False], 0.60, 'booleans'),
1917 ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
1918 ((3.5, 4.0, 5.25), 4.25, 'tuple'),
1919 (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
1920 ]:
1921 actual_mean = fmean(data)
1922 self.assertIs(type(actual_mean), float, kind)
1923 self.assertEqual(actual_mean, expected_mean, kind)
1924
1925 def test_error_cases(self):
1926 fmean = statistics.fmean
1927 StatisticsError = statistics.StatisticsError
1928 with self.assertRaises(StatisticsError):
1929 fmean([]) # empty input
1930 with self.assertRaises(StatisticsError):
1931 fmean(iter([])) # empty iterator
1932 with self.assertRaises(TypeError):
1933 fmean(None) # non-iterable input
1934 with self.assertRaises(TypeError):
1935 fmean([10, None, 20]) # non-numeric input
1936 with self.assertRaises(TypeError):
1937 fmean() # missing data argument
1938 with self.assertRaises(TypeError):
1939 fmean([10, 20, 60], 70) # too many arguments
1940
1941 def test_special_values(self):
1942 # Rules for special values are inherited from math.fsum()
1943 fmean = statistics.fmean
1944 NaN = float('Nan')
1945 Inf = float('Inf')
1946 self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
1947 self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
1948 self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
1949 with self.assertRaises(ValueError):
1950 fmean([Inf, -Inf])
Nick Coghlanbfd68bf2014-02-08 19:44:16 +10001951
Larry Hastingsf5e987b2013-10-19 11:50:09 -07001952
1953# === Tests for variances and standard deviations ===
1954
1955class VarianceStdevMixin(UnivariateCommonMixin):
1956 # Mixin class holding common tests for variance and std dev.
1957
1958 # Subclasses should inherit from this before NumericTestClass, in order
1959 # to see the rel attribute below. See testShiftData for an explanation.
1960
1961 rel = 1e-12
1962
1963 def test_single_value(self):
1964 # Deviation of a single value is zero.
1965 for x in (11, 19.8, 4.6e14, Fraction(21, 34), Decimal('8.392')):
1966 self.assertEqual(self.func([x]), 0)
1967
1968 def test_repeated_single_value(self):
1969 # The deviation of a single repeated value is zero.
1970 for x in (7.2, 49, 8.1e15, Fraction(3, 7), Decimal('62.4802')):
1971 for count in (2, 3, 5, 15):
1972 data = [x]*count
1973 self.assertEqual(self.func(data), 0)
1974
1975 def test_domain_error_regression(self):
1976 # Regression test for a domain error exception.
1977 # (Thanks to Geremy Condra.)
1978 data = [0.123456789012345]*10000
1979 # All the items are identical, so variance should be exactly zero.
1980 # We allow some small round-off error, but not much.
1981 result = self.func(data)
1982 self.assertApproxEqual(result, 0.0, tol=5e-17)
1983 self.assertGreaterEqual(result, 0) # A negative result must fail.
1984
1985 def test_shift_data(self):
1986 # Test that shifting the data by a constant amount does not affect
1987 # the variance or stdev. Or at least not much.
1988
1989 # Due to rounding, this test should be considered an ideal. We allow
1990 # some tolerance away from "no change at all" by setting tol and/or rel
1991 # attributes. Subclasses may set tighter or looser error tolerances.
1992 raw = [1.03, 1.27, 1.94, 2.04, 2.58, 3.14, 4.75, 4.98, 5.42, 6.78]
1993 expected = self.func(raw)
1994 # Don't set shift too high, the bigger it is, the more rounding error.
1995 shift = 1e5
1996 data = [x + shift for x in raw]
1997 self.assertApproxEqual(self.func(data), expected)
1998
1999 def test_shift_data_exact(self):
2000 # Like test_shift_data, but result is always exact.
2001 raw = [1, 3, 3, 4, 5, 7, 9, 10, 11, 16]
2002 assert all(x==int(x) for x in raw)
2003 expected = self.func(raw)
2004 shift = 10**9
2005 data = [x + shift for x in raw]
2006 self.assertEqual(self.func(data), expected)
2007
2008 def test_iter_list_same(self):
2009 # Test that iter data and list data give the same result.
2010
2011 # This is an explicit test that iterators and lists are treated the
2012 # same; justification for this test over and above the similar test
2013 # in UnivariateCommonMixin is that an earlier design had variance and
2014 # friends swap between one- and two-pass algorithms, which would
2015 # sometimes give different results.
2016 data = [random.uniform(-3, 8) for _ in range(1000)]
2017 expected = self.func(data)
2018 self.assertEqual(self.func(iter(data)), expected)
2019
2020
2021class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2022 # Tests for population variance.
2023 def setUp(self):
2024 self.func = statistics.pvariance
2025
2026 def test_exact_uniform(self):
2027 # Test the variance against an exact result for uniform data.
2028 data = list(range(10000))
2029 random.shuffle(data)
2030 expected = (10000**2 - 1)/12 # Exact value.
2031 self.assertEqual(self.func(data), expected)
2032
2033 def test_ints(self):
2034 # Test population variance with int data.
2035 data = [4, 7, 13, 16]
2036 exact = 22.5
2037 self.assertEqual(self.func(data), exact)
2038
2039 def test_fractions(self):
2040 # Test population variance with Fraction data.
2041 F = Fraction
2042 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2043 exact = F(3, 8)
2044 result = self.func(data)
2045 self.assertEqual(result, exact)
2046 self.assertIsInstance(result, Fraction)
2047
2048 def test_decimals(self):
2049 # Test population variance with Decimal data.
2050 D = Decimal
2051 data = [D("12.1"), D("12.2"), D("12.5"), D("12.9")]
2052 exact = D('0.096875')
2053 result = self.func(data)
2054 self.assertEqual(result, exact)
2055 self.assertIsInstance(result, Decimal)
2056
2057
2058class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin):
2059 # Tests for sample variance.
2060 def setUp(self):
2061 self.func = statistics.variance
2062
2063 def test_single_value(self):
2064 # Override method from VarianceStdevMixin.
2065 for x in (35, 24.7, 8.2e15, Fraction(19, 30), Decimal('4.2084')):
2066 self.assertRaises(statistics.StatisticsError, self.func, [x])
2067
2068 def test_ints(self):
2069 # Test sample variance with int data.
2070 data = [4, 7, 13, 16]
2071 exact = 30
2072 self.assertEqual(self.func(data), exact)
2073
2074 def test_fractions(self):
2075 # Test sample variance with Fraction data.
2076 F = Fraction
2077 data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)]
2078 exact = F(1, 2)
2079 result = self.func(data)
2080 self.assertEqual(result, exact)
2081 self.assertIsInstance(result, Fraction)
2082
2083 def test_decimals(self):
2084 # Test sample variance with Decimal data.
2085 D = Decimal
2086 data = [D(2), D(2), D(7), D(9)]
2087 exact = 4*D('9.5')/D(3)
2088 result = self.func(data)
2089 self.assertEqual(result, exact)
2090 self.assertIsInstance(result, Decimal)
2091
2092
2093class TestPStdev(VarianceStdevMixin, NumericTestCase):
2094 # Tests for population standard deviation.
2095 def setUp(self):
2096 self.func = statistics.pstdev
2097
2098 def test_compare_to_variance(self):
2099 # Test that stdev is, in fact, the square root of variance.
2100 data = [random.uniform(-17, 24) for _ in range(1000)]
2101 expected = math.sqrt(statistics.pvariance(data))
2102 self.assertEqual(self.func(data), expected)
2103
2104
2105class TestStdev(VarianceStdevMixin, NumericTestCase):
2106 # Tests for sample standard deviation.
2107 def setUp(self):
2108 self.func = statistics.stdev
2109
2110 def test_single_value(self):
2111 # Override method from VarianceStdevMixin.
2112 for x in (81, 203.74, 3.9e14, Fraction(5, 21), Decimal('35.719')):
2113 self.assertRaises(statistics.StatisticsError, self.func, [x])
2114
2115 def test_compare_to_variance(self):
2116 # Test that stdev is, in fact, the square root of variance.
2117 data = [random.uniform(-2, 9) for _ in range(1000)]
2118 expected = math.sqrt(statistics.variance(data))
2119 self.assertEqual(self.func(data), expected)
2120
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002121
Raymond Hettinger6463ba32019-04-07 09:20:03 -07002122class TestGeometricMean(unittest.TestCase):
2123
2124 def test_basics(self):
2125 geometric_mean = statistics.geometric_mean
2126 self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
2127 self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
2128 self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
2129
2130 random.seed(86753095551212)
2131 for rng in [
2132 range(1, 100),
2133 range(1, 1_000),
2134 range(1, 10_000),
2135 range(500, 10_000, 3),
2136 range(10_000, 500, -3),
2137 [12, 17, 13, 5, 120, 7],
2138 [random.expovariate(50.0) for i in range(1_000)],
2139 [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
2140 [random.triangular(2000, 3000, 2200) for i in range(3_000)],
2141 ]:
2142 gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
2143 gm_float = geometric_mean(rng)
2144 self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
2145
2146 def test_various_input_types(self):
2147 geometric_mean = statistics.geometric_mean
2148 D = Decimal
2149 F = Fraction
2150 # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
2151 expected_mean = 4.18886
2152 for data, kind in [
2153 ([3.5, 4.0, 5.25], 'floats'),
2154 ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
2155 ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
2156 ([3.5, 4, F(21, 4)], 'mixed types'),
2157 ((3.5, 4.0, 5.25), 'tuple'),
2158 (iter([3.5, 4.0, 5.25]), 'iterator'),
2159 ]:
2160 actual_mean = geometric_mean(data)
2161 self.assertIs(type(actual_mean), float, kind)
2162 self.assertAlmostEqual(actual_mean, expected_mean, places=5)
2163
2164 def test_big_and_small(self):
2165 geometric_mean = statistics.geometric_mean
2166
2167 # Avoid overflow to infinity
2168 large = 2.0 ** 1000
2169 big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
2170 self.assertTrue(math.isclose(big_gm, 36.0 * large))
2171 self.assertFalse(math.isinf(big_gm))
2172
2173 # Avoid underflow to zero
2174 small = 2.0 ** -1000
2175 small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
2176 self.assertTrue(math.isclose(small_gm, 36.0 * small))
2177 self.assertNotEqual(small_gm, 0.0)
2178
2179 def test_error_cases(self):
2180 geometric_mean = statistics.geometric_mean
2181 StatisticsError = statistics.StatisticsError
2182 with self.assertRaises(StatisticsError):
2183 geometric_mean([]) # empty input
2184 with self.assertRaises(StatisticsError):
2185 geometric_mean([3.5, 0.0, 5.25]) # zero input
2186 with self.assertRaises(StatisticsError):
2187 geometric_mean([3.5, -4.0, 5.25]) # negative input
2188 with self.assertRaises(StatisticsError):
2189 geometric_mean(iter([])) # empty iterator
2190 with self.assertRaises(TypeError):
2191 geometric_mean(None) # non-iterable input
2192 with self.assertRaises(TypeError):
2193 geometric_mean([10, None, 20]) # non-numeric input
2194 with self.assertRaises(TypeError):
2195 geometric_mean() # missing data argument
2196 with self.assertRaises(TypeError):
2197 geometric_mean([10, 20, 60], 70) # too many arguments
2198
2199 def test_special_values(self):
2200 # Rules for special values are inherited from math.fsum()
2201 geometric_mean = statistics.geometric_mean
2202 NaN = float('Nan')
2203 Inf = float('Inf')
2204 self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
2205 self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
2206 self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
2207 with self.assertRaises(ValueError):
2208 geometric_mean([Inf, -Inf])
2209
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002210
2211class TestQuantiles(unittest.TestCase):
2212
2213 def test_specific_cases(self):
2214 # Match results computed by hand and cross-checked
2215 # against the PERCENTILE.EXC function in MS Excel.
2216 quantiles = statistics.quantiles
2217 data = [120, 200, 250, 320, 350]
2218 random.shuffle(data)
2219 for n, expected in [
2220 (1, []),
2221 (2, [250.0]),
2222 (3, [200.0, 320.0]),
2223 (4, [160.0, 250.0, 335.0]),
2224 (5, [136.0, 220.0, 292.0, 344.0]),
2225 (6, [120.0, 200.0, 250.0, 320.0, 350.0]),
2226 (8, [100.0, 160.0, 212.5, 250.0, 302.5, 335.0, 357.5]),
2227 (10, [88.0, 136.0, 184.0, 220.0, 250.0, 292.0, 326.0, 344.0, 362.0]),
2228 (12, [80.0, 120.0, 160.0, 200.0, 225.0, 250.0, 285.0, 320.0, 335.0,
2229 350.0, 365.0]),
2230 (15, [72.0, 104.0, 136.0, 168.0, 200.0, 220.0, 240.0, 264.0, 292.0,
2231 320.0, 332.0, 344.0, 356.0, 368.0]),
2232 ]:
2233 self.assertEqual(expected, quantiles(data, n=n))
2234 self.assertEqual(len(quantiles(data, n=n)), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002235 # Preserve datatype when possible
2236 for datatype in (float, Decimal, Fraction):
2237 result = quantiles(map(datatype, data), n=n)
2238 self.assertTrue(all(type(x) == datatype) for x in result)
2239 self.assertEqual(result, list(map(datatype, expected)))
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002240 # Quantiles should be idempotent
2241 if len(expected) >= 2:
2242 self.assertEqual(quantiles(expected, n=n), expected)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002243 # Cross-check against method='inclusive' which should give
2244 # the same result after adding in minimum and maximum values
2245 # extrapolated from the two lowest and two highest points.
2246 sdata = sorted(data)
2247 lo = 2 * sdata[0] - sdata[1]
2248 hi = 2 * sdata[-1] - sdata[-2]
2249 padded_data = data + [lo, hi]
2250 self.assertEqual(
2251 quantiles(data, n=n),
2252 quantiles(padded_data, n=n, method='inclusive'),
2253 (n, data),
2254 )
Tim Gatesc18b8052019-12-10 04:42:17 +11002255 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002256 def f(x):
2257 return 3.5 * x - 1234.675
2258 exp = list(map(f, expected))
2259 act = quantiles(map(f, data), n=n)
2260 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002261 # Q2 agrees with median()
2262 for k in range(2, 60):
2263 data = random.choices(range(100), k=k)
2264 q1, q2, q3 = quantiles(data)
2265 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002266
2267 def test_specific_cases_inclusive(self):
2268 # Match results computed by hand and cross-checked
2269 # against the PERCENTILE.INC function in MS Excel
Xtreak874ad1b2019-05-02 23:50:59 +05302270 # and against the quantile() function in SciPy.
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002271 quantiles = statistics.quantiles
2272 data = [100, 200, 400, 800]
2273 random.shuffle(data)
2274 for n, expected in [
2275 (1, []),
2276 (2, [300.0]),
2277 (3, [200.0, 400.0]),
2278 (4, [175.0, 300.0, 500.0]),
2279 (5, [160.0, 240.0, 360.0, 560.0]),
2280 (6, [150.0, 200.0, 300.0, 400.0, 600.0]),
2281 (8, [137.5, 175, 225.0, 300.0, 375.0, 500.0,650.0]),
2282 (10, [130.0, 160.0, 190.0, 240.0, 300.0, 360.0, 440.0, 560.0, 680.0]),
2283 (12, [125.0, 150.0, 175.0, 200.0, 250.0, 300.0, 350.0, 400.0,
2284 500.0, 600.0, 700.0]),
2285 (15, [120.0, 140.0, 160.0, 180.0, 200.0, 240.0, 280.0, 320.0, 360.0,
2286 400.0, 480.0, 560.0, 640.0, 720.0]),
2287 ]:
2288 self.assertEqual(expected, quantiles(data, n=n, method="inclusive"))
2289 self.assertEqual(len(quantiles(data, n=n, method="inclusive")), n - 1)
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002290 # Preserve datatype when possible
2291 for datatype in (float, Decimal, Fraction):
2292 result = quantiles(map(datatype, data), n=n, method="inclusive")
2293 self.assertTrue(all(type(x) == datatype) for x in result)
2294 self.assertEqual(result, list(map(datatype, expected)))
Tim Gatesc18b8052019-12-10 04:42:17 +11002295 # Invariant under translation and scaling
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002296 def f(x):
2297 return 3.5 * x - 1234.675
2298 exp = list(map(f, expected))
2299 act = quantiles(map(f, data), n=n, method="inclusive")
2300 self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002301 # Natural deciles
2302 self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
2303 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
2304 self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
2305 [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
Raymond Hettingerb0a2c0f2019-04-29 23:47:33 -07002306 # Whenever n is smaller than the number of data points, running
2307 # method='inclusive' should give the same result as method='exclusive'
2308 # after the two included extreme points are removed.
2309 data = [random.randrange(10_000) for i in range(501)]
2310 actual = quantiles(data, n=32, method='inclusive')
2311 data.remove(min(data))
2312 data.remove(max(data))
2313 expected = quantiles(data, n=32)
2314 self.assertEqual(expected, actual)
Raymond Hettingere917f2e2019-05-18 10:18:29 -07002315 # Q2 agrees with median()
2316 for k in range(2, 60):
2317 data = random.choices(range(100), k=k)
2318 q1, q2, q3 = quantiles(data, method='inclusive')
2319 self.assertEqual(q2, statistics.median(data))
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002320
Raymond Hettingerdb81ba12019-04-28 21:31:55 -07002321 def test_equal_inputs(self):
2322 quantiles = statistics.quantiles
2323 for n in range(2, 10):
2324 data = [10.0] * n
2325 self.assertEqual(quantiles(data), [10.0, 10.0, 10.0])
2326 self.assertEqual(quantiles(data, method='inclusive'),
2327 [10.0, 10.0, 10.0])
2328
Raymond Hettinger9013ccf2019-04-23 00:06:35 -07002329 def test_equal_sized_groups(self):
2330 quantiles = statistics.quantiles
2331 total = 10_000
2332 data = [random.expovariate(0.2) for i in range(total)]
2333 while len(set(data)) != total:
2334 data.append(random.expovariate(0.2))
2335 data.sort()
2336
2337 # Cases where the group size exactly divides the total
2338 for n in (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000):
2339 group_size = total // n
2340 self.assertEqual(
2341 [bisect.bisect(data, q) for q in quantiles(data, n=n)],
2342 list(range(group_size, total, group_size)))
2343
2344 # When the group sizes can't be exactly equal, they should
2345 # differ by no more than one
2346 for n in (13, 19, 59, 109, 211, 571, 1019, 1907, 5261, 9769):
2347 group_sizes = {total // n, total // n + 1}
2348 pos = [bisect.bisect(data, q) for q in quantiles(data, n=n)]
2349 sizes = {q - p for p, q in zip(pos, pos[1:])}
2350 self.assertTrue(sizes <= group_sizes)
2351
2352 def test_error_cases(self):
2353 quantiles = statistics.quantiles
2354 StatisticsError = statistics.StatisticsError
2355 with self.assertRaises(TypeError):
2356 quantiles() # Missing arguments
2357 with self.assertRaises(TypeError):
2358 quantiles([10, 20, 30], 13, n=4) # Too many arguments
2359 with self.assertRaises(TypeError):
2360 quantiles([10, 20, 30], 4) # n is a positional argument
2361 with self.assertRaises(StatisticsError):
2362 quantiles([10, 20, 30], n=0) # n is zero
2363 with self.assertRaises(StatisticsError):
2364 quantiles([10, 20, 30], n=-1) # n is negative
2365 with self.assertRaises(TypeError):
2366 quantiles([10, 20, 30], n=1.5) # n is not an integer
2367 with self.assertRaises(ValueError):
2368 quantiles([10, 20, 30], method='X') # method is unknown
2369 with self.assertRaises(StatisticsError):
2370 quantiles([10], n=4) # not enough data points
2371 with self.assertRaises(TypeError):
2372 quantiles([10, None, 30], n=4) # data is non-numeric
2373
2374
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002375class TestNormalDist:
Raymond Hettinger11c79532019-02-23 14:44:07 -08002376
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002377 # General note on precision: The pdf(), cdf(), and overlap() methods
2378 # depend on functions in the math libraries that do not make
2379 # explicit accuracy guarantees. Accordingly, some of the accuracy
2380 # tests below may fail if the underlying math functions are
2381 # inaccurate. There isn't much we can do about this short of
2382 # implementing our own implementations from scratch.
2383
Raymond Hettinger11c79532019-02-23 14:44:07 -08002384 def test_slots(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002385 nd = self.module.NormalDist(300, 23)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002386 with self.assertRaises(TypeError):
2387 vars(nd)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002388 self.assertEqual(tuple(nd.__slots__), ('_mu', '_sigma'))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002389
2390 def test_instantiation_and_attributes(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002391 nd = self.module.NormalDist(500, 17)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002392 self.assertEqual(nd.mean, 500)
2393 self.assertEqual(nd.stdev, 17)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002394 self.assertEqual(nd.variance, 17**2)
2395
2396 # default arguments
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002397 nd = self.module.NormalDist()
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002398 self.assertEqual(nd.mean, 0)
2399 self.assertEqual(nd.stdev, 1)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002400 self.assertEqual(nd.variance, 1**2)
2401
2402 # error case: negative sigma
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002403 with self.assertRaises(self.module.StatisticsError):
2404 self.module.NormalDist(500, -10)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002405
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002406 # verify that subclass type is honored
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002407 class NewNormalDist(self.module.NormalDist):
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002408 pass
2409 nnd = NewNormalDist(200, 5)
2410 self.assertEqual(type(nnd), NewNormalDist)
2411
Raymond Hettinger11c79532019-02-23 14:44:07 -08002412 def test_alternative_constructor(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002413 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002414 data = [96, 107, 90, 92, 110]
2415 # list input
2416 self.assertEqual(NormalDist.from_samples(data), NormalDist(99, 9))
2417 # tuple input
2418 self.assertEqual(NormalDist.from_samples(tuple(data)), NormalDist(99, 9))
2419 # iterator input
2420 self.assertEqual(NormalDist.from_samples(iter(data)), NormalDist(99, 9))
2421 # error cases
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002422 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002423 NormalDist.from_samples([]) # empty input
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002424 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002425 NormalDist.from_samples([10]) # only one input
2426
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002427 # verify that subclass type is honored
2428 class NewNormalDist(NormalDist):
2429 pass
2430 nnd = NewNormalDist.from_samples(data)
2431 self.assertEqual(type(nnd), NewNormalDist)
2432
Raymond Hettinger11c79532019-02-23 14:44:07 -08002433 def test_sample_generation(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002434 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002435 mu, sigma = 10_000, 3.0
2436 X = NormalDist(mu, sigma)
2437 n = 1_000
2438 data = X.samples(n)
2439 self.assertEqual(len(data), n)
2440 self.assertEqual(set(map(type, data)), {float})
2441 # mean(data) expected to fall within 8 standard deviations
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002442 xbar = self.module.mean(data)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002443 self.assertTrue(mu - sigma*8 <= xbar <= mu + sigma*8)
2444
2445 # verify that seeding makes reproducible sequences
2446 n = 100
2447 data1 = X.samples(n, seed='happiness and joy')
2448 data2 = X.samples(n, seed='trouble and despair')
2449 data3 = X.samples(n, seed='happiness and joy')
2450 data4 = X.samples(n, seed='trouble and despair')
2451 self.assertEqual(data1, data3)
2452 self.assertEqual(data2, data4)
2453 self.assertNotEqual(data1, data2)
2454
Raymond Hettinger11c79532019-02-23 14:44:07 -08002455 def test_pdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002456 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002457 X = NormalDist(100, 15)
2458 # Verify peak around center
2459 self.assertLess(X.pdf(99), X.pdf(100))
2460 self.assertLess(X.pdf(101), X.pdf(100))
2461 # Test symmetry
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002462 for i in range(50):
2463 self.assertAlmostEqual(X.pdf(100 - i), X.pdf(100 + i))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002464 # Test vs CDF
2465 dx = 2.0 ** -10
2466 for x in range(90, 111):
2467 est_pdf = (X.cdf(x + dx) - X.cdf(x)) / dx
2468 self.assertAlmostEqual(X.pdf(x), est_pdf, places=4)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002469 # Test vs table of known values -- CRC 26th Edition
2470 Z = NormalDist()
2471 for x, px in enumerate([
2472 0.3989, 0.3989, 0.3989, 0.3988, 0.3986,
2473 0.3984, 0.3982, 0.3980, 0.3977, 0.3973,
2474 0.3970, 0.3965, 0.3961, 0.3956, 0.3951,
2475 0.3945, 0.3939, 0.3932, 0.3925, 0.3918,
2476 0.3910, 0.3902, 0.3894, 0.3885, 0.3876,
2477 0.3867, 0.3857, 0.3847, 0.3836, 0.3825,
2478 0.3814, 0.3802, 0.3790, 0.3778, 0.3765,
2479 0.3752, 0.3739, 0.3725, 0.3712, 0.3697,
2480 0.3683, 0.3668, 0.3653, 0.3637, 0.3621,
2481 0.3605, 0.3589, 0.3572, 0.3555, 0.3538,
2482 ]):
2483 self.assertAlmostEqual(Z.pdf(x / 100.0), px, places=4)
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002484 self.assertAlmostEqual(Z.pdf(-x / 100.0), px, places=4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002485 # Error case: variance is zero
2486 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002487 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002488 Y.pdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002489 # Special values
2490 self.assertEqual(X.pdf(float('-Inf')), 0.0)
2491 self.assertEqual(X.pdf(float('Inf')), 0.0)
2492 self.assertTrue(math.isnan(X.pdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002493
2494 def test_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002495 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002496 X = NormalDist(100, 15)
2497 cdfs = [X.cdf(x) for x in range(1, 200)]
2498 self.assertEqual(set(map(type, cdfs)), {float})
2499 # Verify montonic
2500 self.assertEqual(cdfs, sorted(cdfs))
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002501 # Verify center (should be exact)
2502 self.assertEqual(X.cdf(100), 0.50)
Raymond Hettinger18ee50d2019-03-06 02:31:14 -08002503 # Check against a table of known values
2504 # https://en.wikipedia.org/wiki/Standard_normal_table#Cumulative
2505 Z = NormalDist()
2506 for z, cum_prob in [
2507 (0.00, 0.50000), (0.01, 0.50399), (0.02, 0.50798),
2508 (0.14, 0.55567), (0.29, 0.61409), (0.33, 0.62930),
2509 (0.54, 0.70540), (0.60, 0.72575), (1.17, 0.87900),
2510 (1.60, 0.94520), (2.05, 0.97982), (2.89, 0.99807),
2511 (3.52, 0.99978), (3.98, 0.99997), (4.07, 0.99998),
2512 ]:
2513 self.assertAlmostEqual(Z.cdf(z), cum_prob, places=5)
2514 self.assertAlmostEqual(Z.cdf(-z), 1.0 - cum_prob, places=5)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002515 # Error case: variance is zero
2516 Y = NormalDist(100, 0)
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002517 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger11c79532019-02-23 14:44:07 -08002518 Y.cdf(90)
Raymond Hettingeref17fdb2019-02-28 09:16:25 -08002519 # Special values
2520 self.assertEqual(X.cdf(float('-Inf')), 0.0)
2521 self.assertEqual(X.cdf(float('Inf')), 1.0)
2522 self.assertTrue(math.isnan(X.cdf(float('NaN'))))
Raymond Hettinger11c79532019-02-23 14:44:07 -08002523
Neil Schemenauer52a48e62019-07-30 11:08:18 -07002524 @support.skip_if_pgo_task
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002525 def test_inv_cdf(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002526 NormalDist = self.module.NormalDist
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002527
2528 # Center case should be exact.
2529 iq = NormalDist(100, 15)
2530 self.assertEqual(iq.inv_cdf(0.50), iq.mean)
2531
2532 # Test versus a published table of known percentage points.
2533 # See the second table at the bottom of the page here:
2534 # http://people.bath.ac.uk/masss/tables/normaltable.pdf
2535 Z = NormalDist()
2536 pp = {5.0: (0.000, 1.645, 2.576, 3.291, 3.891,
2537 4.417, 4.892, 5.327, 5.731, 6.109),
2538 2.5: (0.674, 1.960, 2.807, 3.481, 4.056,
2539 4.565, 5.026, 5.451, 5.847, 6.219),
2540 1.0: (1.282, 2.326, 3.090, 3.719, 4.265,
2541 4.753, 5.199, 5.612, 5.998, 6.361)}
2542 for base, row in pp.items():
2543 for exp, x in enumerate(row, start=1):
2544 p = base * 10.0 ** (-exp)
2545 self.assertAlmostEqual(-Z.inv_cdf(p), x, places=3)
2546 p = 1.0 - p
2547 self.assertAlmostEqual(Z.inv_cdf(p), x, places=3)
2548
2549 # Match published example for MS Excel
2550 # https://support.office.com/en-us/article/norm-inv-function-54b30935-fee7-493c-bedb-2278a9db7e13
2551 self.assertAlmostEqual(NormalDist(40, 1.5).inv_cdf(0.908789), 42.000002)
2552
2553 # One million equally spaced probabilities
2554 n = 2**20
2555 for p in range(1, n):
2556 p /= n
2557 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2558
2559 # One hundred ever smaller probabilities to test tails out to
2560 # extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
2561 for e in range(1, 51):
2562 p = 2.0 ** (-e)
2563 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2564 p = 1.0 - p
2565 self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
2566
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002567 # Now apply cdf() first. Near the tails, the round-trip loses
2568 # precision and is ill-conditioned (small changes in the inputs
2569 # give large changes in the output), so only check to 5 places.
2570 for x in range(200):
2571 self.assertAlmostEqual(iq.inv_cdf(iq.cdf(x)), x, places=5)
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002572
2573 # Error cases:
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002574 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002575 iq.inv_cdf(0.0) # p is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002576 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002577 iq.inv_cdf(-0.1) # p under zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002578 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002579 iq.inv_cdf(1.0) # p is one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002580 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002581 iq.inv_cdf(1.1) # p over one
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002582 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002583 iq = NormalDist(100, 0) # sigma is zero
Raymond Hettinger714c60d2019-03-18 20:17:14 -07002584 iq.inv_cdf(0.5)
2585
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002586 # Special values
2587 self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
2588
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002589 def test_quantiles(self):
2590 # Quartiles of a standard normal distribution
2591 Z = self.module.NormalDist()
2592 for n, expected in [
2593 (1, []),
2594 (2, [0.0]),
2595 (3, [-0.4307, 0.4307]),
2596 (4 ,[-0.6745, 0.0, 0.6745]),
2597 ]:
2598 actual = Z.quantiles(n=n)
2599 self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
2600 for e, a in zip(expected, actual)))
2601
Raymond Hettinger318d5372019-03-06 22:59:40 -08002602 def test_overlap(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002603 NormalDist = self.module.NormalDist
Raymond Hettinger318d5372019-03-06 22:59:40 -08002604
2605 # Match examples from Imman and Bradley
2606 for X1, X2, published_result in [
2607 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0), 0.80258),
2608 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0), 0.60993),
2609 ]:
2610 self.assertAlmostEqual(X1.overlap(X2), published_result, places=4)
2611 self.assertAlmostEqual(X2.overlap(X1), published_result, places=4)
2612
2613 # Check against integration of the PDF
2614 def overlap_numeric(X, Y, *, steps=8_192, z=5):
2615 'Numerical integration cross-check for overlap() '
2616 fsum = math.fsum
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002617 center = (X.mean + Y.mean) / 2.0
2618 width = z * max(X.stdev, Y.stdev)
Raymond Hettinger318d5372019-03-06 22:59:40 -08002619 start = center - width
2620 dx = 2.0 * width / steps
2621 x_arr = [start + i*dx for i in range(steps)]
2622 xp = list(map(X.pdf, x_arr))
2623 yp = list(map(Y.pdf, x_arr))
2624 total = max(fsum(xp), fsum(yp))
2625 return fsum(map(min, xp, yp)) / total
2626
2627 for X1, X2 in [
2628 # Examples from Imman and Bradley
2629 (NormalDist(0.0, 2.0), NormalDist(1.0, 2.0)),
2630 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2631 # Example from https://www.rasch.org/rmt/rmt101r.htm
2632 (NormalDist(0.0, 1.0), NormalDist(1.0, 2.0)),
2633 # Gender heights from http://www.usablestats.com/lessons/normal
2634 (NormalDist(70, 4), NormalDist(65, 3.5)),
2635 # Misc cases with equal standard deviations
2636 (NormalDist(100, 15), NormalDist(110, 15)),
2637 (NormalDist(-100, 15), NormalDist(110, 15)),
2638 (NormalDist(-100, 15), NormalDist(-110, 15)),
2639 # Misc cases with unequal standard deviations
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002640 (NormalDist(100, 12), NormalDist(100, 15)),
Raymond Hettinger318d5372019-03-06 22:59:40 -08002641 (NormalDist(100, 12), NormalDist(110, 15)),
2642 (NormalDist(100, 12), NormalDist(150, 15)),
2643 (NormalDist(100, 12), NormalDist(150, 35)),
2644 # Misc cases with small values
2645 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.003)),
2646 (NormalDist(1.000, 0.002), NormalDist(1.006, 0.0003)),
2647 (NormalDist(1.000, 0.002), NormalDist(1.001, 0.099)),
2648 ]:
2649 self.assertAlmostEqual(X1.overlap(X2), overlap_numeric(X1, X2), places=5)
2650 self.assertAlmostEqual(X2.overlap(X1), overlap_numeric(X1, X2), places=5)
2651
2652 # Error cases
2653 X = NormalDist()
2654 with self.assertRaises(TypeError):
2655 X.overlap() # too few arguments
2656 with self.assertRaises(TypeError):
2657 X.overlap(X, X) # too may arguments
2658 with self.assertRaises(TypeError):
2659 X.overlap(None) # right operand not a NormalDist
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002660 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002661 X.overlap(NormalDist(1, 0)) # right operand sigma is zero
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002662 with self.assertRaises(self.module.StatisticsError):
Raymond Hettinger318d5372019-03-06 22:59:40 -08002663 NormalDist(1, 0).overlap(X) # left operand sigma is zero
2664
Raymond Hettinger70f027d2020-04-16 10:25:14 -07002665 def test_zscore(self):
2666 NormalDist = self.module.NormalDist
2667 X = NormalDist(100, 15)
2668 self.assertEqual(X.zscore(142), 2.8)
2669 self.assertEqual(X.zscore(58), -2.8)
2670 self.assertEqual(X.zscore(100), 0.0)
2671 with self.assertRaises(TypeError):
2672 X.zscore() # too few arguments
2673 with self.assertRaises(TypeError):
2674 X.zscore(1, 1) # too may arguments
2675 with self.assertRaises(TypeError):
2676 X.zscore(None) # non-numeric type
2677 with self.assertRaises(self.module.StatisticsError):
2678 NormalDist(1, 0).zscore(100) # sigma is zero
2679
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002680 def test_properties(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002681 X = self.module.NormalDist(100, 15)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002682 self.assertEqual(X.mean, 100)
Raymond Hettinger4db25d52019-09-08 16:57:58 -07002683 self.assertEqual(X.median, 100)
2684 self.assertEqual(X.mode, 100)
Raymond Hettinger9e456bc2019-02-24 11:44:55 -08002685 self.assertEqual(X.stdev, 15)
2686 self.assertEqual(X.variance, 225)
2687
Raymond Hettinger11c79532019-02-23 14:44:07 -08002688 def test_same_type_addition_and_subtraction(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002689 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002690 X = NormalDist(100, 12)
2691 Y = NormalDist(40, 5)
2692 self.assertEqual(X + Y, NormalDist(140, 13)) # __add__
2693 self.assertEqual(X - Y, NormalDist(60, 13)) # __sub__
2694
2695 def test_translation_and_scaling(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002696 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002697 X = NormalDist(100, 15)
2698 y = 10
2699 self.assertEqual(+X, NormalDist(100, 15)) # __pos__
2700 self.assertEqual(-X, NormalDist(-100, 15)) # __neg__
2701 self.assertEqual(X + y, NormalDist(110, 15)) # __add__
2702 self.assertEqual(y + X, NormalDist(110, 15)) # __radd__
2703 self.assertEqual(X - y, NormalDist(90, 15)) # __sub__
2704 self.assertEqual(y - X, NormalDist(-90, 15)) # __rsub__
2705 self.assertEqual(X * y, NormalDist(1000, 150)) # __mul__
2706 self.assertEqual(y * X, NormalDist(1000, 150)) # __rmul__
2707 self.assertEqual(X / y, NormalDist(10, 1.5)) # __truediv__
Raymond Hettinger1f58f4f2019-03-06 23:23:55 -08002708 with self.assertRaises(TypeError): # __rtruediv__
Raymond Hettinger11c79532019-02-23 14:44:07 -08002709 y / X
2710
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002711 def test_unary_operations(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002712 NormalDist = self.module.NormalDist
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002713 X = NormalDist(100, 12)
2714 Y = +X
2715 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002716 self.assertEqual(X.mean, Y.mean)
2717 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002718 Y = -X
2719 self.assertIsNot(X, Y)
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002720 self.assertEqual(X.mean, -Y.mean)
2721 self.assertEqual(X.stdev, Y.stdev)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002722
Raymond Hettinger11c79532019-02-23 14:44:07 -08002723 def test_equality(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002724 NormalDist = self.module.NormalDist
Raymond Hettinger11c79532019-02-23 14:44:07 -08002725 nd1 = NormalDist()
2726 nd2 = NormalDist(2, 4)
2727 nd3 = NormalDist()
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002728 nd4 = NormalDist(2, 4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002729 nd5 = NormalDist(2, 8)
2730 nd6 = NormalDist(8, 4)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002731 self.assertNotEqual(nd1, nd2)
2732 self.assertEqual(nd1, nd3)
Raymond Hettinger2afb5982019-03-20 13:28:59 -07002733 self.assertEqual(nd2, nd4)
Raymond Hettinger5eabec02019-10-18 14:20:35 -07002734 self.assertNotEqual(nd2, nd5)
2735 self.assertNotEqual(nd2, nd6)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002736
2737 # Test NotImplemented when types are different
2738 class A:
2739 def __eq__(self, other):
2740 return 10
2741 a = A()
2742 self.assertEqual(nd1.__eq__(a), NotImplemented)
2743 self.assertEqual(nd1 == a, 10)
2744 self.assertEqual(a == nd1, 10)
2745
2746 # All subclasses to compare equal giving the same behavior
2747 # as list, tuple, int, float, complex, str, dict, set, etc.
2748 class SizedNormalDist(NormalDist):
2749 def __init__(self, mu, sigma, n):
2750 super().__init__(mu, sigma)
2751 self.n = n
2752 s = SizedNormalDist(100, 15, 57)
2753 nd4 = NormalDist(100, 15)
2754 self.assertEqual(s, nd4)
2755
2756 # Don't allow duck type equality because we wouldn't
2757 # want a lognormal distribution to compare equal
2758 # to a normal distribution with the same parameters
2759 class LognormalDist:
2760 def __init__(self, mu, sigma):
2761 self.mu = mu
2762 self.sigma = sigma
2763 lnd = LognormalDist(100, 15)
2764 nd = NormalDist(100, 15)
2765 self.assertNotEqual(nd, lnd)
2766
2767 def test_pickle_and_copy(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002768 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002769 nd1 = copy.copy(nd)
2770 self.assertEqual(nd, nd1)
2771 nd2 = copy.deepcopy(nd)
2772 self.assertEqual(nd, nd2)
2773 nd3 = pickle.loads(pickle.dumps(nd))
2774 self.assertEqual(nd, nd3)
2775
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002776 def test_hashability(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002777 ND = self.module.NormalDist
Raymond Hettinger02c91f52019-07-21 00:34:47 -07002778 s = {ND(100, 15), ND(100.0, 15.0), ND(100, 10), ND(95, 15), ND(100, 15)}
2779 self.assertEqual(len(s), 3)
2780
Raymond Hettinger11c79532019-02-23 14:44:07 -08002781 def test_repr(self):
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002782 nd = self.module.NormalDist(37.5, 5.625)
Raymond Hettinger11c79532019-02-23 14:44:07 -08002783 self.assertEqual(repr(nd), 'NormalDist(mu=37.5, sigma=5.625)')
2784
Dong-hee Na8ad22a42019-08-25 02:51:20 +09002785# Swapping the sys.modules['statistics'] is to solving the
2786# _pickle.PicklingError:
2787# Can't pickle <class 'statistics.NormalDist'>:
2788# it's not the same object as statistics.NormalDist
2789class TestNormalDistPython(unittest.TestCase, TestNormalDist):
2790 module = py_statistics
2791 def setUp(self):
2792 sys.modules['statistics'] = self.module
2793
2794 def tearDown(self):
2795 sys.modules['statistics'] = statistics
2796
2797
2798@unittest.skipUnless(c_statistics, 'requires _statistics')
2799class TestNormalDistC(unittest.TestCase, TestNormalDist):
2800 module = c_statistics
2801 def setUp(self):
2802 sys.modules['statistics'] = self.module
2803
2804 def tearDown(self):
2805 sys.modules['statistics'] = statistics
2806
Larry Hastingsf5e987b2013-10-19 11:50:09 -07002807
2808# === Run tests ===
2809
2810def load_tests(loader, tests, ignore):
2811 """Used for doctest/unittest integration."""
2812 tests.addTests(doctest.DocTestSuite())
2813 return tests
2814
2815
2816if __name__ == "__main__":
2817 unittest.main()