blob: ca46d0b5a64539a0aac4ba19e77bdfc68c61bd58 [file] [log] [blame]
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001from test import support, seq_tests
Zachary Wareac28b792015-12-04 23:32:23 -06002import unittest
Walter Dörwald1dde95d2003-12-08 11:38:45 +00003
Antoine Pitrou3a652b12009-03-23 18:52:06 +00004import gc
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00005import pickle
Antoine Pitrou3a652b12009-03-23 18:52:06 +00006
Tim Peters7ab3d152019-02-08 15:09:26 -06007# For tuple hashes, we normally only run a test to ensure that we get
8# the same results across platforms in a handful of cases. If that's
9# so, there's no real point to running more. Set RUN_ALL_HASH_TESTS to
10# run more anyway. That's usually of real interest only when analyzing,
11# or changing, the hash algorithm. In which case it's usually also
12# most useful to set JUST_SHOW_HASH_RESULTS, to see all the results
13# instead of wrestling with test "failures". See the bottom of the
14# file for extensive notes on what we're testing here and why.
15RUN_ALL_HASH_TESTS = False
16JUST_SHOW_HASH_RESULTS = False # if RUN_ALL_HASH_TESTS, just display
17
Walter Dörwald1dde95d2003-12-08 11:38:45 +000018class TupleTest(seq_tests.CommonTest):
19 type2test = tuple
20
Terry Jan Reedyffff1442014-08-02 01:30:37 -040021 def test_getitem_error(self):
22 msg = "tuple indices must be integers or slices"
23 with self.assertRaisesRegex(TypeError, msg):
24 ()['a']
25
Walter Dörwald1dde95d2003-12-08 11:38:45 +000026 def test_constructors(self):
Éric Araujoa63c2402010-12-23 19:13:05 +000027 super().test_constructors()
Walter Dörwald1dde95d2003-12-08 11:38:45 +000028 # calling built-in types without argument must return empty
29 self.assertEqual(tuple(), ())
Christian Heimes81ee3ef2008-05-04 22:42:01 +000030 t0_3 = (0, 1, 2, 3)
31 t0_3_bis = tuple(t0_3)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000032 self.assertTrue(t0_3 is t0_3_bis)
Christian Heimes81ee3ef2008-05-04 22:42:01 +000033 self.assertEqual(tuple([]), ())
34 self.assertEqual(tuple([0, 1, 2, 3]), (0, 1, 2, 3))
35 self.assertEqual(tuple(''), ())
36 self.assertEqual(tuple('spam'), ('s', 'p', 'a', 'm'))
Serhiy Storchaka58d23e62017-03-06 00:53:39 +020037 self.assertEqual(tuple(x for x in range(10) if x % 2),
38 (1, 3, 5, 7, 9))
39
40 def test_keyword_args(self):
Serhiy Storchaka2e564242017-03-06 17:01:06 +020041 with self.assertRaisesRegex(TypeError, 'keyword argument'):
42 tuple(sequence=())
Walter Dörwald1dde95d2003-12-08 11:38:45 +000043
44 def test_truth(self):
Guido van Rossumcd16bf62007-06-13 18:07:49 +000045 super().test_truth()
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000046 self.assertTrue(not ())
47 self.assertTrue((42, ))
Walter Dörwald1dde95d2003-12-08 11:38:45 +000048
49 def test_len(self):
Guido van Rossumcd16bf62007-06-13 18:07:49 +000050 super().test_len()
Walter Dörwald1dde95d2003-12-08 11:38:45 +000051 self.assertEqual(len(()), 0)
52 self.assertEqual(len((0,)), 1)
53 self.assertEqual(len((0, 1, 2)), 3)
54
55 def test_iadd(self):
Guido van Rossumcd16bf62007-06-13 18:07:49 +000056 super().test_iadd()
Walter Dörwald1dde95d2003-12-08 11:38:45 +000057 u = (0, 1)
58 u2 = u
59 u += (2, 3)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000060 self.assertTrue(u is not u2)
Walter Dörwald1dde95d2003-12-08 11:38:45 +000061
62 def test_imul(self):
Guido van Rossumcd16bf62007-06-13 18:07:49 +000063 super().test_imul()
Walter Dörwald1dde95d2003-12-08 11:38:45 +000064 u = (0, 1)
65 u2 = u
66 u *= 3
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000067 self.assertTrue(u is not u2)
Walter Dörwald1dde95d2003-12-08 11:38:45 +000068
69 def test_tupleresizebug(self):
70 # Check that a specific bug in _PyTuple_Resize() is squashed.
71 def f():
72 for i in range(1000):
73 yield i
Guido van Rossum805365e2007-05-07 22:24:25 +000074 self.assertEqual(list(tuple(f())), list(range(1000)))
Walter Dörwald1dde95d2003-12-08 11:38:45 +000075
Tim Peters7ab3d152019-02-08 15:09:26 -060076 # We expect tuples whose base components have deterministic hashes to
77 # have deterministic hashes too - and, indeed, the same hashes across
78 # platforms with hash codes of the same bit width.
79 def test_hash_exact(self):
80 def check_one_exact(t, e32, e64):
81 got = hash(t)
82 expected = e32 if support.NHASHBITS == 32 else e64
83 if got != expected:
84 msg = f"FAIL hash({t!r}) == {got} != {expected}"
85 self.fail(msg)
86
87 check_one_exact((), 750394483, 5740354900026072187)
88 check_one_exact((0,), 1214856301, -8753497827991233192)
89 check_one_exact((0, 0), -168982784, -8458139203682520985)
90 check_one_exact((0.5,), 2077348973, -408149959306781352)
91 check_one_exact((0.5, (), (-2, 3, (4, 6))), 714642271,
92 -1845940830829704396)
93
jdemeyeraeb1be52018-10-28 02:06:38 +020094 # Various tests for hashing of tuples to check that we get few collisions.
Tim Peters7ab3d152019-02-08 15:09:26 -060095 # Does something only if RUN_ALL_HASH_TESTS is true.
jdemeyeraeb1be52018-10-28 02:06:38 +020096 #
Tim Peters7ab3d152019-02-08 15:09:26 -060097 # Earlier versions of the tuple hash algorithm had massive collisions
jdemeyeraeb1be52018-10-28 02:06:38 +020098 # reported at:
99 # - https://bugs.python.org/issue942952
100 # - https://bugs.python.org/issue34751
Tim Peters7ab3d152019-02-08 15:09:26 -0600101 def test_hash_optional(self):
102 from itertools import product
103
104 if not RUN_ALL_HASH_TESTS:
105 return
106
107 # If specified, `expected` is a 2-tuple of expected
108 # (number_of_collisions, pileup) values, and the test fails if
109 # those aren't the values we get. Also if specified, the test
110 # fails if z > `zlimit`.
111 def tryone_inner(tag, nbins, hashes, expected=None, zlimit=None):
112 from collections import Counter
113
114 nballs = len(hashes)
115 mean, sdev = support.collision_stats(nbins, nballs)
116 c = Counter(hashes)
117 collisions = nballs - len(c)
118 z = (collisions - mean) / sdev
119 pileup = max(c.values()) - 1
120 del c
121 got = (collisions, pileup)
122 failed = False
123 prefix = ""
124 if zlimit is not None and z > zlimit:
125 failed = True
126 prefix = f"FAIL z > {zlimit}; "
127 if expected is not None and got != expected:
128 failed = True
129 prefix += f"FAIL {got} != {expected}; "
130 if failed or JUST_SHOW_HASH_RESULTS:
131 msg = f"{prefix}{tag}; pileup {pileup:,} mean {mean:.1f} "
132 msg += f"coll {collisions:,} z {z:+.1f}"
133 if JUST_SHOW_HASH_RESULTS:
134 import sys
135 print(msg, file=sys.__stdout__)
136 else:
137 self.fail(msg)
138
139 def tryone(tag, xs,
140 native32=None, native64=None, hi32=None, lo32=None,
141 zlimit=None):
142 NHASHBITS = support.NHASHBITS
143 hashes = list(map(hash, xs))
144 tryone_inner(tag + f"; {NHASHBITS}-bit hash codes",
145 1 << NHASHBITS,
146 hashes,
147 native32 if NHASHBITS == 32 else native64,
148 zlimit)
149
150 if NHASHBITS > 32:
151 shift = NHASHBITS - 32
152 tryone_inner(tag + "; 32-bit upper hash codes",
153 1 << 32,
154 [h >> shift for h in hashes],
155 hi32,
156 zlimit)
157
158 mask = (1 << 32) - 1
159 tryone_inner(tag + "; 32-bit lower hash codes",
160 1 << 32,
161 [h & mask for h in hashes],
162 lo32,
163 zlimit)
164
165 # Tuples of smallish positive integers are common - nice if we
166 # get "better than random" for these.
167 tryone("range(100) by 3", list(product(range(100), repeat=3)),
168 (0, 0), (0, 0), (4, 1), (0, 0))
169
170 # A previous hash had systematic problems when mixing integers of
171 # similar magnitude but opposite sign, obscurely related to that
172 # j ^ -2 == -j when j is odd.
173 cands = list(range(-10, -1)) + list(range(9))
174
175 # Note: -1 is omitted because hash(-1) == hash(-2) == -2, and
176 # there's nothing the tuple hash can do to avoid collisions
177 # inherited from collisions in the tuple components' hashes.
178 tryone("-10 .. 8 by 4", list(product(cands, repeat=4)),
179 (0, 0), (0, 0), (0, 0), (0, 0))
180 del cands
181
182 # The hashes here are a weird mix of values where all the
183 # variation is in the lowest bits and across a single high-order
184 # bit - the middle bits are all zeroes. A decent hash has to
185 # both propagate low bits to the left and high bits to the
186 # right. This is also complicated a bit in that there are
187 # collisions among the hashes of the integers in L alone.
188 L = [n << 60 for n in range(100)]
189 tryone("0..99 << 60 by 3", list(product(L, repeat=3)),
190 (0, 0), (0, 0), (0, 0), (324, 1))
191 del L
192
193 # Used to suffer a massive number of collisions.
194 tryone("[-3, 3] by 18", list(product([-3, 3], repeat=18)),
195 (7, 1), (0, 0), (7, 1), (6, 1))
196
197 # And even worse. hash(0.5) has only a single bit set, at the
198 # high end. A decent hash needs to propagate high bits right.
199 tryone("[0, 0.5] by 18", list(product([0, 0.5], repeat=18)),
200 (5, 1), (0, 0), (9, 1), (12, 1))
201
202 # Hashes of ints and floats are the same across platforms.
203 # String hashes vary even on a single platform across runs, due
204 # to hash randomization for strings. So we can't say exactly
205 # what this should do. Instead we insist that the # of
206 # collisions is no more than 4 sdevs above the theoretically
207 # random mean. Even if the tuple hash can't achieve that on its
208 # own, the string hash is trying to be decently pseudo-random
209 # (in all bit positions) on _its_ own. We can at least test
210 # that the tuple hash doesn't systematically ruin that.
211 tryone("4-char tuples",
212 list(product("abcdefghijklmnopqrstuvwxyz", repeat=4)),
213 zlimit=4.0)
214
215 # The "old tuple test". See https://bugs.python.org/issue942952.
216 # Ensures, for example, that the hash:
217 # is non-commutative
218 # spreads closely spaced values
219 # doesn't exhibit cancellation in tuples like (x,(x,y))
220 N = 50
Guido van Rossum805365e2007-05-07 22:24:25 +0000221 base = list(range(N))
Tim Peters7ab3d152019-02-08 15:09:26 -0600222 xp = list(product(base, repeat=2))
223 inps = base + list(product(base, xp)) + \
224 list(product(xp, base)) + xp + list(zip(base))
225 tryone("old tuple test", inps,
226 (2, 1), (0, 0), (52, 49), (7, 1))
227 del base, xp, inps
jdemeyeraeb1be52018-10-28 02:06:38 +0200228
Tim Peters7ab3d152019-02-08 15:09:26 -0600229 # The "new tuple test". See https://bugs.python.org/issue34751.
230 # Even more tortured nesting, and a mix of signed ints of very
231 # small magnitude.
jdemeyeraeb1be52018-10-28 02:06:38 +0200232 n = 5
233 A = [x for x in range(-n, n+1) if x != -1]
jdemeyeraeb1be52018-10-28 02:06:38 +0200234 B = A + [(a,) for a in A]
Tim Peters7ab3d152019-02-08 15:09:26 -0600235 L2 = list(product(A, repeat=2))
236 L3 = L2 + list(product(A, repeat=3))
237 L4 = L3 + list(product(A, repeat=4))
jdemeyeraeb1be52018-10-28 02:06:38 +0200238 # T = list of testcases. These consist of all (possibly nested
239 # at most 2 levels deep) tuples containing at most 4 items from
240 # the set A.
241 T = A
242 T += [(a,) for a in B + L4]
Tim Peters7ab3d152019-02-08 15:09:26 -0600243 T += product(L3, B)
244 T += product(L2, repeat=2)
245 T += product(B, L3)
246 T += product(B, B, L2)
247 T += product(B, L2, B)
248 T += product(L2, B, B)
249 T += product(B, repeat=4)
250 assert len(T) == 345130
251 tryone("new tuple test", T,
252 (9, 1), (0, 0), (21, 5), (6, 1))
Walter Dörwald1dde95d2003-12-08 11:38:45 +0000253
Raymond Hettinger5ea7e312004-09-30 07:47:20 +0000254 def test_repr(self):
255 l0 = tuple()
256 l2 = (0, 1, 2)
257 a0 = self.type2test(l0)
258 a2 = self.type2test(l2)
259
260 self.assertEqual(str(a0), repr(l0))
261 self.assertEqual(str(a2), repr(l2))
262 self.assertEqual(repr(a0), "()")
263 self.assertEqual(repr(a2), "(0, 1, 2)")
264
Antoine Pitrou3a652b12009-03-23 18:52:06 +0000265 def _not_tracked(self, t):
266 # Nested tuples can take several collections to untrack
267 gc.collect()
268 gc.collect()
269 self.assertFalse(gc.is_tracked(t), t)
270
271 def _tracked(self, t):
272 self.assertTrue(gc.is_tracked(t), t)
273 gc.collect()
274 gc.collect()
275 self.assertTrue(gc.is_tracked(t), t)
276
Benjamin Peterson02d78062010-06-27 22:44:51 +0000277 @support.cpython_only
Antoine Pitrou3a652b12009-03-23 18:52:06 +0000278 def test_track_literals(self):
279 # Test GC-optimization of tuple literals
280 x, y, z = 1.5, "a", []
281
282 self._not_tracked(())
283 self._not_tracked((1,))
284 self._not_tracked((1, 2))
285 self._not_tracked((1, 2, "a"))
286 self._not_tracked((1, 2, (None, True, False, ()), int))
287 self._not_tracked((object(),))
288 self._not_tracked(((1, x), y, (2, 3)))
289
290 # Tuples with mutable elements are always tracked, even if those
291 # elements are not tracked right now.
292 self._tracked(([],))
293 self._tracked(([1],))
294 self._tracked(({},))
295 self._tracked((set(),))
296 self._tracked((x, y, z))
297
298 def check_track_dynamic(self, tp, always_track):
299 x, y, z = 1.5, "a", []
300
301 check = self._tracked if always_track else self._not_tracked
302 check(tp())
303 check(tp([]))
304 check(tp(set()))
305 check(tp([1, x, y]))
306 check(tp(obj for obj in [1, x, y]))
307 check(tp(set([1, x, y])))
308 check(tp(tuple([obj]) for obj in [1, x, y]))
309 check(tuple(tp([obj]) for obj in [1, x, y]))
310
311 self._tracked(tp([z]))
312 self._tracked(tp([[x, y]]))
313 self._tracked(tp([{x: y}]))
314 self._tracked(tp(obj for obj in [x, y, z]))
315 self._tracked(tp(tuple([obj]) for obj in [x, y, z]))
316 self._tracked(tuple(tp([obj]) for obj in [x, y, z]))
317
Benjamin Peterson02d78062010-06-27 22:44:51 +0000318 @support.cpython_only
Antoine Pitrou3a652b12009-03-23 18:52:06 +0000319 def test_track_dynamic(self):
320 # Test GC-optimization of dynamically constructed tuples.
321 self.check_track_dynamic(tuple, False)
322
Benjamin Peterson02d78062010-06-27 22:44:51 +0000323 @support.cpython_only
Antoine Pitrou3a652b12009-03-23 18:52:06 +0000324 def test_track_subtypes(self):
325 # Tuple subtypes must always be tracked
326 class MyTuple(tuple):
327 pass
328 self.check_track_dynamic(MyTuple, True)
329
Benjamin Peterson02d78062010-06-27 22:44:51 +0000330 @support.cpython_only
Antoine Pitrou6b7dfc92009-12-12 19:18:27 +0000331 def test_bug7466(self):
332 # Trying to untrack an unfinished tuple could crash Python
333 self._not_tracked(tuple(gc.collect() for i in range(101)))
Antoine Pitrou3a652b12009-03-23 18:52:06 +0000334
Antoine Pitroueeb7eea2011-10-06 18:57:27 +0200335 def test_repr_large(self):
336 # Check the repr of large list objects
337 def check(n):
338 l = (0,) * n
339 s = repr(l)
340 self.assertEqual(s,
341 '(' + ', '.join(['0'] * n) + ')')
342 check(10) # check our checking code
343 check(1000000)
344
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +0000345 def test_iterator_pickle(self):
346 # Userlist iterators don't support pickling yet since
347 # they are based on generators.
348 data = self.type2test([4, 5, 6, 7])
Serhiy Storchakabad12572014-12-15 14:03:42 +0200349 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
350 itorg = iter(data)
351 d = pickle.dumps(itorg, proto)
352 it = pickle.loads(d)
353 self.assertEqual(type(itorg), type(it))
354 self.assertEqual(self.type2test(it), self.type2test(data))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +0000355
Serhiy Storchakabad12572014-12-15 14:03:42 +0200356 it = pickle.loads(d)
357 next(it)
358 d = pickle.dumps(it, proto)
359 self.assertEqual(self.type2test(it), self.type2test(data)[1:])
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +0000360
361 def test_reversed_pickle(self):
362 data = self.type2test([4, 5, 6, 7])
Serhiy Storchakabad12572014-12-15 14:03:42 +0200363 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
364 itorg = reversed(data)
365 d = pickle.dumps(itorg, proto)
366 it = pickle.loads(d)
367 self.assertEqual(type(itorg), type(it))
368 self.assertEqual(self.type2test(it), self.type2test(reversed(data)))
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +0000369
Serhiy Storchakabad12572014-12-15 14:03:42 +0200370 it = pickle.loads(d)
371 next(it)
372 d = pickle.dumps(it, proto)
373 self.assertEqual(self.type2test(it), self.type2test(reversed(data))[1:])
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +0000374
Martin v. Löwis4c1730d2012-08-01 10:32:11 +0200375 def test_no_comdat_folding(self):
376 # Issue 8847: In the PGO build, the MSVC linker's COMDAT folding
377 # optimization causes failures in code that relies on distinct
378 # function addresses.
379 class T(tuple): pass
380 with self.assertRaises(TypeError):
381 [3,] + T((1,2))
382
Raymond Hettingere5bb5512014-03-30 10:12:09 -0700383 def test_lexicographic_ordering(self):
384 # Issue 21100
385 a = self.type2test([1, 2])
386 b = self.type2test([1, 2, 0])
387 c = self.type2test([1, 3])
388 self.assertLess(a, b)
389 self.assertLess(b, c)
390
Tim Peters7ab3d152019-02-08 15:09:26 -0600391# Notes on testing hash codes. The primary thing is that Python doesn't
392# care about "random" hash codes. To the contrary, we like them to be
393# very regular when possible, so that the low-order bits are as evenly
394# distributed as possible. For integers this is easy: hash(i) == i for
395# all not-huge i except i==-1.
396#
397# For tuples of mixed type there's really no hope of that, so we want
398# "randomish" here instead. But getting close to pseudo-random in all
399# bit positions is more expensive than we've been willing to pay for.
400#
401# We can tolerate large deviations from random - what we don't want is
402# catastrophic pileups on a relative handful of hash codes. The dict
403# and set lookup routines remain effective provided that full-width hash
404# codes for not-equal objects are distinct.
405#
406# So we compute various statistics here based on what a "truly random"
407# hash would do, but don't automate "pass or fail" based on those
408# results. Instead those are viewed as inputs to human judgment, and the
409# automated tests merely ensure we get the _same_ results across
410# platforms. In fact, we normally don't bother to run them at all -
411# set RUN_ALL_HASH_TESTS to force it.
412#
413# When global JUST_SHOW_HASH_RESULTS is True, the tuple hash statistics
414# are just displayed to stdout. A typical output line looks like:
415#
416# old tuple test; 32-bit upper hash codes; \
417# pileup 49 mean 7.4 coll 52 z +16.4
418#
419# "old tuple test" is just a string name for the test being run.
420#
421# "32-bit upper hash codes" means this was run under a 64-bit build and
422# we've shifted away the lower 32 bits of the hash codes.
423#
424# "pileup" is 0 if there were no collisions across those hash codes.
425# It's 1 less than the maximum number of times any single hash code was
426# seen. So in this case, there was (at least) one hash code that was
427# seen 50 times: that hash code "piled up" 49 more times than ideal.
428#
429# "mean" is the number of collisions a perfectly random hash function
430# would have yielded, on average.
431#
432# "coll" is the number of collisions actually seen.
433#
434# "z" is "coll - mean" divided by the standard deviation of the number
435# of collisions a perfectly random hash function would suffer. A
436# positive value is "worse than random", and negative value "better than
437# random". Anything of magnitude greater than 3 would be highly suspect
438# for a hash function that claimed to be random. It's essentially
439# impossible that a truly random function would deliver a result 16.4
440# sdevs "worse than random".
441#
442# But we don't care here! That's why the test isn't coded to fail.
443# Knowing something about how the high-order hash code bits behave
444# provides insight, but is irrelevant to how the dict and set lookup
445# code performs. The low-order bits are much more important to that,
446# and on the same test those did "just like random":
447#
448# old tuple test; 32-bit lower hash codes; \
449# pileup 1 mean 7.4 coll 7 z -0.2
450#
451# So there are always tradeoffs to consider. For another:
452#
453# 0..99 << 60 by 3; 32-bit hash codes; \
454# pileup 0 mean 116.4 coll 0 z -10.8
455#
456# That was run under a 32-bit build, and is spectacularly "better than
457# random". On a 64-bit build the wider hash codes are fine too:
458#
459# 0..99 << 60 by 3; 64-bit hash codes; \
460# pileup 0 mean 0.0 coll 0 z -0.0
461#
462# but their lower 32 bits are poor:
463#
464# 0..99 << 60 by 3; 32-bit lower hash codes; \
465# pileup 1 mean 116.4 coll 324 z +19.2
466#
467# In a statistical sense that's waaaaay too many collisions, but (a) 324
468# collisions out of a million hash codes isn't anywhere near being a
469# real problem; and, (b) the worst pileup on a single hash code is a measly
470# 1 extra. It's a relatively poor case for the tuple hash, but still
471# fine for practical use.
472#
473# This isn't, which is what Python 3.7.1 produced for the hashes of
474# itertools.product([0, 0.5], repeat=18). Even with a fat 64-bit
475# hashcode, the highest pileup was over 16,000 - making a dict/set
476# lookup on one of the colliding values thousands of times slower (on
477# average) than we expect.
478#
479# [0, 0.5] by 18; 64-bit hash codes; \
480# pileup 16,383 mean 0.0 coll 262,128 z +6073641856.9
481# [0, 0.5] by 18; 32-bit lower hash codes; \
482# pileup 262,143 mean 8.0 coll 262,143 z +92683.6
483
Zachary Ware38c707e2015-04-13 15:00:43 -0500484if __name__ == "__main__":
485 unittest.main()