blob: 90722f7abc6a2240b574d244f9ddb06d2b40b9b2 [file] [log] [blame]
Guido van Rossum03e35c51998-05-10 18:27:29 +00001"""Sort performance test.
2
3See main() for command line syntax.
4See tabulate() for output format.
5
6"""
Guido van Rossumea176b61998-05-10 18:20:05 +00007
8import sys
9import time
Andrew M. Kuchlinga9745612002-04-10 14:54:39 +000010import random
Guido van Rossumea176b61998-05-10 18:20:05 +000011import marshal
12import tempfile
Guido van Rossumea176b61998-05-10 18:20:05 +000013import os
14
15td = tempfile.gettempdir()
16
Tim Peters8b6ec792002-07-18 15:53:32 +000017def randfloats(n):
18 """Return a list of n random floats in [0, 1)."""
19 # Generating floats is expensive, so this writes them out to a file in
20 # a temp directory. If the file already exists, it just reads them
21 # back in and shuffles them a bit.
Guido van Rossumea176b61998-05-10 18:20:05 +000022 fn = os.path.join(td, "rr%06d" % n)
23 try:
24 fp = open(fn, "rb")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020025 except OSError:
Tim Peters8b6ec792002-07-18 15:53:32 +000026 r = random.random
Guido van Rossum805365e2007-05-07 22:24:25 +000027 result = [r() for i in range(n)]
Guido van Rossumea176b61998-05-10 18:20:05 +000028 try:
29 try:
30 fp = open(fn, "wb")
31 marshal.dump(result, fp)
32 fp.close()
33 fp = None
34 finally:
35 if fp:
36 try:
37 os.unlink(fn)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +020038 except OSError:
Guido van Rossumea176b61998-05-10 18:20:05 +000039 pass
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020040 except OSError as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +000041 print("can't write", fn, ":", msg)
Guido van Rossumea176b61998-05-10 18:20:05 +000042 else:
43 result = marshal.load(fp)
44 fp.close()
Guido van Rossumea176b61998-05-10 18:20:05 +000045 # Shuffle it a bit...
46 for i in range(10):
Tim Peters8b6ec792002-07-18 15:53:32 +000047 i = random.randrange(n)
Guido van Rossumea176b61998-05-10 18:20:05 +000048 temp = result[:i]
49 del result[:i]
50 temp.reverse()
Tim Peters8b6ec792002-07-18 15:53:32 +000051 result.extend(temp)
Guido van Rossumea176b61998-05-10 18:20:05 +000052 del temp
Tim Peters8b6ec792002-07-18 15:53:32 +000053 assert len(result) == n
Guido van Rossumea176b61998-05-10 18:20:05 +000054 return result
55
Tim Peters8b6ec792002-07-18 15:53:32 +000056def flush():
Guido van Rossumea176b61998-05-10 18:20:05 +000057 sys.stdout.flush()
58
59def doit(L):
Victor Stinnerfe98e2f2012-04-29 03:01:20 +020060 t0 = time.perf_counter()
Guido van Rossumea176b61998-05-10 18:20:05 +000061 L.sort()
Victor Stinnerfe98e2f2012-04-29 03:01:20 +020062 t1 = time.perf_counter()
Guido van Rossumbe19ed72007-02-09 05:37:30 +000063 print("%6.2f" % (t1-t0), end=' ')
Tim Peters8b6ec792002-07-18 15:53:32 +000064 flush()
Guido van Rossumea176b61998-05-10 18:20:05 +000065
66def tabulate(r):
Guido van Rossum03e35c51998-05-10 18:27:29 +000067 """Tabulate sort speed for lists of various sizes.
68
69 The sizes are 2**i for i in r (the argument, a list).
70
71 The output displays i, 2**i, and the time to sort arrays of 2**i
72 floating point numbers with the following properties:
73
74 *sort: random data
75 \sort: descending data
76 /sort: ascending data
Tim Peters7ea39b12002-07-21 17:37:03 +000077 3sort: ascending, then 3 random exchanges
78 +sort: ascending, then 10 random at the end
Tim Petersd5f43592002-08-02 05:46:09 +000079 %sort: ascending, then randomly replace 1% of the elements w/ random values
Guido van Rossum03e35c51998-05-10 18:27:29 +000080 ~sort: many duplicates
Tim Peters8b6ec792002-07-18 15:53:32 +000081 =sort: all equal
Guido van Rossum16653cb1998-05-26 15:05:12 +000082 !sort: worst case scenario
Guido van Rossum03e35c51998-05-10 18:27:29 +000083
84 """
Tim Petersd5f43592002-08-02 05:46:09 +000085 cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
Tim Peters8b6ec792002-07-18 15:53:32 +000086 fmt = ("%2s %7s" + " %6s"*len(cases))
Guido van Rossumbe19ed72007-02-09 05:37:30 +000087 print(fmt % (("i", "2**i") + cases))
Guido van Rossumea176b61998-05-10 18:20:05 +000088 for i in r:
Tim Peters8b6ec792002-07-18 15:53:32 +000089 n = 1 << i
90 L = randfloats(n)
Guido van Rossumbe19ed72007-02-09 05:37:30 +000091 print("%2d %7d" % (i, n), end=' ')
Tim Peters8b6ec792002-07-18 15:53:32 +000092 flush()
Guido van Rossumea176b61998-05-10 18:20:05 +000093 doit(L) # *sort
94 L.reverse()
95 doit(L) # \sort
96 doit(L) # /sort
Tim Peters8b6ec792002-07-18 15:53:32 +000097
Tim Peters0a30e642002-07-20 04:21:51 +000098 # Do 3 random exchanges.
99 for dummy in range(3):
100 i1 = random.randrange(n)
101 i2 = random.randrange(n)
102 L[i1], L[i2] = L[i2], L[i1]
103 doit(L) # 3sort
104
Tim Peters7ea39b12002-07-21 17:37:03 +0000105 # Replace the last 10 with random floats.
106 if n >= 10:
107 L[-10:] = [random.random() for dummy in range(10)]
108 doit(L) # +sort
109
Tim Petersd5f43592002-08-02 05:46:09 +0000110 # Replace 1% of the elements at random.
Guido van Rossum805365e2007-05-07 22:24:25 +0000111 for dummy in range(n // 100):
Tim Petersd5f43592002-08-02 05:46:09 +0000112 L[random.randrange(n)] = random.random()
113 doit(L) # %sort
114
Tim Peters8b6ec792002-07-18 15:53:32 +0000115 # Arrange for lots of duplicates.
Guido van Rossumea176b61998-05-10 18:20:05 +0000116 if n > 4:
Guido van Rossumb298a301998-05-12 13:21:31 +0000117 del L[4:]
Tim Peters8b6ec792002-07-18 15:53:32 +0000118 L = L * (n // 4)
119 # Force the elements to be distinct objects, else timings can be
120 # artificially low.
Georg Brandlf325e032010-08-01 08:07:49 +0000121 L = list(map(lambda x: --x, L))
Guido van Rossumea176b61998-05-10 18:20:05 +0000122 doit(L) # ~sort
Guido van Rossumb298a301998-05-12 13:21:31 +0000123 del L
Tim Peters8b6ec792002-07-18 15:53:32 +0000124
125 # All equal. Again, force the elements to be distinct objects.
Georg Brandlf325e032010-08-01 08:07:49 +0000126 L = list(map(abs, [-0.5] * n))
Tim Peters8b6ec792002-07-18 15:53:32 +0000127 doit(L) # =sort
128 del L
129
130 # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
131 # for an older implementation of quicksort, which used the median
Tim Peters7ea39b12002-07-21 17:37:03 +0000132 # of the first, last and middle elements as the pivot.
Tim Peters8b6ec792002-07-18 15:53:32 +0000133 half = n // 2
Georg Brandlf325e032010-08-01 08:07:49 +0000134 L = list(range(half - 1, -1, -1))
Tim Peters8b6ec792002-07-18 15:53:32 +0000135 L.extend(range(half))
136 # Force to float, so that the timings are comparable. This is
137 # significantly faster if we leave tham as ints.
Georg Brandlf325e032010-08-01 08:07:49 +0000138 L = list(map(float, L))
Guido van Rossum16653cb1998-05-26 15:05:12 +0000139 doit(L) # !sort
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000140 print()
Guido van Rossumea176b61998-05-10 18:20:05 +0000141
142def main():
Guido van Rossum03e35c51998-05-10 18:27:29 +0000143 """Main program when invoked as a script.
144
145 One argument: tabulate a single row.
146 Two arguments: tabulate a range (inclusive).
147 Extra arguments are used to seed the random generator.
148
149 """
Guido van Rossumea176b61998-05-10 18:20:05 +0000150 # default range (inclusive)
151 k1 = 15
Tim Peters8b6ec792002-07-18 15:53:32 +0000152 k2 = 20
Guido van Rossumea176b61998-05-10 18:20:05 +0000153 if sys.argv[1:]:
Guido van Rossum03e35c51998-05-10 18:27:29 +0000154 # one argument: single point
Eric S. Raymondfc170b12001-02-09 11:51:27 +0000155 k1 = k2 = int(sys.argv[1])
Guido van Rossumea176b61998-05-10 18:20:05 +0000156 if sys.argv[2:]:
Guido van Rossum03e35c51998-05-10 18:27:29 +0000157 # two arguments: specify range
Eric S. Raymondfc170b12001-02-09 11:51:27 +0000158 k2 = int(sys.argv[2])
Guido van Rossumea176b61998-05-10 18:20:05 +0000159 if sys.argv[3:]:
160 # derive random seed from remaining arguments
Tim Peters8b6ec792002-07-18 15:53:32 +0000161 x = 1
Guido van Rossumea176b61998-05-10 18:20:05 +0000162 for a in sys.argv[3:]:
Tim Peters8b6ec792002-07-18 15:53:32 +0000163 x = 69069 * x + hash(a)
164 random.seed(x)
Guido van Rossum03e35c51998-05-10 18:27:29 +0000165 r = range(k1, k2+1) # include the end point
Guido van Rossumea176b61998-05-10 18:20:05 +0000166 tabulate(r)
167
168if __name__ == '__main__':
169 main()