Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 1 | """Sort performance test. |
| 2 | |
| 3 | See main() for command line syntax. |
| 4 | See tabulate() for output format. |
| 5 | |
| 6 | """ |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 7 | |
| 8 | import sys |
| 9 | import time |
Andrew M. Kuchling | a974561 | 2002-04-10 14:54:39 +0000 | [diff] [blame] | 10 | import random |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 11 | import marshal |
| 12 | import tempfile |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 13 | import os |
| 14 | |
| 15 | td = tempfile.gettempdir() |
| 16 | |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 17 | def randfloats(n): |
| 18 | """Return a list of n random floats in [0, 1).""" |
| 19 | # Generating floats is expensive, so this writes them out to a file in |
| 20 | # a temp directory. If the file already exists, it just reads them |
| 21 | # back in and shuffles them a bit. |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 22 | fn = os.path.join(td, "rr%06d" % n) |
| 23 | try: |
| 24 | fp = open(fn, "rb") |
| 25 | except IOError: |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 26 | r = random.random |
| 27 | result = [r() for i in xrange(n)] |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 28 | try: |
| 29 | try: |
| 30 | fp = open(fn, "wb") |
| 31 | marshal.dump(result, fp) |
| 32 | fp.close() |
| 33 | fp = None |
| 34 | finally: |
| 35 | if fp: |
| 36 | try: |
| 37 | os.unlink(fn) |
| 38 | except os.error: |
| 39 | pass |
| 40 | except IOError, msg: |
| 41 | print "can't write", fn, ":", msg |
| 42 | else: |
| 43 | result = marshal.load(fp) |
| 44 | fp.close() |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 45 | # Shuffle it a bit... |
| 46 | for i in range(10): |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 47 | i = random.randrange(n) |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 48 | temp = result[:i] |
| 49 | del result[:i] |
| 50 | temp.reverse() |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 51 | result.extend(temp) |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 52 | del temp |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 53 | assert len(result) == n |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 54 | return result |
| 55 | |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 56 | def flush(): |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 57 | sys.stdout.flush() |
| 58 | |
| 59 | def doit(L): |
| 60 | t0 = time.clock() |
| 61 | L.sort() |
| 62 | t1 = time.clock() |
| 63 | print "%6.2f" % (t1-t0), |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 64 | flush() |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 65 | |
| 66 | def tabulate(r): |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 67 | """Tabulate sort speed for lists of various sizes. |
| 68 | |
| 69 | The sizes are 2**i for i in r (the argument, a list). |
| 70 | |
| 71 | The output displays i, 2**i, and the time to sort arrays of 2**i |
| 72 | floating point numbers with the following properties: |
| 73 | |
| 74 | *sort: random data |
| 75 | \sort: descending data |
| 76 | /sort: ascending data |
Tim Peters | 7ea39b1 | 2002-07-21 17:37:03 +0000 | [diff] [blame] | 77 | 3sort: ascending, then 3 random exchanges |
| 78 | +sort: ascending, then 10 random at the end |
Tim Peters | d5f4359 | 2002-08-02 05:46:09 +0000 | [diff] [blame] | 79 | %sort: ascending, then randomly replace 1% of the elements w/ random values |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 80 | ~sort: many duplicates |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 81 | =sort: all equal |
Guido van Rossum | 16653cb | 1998-05-26 15:05:12 +0000 | [diff] [blame] | 82 | !sort: worst case scenario |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 83 | |
| 84 | """ |
Tim Peters | d5f4359 | 2002-08-02 05:46:09 +0000 | [diff] [blame] | 85 | cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 86 | fmt = ("%2s %7s" + " %6s"*len(cases)) |
Guido van Rossum | 16653cb | 1998-05-26 15:05:12 +0000 | [diff] [blame] | 87 | print fmt % (("i", "2**i") + cases) |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 88 | for i in r: |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 89 | n = 1 << i |
| 90 | L = randfloats(n) |
| 91 | print "%2d %7d" % (i, n), |
| 92 | flush() |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 93 | doit(L) # *sort |
| 94 | L.reverse() |
| 95 | doit(L) # \sort |
| 96 | doit(L) # /sort |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 97 | |
Tim Peters | 0a30e64 | 2002-07-20 04:21:51 +0000 | [diff] [blame] | 98 | # Do 3 random exchanges. |
| 99 | for dummy in range(3): |
| 100 | i1 = random.randrange(n) |
| 101 | i2 = random.randrange(n) |
| 102 | L[i1], L[i2] = L[i2], L[i1] |
| 103 | doit(L) # 3sort |
| 104 | |
Tim Peters | 7ea39b1 | 2002-07-21 17:37:03 +0000 | [diff] [blame] | 105 | # Replace the last 10 with random floats. |
| 106 | if n >= 10: |
| 107 | L[-10:] = [random.random() for dummy in range(10)] |
| 108 | doit(L) # +sort |
| 109 | |
Tim Peters | d5f4359 | 2002-08-02 05:46:09 +0000 | [diff] [blame] | 110 | # Replace 1% of the elements at random. |
| 111 | for dummy in xrange(n // 100): |
| 112 | L[random.randrange(n)] = random.random() |
| 113 | doit(L) # %sort |
| 114 | |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 115 | # Arrange for lots of duplicates. |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 116 | if n > 4: |
Guido van Rossum | b298a30 | 1998-05-12 13:21:31 +0000 | [diff] [blame] | 117 | del L[4:] |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 118 | L = L * (n // 4) |
| 119 | # Force the elements to be distinct objects, else timings can be |
| 120 | # artificially low. |
Guido van Rossum | b298a30 | 1998-05-12 13:21:31 +0000 | [diff] [blame] | 121 | L = map(lambda x: --x, L) |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 122 | doit(L) # ~sort |
Guido van Rossum | b298a30 | 1998-05-12 13:21:31 +0000 | [diff] [blame] | 123 | del L |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 124 | |
| 125 | # All equal. Again, force the elements to be distinct objects. |
| 126 | L = map(abs, [-0.5] * n) |
| 127 | doit(L) # =sort |
| 128 | del L |
| 129 | |
| 130 | # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case |
| 131 | # for an older implementation of quicksort, which used the median |
Tim Peters | 7ea39b1 | 2002-07-21 17:37:03 +0000 | [diff] [blame] | 132 | # of the first, last and middle elements as the pivot. |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 133 | half = n // 2 |
| 134 | L = range(half - 1, -1, -1) |
| 135 | L.extend(range(half)) |
| 136 | # Force to float, so that the timings are comparable. This is |
| 137 | # significantly faster if we leave tham as ints. |
| 138 | L = map(float, L) |
Guido van Rossum | 16653cb | 1998-05-26 15:05:12 +0000 | [diff] [blame] | 139 | doit(L) # !sort |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 140 | print |
| 141 | |
| 142 | def main(): |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 143 | """Main program when invoked as a script. |
| 144 | |
| 145 | One argument: tabulate a single row. |
| 146 | Two arguments: tabulate a range (inclusive). |
| 147 | Extra arguments are used to seed the random generator. |
| 148 | |
| 149 | """ |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 150 | # default range (inclusive) |
| 151 | k1 = 15 |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 152 | k2 = 20 |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 153 | if sys.argv[1:]: |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 154 | # one argument: single point |
Eric S. Raymond | fc170b1 | 2001-02-09 11:51:27 +0000 | [diff] [blame] | 155 | k1 = k2 = int(sys.argv[1]) |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 156 | if sys.argv[2:]: |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 157 | # two arguments: specify range |
Eric S. Raymond | fc170b1 | 2001-02-09 11:51:27 +0000 | [diff] [blame] | 158 | k2 = int(sys.argv[2]) |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 159 | if sys.argv[3:]: |
| 160 | # derive random seed from remaining arguments |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 161 | x = 1 |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 162 | for a in sys.argv[3:]: |
Tim Peters | 8b6ec79 | 2002-07-18 15:53:32 +0000 | [diff] [blame] | 163 | x = 69069 * x + hash(a) |
| 164 | random.seed(x) |
Guido van Rossum | 03e35c5 | 1998-05-10 18:27:29 +0000 | [diff] [blame] | 165 | r = range(k1, k2+1) # include the end point |
Guido van Rossum | ea176b6 | 1998-05-10 18:20:05 +0000 | [diff] [blame] | 166 | tabulate(r) |
| 167 | |
| 168 | if __name__ == '__main__': |
| 169 | main() |