Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 1 | from test_support import verbose |
| 2 | import random |
| 3 | |
| 4 | # From SF bug #422121: Insecurities in dict comparison. |
| 5 | |
Tim Peters | 8c3e91e | 2001-05-10 19:40:30 +0000 | [diff] [blame] | 6 | # Safety of code doing comparisons has been an historical Python weak spot. |
| 7 | # The problem is that comparison of structures written in C *naturally* |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 8 | # wants to hold on to things like the size of the container, or "the |
| 9 | # biggest" containee so far, across a traversal of the container; but |
| 10 | # code to do containee comparisons can call back into Python and mutate |
| 11 | # the container in arbitrary ways while the C loop is in midstream. If the |
| 12 | # C code isn't extremely paranoid about digging things out of memory on |
| 13 | # each trip, and artificially boosting refcounts for the duration, anything |
| 14 | # from infinite loops to OS crashes can result (yes, I use Windows <wink>). |
| 15 | # |
| 16 | # The other problem is that code designed to provoke a weakness is usually |
| 17 | # white-box code, and so catches only the particular vulnerabilities the |
| 18 | # author knew to protect against. For example, Python's list.sort() code |
| 19 | # went thru many iterations as one "new" vulnerability after another was |
| 20 | # discovered. |
| 21 | # |
| 22 | # So the dict comparison test here uses a black-box approach instead, |
| 23 | # generating dicts of various sizes at random, and performing random |
| 24 | # mutations on them at random times. This proved very effective, |
| 25 | # triggering at least six distinct failure modes the first 20 times I |
| 26 | # ran it. Indeed, at the start, the driver never got beyond 6 iterations |
| 27 | # before the test died. |
| 28 | |
| 29 | # The dicts are global to make it easy to mutate tham from within functions. |
| 30 | dict1 = {} |
| 31 | dict2 = {} |
| 32 | |
| 33 | # The current set of keys in dict1 and dict2. These are materialized as |
| 34 | # lists to make it easy to pick a dict key at random. |
| 35 | dict1keys = [] |
| 36 | dict2keys = [] |
| 37 | |
| 38 | # Global flag telling maybe_mutate() wether to *consider* mutating. |
| 39 | mutate = 0 |
| 40 | |
| 41 | # If global mutate is true, consider mutating a dict. May or may not |
| 42 | # mutate a dict even if mutate is true. If it does decide to mutate a |
| 43 | # dict, it picks one of {dict1, dict2} at random, and deletes a random |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 44 | # entry from it; or, more rarely, adds a random element. |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 45 | |
| 46 | def maybe_mutate(): |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 47 | global mutate |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 48 | if not mutate: |
| 49 | return |
| 50 | if random.random() < 0.5: |
| 51 | return |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 52 | |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 53 | if random.random() < 0.5: |
| 54 | target, keys = dict1, dict1keys |
| 55 | else: |
| 56 | target, keys = dict2, dict2keys |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 57 | |
| 58 | if random.random() < 0.2: |
| 59 | # Insert a new key. |
| 60 | mutate = 0 # disable mutation until key inserted |
| 61 | while 1: |
| 62 | newkey = Horrid(random.randrange(100)) |
| 63 | if newkey not in target: |
| 64 | break |
| 65 | target[newkey] = Horrid(random.randrange(100)) |
| 66 | keys.append(newkey) |
| 67 | mutate = 1 |
| 68 | |
| 69 | elif keys: |
| 70 | # Delete a key at random. |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 71 | i = random.randrange(len(keys)) |
| 72 | key = keys[i] |
| 73 | del target[key] |
| 74 | # CAUTION: don't use keys.remove(key) here. Or do <wink>. The |
| 75 | # point is that .remove() would trigger more comparisons, and so |
| 76 | # also more calls to this routine. We're mutating often enough |
| 77 | # without that. |
| 78 | del keys[i] |
| 79 | |
| 80 | # A horrid class that triggers random mutations of dict1 and dict2 when |
| 81 | # instances are compared. |
| 82 | |
| 83 | class Horrid: |
| 84 | def __init__(self, i): |
| 85 | # Comparison outcomes are determined by the value of i. |
| 86 | self.i = i |
| 87 | |
| 88 | # An artificial hashcode is selected at random so that we don't |
Tim Peters | 8c3e91e | 2001-05-10 19:40:30 +0000 | [diff] [blame] | 89 | # have any systematic relationship between comparison outcomes |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 90 | # (based on self.i and other.i) and relative position within the |
Tim Peters | 8c3e91e | 2001-05-10 19:40:30 +0000 | [diff] [blame] | 91 | # hash vector (based on hashcode). |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 92 | self.hashcode = random.randrange(1000000000) |
| 93 | |
| 94 | def __hash__(self): |
| 95 | return self.hashcode |
| 96 | |
| 97 | def __cmp__(self, other): |
| 98 | maybe_mutate() # The point of the test. |
| 99 | return cmp(self.i, other.i) |
| 100 | |
| 101 | def __repr__(self): |
| 102 | return "Horrid(%d)" % self.i |
| 103 | |
| 104 | # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, |
| 105 | # where i and j are selected at random from the candidates list. |
| 106 | # Return d.keys() after filling. |
| 107 | |
| 108 | def fill_dict(d, candidates, numentries): |
| 109 | d.clear() |
| 110 | for i in xrange(numentries): |
| 111 | d[Horrid(random.choice(candidates))] = \ |
| 112 | Horrid(random.choice(candidates)) |
| 113 | return d.keys() |
| 114 | |
| 115 | # Test one pair of randomly generated dicts, each with n entries. |
| 116 | # Note that dict comparison is trivial if they don't have the same number |
| 117 | # of entires (then the "shorter" dict is instantly considered to be the |
| 118 | # smaller one, without even looking at the entries). |
| 119 | |
| 120 | def test_one(n): |
| 121 | global mutate, dict1, dict2, dict1keys, dict2keys |
| 122 | |
| 123 | # Fill the dicts without mutating them. |
| 124 | mutate = 0 |
| 125 | dict1keys = fill_dict(dict1, range(n), n) |
| 126 | dict2keys = fill_dict(dict2, range(n), n) |
| 127 | |
| 128 | # Enable mutation, then compare the dicts so long as they have the |
| 129 | # same size. |
| 130 | mutate = 1 |
| 131 | if verbose: |
| 132 | print "trying w/ lengths", len(dict1), len(dict2), |
| 133 | while dict1 and len(dict1) == len(dict2): |
| 134 | if verbose: |
| 135 | print ".", |
| 136 | c = cmp(dict1, dict2) |
| 137 | if verbose: |
| 138 | print |
| 139 | |
| 140 | # Run test_one n times. At the start (before the bugs were fixed), 20 |
| 141 | # consecutive runs of this test each blew up on or before the sixth time |
| 142 | # test_one was run. So n doesn't have to be large to get an interesting |
| 143 | # test. |
| 144 | # OTOH, calling with large n is also interesting, to ensure that the fixed |
| 145 | # code doesn't hold on to refcounts *too* long (in which case memory would |
| 146 | # leak). |
| 147 | |
| 148 | def test(n): |
| 149 | for i in xrange(n): |
| 150 | test_one(random.randrange(1, 100)) |
| 151 | |
| 152 | # See last comment block for clues about good values for n. |
| 153 | test(100) |