Barry Warsaw | 04f357c | 2002-07-23 19:04:11 +0000 | [diff] [blame] | 1 | from test.test_support import verbose, TESTFN |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 2 | import random |
Tim Peters | 23cf6be | 2001-06-02 08:02:56 +0000 | [diff] [blame] | 3 | import os |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 4 | |
| 5 | # From SF bug #422121: Insecurities in dict comparison. |
| 6 | |
Tim Peters | 8c3e91e | 2001-05-10 19:40:30 +0000 | [diff] [blame] | 7 | # Safety of code doing comparisons has been an historical Python weak spot. |
| 8 | # The problem is that comparison of structures written in C *naturally* |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 9 | # wants to hold on to things like the size of the container, or "the |
| 10 | # biggest" containee so far, across a traversal of the container; but |
| 11 | # code to do containee comparisons can call back into Python and mutate |
| 12 | # the container in arbitrary ways while the C loop is in midstream. If the |
| 13 | # C code isn't extremely paranoid about digging things out of memory on |
| 14 | # each trip, and artificially boosting refcounts for the duration, anything |
| 15 | # from infinite loops to OS crashes can result (yes, I use Windows <wink>). |
| 16 | # |
| 17 | # The other problem is that code designed to provoke a weakness is usually |
| 18 | # white-box code, and so catches only the particular vulnerabilities the |
| 19 | # author knew to protect against. For example, Python's list.sort() code |
| 20 | # went thru many iterations as one "new" vulnerability after another was |
| 21 | # discovered. |
| 22 | # |
| 23 | # So the dict comparison test here uses a black-box approach instead, |
| 24 | # generating dicts of various sizes at random, and performing random |
| 25 | # mutations on them at random times. This proved very effective, |
| 26 | # triggering at least six distinct failure modes the first 20 times I |
| 27 | # ran it. Indeed, at the start, the driver never got beyond 6 iterations |
| 28 | # before the test died. |
| 29 | |
| 30 | # The dicts are global to make it easy to mutate tham from within functions. |
| 31 | dict1 = {} |
| 32 | dict2 = {} |
| 33 | |
| 34 | # The current set of keys in dict1 and dict2. These are materialized as |
| 35 | # lists to make it easy to pick a dict key at random. |
| 36 | dict1keys = [] |
| 37 | dict2keys = [] |
| 38 | |
| 39 | # Global flag telling maybe_mutate() wether to *consider* mutating. |
| 40 | mutate = 0 |
| 41 | |
| 42 | # If global mutate is true, consider mutating a dict. May or may not |
| 43 | # mutate a dict even if mutate is true. If it does decide to mutate a |
| 44 | # dict, it picks one of {dict1, dict2} at random, and deletes a random |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 45 | # entry from it; or, more rarely, adds a random element. |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 46 | |
| 47 | def maybe_mutate(): |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 48 | global mutate |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 49 | if not mutate: |
| 50 | return |
| 51 | if random.random() < 0.5: |
| 52 | return |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 53 | |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 54 | if random.random() < 0.5: |
| 55 | target, keys = dict1, dict1keys |
| 56 | else: |
| 57 | target, keys = dict2, dict2keys |
Tim Peters | 4c02fec | 2001-05-10 20:18:30 +0000 | [diff] [blame] | 58 | |
| 59 | if random.random() < 0.2: |
| 60 | # Insert a new key. |
| 61 | mutate = 0 # disable mutation until key inserted |
| 62 | while 1: |
| 63 | newkey = Horrid(random.randrange(100)) |
| 64 | if newkey not in target: |
| 65 | break |
| 66 | target[newkey] = Horrid(random.randrange(100)) |
| 67 | keys.append(newkey) |
| 68 | mutate = 1 |
| 69 | |
| 70 | elif keys: |
| 71 | # Delete a key at random. |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 72 | i = random.randrange(len(keys)) |
| 73 | key = keys[i] |
| 74 | del target[key] |
| 75 | # CAUTION: don't use keys.remove(key) here. Or do <wink>. The |
| 76 | # point is that .remove() would trigger more comparisons, and so |
| 77 | # also more calls to this routine. We're mutating often enough |
| 78 | # without that. |
| 79 | del keys[i] |
| 80 | |
| 81 | # A horrid class that triggers random mutations of dict1 and dict2 when |
| 82 | # instances are compared. |
| 83 | |
| 84 | class Horrid: |
| 85 | def __init__(self, i): |
| 86 | # Comparison outcomes are determined by the value of i. |
| 87 | self.i = i |
| 88 | |
| 89 | # An artificial hashcode is selected at random so that we don't |
Tim Peters | 8c3e91e | 2001-05-10 19:40:30 +0000 | [diff] [blame] | 90 | # have any systematic relationship between comparison outcomes |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 91 | # (based on self.i and other.i) and relative position within the |
Tim Peters | 8c3e91e | 2001-05-10 19:40:30 +0000 | [diff] [blame] | 92 | # hash vector (based on hashcode). |
Tim Peters | 95bf939 | 2001-05-10 08:32:44 +0000 | [diff] [blame] | 93 | self.hashcode = random.randrange(1000000000) |
| 94 | |
| 95 | def __hash__(self): |
| 96 | return self.hashcode |
| 97 | |
| 98 | def __cmp__(self, other): |
| 99 | maybe_mutate() # The point of the test. |
| 100 | return cmp(self.i, other.i) |
| 101 | |
| 102 | def __repr__(self): |
| 103 | return "Horrid(%d)" % self.i |
| 104 | |
| 105 | # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, |
| 106 | # where i and j are selected at random from the candidates list. |
| 107 | # Return d.keys() after filling. |
| 108 | |
| 109 | def fill_dict(d, candidates, numentries): |
| 110 | d.clear() |
| 111 | for i in xrange(numentries): |
| 112 | d[Horrid(random.choice(candidates))] = \ |
| 113 | Horrid(random.choice(candidates)) |
| 114 | return d.keys() |
| 115 | |
| 116 | # Test one pair of randomly generated dicts, each with n entries. |
| 117 | # Note that dict comparison is trivial if they don't have the same number |
| 118 | # of entires (then the "shorter" dict is instantly considered to be the |
| 119 | # smaller one, without even looking at the entries). |
| 120 | |
| 121 | def test_one(n): |
| 122 | global mutate, dict1, dict2, dict1keys, dict2keys |
| 123 | |
| 124 | # Fill the dicts without mutating them. |
| 125 | mutate = 0 |
| 126 | dict1keys = fill_dict(dict1, range(n), n) |
| 127 | dict2keys = fill_dict(dict2, range(n), n) |
| 128 | |
| 129 | # Enable mutation, then compare the dicts so long as they have the |
| 130 | # same size. |
| 131 | mutate = 1 |
| 132 | if verbose: |
| 133 | print "trying w/ lengths", len(dict1), len(dict2), |
| 134 | while dict1 and len(dict1) == len(dict2): |
| 135 | if verbose: |
| 136 | print ".", |
| 137 | c = cmp(dict1, dict2) |
| 138 | if verbose: |
| 139 | print |
| 140 | |
| 141 | # Run test_one n times. At the start (before the bugs were fixed), 20 |
| 142 | # consecutive runs of this test each blew up on or before the sixth time |
| 143 | # test_one was run. So n doesn't have to be large to get an interesting |
| 144 | # test. |
| 145 | # OTOH, calling with large n is also interesting, to ensure that the fixed |
| 146 | # code doesn't hold on to refcounts *too* long (in which case memory would |
| 147 | # leak). |
| 148 | |
| 149 | def test(n): |
| 150 | for i in xrange(n): |
| 151 | test_one(random.randrange(1, 100)) |
| 152 | |
| 153 | # See last comment block for clues about good values for n. |
| 154 | test(100) |
Tim Peters | 23cf6be | 2001-06-02 08:02:56 +0000 | [diff] [blame] | 155 | |
| 156 | ########################################################################## |
Tim Peters | fa517b2 | 2001-06-02 08:18:58 +0000 | [diff] [blame] | 157 | # Another segfault bug, distilled by Michael Hudson from a c.l.py post. |
Tim Peters | 23cf6be | 2001-06-02 08:02:56 +0000 | [diff] [blame] | 158 | |
| 159 | class Child: |
| 160 | def __init__(self, parent): |
| 161 | self.__dict__['parent'] = parent |
| 162 | def __getattr__(self, attr): |
| 163 | self.parent.a = 1 |
| 164 | self.parent.b = 1 |
| 165 | self.parent.c = 1 |
| 166 | self.parent.d = 1 |
| 167 | self.parent.e = 1 |
| 168 | self.parent.f = 1 |
| 169 | self.parent.g = 1 |
| 170 | self.parent.h = 1 |
| 171 | self.parent.i = 1 |
| 172 | return getattr(self.parent, attr) |
| 173 | |
| 174 | class Parent: |
| 175 | def __init__(self): |
| 176 | self.a = Child(self) |
| 177 | |
| 178 | # Hard to say what this will print! May vary from time to time. But |
| 179 | # we're specifically trying to test the tp_print slot here, and this is |
| 180 | # the clearest way to do it. We print the result to a temp file so that |
| 181 | # the expected-output file doesn't need to change. |
| 182 | |
| 183 | f = open(TESTFN, "w") |
| 184 | print >> f, Parent().__dict__ |
| 185 | f.close() |
| 186 | os.unlink(TESTFN) |
| 187 | |
| 188 | ########################################################################## |
| 189 | # And another core-dumper from Michael Hudson. |
| 190 | |
| 191 | dict = {} |
| 192 | |
| 193 | # Force dict to malloc its table. |
| 194 | for i in range(1, 10): |
| 195 | dict[i] = i |
| 196 | |
| 197 | f = open(TESTFN, "w") |
| 198 | |
| 199 | class Machiavelli: |
| 200 | def __repr__(self): |
| 201 | dict.clear() |
| 202 | |
| 203 | # Michael sez: "doesn't crash without this. don't know why." |
| 204 | # Tim sez: "luck of the draw; crashes with or without for me." |
| 205 | print >> f |
| 206 | |
| 207 | return `"machiavelli"` |
| 208 | |
| 209 | def __hash__(self): |
| 210 | return 0 |
| 211 | |
| 212 | dict[Machiavelli()] = Machiavelli() |
| 213 | |
| 214 | print >> f, str(dict) |
| 215 | f.close() |
| 216 | os.unlink(TESTFN) |
| 217 | del f, dict |
Tim Peters | 453163d | 2001-06-03 04:54:32 +0000 | [diff] [blame] | 218 | |
| 219 | |
| 220 | ########################################################################## |
| 221 | # And another core-dumper from Michael Hudson. |
| 222 | |
| 223 | dict = {} |
| 224 | |
| 225 | # let's force dict to malloc its table |
| 226 | for i in range(1, 10): |
| 227 | dict[i] = i |
| 228 | |
| 229 | class Machiavelli2: |
| 230 | def __eq__(self, other): |
| 231 | dict.clear() |
| 232 | return 1 |
| 233 | |
| 234 | def __hash__(self): |
| 235 | return 0 |
| 236 | |
| 237 | dict[Machiavelli2()] = Machiavelli2() |
| 238 | |
| 239 | try: |
| 240 | dict[Machiavelli2()] |
| 241 | except KeyError: |
| 242 | pass |
| 243 | |
| 244 | del dict |
| 245 | |
| 246 | ########################################################################## |
| 247 | # And another core-dumper from Michael Hudson. |
| 248 | |
| 249 | dict = {} |
| 250 | |
| 251 | # let's force dict to malloc its table |
| 252 | for i in range(1, 10): |
| 253 | dict[i] = i |
| 254 | |
| 255 | class Machiavelli3: |
| 256 | def __init__(self, id): |
| 257 | self.id = id |
| 258 | |
| 259 | def __eq__(self, other): |
| 260 | if self.id == other.id: |
| 261 | dict.clear() |
| 262 | return 1 |
| 263 | else: |
| 264 | return 0 |
| 265 | |
| 266 | def __repr__(self): |
| 267 | return "%s(%s)"%(self.__class__.__name__, self.id) |
| 268 | |
| 269 | def __hash__(self): |
| 270 | return 0 |
| 271 | |
| 272 | dict[Machiavelli3(1)] = Machiavelli3(0) |
| 273 | dict[Machiavelli3(2)] = Machiavelli3(0) |
| 274 | |
| 275 | f = open(TESTFN, "w") |
| 276 | try: |
| 277 | try: |
| 278 | print >> f, dict[Machiavelli3(2)] |
| 279 | except KeyError: |
| 280 | pass |
| 281 | finally: |
| 282 | f.close() |
| 283 | os.unlink(TESTFN) |
| 284 | |
| 285 | del dict |