Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | combinerefs path |
| 5 | |
| 6 | A helper for analyzing PYTHONDUMPREFS output. |
| 7 | |
| 8 | When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown |
| 9 | time Py_Finalize() prints the list of all live objects twice: first it |
| 10 | prints the repr() of each object while the interpreter is still fully intact. |
| 11 | After cleaning up everything it can, it prints all remaining live objects |
| 12 | again, but the second time just prints their addresses, refcounts, and type |
Tim Peters | 53f72d7 | 2003-04-19 18:21:04 +0000 | [diff] [blame] | 13 | names (because the interpreter has been torn down, calling repr methods at |
| 14 | this point can get into infinite loops or blow up). |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 15 | |
| 16 | Save all this output into a file, then run this script passing the path to |
| 17 | that file. The script finds both output chunks, combines them, then prints |
| 18 | a line of output for each object still alive at the end: |
| 19 | |
| 20 | address refcnt typename repr |
| 21 | |
| 22 | address is the address of the object, in whatever format the platform C |
| 23 | produces for a %p format code. |
| 24 | |
| 25 | refcnt is of the form |
| 26 | |
| 27 | "[" ref "]" |
| 28 | |
| 29 | when the object's refcount is the same in both PYTHONDUMPREFS output blocks, |
| 30 | or |
| 31 | |
| 32 | "[" ref_before "->" ref_after "]" |
| 33 | |
| 34 | if the refcount changed. |
| 35 | |
| 36 | typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS |
| 37 | output block. |
| 38 | |
| 39 | repr is repr(object), extracted from the first PYTHONDUMPREFS output block. |
Tim Peters | 53f72d7 | 2003-04-19 18:21:04 +0000 | [diff] [blame] | 40 | CAUTION: If object is a container type, it may not actually contain all the |
| 41 | objects shown in the repr: the repr was captured from the first output block, |
| 42 | and some of the containees may have been released since then. For example, |
| 43 | it's common for the line showing the dict of interned strings to display |
| 44 | strings that no longer exist at the end of Py_Finalize; this can be recognized |
| 45 | (albeit painfully) because such containees don't have a line of their own. |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 46 | |
| 47 | The objects are listed in allocation order, with most-recently allocated |
| 48 | printed first, and the first object allocated printed last. |
| 49 | |
| 50 | |
| 51 | Simple examples: |
| 52 | |
| 53 | 00857060 [14] str '__len__' |
| 54 | |
| 55 | The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS |
| 56 | output blocks said there were 14 references to it. This is probably due to |
| 57 | C modules that intern the string "__len__" and keep a reference to it in a |
| 58 | file static. |
| 59 | |
| 60 | 00857038 [46->5] tuple () |
| 61 | |
| 62 | 46-5 = 41 references to the empty tuple were removed by the cleanup actions |
| 63 | between the times PYTHONDUMPREFS produced output. |
| 64 | |
| 65 | 00858028 [1025->1456] str '<dummy key>' |
| 66 | |
Tim Peters | 53f72d7 | 2003-04-19 18:21:04 +0000 | [diff] [blame] | 67 | The string '<dummy key>', which is used in dictobject.c to overwrite a real |
| 68 | key that gets deleted, grew several hundred references during cleanup. It |
| 69 | suggests that stuff did get removed from dicts by cleanup, but that the dicts |
| 70 | themselves are staying alive for some reason. """ |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 71 | |
| 72 | import re |
| 73 | import sys |
| 74 | |
| 75 | # Generate lines from fileiter. If whilematch is true, continue reading |
| 76 | # while the regexp object pat matches line. If whilematch is false, lines |
| 77 | # are read so long as pat doesn't match them. In any case, the first line |
| 78 | # that doesn't match pat (when whilematch is true), or that does match pat |
| 79 | # (when whilematch is false), is lost, and fileiter will resume at the line |
| 80 | # following it. |
| 81 | def read(fileiter, pat, whilematch): |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 82 | for line in fileiter: |
| 83 | if bool(pat.match(line)) == whilematch: |
Tim Peters | 8d17a90 | 2003-04-18 01:02:37 +0000 | [diff] [blame] | 84 | yield line |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 85 | else: |
| 86 | break |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 87 | |
| 88 | def combine(fname): |
Guido van Rossum | 4b28041 | 2007-12-06 18:37:53 +0000 | [diff] [blame] | 89 | f = open(fname) |
| 90 | |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 91 | fi = iter(f) |
| 92 | |
| 93 | for line in read(fi, re.compile(r'^Remaining objects:$'), False): |
| 94 | pass |
| 95 | |
| 96 | crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') |
| 97 | addr2rc = {} |
| 98 | addr2guts = {} |
| 99 | before = 0 |
| 100 | for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): |
| 101 | m = crack.match(line) |
| 102 | if m: |
| 103 | addr, addr2rc[addr], addr2guts[addr] = m.groups() |
| 104 | before += 1 |
| 105 | else: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 106 | print('??? skipped:', line) |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 107 | |
| 108 | after = 0 |
| 109 | for line in read(fi, crack, True): |
| 110 | after += 1 |
| 111 | m = crack.match(line) |
| 112 | assert m |
| 113 | addr, rc, guts = m.groups() # guts is type name here |
| 114 | if addr not in addr2rc: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 115 | print('??? new object created while tearing down:', line.rstrip()) |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 116 | continue |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 117 | print(addr, end=' ') |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 118 | if rc == addr2rc[addr]: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 119 | print('[%s]' % rc, end=' ') |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 120 | else: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 121 | print('[%s->%s]' % (addr2rc[addr], rc), end=' ') |
| 122 | print(guts, addr2guts[addr]) |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 123 | |
| 124 | f.close() |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 125 | print("%d objects before, %d after" % (before, after)) |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 126 | |
| 127 | if __name__ == '__main__': |
| 128 | combine(sys.argv[1]) |