Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | combinerefs path |
| 5 | |
| 6 | A helper for analyzing PYTHONDUMPREFS output. |
| 7 | |
| 8 | When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown |
| 9 | time Py_Finalize() prints the list of all live objects twice: first it |
| 10 | prints the repr() of each object while the interpreter is still fully intact. |
| 11 | After cleaning up everything it can, it prints all remaining live objects |
| 12 | again, but the second time just prints their addresses, refcounts, and type |
| 13 | names. |
| 14 | |
| 15 | Save all this output into a file, then run this script passing the path to |
| 16 | that file. The script finds both output chunks, combines them, then prints |
| 17 | a line of output for each object still alive at the end: |
| 18 | |
| 19 | address refcnt typename repr |
| 20 | |
| 21 | address is the address of the object, in whatever format the platform C |
| 22 | produces for a %p format code. |
| 23 | |
| 24 | refcnt is of the form |
| 25 | |
| 26 | "[" ref "]" |
| 27 | |
| 28 | when the object's refcount is the same in both PYTHONDUMPREFS output blocks, |
| 29 | or |
| 30 | |
| 31 | "[" ref_before "->" ref_after "]" |
| 32 | |
| 33 | if the refcount changed. |
| 34 | |
| 35 | typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS |
| 36 | output block. |
| 37 | |
| 38 | repr is repr(object), extracted from the first PYTHONDUMPREFS output block. |
| 39 | |
| 40 | The objects are listed in allocation order, with most-recently allocated |
| 41 | printed first, and the first object allocated printed last. |
| 42 | |
| 43 | |
| 44 | Simple examples: |
| 45 | |
| 46 | 00857060 [14] str '__len__' |
| 47 | |
| 48 | The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS |
| 49 | output blocks said there were 14 references to it. This is probably due to |
| 50 | C modules that intern the string "__len__" and keep a reference to it in a |
| 51 | file static. |
| 52 | |
| 53 | 00857038 [46->5] tuple () |
| 54 | |
| 55 | 46-5 = 41 references to the empty tuple were removed by the cleanup actions |
| 56 | between the times PYTHONDUMPREFS produced output. |
| 57 | |
| 58 | 00858028 [1025->1456] str '<dummy key>' |
| 59 | |
| 60 | The string '<dummy key>', which is used in dictobject.c as the name of the |
| 61 | dummy key that overwrites a real key that gets deleted, actually grew |
| 62 | several hundred references during cleanup. It suggests that stuff did get |
| 63 | removed from dicts by cleanup, but that the dicts themselves are staying |
| 64 | alive for some reason. |
| 65 | """ |
| 66 | |
| 67 | import re |
| 68 | import sys |
| 69 | |
| 70 | # Generate lines from fileiter. If whilematch is true, continue reading |
| 71 | # while the regexp object pat matches line. If whilematch is false, lines |
| 72 | # are read so long as pat doesn't match them. In any case, the first line |
| 73 | # that doesn't match pat (when whilematch is true), or that does match pat |
| 74 | # (when whilematch is false), is lost, and fileiter will resume at the line |
| 75 | # following it. |
| 76 | def read(fileiter, pat, whilematch): |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 77 | for line in fileiter: |
| 78 | if bool(pat.match(line)) == whilematch: |
Tim Peters | 8d17a90 | 2003-04-18 01:02:37 +0000 | [diff] [blame] | 79 | yield line |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 80 | else: |
| 81 | break |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 82 | |
| 83 | def combine(fname): |
| 84 | f = file(fname) |
| 85 | fi = iter(f) |
| 86 | |
| 87 | for line in read(fi, re.compile(r'^Remaining objects:$'), False): |
| 88 | pass |
| 89 | |
| 90 | crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') |
| 91 | addr2rc = {} |
| 92 | addr2guts = {} |
| 93 | before = 0 |
| 94 | for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): |
| 95 | m = crack.match(line) |
| 96 | if m: |
| 97 | addr, addr2rc[addr], addr2guts[addr] = m.groups() |
| 98 | before += 1 |
| 99 | else: |
| 100 | print '??? skipped:', line |
| 101 | |
| 102 | after = 0 |
| 103 | for line in read(fi, crack, True): |
| 104 | after += 1 |
| 105 | m = crack.match(line) |
| 106 | assert m |
| 107 | addr, rc, guts = m.groups() # guts is type name here |
| 108 | if addr not in addr2rc: |
Guido van Rossum | 6869458 | 2003-04-18 19:51:10 +0000 | [diff] [blame] | 109 | print '??? new object created while tearing down:', line.rstrip() |
Tim Peters | 21d7d4d | 2003-04-18 00:45:59 +0000 | [diff] [blame] | 110 | continue |
| 111 | print addr, |
| 112 | if rc == addr2rc[addr]: |
| 113 | print '[%s]' % rc, |
| 114 | else: |
| 115 | print '[%s->%s]' % (addr2rc[addr], rc), |
| 116 | print guts, addr2guts[addr] |
| 117 | |
| 118 | f.close() |
| 119 | print "%d objects before, %d after" % (before, after) |
| 120 | |
| 121 | if __name__ == '__main__': |
| 122 | combine(sys.argv[1]) |