| #! /usr/bin/env python |
| |
| """ |
| combinerefs path |
| |
| A helper for analyzing PYTHONDUMPREFS output. |
| |
| When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown |
| time Py_Finalize() prints the list of all live objects twice: first it |
| prints the repr() of each object while the interpreter is still fully intact. |
| After cleaning up everything it can, it prints all remaining live objects |
| again, but the second time just prints their addresses, refcounts, and type |
| names (because the interpreter has been torn down, calling repr methods at |
| this point can get into infinite loops or blow up). |
| |
| Save all this output into a file, then run this script passing the path to |
| that file. The script finds both output chunks, combines them, then prints |
| a line of output for each object still alive at the end: |
| |
| address refcnt typename repr |
| |
| address is the address of the object, in whatever format the platform C |
| produces for a %p format code. |
| |
| refcnt is of the form |
| |
| "[" ref "]" |
| |
| when the object's refcount is the same in both PYTHONDUMPREFS output blocks, |
| or |
| |
| "[" ref_before "->" ref_after "]" |
| |
| if the refcount changed. |
| |
| typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS |
| output block. |
| |
| repr is repr(object), extracted from the first PYTHONDUMPREFS output block. |
| CAUTION: If object is a container type, it may not actually contain all the |
| objects shown in the repr: the repr was captured from the first output block, |
| and some of the containees may have been released since then. For example, |
| it's common for the line showing the dict of interned strings to display |
| strings that no longer exist at the end of Py_Finalize; this can be recognized |
| (albeit painfully) because such containees don't have a line of their own. |
| |
| The objects are listed in allocation order, with most-recently allocated |
| printed first, and the first object allocated printed last. |
| |
| |
| Simple examples: |
| |
| 00857060 [14] str '__len__' |
| |
| The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS |
| output blocks said there were 14 references to it. This is probably due to |
| C modules that intern the string "__len__" and keep a reference to it in a |
| file static. |
| |
| 00857038 [46->5] tuple () |
| |
| 46-5 = 41 references to the empty tuple were removed by the cleanup actions |
| between the times PYTHONDUMPREFS produced output. |
| |
| 00858028 [1025->1456] str '<dummy key>' |
| |
| The string '<dummy key>', which is used in dictobject.c to overwrite a real |
| key that gets deleted, grew several hundred references during cleanup. It |
| suggests that stuff did get removed from dicts by cleanup, but that the dicts |
| themselves are staying alive for some reason. """ |
| |
| import re |
| import sys |
| |
| # Generate lines from fileiter. If whilematch is true, continue reading |
| # while the regexp object pat matches line. If whilematch is false, lines |
| # are read so long as pat doesn't match them. In any case, the first line |
| # that doesn't match pat (when whilematch is true), or that does match pat |
| # (when whilematch is false), is lost, and fileiter will resume at the line |
| # following it. |
| def read(fileiter, pat, whilematch): |
| for line in fileiter: |
| if bool(pat.match(line)) == whilematch: |
| yield line |
| else: |
| break |
| |
| def combine(fname): |
| f = file(fname) |
| fi = iter(f) |
| |
| for line in read(fi, re.compile(r'^Remaining objects:$'), False): |
| pass |
| |
| crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') |
| addr2rc = {} |
| addr2guts = {} |
| before = 0 |
| for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): |
| m = crack.match(line) |
| if m: |
| addr, addr2rc[addr], addr2guts[addr] = m.groups() |
| before += 1 |
| else: |
| print '??? skipped:', line |
| |
| after = 0 |
| for line in read(fi, crack, True): |
| after += 1 |
| m = crack.match(line) |
| assert m |
| addr, rc, guts = m.groups() # guts is type name here |
| if addr not in addr2rc: |
| print '??? new object created while tearing down:', line.rstrip() |
| continue |
| print addr, |
| if rc == addr2rc[addr]: |
| print '[%s]' % rc, |
| else: |
| print '[%s->%s]' % (addr2rc[addr], rc), |
| print guts, addr2guts[addr] |
| |
| f.close() |
| print "%d objects before, %d after" % (before, after) |
| |
| if __name__ == '__main__': |
| combine(sys.argv[1]) |