blob: 772602079143bfad1ec9b85874fe690669cc7a89 [file] [log] [blame]
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyStringObject* instances, we can
23generate a proxy value within the gdb process that is a list of strings:
24 ["foo", "bar", "baz"]
25
26Doing so can be expensive for complicated graphs of objects, and could take
27some time, so we also have a "write_repr" method that writes a representation
28of the data to a file-like object. This allows us to stop the traversal by
29having the file-like object raise an exception if it gets too much data.
30
31With both "proxyval" and "write_repr" we keep track of the set of all addresses
32visited so far in the traversal, to avoid infinite recursion due to cycles in
33the graph of object references.
34
35We try to defer gdb.lookup_type() invocations for python types until as late as
36possible: for a dynamically linked python binary, when the process starts in
37the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
38the type names are known to the debugger
39
40The module also extends gdb with some python-specific commands.
41'''
42from __future__ import with_statement
43import gdb
44
45# Look up the gdb.Type for some standard types:
46_type_char_ptr = gdb.lookup_type('char').pointer() # char*
47_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
48_type_void_ptr = gdb.lookup_type('void').pointer() # void*
49_type_size_t = gdb.lookup_type('size_t')
50
51SIZEOF_VOID_P = _type_void_ptr.sizeof
52
53
54Py_TPFLAGS_HEAPTYPE = (1L << 9)
55
56Py_TPFLAGS_INT_SUBCLASS = (1L << 23)
57Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
58Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
59Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
60Py_TPFLAGS_STRING_SUBCLASS = (1L << 27)
61Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
62Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
63Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
64Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
65
66
67MAX_OUTPUT_LEN=1024
68
69class NullPyObjectPtr(RuntimeError):
70 pass
71
72
73def safety_limit(val):
74 # Given a integer value from the process being debugged, limit it to some
75 # safety threshold so that arbitrary breakage within said process doesn't
76 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
77 return min(val, 1000)
78
79
80def safe_range(val):
81 # As per range, but don't trust the value too much: cap it to a safety
82 # threshold in case the data was corrupted
83 return xrange(safety_limit(val))
84
85
86class StringTruncated(RuntimeError):
87 pass
88
89class TruncatedStringIO(object):
90 '''Similar to cStringIO, but can truncate the output by raising a
91 StringTruncated exception'''
92 def __init__(self, maxlen=None):
93 self._val = ''
94 self.maxlen = maxlen
95
96 def write(self, data):
97 if self.maxlen:
98 if len(data) + len(self._val) > self.maxlen:
99 # Truncation:
100 self._val += data[0:self.maxlen - len(self._val)]
101 raise StringTruncated()
102
103 self._val += data
104
105 def getvalue(self):
106 return self._val
107
108class PyObjectPtr(object):
109 """
110 Class wrapping a gdb.Value that's a either a (PyObject*) within the
111 inferior process, or some subclass pointer e.g. (PyStringObject*)
112
113 There will be a subclass for every refined PyObject type that we care
114 about.
115
116 Note that at every stage the underlying pointer could be NULL, point
117 to corrupt data, etc; this is the debugger, after all.
118 """
119 _typename = 'PyObject'
120
121 def __init__(self, gdbval, cast_to=None):
122 if cast_to:
123 self._gdbval = gdbval.cast(cast_to)
124 else:
125 self._gdbval = gdbval
126
127 def field(self, name):
128 '''
129 Get the gdb.Value for the given field within the PyObject, coping with
130 some python 2 versus python 3 differences.
131
132 Various libpython types are defined using the "PyObject_HEAD" and
133 "PyObject_VAR_HEAD" macros.
134
135 In Python 2, this these are defined so that "ob_type" and (for a var
136 object) "ob_size" are fields of the type in question.
137
138 In Python 3, this is defined as an embedded PyVarObject type thus:
139 PyVarObject ob_base;
140 so that the "ob_size" field is located insize the "ob_base" field, and
141 the "ob_type" is most easily accessed by casting back to a (PyObject*).
142 '''
143 if self.is_null():
144 raise NullPyObjectPtr(self)
145
146 if name == 'ob_type':
147 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
148 return pyo_ptr.dereference()[name]
149
150 if name == 'ob_size':
151 try:
152 # Python 2:
153 return self._gdbval.dereference()[name]
154 except RuntimeError:
155 # Python 3:
156 return self._gdbval.dereference()['ob_base'][name]
157
158 # General case: look it up inside the object:
159 return self._gdbval.dereference()[name]
160
161 def pyop_field(self, name):
162 '''
163 Get a PyObjectPtr for the given PyObject* field within this PyObject,
164 coping with some python 2 versus python 3 differences.
165 '''
166 return PyObjectPtr.from_pyobject_ptr(self.field(name))
167
168 def write_field_repr(self, name, out, visited):
169 '''
170 Extract the PyObject* field named "name", and write its representation
171 to file-like object "out"
172 '''
173 field_obj = self.pyop_field(name)
174 field_obj.write_repr(out, visited)
175
176 def get_truncated_repr(self, maxlen):
177 '''
178 Get a repr-like string for the data, but truncate it at "maxlen" bytes
179 (ending the object graph traversal as soon as you do)
180 '''
181 out = TruncatedStringIO(maxlen)
182 try:
183 self.write_repr(out, set())
184 except StringTruncated:
185 # Truncation occurred:
186 return out.getvalue() + '...(truncated)'
187
188 # No truncation occurred:
189 return out.getvalue()
190
191 def type(self):
192 return PyTypeObjectPtr(self.field('ob_type'))
193
194 def is_null(self):
195 return 0 == long(self._gdbval)
196
197 def is_optimized_out(self):
198 '''
199 Is the value of the underlying PyObject* visible to the debugger?
200
201 This can vary with the precise version of the compiler used to build
202 Python, and the precise version of gdb.
203
204 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
205 PyEval_EvalFrameEx's "f"
206 '''
207 return self._gdbval.is_optimized_out
208
209 def safe_tp_name(self):
210 try:
211 return self.type().field('tp_name').string()
212 except NullPyObjectPtr:
213 # NULL tp_name?
214 return 'unknown'
215 except RuntimeError:
216 # Can't even read the object at all?
217 return 'unknown'
218
219 def proxyval(self, visited):
220 '''
221 Scrape a value from the inferior process, and try to represent it
222 within the gdb process, whilst (hopefully) avoiding crashes when
223 the remote data is corrupt.
224
225 Derived classes will override this.
226
227 For example, a PyIntObject* with ob_ival 42 in the inferior process
228 should result in an int(42) in this process.
229
230 visited: a set of all gdb.Value pyobject pointers already visited
231 whilst generating this value (to guard against infinite recursion when
232 visiting object graphs with loops). Analogous to Py_ReprEnter and
233 Py_ReprLeave
234 '''
235
236 class FakeRepr(object):
237 """
238 Class representing a non-descript PyObject* value in the inferior
239 process for when we don't have a custom scraper, intended to have
240 a sane repr().
241 """
242
243 def __init__(self, tp_name, address):
244 self.tp_name = tp_name
245 self.address = address
246
247 def __repr__(self):
248 # For the NULL pointer, we have no way of knowing a type, so
249 # special-case it as per
250 # http://bugs.python.org/issue8032#msg100882
251 if self.address == 0:
252 return '0x0'
253 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
254
255 return FakeRepr(self.safe_tp_name(),
256 long(self._gdbval))
257
258 def write_repr(self, out, visited):
259 '''
260 Write a string representation of the value scraped from the inferior
261 process to "out", a file-like object.
262 '''
263 # Default implementation: generate a proxy value and write its repr
264 # However, this could involve a lot of work for complicated objects,
265 # so for derived classes we specialize this
266 return out.write(repr(self.proxyval(visited)))
267
268 @classmethod
269 def subclass_from_type(cls, t):
270 '''
271 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
272 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
273 to use
274
275 Ideally, we would look up the symbols for the global types, but that
276 isn't working yet:
277 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
278 Traceback (most recent call last):
279 File "<string>", line 1, in <module>
280 NotImplementedError: Symbol type not yet supported in Python scripts.
281 Error while executing Python code.
282
283 For now, we use tp_flags, after doing some string comparisons on the
284 tp_name for some special-cases that don't seem to be visible through
285 flags
286 '''
287 try:
288 tp_name = t.field('tp_name').string()
289 tp_flags = int(t.field('tp_flags'))
290 except RuntimeError:
291 # Handle any kind of error e.g. NULL ptrs by simply using the base
292 # class
293 return cls
294
295 #print 'tp_flags = 0x%08x' % tp_flags
296 #print 'tp_name = %r' % tp_name
297
298 name_map = {'bool': PyBoolObjectPtr,
299 'classobj': PyClassObjectPtr,
300 'instance': PyInstanceObjectPtr,
301 'NoneType': PyNoneStructPtr,
302 'frame': PyFrameObjectPtr,
303 'set' : PySetObjectPtr,
304 'frozenset' : PySetObjectPtr,
305 'builtin_function_or_method' : PyCFunctionObjectPtr,
306 }
307 if tp_name in name_map:
308 return name_map[tp_name]
309
310 if tp_flags & Py_TPFLAGS_HEAPTYPE:
311 return HeapTypeObjectPtr
312
313 if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
314 return PyIntObjectPtr
315 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
316 return PyLongObjectPtr
317 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
318 return PyListObjectPtr
319 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
320 return PyTupleObjectPtr
321 if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
322 return PyStringObjectPtr
323 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
324 return PyUnicodeObjectPtr
325 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
326 return PyDictObjectPtr
327 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
328 return PyBaseExceptionObjectPtr
329 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
330 # return PyTypeObjectPtr
331
332 # Use the base class:
333 return cls
334
335 @classmethod
336 def from_pyobject_ptr(cls, gdbval):
337 '''
338 Try to locate the appropriate derived class dynamically, and cast
339 the pointer accordingly.
340 '''
341 try:
342 p = PyObjectPtr(gdbval)
343 cls = cls.subclass_from_type(p.type())
344 return cls(gdbval, cast_to=cls.get_gdb_type())
345 except RuntimeError:
346 # Handle any kind of error e.g. NULL ptrs by simply using the base
347 # class
348 pass
349 return cls(gdbval)
350
351 @classmethod
352 def get_gdb_type(cls):
353 return gdb.lookup_type(cls._typename).pointer()
354
355 def as_address(self):
356 return long(self._gdbval)
357
358
359class ProxyAlreadyVisited(object):
360 '''
361 Placeholder proxy to use when protecting against infinite recursion due to
362 loops in the object graph.
363
364 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
365 '''
366 def __init__(self, rep):
367 self._rep = rep
368
369 def __repr__(self):
370 return self._rep
371
372
373def _write_instance_repr(out, visited, name, pyop_attrdict, address):
374 '''Shared code for use by old-style and new-style classes:
375 write a representation to file-like object "out"'''
376 out.write('<')
377 out.write(name)
378
379 # Write dictionary of instance attributes:
380 if isinstance(pyop_attrdict, PyDictObjectPtr):
381 out.write('(')
382 first = True
383 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
384 if not first:
385 out.write(', ')
386 first = False
387 out.write(pyop_arg.proxyval(visited))
388 out.write('=')
389 pyop_val.write_repr(out, visited)
390 out.write(')')
391 out.write(' at remote 0x%x>' % address)
392
393
394class InstanceProxy(object):
395
396 def __init__(self, cl_name, attrdict, address):
397 self.cl_name = cl_name
398 self.attrdict = attrdict
399 self.address = address
400
401 def __repr__(self):
402 if isinstance(self.attrdict, dict):
403 kwargs = ', '.join(["%s=%r" % (arg, val)
404 for arg, val in self.attrdict.iteritems()])
405 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
406 kwargs, self.address)
407 else:
408 return '<%s at remote 0x%x>' % (self.cl_name,
409 self.address)
410
411def _PyObject_VAR_SIZE(typeobj, nitems):
412 return ( ( typeobj.field('tp_basicsize') +
413 nitems * typeobj.field('tp_itemsize') +
414 (SIZEOF_VOID_P - 1)
415 ) & ~(SIZEOF_VOID_P - 1)
416 ).cast(_type_size_t)
417
418class HeapTypeObjectPtr(PyObjectPtr):
419 _typename = 'PyObject'
420
421 def get_attr_dict(self):
422 '''
423 Get the PyDictObject ptr representing the attribute dictionary
424 (or None if there's a problem)
425 '''
426 try:
427 typeobj = self.type()
428 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
429 if dictoffset != 0:
430 if dictoffset < 0:
431 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
432 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
433 if tsize < 0:
434 tsize = -tsize
435 size = _PyObject_VAR_SIZE(typeobj, tsize)
436 dictoffset += size
437 assert dictoffset > 0
438 assert dictoffset % SIZEOF_VOID_P == 0
439
440 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
441 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
442 dictptr = dictptr.cast(PyObjectPtrPtr)
443 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
444 except RuntimeError:
445 # Corrupt data somewhere; fail safe
446 pass
447
448 # Not found, or some kind of error:
449 return None
450
451 def proxyval(self, visited):
452 '''
453 Support for new-style classes.
454
455 Currently we just locate the dictionary using a transliteration to
456 python of _PyObject_GetDictPtr, ignoring descriptors
457 '''
458 # Guard against infinite loops:
459 if self.as_address() in visited:
460 return ProxyAlreadyVisited('<...>')
461 visited.add(self.as_address())
462
463 pyop_attr_dict = self.get_attr_dict()
464 if pyop_attr_dict:
465 attr_dict = pyop_attr_dict.proxyval(visited)
466 else:
467 attr_dict = {}
468 tp_name = self.safe_tp_name()
469
470 # New-style class:
471 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
472
473 def write_repr(self, out, visited):
474 # Guard against infinite loops:
475 if self.as_address() in visited:
476 out.write('<...>')
477 return
478 visited.add(self.as_address())
479
480 pyop_attrdict = self.get_attr_dict()
481 _write_instance_repr(out, visited,
482 self.safe_tp_name(), pyop_attrdict, self.as_address())
483
484class ProxyException(Exception):
485 def __init__(self, tp_name, args):
486 self.tp_name = tp_name
487 self.args = args
488
489 def __repr__(self):
490 return '%s%r' % (self.tp_name, self.args)
491
492class PyBaseExceptionObjectPtr(PyObjectPtr):
493 """
494 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
495 within the process being debugged.
496 """
497 _typename = 'PyBaseExceptionObject'
498
499 def proxyval(self, visited):
500 # Guard against infinite loops:
501 if self.as_address() in visited:
502 return ProxyAlreadyVisited('(...)')
503 visited.add(self.as_address())
504 arg_proxy = self.pyop_field('args').proxyval(visited)
505 return ProxyException(self.safe_tp_name(),
506 arg_proxy)
507
508 def write_repr(self, out, visited):
509 # Guard against infinite loops:
510 if self.as_address() in visited:
511 out.write('(...)')
512 return
513 visited.add(self.as_address())
514
515 out.write(self.safe_tp_name())
516 self.write_field_repr('args', out, visited)
517
518class PyBoolObjectPtr(PyObjectPtr):
519 """
520 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
521 <bool> instances (Py_True/Py_False) within the process being debugged.
522 """
523 _typename = 'PyBoolObject'
524
525 def proxyval(self, visited):
526 if int_from_int(self.field('ob_ival')):
527 return True
528 else:
529 return False
530
531
532class PyClassObjectPtr(PyObjectPtr):
533 """
534 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
535 instance within the process being debugged.
536 """
537 _typename = 'PyClassObject'
538
539
540class BuiltInFunctionProxy(object):
541 def __init__(self, ml_name):
542 self.ml_name = ml_name
543
544 def __repr__(self):
545 return "<built-in function %s>" % self.ml_name
546
547class BuiltInMethodProxy(object):
548 def __init__(self, ml_name, pyop_m_self):
549 self.ml_name = ml_name
550 self.pyop_m_self = pyop_m_self
551
552 def __repr__(self):
553 return ('<built-in method %s of %s object at remote 0x%x>'
554 % (self.ml_name,
555 self.pyop_m_self.safe_tp_name(),
556 self.pyop_m_self.as_address())
557 )
558
559class PyCFunctionObjectPtr(PyObjectPtr):
560 """
561 Class wrapping a gdb.Value that's a PyCFunctionObject*
562 (see Include/methodobject.h and Objects/methodobject.c)
563 """
564 _typename = 'PyCFunctionObject'
565
566 def proxyval(self, visited):
567 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
568 ml_name = m_ml['ml_name'].string()
569
570 pyop_m_self = self.pyop_field('m_self')
571 if pyop_m_self.is_null():
572 return BuiltInFunctionProxy(ml_name)
573 else:
574 return BuiltInMethodProxy(ml_name, pyop_m_self)
575
576
577class PyCodeObjectPtr(PyObjectPtr):
578 """
579 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
580 within the process being debugged.
581 """
582 _typename = 'PyCodeObject'
583
584 def addr2line(self, addrq):
585 '''
586 Get the line number for a given bytecode offset
587
588 Analogous to PyCode_Addr2Line; translated from pseudocode in
589 Objects/lnotab_notes.txt
590 '''
591 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
592
593 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
594 # not 0, as lnotab_notes.txt has it:
595 lineno = int_from_int(self.field('co_firstlineno'))
596
597 addr = 0
598 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
599 addr += ord(addr_incr)
600 if addr > addrq:
601 return lineno
602 lineno += ord(line_incr)
603 return lineno
604
605
606class PyDictObjectPtr(PyObjectPtr):
607 """
608 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
609 within the process being debugged.
610 """
611 _typename = 'PyDictObject'
612
613 def iteritems(self):
614 '''
615 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
616 analagous to dict.iteritems()
617 '''
618 for i in safe_range(self.field('ma_mask') + 1):
619 ep = self.field('ma_table') + i
620 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
621 if not pyop_value.is_null():
622 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
623 yield (pyop_key, pyop_value)
624
625 def proxyval(self, visited):
626 # Guard against infinite loops:
627 if self.as_address() in visited:
628 return ProxyAlreadyVisited('{...}')
629 visited.add(self.as_address())
630
631 result = {}
632 for pyop_key, pyop_value in self.iteritems():
633 proxy_key = pyop_key.proxyval(visited)
634 proxy_value = pyop_value.proxyval(visited)
635 result[proxy_key] = proxy_value
636 return result
637
638 def write_repr(self, out, visited):
639 # Guard against infinite loops:
640 if self.as_address() in visited:
641 out.write('{...}')
642 return
643 visited.add(self.as_address())
644
645 out.write('{')
646 first = True
647 for pyop_key, pyop_value in self.iteritems():
648 if not first:
649 out.write(', ')
650 first = False
651 pyop_key.write_repr(out, visited)
652 out.write(': ')
653 pyop_value.write_repr(out, visited)
654 out.write('}')
655
656class PyInstanceObjectPtr(PyObjectPtr):
657 _typename = 'PyInstanceObject'
658
659 def proxyval(self, visited):
660 # Guard against infinite loops:
661 if self.as_address() in visited:
662 return ProxyAlreadyVisited('<...>')
663 visited.add(self.as_address())
664
665 # Get name of class:
666 in_class = self.pyop_field('in_class')
667 cl_name = in_class.pyop_field('cl_name').proxyval(visited)
668
669 # Get dictionary of instance attributes:
670 in_dict = self.pyop_field('in_dict').proxyval(visited)
671
672 # Old-style class:
673 return InstanceProxy(cl_name, in_dict, long(self._gdbval))
674
675 def write_repr(self, out, visited):
676 # Guard against infinite loops:
677 if self.as_address() in visited:
678 out.write('<...>')
679 return
680 visited.add(self.as_address())
681
682 # Old-style class:
683
684 # Get name of class:
685 in_class = self.pyop_field('in_class')
686 cl_name = in_class.pyop_field('cl_name').proxyval(visited)
687
688 # Get dictionary of instance attributes:
689 pyop_in_dict = self.pyop_field('in_dict')
690
691 _write_instance_repr(out, visited,
692 cl_name, pyop_in_dict, self.as_address())
693
694class PyIntObjectPtr(PyObjectPtr):
695 _typename = 'PyIntObject'
696
697 def proxyval(self, visited):
698 result = int_from_int(self.field('ob_ival'))
699 return result
700
701class PyListObjectPtr(PyObjectPtr):
702 _typename = 'PyListObject'
703
704 def __getitem__(self, i):
705 # Get the gdb.Value for the (PyObject*) with the given index:
706 field_ob_item = self.field('ob_item')
707 return field_ob_item[i]
708
709 def proxyval(self, visited):
710 # Guard against infinite loops:
711 if self.as_address() in visited:
712 return ProxyAlreadyVisited('[...]')
713 visited.add(self.as_address())
714
715 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
716 for i in safe_range(int_from_int(self.field('ob_size')))]
717 return result
718
719 def write_repr(self, out, visited):
720 # Guard against infinite loops:
721 if self.as_address() in visited:
722 out.write('[...]')
723 return
724 visited.add(self.as_address())
725
726 out.write('[')
727 for i in safe_range(int_from_int(self.field('ob_size'))):
728 if i > 0:
729 out.write(', ')
730 element = PyObjectPtr.from_pyobject_ptr(self[i])
731 element.write_repr(out, visited)
732 out.write(']')
733
734class PyLongObjectPtr(PyObjectPtr):
735 _typename = 'PyLongObject'
736
737 def proxyval(self, visited):
738 '''
739 Python's Include/longobjrep.h has this declaration:
740 struct _longobject {
741 PyObject_VAR_HEAD
742 digit ob_digit[1];
743 };
744
745 with this description:
746 The absolute value of a number is equal to
747 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
748 Negative numbers are represented with ob_size < 0;
749 zero is represented by ob_size == 0.
750
751 where SHIFT can be either:
752 #define PyLong_SHIFT 30
753 #define PyLong_SHIFT 15
754 '''
755 ob_size = long(self.field('ob_size'))
756 if ob_size == 0:
757 return 0L
758
759 ob_digit = self.field('ob_digit')
760
761 if gdb.lookup_type('digit').sizeof == 2:
762 SHIFT = 15L
763 else:
764 SHIFT = 30L
765
766 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
767 for i in safe_range(abs(ob_size))]
768 result = sum(digits)
769 if ob_size < 0:
770 result = -result
771 return result
772
773
774class PyNoneStructPtr(PyObjectPtr):
775 """
776 Class wrapping a gdb.Value that's a PyObject* pointing to the
777 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
778 """
779 _typename = 'PyObject'
780
781 def proxyval(self, visited):
782 return None
783
784
785class PyFrameObjectPtr(PyObjectPtr):
786 _typename = 'PyFrameObject'
787
788 def __init__(self, gdbval, cast_to):
789 PyObjectPtr.__init__(self, gdbval, cast_to)
790
791 if not self.is_optimized_out():
792 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
793 self.co_name = self.co.pyop_field('co_name')
794 self.co_filename = self.co.pyop_field('co_filename')
795
796 self.f_lineno = int_from_int(self.field('f_lineno'))
797 self.f_lasti = int_from_int(self.field('f_lasti'))
798 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
799 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
800
801 def iter_locals(self):
802 '''
803 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
804 the local variables of this frame
805 '''
806 if self.is_optimized_out():
807 return
808
809 f_localsplus = self.field('f_localsplus')
810 for i in safe_range(self.co_nlocals):
811 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
812 if not pyop_value.is_null():
813 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
814 yield (pyop_name, pyop_value)
815
816 def iter_globals(self):
817 '''
818 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
819 the global variables of this frame
820 '''
821 if self.is_optimized_out():
822 return
823
824 pyop_globals = self.pyop_field('f_globals')
825 return pyop_globals.iteritems()
826
827 def iter_builtins(self):
828 '''
829 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
830 the builtin variables
831 '''
832 if self.is_optimized_out():
833 return
834
835 pyop_builtins = self.pyop_field('f_builtins')
836 return pyop_builtins.iteritems()
837
838 def get_var_by_name(self, name):
839 '''
840 Look for the named local variable, returning a (PyObjectPtr, scope) pair
841 where scope is a string 'local', 'global', 'builtin'
842
843 If not found, return (None, None)
844 '''
845 for pyop_name, pyop_value in self.iter_locals():
846 if name == pyop_name.proxyval(set()):
847 return pyop_value, 'local'
848 for pyop_name, pyop_value in self.iter_globals():
849 if name == pyop_name.proxyval(set()):
850 return pyop_value, 'global'
851 for pyop_name, pyop_value in self.iter_builtins():
852 if name == pyop_name.proxyval(set()):
853 return pyop_value, 'builtin'
854 return None, None
855
856 def filename(self):
857 '''Get the path of the current Python source file, as a string'''
858 if self.is_optimized_out():
859 return '(frame information optimized out)'
860 return self.co_filename.proxyval(set())
861
862 def current_line_num(self):
863 '''Get current line number as an integer (1-based)
864
865 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
866
867 See Objects/lnotab_notes.txt
868 '''
869 if self.is_optimized_out():
870 return None
871 f_trace = self.field('f_trace')
872 if long(f_trace) != 0:
873 # we have a non-NULL f_trace:
874 return self.f_lineno
875 else:
876 #try:
877 return self.co.addr2line(self.f_lasti)
878 #except ValueError:
879 # return self.f_lineno
880
881 def current_line(self):
882 '''Get the text of the current source line as a string, with a trailing
883 newline character'''
884 if self.is_optimized_out():
885 return '(frame information optimized out)'
886 with open(self.filename(), 'r') as f:
887 all_lines = f.readlines()
888 # Convert from 1-based current_line_num to 0-based list offset:
889 return all_lines[self.current_line_num()-1]
890
891 def write_repr(self, out, visited):
892 if self.is_optimized_out():
893 out.write('(frame information optimized out)')
894 return
895 out.write('Frame 0x%x, for file %s, line %i, in %s ('
896 % (self.as_address(),
897 self.co_filename,
898 self.current_line_num(),
899 self.co_name))
900 first = True
901 for pyop_name, pyop_value in self.iter_locals():
902 if not first:
903 out.write(', ')
904 first = False
905
906 out.write(pyop_name.proxyval(visited))
907 out.write('=')
908 pyop_value.write_repr(out, visited)
909
910 out.write(')')
911
912class PySetObjectPtr(PyObjectPtr):
913 _typename = 'PySetObject'
914
915 def proxyval(self, visited):
916 # Guard against infinite loops:
917 if self.as_address() in visited:
918 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
919 visited.add(self.as_address())
920
921 members = []
922 table = self.field('table')
923 for i in safe_range(self.field('mask')+1):
924 setentry = table[i]
925 key = setentry['key']
926 if key != 0:
927 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
928 if key_proxy != '<dummy key>':
929 members.append(key_proxy)
930 if self.safe_tp_name() == 'frozenset':
931 return frozenset(members)
932 else:
933 return set(members)
934
935 def write_repr(self, out, visited):
936 out.write(self.safe_tp_name())
937
938 # Guard against infinite loops:
939 if self.as_address() in visited:
940 out.write('(...)')
941 return
942 visited.add(self.as_address())
943
944 out.write('([')
945 first = True
946 table = self.field('table')
947 for i in safe_range(self.field('mask')+1):
948 setentry = table[i]
949 key = setentry['key']
950 if key != 0:
951 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
952 key_proxy = pyop_key.proxyval(visited) # FIXME!
953 if key_proxy != '<dummy key>':
954 if not first:
955 out.write(', ')
956 first = False
957 pyop_key.write_repr(out, visited)
958 out.write('])')
959
960
961class PyStringObjectPtr(PyObjectPtr):
962 _typename = 'PyStringObject'
963
964 def __str__(self):
965 field_ob_size = self.field('ob_size')
966 field_ob_sval = self.field('ob_sval')
967 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
968 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
969
970 def proxyval(self, visited):
971 return str(self)
972
973class PyTupleObjectPtr(PyObjectPtr):
974 _typename = 'PyTupleObject'
975
976 def __getitem__(self, i):
977 # Get the gdb.Value for the (PyObject*) with the given index:
978 field_ob_item = self.field('ob_item')
979 return field_ob_item[i]
980
981 def proxyval(self, visited):
982 # Guard against infinite loops:
983 if self.as_address() in visited:
984 return ProxyAlreadyVisited('(...)')
985 visited.add(self.as_address())
986
987 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
988 for i in safe_range(int_from_int(self.field('ob_size')))])
989 return result
990
991 def write_repr(self, out, visited):
992 # Guard against infinite loops:
993 if self.as_address() in visited:
994 out.write('(...)')
995 return
996 visited.add(self.as_address())
997
998 out.write('(')
999 for i in safe_range(int_from_int(self.field('ob_size'))):
1000 if i > 0:
1001 out.write(', ')
1002 element = PyObjectPtr.from_pyobject_ptr(self[i])
1003 element.write_repr(out, visited)
1004 if self.field('ob_size') == 1:
1005 out.write(',)')
1006 else:
1007 out.write(')')
1008
1009class PyTypeObjectPtr(PyObjectPtr):
1010 _typename = 'PyTypeObject'
1011
1012
1013class PyUnicodeObjectPtr(PyObjectPtr):
1014 _typename = 'PyUnicodeObject'
1015
1016 def proxyval(self, visited):
1017 # From unicodeobject.h:
1018 # Py_ssize_t length; /* Length of raw Unicode data in buffer */
1019 # Py_UNICODE *str; /* Raw Unicode buffer */
1020 field_length = long(self.field('length'))
1021 field_str = self.field('str')
1022
1023 # Gather a list of ints from the Py_UNICODE array; these are either
1024 # UCS-2 or UCS-4 code points:
1025 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1026
1027 # Convert the int code points to unicode characters, and generate a
1028 # local unicode instance:
1029 result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
1030 return result
1031
1032
1033def int_from_int(gdbval):
1034 return int(str(gdbval))
1035
1036
1037def stringify(val):
1038 # TODO: repr() puts everything on one line; pformat can be nicer, but
1039 # can lead to v.long results; this function isolates the choice
1040 if True:
1041 return repr(val)
1042 else:
1043 from pprint import pformat
1044 return pformat(val)
1045
1046
1047class PyObjectPtrPrinter:
1048 "Prints a (PyObject*)"
1049
1050 def __init__ (self, gdbval):
1051 self.gdbval = gdbval
1052
1053 def to_string (self):
1054 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1055 if True:
1056 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1057 else:
1058 # Generate full proxy value then stringify it.
1059 # Doing so could be expensive
1060 proxyval = pyop.proxyval(set())
1061 return stringify(proxyval)
1062
1063def pretty_printer_lookup(gdbval):
1064 type = gdbval.type.unqualified()
1065 if type.code == gdb.TYPE_CODE_PTR:
1066 type = type.target().unqualified()
1067 t = str(type)
1068 if t in ("PyObject", "PyFrameObject"):
1069 return PyObjectPtrPrinter(gdbval)
1070
1071"""
1072During development, I've been manually invoking the code in this way:
1073(gdb) python
1074
1075import sys
1076sys.path.append('/home/david/coding/python-gdb')
1077import libpython
1078end
1079
1080then reloading it after each edit like this:
1081(gdb) python reload(libpython)
1082
1083The following code should ensure that the prettyprinter is registered
1084if the code is autoloaded by gdb when visiting libpython.so, provided
1085that this python file is installed to the same path as the library (or its
1086.debug file) plus a "-gdb.py" suffix, e.g:
1087 /usr/lib/libpython2.6.so.1.0-gdb.py
1088 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1089"""
1090def register (obj):
1091 if obj == None:
1092 obj = gdb
1093
1094 # Wire up the pretty-printer
1095 obj.pretty_printers.append(pretty_printer_lookup)
1096
1097register (gdb.current_objfile ())
1098
1099
Martin v. Löwis24f09fd2010-04-17 22:40:40 +00001100
1101# Unfortunately, the exact API exposed by the gdb module varies somewhat
1102# from build to build
1103# See http://bugs.python.org/issue8279?#msg102276
1104
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001105class Frame(object):
1106 '''
1107 Wrapper for gdb.Frame, adding various methods
1108 '''
1109 def __init__(self, gdbframe):
1110 self._gdbframe = gdbframe
1111
1112 def older(self):
1113 older = self._gdbframe.older()
1114 if older:
1115 return Frame(older)
1116 else:
1117 return None
1118
1119 def newer(self):
1120 newer = self._gdbframe.newer()
1121 if newer:
1122 return Frame(newer)
1123 else:
1124 return None
1125
1126 def select(self):
Martin v. Löwis24f09fd2010-04-17 22:40:40 +00001127 '''If supported, select this frame and return True; return False if unsupported
1128
1129 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1130 onwards, but absent on Ubuntu buildbot'''
1131 if not hasattr(self._gdbframe, 'select'):
1132 print ('Unable to select frame: '
1133 'this build of gdb does not expose a gdb.Frame.select method')
1134 return False
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001135 self._gdbframe.select()
Martin v. Löwis24f09fd2010-04-17 22:40:40 +00001136 return True
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001137
1138 def get_index(self):
1139 '''Calculate index of frame, starting at 0 for the newest frame within
1140 this thread'''
1141 index = 0
1142 # Go down until you reach the newest frame:
1143 iter_frame = self
1144 while iter_frame.newer():
1145 index += 1
1146 iter_frame = iter_frame.newer()
1147 return index
1148
1149 def is_evalframeex(self):
Martin v. Löwis24f09fd2010-04-17 22:40:40 +00001150 '''Is this a PyEval_EvalFrameEx frame?'''
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001151 if self._gdbframe.function():
Martin v. Löwis24f09fd2010-04-17 22:40:40 +00001152 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001153 '''
1154 I believe we also need to filter on the inline
1155 struct frame_id.inline_depth, only regarding frames with
1156 an inline depth of 0 as actually being this function
1157
1158 So we reject those with type gdb.INLINE_FRAME
1159 '''
1160 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1161 # We have a PyEval_EvalFrameEx frame:
1162 return True
1163
1164 return False
1165
1166 def get_pyop(self):
1167 try:
1168 f = self._gdbframe.read_var('f')
1169 return PyFrameObjectPtr.from_pyobject_ptr(f)
1170 except ValueError:
1171 return None
1172
1173 @classmethod
1174 def get_selected_frame(cls):
1175 _gdbframe = gdb.selected_frame()
1176 if _gdbframe:
1177 return Frame(_gdbframe)
1178 return None
1179
1180 @classmethod
1181 def get_selected_python_frame(cls):
1182 '''Try to obtain the Frame for the python code in the selected frame,
1183 or None'''
1184 frame = cls.get_selected_frame()
1185
1186 while frame:
1187 if frame.is_evalframeex():
1188 return frame
1189 frame = frame.older()
1190
1191 # Not found:
1192 return None
1193
1194 def print_summary(self):
1195 if self.is_evalframeex():
1196 pyop = self.get_pyop()
1197 if pyop:
1198 sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
1199 sys.stdout.write(pyop.current_line())
1200 else:
1201 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1202 else:
1203 sys.stdout.write('#%i\n' % self.get_index())
1204
1205class PyList(gdb.Command):
1206 '''List the current Python source code, if any
1207
1208 Use
1209 py-list START
1210 to list at a different line number within the python source.
1211
1212 Use
1213 py-list START, END
1214 to list a specific range of lines within the python source.
1215 '''
1216
1217 def __init__(self):
1218 gdb.Command.__init__ (self,
1219 "py-list",
1220 gdb.COMMAND_FILES,
1221 gdb.COMPLETE_NONE)
1222
1223
1224 def invoke(self, args, from_tty):
1225 import re
1226
1227 start = None
1228 end = None
1229
1230 m = re.match(r'\s*(\d+)\s*', args)
1231 if m:
1232 start = int(m.group(0))
1233 end = start + 10
1234
1235 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1236 if m:
1237 start, end = map(int, m.groups())
1238
1239 frame = Frame.get_selected_python_frame()
1240 if not frame:
1241 print 'Unable to locate python frame'
1242 return
1243
1244 pyop = frame.get_pyop()
1245 if not pyop:
1246 print 'Unable to read information on python frame'
1247 return
1248
1249 filename = pyop.filename()
1250 lineno = pyop.current_line_num()
1251
1252 if start is None:
1253 start = lineno - 5
1254 end = lineno + 5
1255
1256 if start<1:
1257 start = 1
1258
1259 with open(filename, 'r') as f:
1260 all_lines = f.readlines()
1261 # start and end are 1-based, all_lines is 0-based;
1262 # so [start-1:end] as a python slice gives us [start, end] as a
1263 # closed interval
1264 for i, line in enumerate(all_lines[start-1:end]):
1265 linestr = str(i+start)
1266 # Highlight current line:
1267 if i + start == lineno:
1268 linestr = '>' + linestr
1269 sys.stdout.write('%4s %s' % (linestr, line))
1270
1271
1272# ...and register the command:
1273PyList()
1274
1275def move_in_stack(move_up):
1276 '''Move up or down the stack (for the py-up/py-down command)'''
1277 frame = Frame.get_selected_python_frame()
1278 while frame:
1279 if move_up:
1280 iter_frame = frame.older()
1281 else:
1282 iter_frame = frame.newer()
1283
1284 if not iter_frame:
1285 break
1286
1287 if iter_frame.is_evalframeex():
1288 # Result:
Martin v. Löwis24f09fd2010-04-17 22:40:40 +00001289 if iter_frame.select():
1290 iter_frame.print_summary()
Martin v. Löwisbf0dfb32010-04-01 07:40:51 +00001291 return
1292
1293 frame = iter_frame
1294
1295 if move_up:
1296 print 'Unable to find an older python frame'
1297 else:
1298 print 'Unable to find a newer python frame'
1299
1300class PyUp(gdb.Command):
1301 'Select and print the python stack frame that called this one (if any)'
1302 def __init__(self):
1303 gdb.Command.__init__ (self,
1304 "py-up",
1305 gdb.COMMAND_STACK,
1306 gdb.COMPLETE_NONE)
1307
1308
1309 def invoke(self, args, from_tty):
1310 move_in_stack(move_up=True)
1311
1312PyUp()
1313
1314class PyDown(gdb.Command):
1315 'Select and print the python stack frame called by this one (if any)'
1316 def __init__(self):
1317 gdb.Command.__init__ (self,
1318 "py-down",
1319 gdb.COMMAND_STACK,
1320 gdb.COMPLETE_NONE)
1321
1322
1323 def invoke(self, args, from_tty):
1324 move_in_stack(move_up=False)
1325
1326PyDown()
1327
1328class PyBacktrace(gdb.Command):
1329 'Display the current python frame and all the frames within its call stack (if any)'
1330 def __init__(self):
1331 gdb.Command.__init__ (self,
1332 "py-bt",
1333 gdb.COMMAND_STACK,
1334 gdb.COMPLETE_NONE)
1335
1336
1337 def invoke(self, args, from_tty):
1338 frame = Frame.get_selected_python_frame()
1339 while frame:
1340 if frame.is_evalframeex():
1341 frame.print_summary()
1342 frame = frame.older()
1343
1344PyBacktrace()
1345
1346class PyPrint(gdb.Command):
1347 'Look up the given python variable name, and print it'
1348 def __init__(self):
1349 gdb.Command.__init__ (self,
1350 "py-print",
1351 gdb.COMMAND_DATA,
1352 gdb.COMPLETE_NONE)
1353
1354
1355 def invoke(self, args, from_tty):
1356 name = str(args)
1357
1358 frame = Frame.get_selected_python_frame()
1359 if not frame:
1360 print 'Unable to locate python frame'
1361 return
1362
1363 pyop_frame = frame.get_pyop()
1364 if not pyop_frame:
1365 print 'Unable to read information on python frame'
1366 return
1367
1368 pyop_var, scope = pyop_frame.get_var_by_name(name)
1369
1370 if pyop_var:
1371 print ('%s %r = %s'
1372 % (scope,
1373 name,
1374 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1375 else:
1376 print '%r not found' % name
1377
1378PyPrint()
1379
1380class PyLocals(gdb.Command):
1381 'Look up the given python variable name, and print it'
1382 def __init__(self):
1383 gdb.Command.__init__ (self,
1384 "py-locals",
1385 gdb.COMMAND_DATA,
1386 gdb.COMPLETE_NONE)
1387
1388
1389 def invoke(self, args, from_tty):
1390 name = str(args)
1391
1392 frame = Frame.get_selected_python_frame()
1393 if not frame:
1394 print 'Unable to locate python frame'
1395 return
1396
1397 pyop_frame = frame.get_pyop()
1398 if not pyop_frame:
1399 print 'Unable to read information on python frame'
1400 return
1401
1402 for pyop_name, pyop_value in pyop_frame.iter_locals():
1403 print ('%s = %s'
1404 % (pyop_name.proxyval(set()),
1405 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1406
1407PyLocals()