blob: cf67cf89001f8bcf1ae09d576a6fd50ac1bbe8f3 [file] [log] [blame]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
Victor Stinner67df3a42010-04-21 13:53:05 +000022holding three PyObject* that turn out to be PyBytesObject* instances, we can
Martin v. Löwis5ae68102010-04-21 22:38:42 +000023generate a proxy value within the gdb process that is a list of bytes
24instances:
25 [b"foo", b"bar", b"baz"]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000026
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object. This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43from __future__ import with_statement
44import gdb
Victor Stinner150016f2010-05-19 23:04:56 +000045import locale
Georg Brandlb639c142010-07-14 08:54:40 +000046import sys
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000047
48# Look up the gdb.Type for some standard types:
49_type_char_ptr = gdb.lookup_type('char').pointer() # char*
50_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
51_type_void_ptr = gdb.lookup_type('void').pointer() # void*
Martin v. Löwis24fa9832011-09-28 08:35:25 +020052_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
53_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000054
Victor Stinner0c4fbff2011-12-08 00:08:22 +010055# value computed later, see PyUnicodeObjectPtr.proxy()
56_is_pep393 = None
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020057
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000058SIZEOF_VOID_P = _type_void_ptr.sizeof
59
60
61Py_TPFLAGS_HEAPTYPE = (1L << 9)
62
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000063Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
64Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
65Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Martin v. Löwis5ae68102010-04-21 22:38:42 +000066Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000067Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
68Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
69Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
70Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
71
72
73MAX_OUTPUT_LEN=1024
74
Martin v. Löwis5ae68102010-04-21 22:38:42 +000075hexdigits = "0123456789abcdef"
76
Victor Stinner150016f2010-05-19 23:04:56 +000077ENCODING = locale.getpreferredencoding()
Martin v. Löwis5ae68102010-04-21 22:38:42 +000078
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000079class NullPyObjectPtr(RuntimeError):
80 pass
81
82
83def safety_limit(val):
84 # Given a integer value from the process being debugged, limit it to some
85 # safety threshold so that arbitrary breakage within said process doesn't
86 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
87 return min(val, 1000)
88
89
90def safe_range(val):
91 # As per range, but don't trust the value too much: cap it to a safety
92 # threshold in case the data was corrupted
93 return xrange(safety_limit(val))
94
Victor Stinner0e5a41b2010-08-17 22:49:25 +000095def write_unicode(file, text):
96 # Write a byte or unicode string to file. Unicode strings are encoded to
97 # ENCODING encoding with 'backslashreplace' error handler to avoid
98 # UnicodeEncodeError.
99 if isinstance(text, unicode):
100 text = text.encode(ENCODING, 'backslashreplace')
101 file.write(text)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000102
Victor Stinner6ffbee72010-10-17 19:35:30 +0000103def os_fsencode(filename):
104 if not isinstance(filename, unicode):
105 return filename
106 encoding = sys.getfilesystemencoding()
107 if encoding == 'mbcs':
108 # mbcs doesn't support surrogateescape
109 return filename.encode(encoding)
110 encoded = []
111 for char in filename:
112 # surrogateescape error handler
113 if 0xDC80 <= ord(char) <= 0xDCFF:
114 byte = chr(ord(char) - 0xDC00)
115 else:
116 byte = char.encode(encoding)
117 encoded.append(byte)
118 return ''.join(encoded)
119
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000120class StringTruncated(RuntimeError):
121 pass
122
123class TruncatedStringIO(object):
124 '''Similar to cStringIO, but can truncate the output by raising a
125 StringTruncated exception'''
126 def __init__(self, maxlen=None):
127 self._val = ''
128 self.maxlen = maxlen
129
130 def write(self, data):
131 if self.maxlen:
132 if len(data) + len(self._val) > self.maxlen:
133 # Truncation:
134 self._val += data[0:self.maxlen - len(self._val)]
135 raise StringTruncated()
136
137 self._val += data
138
139 def getvalue(self):
140 return self._val
141
142class PyObjectPtr(object):
143 """
144 Class wrapping a gdb.Value that's a either a (PyObject*) within the
Victor Stinner67df3a42010-04-21 13:53:05 +0000145 inferior process, or some subclass pointer e.g. (PyBytesObject*)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000146
147 There will be a subclass for every refined PyObject type that we care
148 about.
149
150 Note that at every stage the underlying pointer could be NULL, point
151 to corrupt data, etc; this is the debugger, after all.
152 """
153 _typename = 'PyObject'
154
155 def __init__(self, gdbval, cast_to=None):
156 if cast_to:
157 self._gdbval = gdbval.cast(cast_to)
158 else:
159 self._gdbval = gdbval
160
161 def field(self, name):
162 '''
163 Get the gdb.Value for the given field within the PyObject, coping with
164 some python 2 versus python 3 differences.
165
166 Various libpython types are defined using the "PyObject_HEAD" and
167 "PyObject_VAR_HEAD" macros.
168
169 In Python 2, this these are defined so that "ob_type" and (for a var
170 object) "ob_size" are fields of the type in question.
171
172 In Python 3, this is defined as an embedded PyVarObject type thus:
173 PyVarObject ob_base;
174 so that the "ob_size" field is located insize the "ob_base" field, and
175 the "ob_type" is most easily accessed by casting back to a (PyObject*).
176 '''
177 if self.is_null():
178 raise NullPyObjectPtr(self)
179
180 if name == 'ob_type':
181 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
182 return pyo_ptr.dereference()[name]
183
184 if name == 'ob_size':
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000185 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
186 return pyo_ptr.dereference()[name]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000187
188 # General case: look it up inside the object:
189 return self._gdbval.dereference()[name]
190
191 def pyop_field(self, name):
192 '''
193 Get a PyObjectPtr for the given PyObject* field within this PyObject,
194 coping with some python 2 versus python 3 differences.
195 '''
196 return PyObjectPtr.from_pyobject_ptr(self.field(name))
197
198 def write_field_repr(self, name, out, visited):
199 '''
200 Extract the PyObject* field named "name", and write its representation
201 to file-like object "out"
202 '''
203 field_obj = self.pyop_field(name)
204 field_obj.write_repr(out, visited)
205
206 def get_truncated_repr(self, maxlen):
207 '''
208 Get a repr-like string for the data, but truncate it at "maxlen" bytes
209 (ending the object graph traversal as soon as you do)
210 '''
211 out = TruncatedStringIO(maxlen)
212 try:
213 self.write_repr(out, set())
214 except StringTruncated:
215 # Truncation occurred:
216 return out.getvalue() + '...(truncated)'
217
218 # No truncation occurred:
219 return out.getvalue()
220
221 def type(self):
222 return PyTypeObjectPtr(self.field('ob_type'))
223
224 def is_null(self):
225 return 0 == long(self._gdbval)
226
227 def is_optimized_out(self):
228 '''
229 Is the value of the underlying PyObject* visible to the debugger?
230
231 This can vary with the precise version of the compiler used to build
232 Python, and the precise version of gdb.
233
234 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
235 PyEval_EvalFrameEx's "f"
236 '''
237 return self._gdbval.is_optimized_out
238
239 def safe_tp_name(self):
240 try:
241 return self.type().field('tp_name').string()
242 except NullPyObjectPtr:
243 # NULL tp_name?
244 return 'unknown'
245 except RuntimeError:
246 # Can't even read the object at all?
247 return 'unknown'
248
249 def proxyval(self, visited):
250 '''
251 Scrape a value from the inferior process, and try to represent it
252 within the gdb process, whilst (hopefully) avoiding crashes when
253 the remote data is corrupt.
254
255 Derived classes will override this.
256
257 For example, a PyIntObject* with ob_ival 42 in the inferior process
258 should result in an int(42) in this process.
259
260 visited: a set of all gdb.Value pyobject pointers already visited
261 whilst generating this value (to guard against infinite recursion when
262 visiting object graphs with loops). Analogous to Py_ReprEnter and
263 Py_ReprLeave
264 '''
265
266 class FakeRepr(object):
267 """
268 Class representing a non-descript PyObject* value in the inferior
269 process for when we don't have a custom scraper, intended to have
270 a sane repr().
271 """
272
273 def __init__(self, tp_name, address):
274 self.tp_name = tp_name
275 self.address = address
276
277 def __repr__(self):
278 # For the NULL pointer, we have no way of knowing a type, so
279 # special-case it as per
280 # http://bugs.python.org/issue8032#msg100882
281 if self.address == 0:
282 return '0x0'
283 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
284
285 return FakeRepr(self.safe_tp_name(),
286 long(self._gdbval))
287
288 def write_repr(self, out, visited):
289 '''
290 Write a string representation of the value scraped from the inferior
291 process to "out", a file-like object.
292 '''
293 # Default implementation: generate a proxy value and write its repr
294 # However, this could involve a lot of work for complicated objects,
295 # so for derived classes we specialize this
296 return out.write(repr(self.proxyval(visited)))
297
298 @classmethod
299 def subclass_from_type(cls, t):
300 '''
301 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
302 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
303 to use
304
305 Ideally, we would look up the symbols for the global types, but that
306 isn't working yet:
307 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
308 Traceback (most recent call last):
309 File "<string>", line 1, in <module>
310 NotImplementedError: Symbol type not yet supported in Python scripts.
311 Error while executing Python code.
312
313 For now, we use tp_flags, after doing some string comparisons on the
314 tp_name for some special-cases that don't seem to be visible through
315 flags
316 '''
317 try:
318 tp_name = t.field('tp_name').string()
319 tp_flags = int(t.field('tp_flags'))
320 except RuntimeError:
321 # Handle any kind of error e.g. NULL ptrs by simply using the base
322 # class
323 return cls
324
325 #print 'tp_flags = 0x%08x' % tp_flags
326 #print 'tp_name = %r' % tp_name
327
328 name_map = {'bool': PyBoolObjectPtr,
329 'classobj': PyClassObjectPtr,
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000330 'NoneType': PyNoneStructPtr,
331 'frame': PyFrameObjectPtr,
332 'set' : PySetObjectPtr,
333 'frozenset' : PySetObjectPtr,
334 'builtin_function_or_method' : PyCFunctionObjectPtr,
335 }
336 if tp_name in name_map:
337 return name_map[tp_name]
338
339 if tp_flags & Py_TPFLAGS_HEAPTYPE:
340 return HeapTypeObjectPtr
341
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000342 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
343 return PyLongObjectPtr
344 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
345 return PyListObjectPtr
346 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
347 return PyTupleObjectPtr
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000348 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
Victor Stinner67df3a42010-04-21 13:53:05 +0000349 return PyBytesObjectPtr
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000350 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
351 return PyUnicodeObjectPtr
352 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
353 return PyDictObjectPtr
354 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
355 return PyBaseExceptionObjectPtr
356 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
357 # return PyTypeObjectPtr
358
359 # Use the base class:
360 return cls
361
362 @classmethod
363 def from_pyobject_ptr(cls, gdbval):
364 '''
365 Try to locate the appropriate derived class dynamically, and cast
366 the pointer accordingly.
367 '''
368 try:
369 p = PyObjectPtr(gdbval)
370 cls = cls.subclass_from_type(p.type())
371 return cls(gdbval, cast_to=cls.get_gdb_type())
372 except RuntimeError:
373 # Handle any kind of error e.g. NULL ptrs by simply using the base
374 # class
375 pass
376 return cls(gdbval)
377
378 @classmethod
379 def get_gdb_type(cls):
380 return gdb.lookup_type(cls._typename).pointer()
381
382 def as_address(self):
383 return long(self._gdbval)
384
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000385class PyVarObjectPtr(PyObjectPtr):
386 _typename = 'PyVarObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000387
388class ProxyAlreadyVisited(object):
389 '''
390 Placeholder proxy to use when protecting against infinite recursion due to
391 loops in the object graph.
392
393 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
394 '''
395 def __init__(self, rep):
396 self._rep = rep
397
398 def __repr__(self):
399 return self._rep
400
401
402def _write_instance_repr(out, visited, name, pyop_attrdict, address):
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100403 '''Shared code for use by all classes:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000404 write a representation to file-like object "out"'''
405 out.write('<')
406 out.write(name)
407
408 # Write dictionary of instance attributes:
409 if isinstance(pyop_attrdict, PyDictObjectPtr):
410 out.write('(')
411 first = True
412 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
413 if not first:
414 out.write(', ')
415 first = False
416 out.write(pyop_arg.proxyval(visited))
417 out.write('=')
418 pyop_val.write_repr(out, visited)
419 out.write(')')
420 out.write(' at remote 0x%x>' % address)
421
422
423class InstanceProxy(object):
424
425 def __init__(self, cl_name, attrdict, address):
426 self.cl_name = cl_name
427 self.attrdict = attrdict
428 self.address = address
429
430 def __repr__(self):
431 if isinstance(self.attrdict, dict):
432 kwargs = ', '.join(["%s=%r" % (arg, val)
433 for arg, val in self.attrdict.iteritems()])
434 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
435 kwargs, self.address)
436 else:
437 return '<%s at remote 0x%x>' % (self.cl_name,
438 self.address)
439
440def _PyObject_VAR_SIZE(typeobj, nitems):
Victor Stinnerd2084162011-12-19 13:42:24 +0100441 if _PyObject_VAR_SIZE._type_size_t is None:
442 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
443
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000444 return ( ( typeobj.field('tp_basicsize') +
445 nitems * typeobj.field('tp_itemsize') +
446 (SIZEOF_VOID_P - 1)
447 ) & ~(SIZEOF_VOID_P - 1)
Victor Stinnerd2084162011-12-19 13:42:24 +0100448 ).cast(_PyObject_VAR_SIZE._type_size_t)
449_PyObject_VAR_SIZE._type_size_t = None
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000450
451class HeapTypeObjectPtr(PyObjectPtr):
452 _typename = 'PyObject'
453
454 def get_attr_dict(self):
455 '''
456 Get the PyDictObject ptr representing the attribute dictionary
457 (or None if there's a problem)
458 '''
459 try:
460 typeobj = self.type()
461 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
462 if dictoffset != 0:
463 if dictoffset < 0:
464 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
465 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
466 if tsize < 0:
467 tsize = -tsize
468 size = _PyObject_VAR_SIZE(typeobj, tsize)
469 dictoffset += size
470 assert dictoffset > 0
471 assert dictoffset % SIZEOF_VOID_P == 0
472
473 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
474 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
475 dictptr = dictptr.cast(PyObjectPtrPtr)
476 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
477 except RuntimeError:
478 # Corrupt data somewhere; fail safe
479 pass
480
481 # Not found, or some kind of error:
482 return None
483
484 def proxyval(self, visited):
485 '''
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100486 Support for classes.
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000487
488 Currently we just locate the dictionary using a transliteration to
489 python of _PyObject_GetDictPtr, ignoring descriptors
490 '''
491 # Guard against infinite loops:
492 if self.as_address() in visited:
493 return ProxyAlreadyVisited('<...>')
494 visited.add(self.as_address())
495
496 pyop_attr_dict = self.get_attr_dict()
497 if pyop_attr_dict:
498 attr_dict = pyop_attr_dict.proxyval(visited)
499 else:
500 attr_dict = {}
501 tp_name = self.safe_tp_name()
502
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100503 # Class:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000504 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
505
506 def write_repr(self, out, visited):
507 # Guard against infinite loops:
508 if self.as_address() in visited:
509 out.write('<...>')
510 return
511 visited.add(self.as_address())
512
513 pyop_attrdict = self.get_attr_dict()
514 _write_instance_repr(out, visited,
515 self.safe_tp_name(), pyop_attrdict, self.as_address())
516
517class ProxyException(Exception):
518 def __init__(self, tp_name, args):
519 self.tp_name = tp_name
520 self.args = args
521
522 def __repr__(self):
523 return '%s%r' % (self.tp_name, self.args)
524
525class PyBaseExceptionObjectPtr(PyObjectPtr):
526 """
527 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
528 within the process being debugged.
529 """
530 _typename = 'PyBaseExceptionObject'
531
532 def proxyval(self, visited):
533 # Guard against infinite loops:
534 if self.as_address() in visited:
535 return ProxyAlreadyVisited('(...)')
536 visited.add(self.as_address())
537 arg_proxy = self.pyop_field('args').proxyval(visited)
538 return ProxyException(self.safe_tp_name(),
539 arg_proxy)
540
541 def write_repr(self, out, visited):
542 # Guard against infinite loops:
543 if self.as_address() in visited:
544 out.write('(...)')
545 return
546 visited.add(self.as_address())
547
548 out.write(self.safe_tp_name())
549 self.write_field_repr('args', out, visited)
550
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000551class PyClassObjectPtr(PyObjectPtr):
552 """
553 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
554 instance within the process being debugged.
555 """
556 _typename = 'PyClassObject'
557
558
559class BuiltInFunctionProxy(object):
560 def __init__(self, ml_name):
561 self.ml_name = ml_name
562
563 def __repr__(self):
564 return "<built-in function %s>" % self.ml_name
565
566class BuiltInMethodProxy(object):
567 def __init__(self, ml_name, pyop_m_self):
568 self.ml_name = ml_name
569 self.pyop_m_self = pyop_m_self
570
571 def __repr__(self):
572 return ('<built-in method %s of %s object at remote 0x%x>'
573 % (self.ml_name,
574 self.pyop_m_self.safe_tp_name(),
575 self.pyop_m_self.as_address())
576 )
577
578class PyCFunctionObjectPtr(PyObjectPtr):
579 """
580 Class wrapping a gdb.Value that's a PyCFunctionObject*
581 (see Include/methodobject.h and Objects/methodobject.c)
582 """
583 _typename = 'PyCFunctionObject'
584
585 def proxyval(self, visited):
586 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
587 ml_name = m_ml['ml_name'].string()
588
589 pyop_m_self = self.pyop_field('m_self')
590 if pyop_m_self.is_null():
591 return BuiltInFunctionProxy(ml_name)
592 else:
593 return BuiltInMethodProxy(ml_name, pyop_m_self)
594
595
596class PyCodeObjectPtr(PyObjectPtr):
597 """
598 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
599 within the process being debugged.
600 """
601 _typename = 'PyCodeObject'
602
603 def addr2line(self, addrq):
604 '''
605 Get the line number for a given bytecode offset
606
607 Analogous to PyCode_Addr2Line; translated from pseudocode in
608 Objects/lnotab_notes.txt
609 '''
610 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
611
612 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
613 # not 0, as lnotab_notes.txt has it:
614 lineno = int_from_int(self.field('co_firstlineno'))
615
616 addr = 0
617 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
618 addr += ord(addr_incr)
619 if addr > addrq:
620 return lineno
621 lineno += ord(line_incr)
622 return lineno
623
624
625class PyDictObjectPtr(PyObjectPtr):
626 """
627 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
628 within the process being debugged.
629 """
630 _typename = 'PyDictObject'
631
632 def iteritems(self):
633 '''
634 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
635 analagous to dict.iteritems()
636 '''
Benjamin Peterson7d95e402012-04-23 11:24:50 -0400637 keys = self.field('ma_keys')
638 values = self.field('ma_values')
639 for i in safe_range(keys['dk_size']):
640 ep = keys['dk_entries'].address + i
641 if long(values):
642 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
643 else:
644 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000645 if not pyop_value.is_null():
646 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
647 yield (pyop_key, pyop_value)
648
649 def proxyval(self, visited):
650 # Guard against infinite loops:
651 if self.as_address() in visited:
652 return ProxyAlreadyVisited('{...}')
653 visited.add(self.as_address())
654
655 result = {}
656 for pyop_key, pyop_value in self.iteritems():
657 proxy_key = pyop_key.proxyval(visited)
658 proxy_value = pyop_value.proxyval(visited)
659 result[proxy_key] = proxy_value
660 return result
661
662 def write_repr(self, out, visited):
663 # Guard against infinite loops:
664 if self.as_address() in visited:
665 out.write('{...}')
666 return
667 visited.add(self.as_address())
668
669 out.write('{')
670 first = True
671 for pyop_key, pyop_value in self.iteritems():
672 if not first:
673 out.write(', ')
674 first = False
675 pyop_key.write_repr(out, visited)
676 out.write(': ')
677 pyop_value.write_repr(out, visited)
678 out.write('}')
679
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000680class PyListObjectPtr(PyObjectPtr):
681 _typename = 'PyListObject'
682
683 def __getitem__(self, i):
684 # Get the gdb.Value for the (PyObject*) with the given index:
685 field_ob_item = self.field('ob_item')
686 return field_ob_item[i]
687
688 def proxyval(self, visited):
689 # Guard against infinite loops:
690 if self.as_address() in visited:
691 return ProxyAlreadyVisited('[...]')
692 visited.add(self.as_address())
693
694 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
695 for i in safe_range(int_from_int(self.field('ob_size')))]
696 return result
697
698 def write_repr(self, out, visited):
699 # Guard against infinite loops:
700 if self.as_address() in visited:
701 out.write('[...]')
702 return
703 visited.add(self.as_address())
704
705 out.write('[')
706 for i in safe_range(int_from_int(self.field('ob_size'))):
707 if i > 0:
708 out.write(', ')
709 element = PyObjectPtr.from_pyobject_ptr(self[i])
710 element.write_repr(out, visited)
711 out.write(']')
712
713class PyLongObjectPtr(PyObjectPtr):
714 _typename = 'PyLongObject'
715
716 def proxyval(self, visited):
717 '''
718 Python's Include/longobjrep.h has this declaration:
719 struct _longobject {
720 PyObject_VAR_HEAD
721 digit ob_digit[1];
722 };
723
724 with this description:
725 The absolute value of a number is equal to
726 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
727 Negative numbers are represented with ob_size < 0;
728 zero is represented by ob_size == 0.
729
730 where SHIFT can be either:
731 #define PyLong_SHIFT 30
732 #define PyLong_SHIFT 15
733 '''
734 ob_size = long(self.field('ob_size'))
735 if ob_size == 0:
736 return 0L
737
738 ob_digit = self.field('ob_digit')
739
740 if gdb.lookup_type('digit').sizeof == 2:
741 SHIFT = 15L
742 else:
743 SHIFT = 30L
744
745 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
746 for i in safe_range(abs(ob_size))]
747 result = sum(digits)
748 if ob_size < 0:
749 result = -result
750 return result
751
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000752 def write_repr(self, out, visited):
753 # Write this out as a Python 3 int literal, i.e. without the "L" suffix
754 proxy = self.proxyval(visited)
755 out.write("%s" % proxy)
756
757
758class PyBoolObjectPtr(PyLongObjectPtr):
759 """
760 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
761 <bool> instances (Py_True/Py_False) within the process being debugged.
762 """
763 def proxyval(self, visited):
764 if PyLongObjectPtr.proxyval(self, visited):
765 return True
766 else:
767 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000768
769class PyNoneStructPtr(PyObjectPtr):
770 """
771 Class wrapping a gdb.Value that's a PyObject* pointing to the
772 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
773 """
774 _typename = 'PyObject'
775
776 def proxyval(self, visited):
777 return None
778
779
780class PyFrameObjectPtr(PyObjectPtr):
781 _typename = 'PyFrameObject'
782
Victor Stinnerd2084162011-12-19 13:42:24 +0100783 def __init__(self, gdbval, cast_to=None):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000784 PyObjectPtr.__init__(self, gdbval, cast_to)
785
786 if not self.is_optimized_out():
787 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
788 self.co_name = self.co.pyop_field('co_name')
789 self.co_filename = self.co.pyop_field('co_filename')
790
791 self.f_lineno = int_from_int(self.field('f_lineno'))
792 self.f_lasti = int_from_int(self.field('f_lasti'))
793 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
794 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
795
796 def iter_locals(self):
797 '''
798 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
799 the local variables of this frame
800 '''
801 if self.is_optimized_out():
802 return
803
804 f_localsplus = self.field('f_localsplus')
805 for i in safe_range(self.co_nlocals):
806 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
807 if not pyop_value.is_null():
808 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
809 yield (pyop_name, pyop_value)
810
811 def iter_globals(self):
812 '''
813 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
814 the global variables of this frame
815 '''
816 if self.is_optimized_out():
Victor Stinnerd2084162011-12-19 13:42:24 +0100817 return ()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000818
819 pyop_globals = self.pyop_field('f_globals')
820 return pyop_globals.iteritems()
821
822 def iter_builtins(self):
823 '''
824 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
825 the builtin variables
826 '''
827 if self.is_optimized_out():
Victor Stinnerd2084162011-12-19 13:42:24 +0100828 return ()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000829
830 pyop_builtins = self.pyop_field('f_builtins')
831 return pyop_builtins.iteritems()
832
833 def get_var_by_name(self, name):
834 '''
835 Look for the named local variable, returning a (PyObjectPtr, scope) pair
836 where scope is a string 'local', 'global', 'builtin'
837
838 If not found, return (None, None)
839 '''
840 for pyop_name, pyop_value in self.iter_locals():
841 if name == pyop_name.proxyval(set()):
842 return pyop_value, 'local'
843 for pyop_name, pyop_value in self.iter_globals():
844 if name == pyop_name.proxyval(set()):
845 return pyop_value, 'global'
846 for pyop_name, pyop_value in self.iter_builtins():
847 if name == pyop_name.proxyval(set()):
848 return pyop_value, 'builtin'
849 return None, None
850
851 def filename(self):
852 '''Get the path of the current Python source file, as a string'''
853 if self.is_optimized_out():
854 return '(frame information optimized out)'
855 return self.co_filename.proxyval(set())
856
857 def current_line_num(self):
858 '''Get current line number as an integer (1-based)
859
860 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
861
862 See Objects/lnotab_notes.txt
863 '''
864 if self.is_optimized_out():
865 return None
866 f_trace = self.field('f_trace')
867 if long(f_trace) != 0:
868 # we have a non-NULL f_trace:
869 return self.f_lineno
870 else:
871 #try:
872 return self.co.addr2line(self.f_lasti)
873 #except ValueError:
874 # return self.f_lineno
875
876 def current_line(self):
877 '''Get the text of the current source line as a string, with a trailing
878 newline character'''
879 if self.is_optimized_out():
880 return '(frame information optimized out)'
Victor Stinner6ffbee72010-10-17 19:35:30 +0000881 filename = self.filename()
Victor Stinnerd57c5c82011-07-01 12:57:44 +0200882 try:
883 f = open(os_fsencode(filename), 'r')
884 except IOError:
885 return None
886 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000887 all_lines = f.readlines()
888 # Convert from 1-based current_line_num to 0-based list offset:
889 return all_lines[self.current_line_num()-1]
890
891 def write_repr(self, out, visited):
892 if self.is_optimized_out():
893 out.write('(frame information optimized out)')
894 return
895 out.write('Frame 0x%x, for file %s, line %i, in %s ('
896 % (self.as_address(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000897 self.co_filename.proxyval(visited),
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000898 self.current_line_num(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000899 self.co_name.proxyval(visited)))
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000900 first = True
901 for pyop_name, pyop_value in self.iter_locals():
902 if not first:
903 out.write(', ')
904 first = False
905
906 out.write(pyop_name.proxyval(visited))
907 out.write('=')
908 pyop_value.write_repr(out, visited)
909
910 out.write(')')
911
Victor Stinnere670c882011-05-13 17:40:15 +0200912 def print_traceback(self):
913 if self.is_optimized_out():
914 sys.stdout.write(' (frame information optimized out)\n')
Victor Stinnerd2084162011-12-19 13:42:24 +0100915 return
Victor Stinnere670c882011-05-13 17:40:15 +0200916 visited = set()
917 sys.stdout.write(' File "%s", line %i, in %s\n'
918 % (self.co_filename.proxyval(visited),
919 self.current_line_num(),
920 self.co_name.proxyval(visited)))
921
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000922class PySetObjectPtr(PyObjectPtr):
923 _typename = 'PySetObject'
924
925 def proxyval(self, visited):
926 # Guard against infinite loops:
927 if self.as_address() in visited:
928 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
929 visited.add(self.as_address())
930
931 members = []
932 table = self.field('table')
933 for i in safe_range(self.field('mask')+1):
934 setentry = table[i]
935 key = setentry['key']
936 if key != 0:
937 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
938 if key_proxy != '<dummy key>':
939 members.append(key_proxy)
940 if self.safe_tp_name() == 'frozenset':
941 return frozenset(members)
942 else:
943 return set(members)
944
945 def write_repr(self, out, visited):
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000946 # Emulate Python 3's set_repr
947 tp_name = self.safe_tp_name()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000948
949 # Guard against infinite loops:
950 if self.as_address() in visited:
951 out.write('(...)')
952 return
953 visited.add(self.as_address())
954
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000955 # Python 3's set_repr special-cases the empty set:
956 if not self.field('used'):
957 out.write(tp_name)
958 out.write('()')
959 return
960
961 # Python 3 uses {} for set literals:
962 if tp_name != 'set':
963 out.write(tp_name)
964 out.write('(')
965
966 out.write('{')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000967 first = True
968 table = self.field('table')
969 for i in safe_range(self.field('mask')+1):
970 setentry = table[i]
971 key = setentry['key']
972 if key != 0:
973 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
974 key_proxy = pyop_key.proxyval(visited) # FIXME!
975 if key_proxy != '<dummy key>':
976 if not first:
977 out.write(', ')
978 first = False
979 pyop_key.write_repr(out, visited)
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000980 out.write('}')
981
982 if tp_name != 'set':
983 out.write(')')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000984
985
Victor Stinner67df3a42010-04-21 13:53:05 +0000986class PyBytesObjectPtr(PyObjectPtr):
987 _typename = 'PyBytesObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000988
989 def __str__(self):
990 field_ob_size = self.field('ob_size')
991 field_ob_sval = self.field('ob_sval')
992 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
993 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
994
995 def proxyval(self, visited):
996 return str(self)
997
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000998 def write_repr(self, out, visited):
999 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1000
1001 # Get a PyStringObject* within the Python 2 gdb process:
1002 proxy = self.proxyval(visited)
1003
1004 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1005 # to Python 2 code:
1006 quote = "'"
1007 if "'" in proxy and not '"' in proxy:
1008 quote = '"'
1009 out.write('b')
1010 out.write(quote)
1011 for byte in proxy:
1012 if byte == quote or byte == '\\':
1013 out.write('\\')
1014 out.write(byte)
1015 elif byte == '\t':
1016 out.write('\\t')
1017 elif byte == '\n':
1018 out.write('\\n')
1019 elif byte == '\r':
1020 out.write('\\r')
1021 elif byte < ' ' or ord(byte) >= 0x7f:
1022 out.write('\\x')
1023 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1024 out.write(hexdigits[ord(byte) & 0xf])
1025 else:
1026 out.write(byte)
1027 out.write(quote)
1028
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001029class PyTupleObjectPtr(PyObjectPtr):
1030 _typename = 'PyTupleObject'
1031
1032 def __getitem__(self, i):
1033 # Get the gdb.Value for the (PyObject*) with the given index:
1034 field_ob_item = self.field('ob_item')
1035 return field_ob_item[i]
1036
1037 def proxyval(self, visited):
1038 # Guard against infinite loops:
1039 if self.as_address() in visited:
1040 return ProxyAlreadyVisited('(...)')
1041 visited.add(self.as_address())
1042
1043 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1044 for i in safe_range(int_from_int(self.field('ob_size')))])
1045 return result
1046
1047 def write_repr(self, out, visited):
1048 # Guard against infinite loops:
1049 if self.as_address() in visited:
1050 out.write('(...)')
1051 return
1052 visited.add(self.as_address())
1053
1054 out.write('(')
1055 for i in safe_range(int_from_int(self.field('ob_size'))):
1056 if i > 0:
1057 out.write(', ')
1058 element = PyObjectPtr.from_pyobject_ptr(self[i])
1059 element.write_repr(out, visited)
1060 if self.field('ob_size') == 1:
1061 out.write(',)')
1062 else:
1063 out.write(')')
1064
1065class PyTypeObjectPtr(PyObjectPtr):
1066 _typename = 'PyTypeObject'
1067
1068
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001069def _unichr_is_printable(char):
1070 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1071 if char == u" ":
1072 return True
1073 import unicodedata
Antoine Pitroub41e1282010-09-08 20:57:48 +00001074 return unicodedata.category(char) not in ("C", "Z")
1075
1076if sys.maxunicode >= 0x10000:
1077 _unichr = unichr
1078else:
1079 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1080 def _unichr(x):
1081 if x < 0x10000:
1082 return unichr(x)
1083 x -= 0x10000
1084 ch1 = 0xD800 | (x >> 10)
1085 ch2 = 0xDC00 | (x & 0x3FF)
1086 return unichr(ch1) + unichr(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001087
1088
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001089class PyUnicodeObjectPtr(PyObjectPtr):
1090 _typename = 'PyUnicodeObject'
1091
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001092 def char_width(self):
1093 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1094 return _type_Py_UNICODE.sizeof
1095
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001096 def proxyval(self, visited):
Victor Stinner0c4fbff2011-12-08 00:08:22 +01001097 global _is_pep393
1098 if _is_pep393 is None:
1099 fields = gdb.lookup_type('PyUnicodeObject').target().fields()
1100 _is_pep393 = 'data' in [f.name for f in fields]
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001101 if _is_pep393:
1102 # Python 3.3 and newer
1103 may_have_surrogates = False
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001104 compact = self.field('_base')
1105 ascii = compact['_base']
1106 state = ascii['state']
Victor Stinnera3b334d2011-10-03 13:53:37 +02001107 is_compact_ascii = (int(state['ascii']) and int(state['compact']))
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001108 if not int(state['ready']):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001109 # string is not ready
Victor Stinnerf16a3502011-11-04 22:34:01 +01001110 field_length = long(compact['wstr_length'])
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111 may_have_surrogates = True
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001112 field_str = ascii['wstr']
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001113 else:
Victor Stinnerf16a3502011-11-04 22:34:01 +01001114 field_length = long(ascii['length'])
Victor Stinnera3b334d2011-10-03 13:53:37 +02001115 if is_compact_ascii:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001116 field_str = ascii.address + 1
1117 elif int(state['compact']):
1118 field_str = compact.address + 1
1119 else:
1120 field_str = self.field('data')['any']
1121 repr_kind = int(state['kind'])
1122 if repr_kind == 1:
1123 field_str = field_str.cast(_type_unsigned_char_ptr)
1124 elif repr_kind == 2:
1125 field_str = field_str.cast(_type_unsigned_short_ptr)
Antoine Pitrou3c0c5f22011-10-08 19:33:24 +02001126 elif repr_kind == 4:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001127 field_str = field_str.cast(_type_unsigned_int_ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128 else:
1129 # Python 3.2 and earlier
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001130 field_length = long(self.field('length'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001131 field_str = self.field('str')
1132 may_have_surrogates = self.char_width() == 2
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001133
1134 # Gather a list of ints from the Py_UNICODE array; these are either
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 # UCS-1, UCS-2 or UCS-4 code points:
1136 if not may_have_surrogates:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001137 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1138 else:
1139 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1140 # inferior process: we must join surrogate pairs.
1141 Py_UNICODEs = []
1142 i = 0
Antoine Pitroub1856d72010-09-08 21:07:40 +00001143 limit = safety_limit(field_length)
1144 while i < limit:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001145 ucs = int(field_str[i])
1146 i += 1
1147 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1148 Py_UNICODEs.append(ucs)
1149 continue
1150 # This could be a surrogate pair.
1151 ucs2 = int(field_str[i])
1152 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1153 continue
1154 code = (ucs & 0x03FF) << 10
1155 code |= ucs2 & 0x03FF
1156 code += 0x00010000
1157 Py_UNICODEs.append(code)
1158 i += 1
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001159
1160 # Convert the int code points to unicode characters, and generate a
Antoine Pitroub41e1282010-09-08 20:57:48 +00001161 # local unicode instance.
1162 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1163 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001164 return result
1165
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001166 def write_repr(self, out, visited):
1167 # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1168
1169 # Get a PyUnicodeObject* within the Python 2 gdb process:
1170 proxy = self.proxyval(visited)
1171
1172 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1173 # to Python 2:
1174 if "'" in proxy and '"' not in proxy:
1175 quote = '"'
1176 else:
1177 quote = "'"
1178 out.write(quote)
1179
1180 i = 0
1181 while i < len(proxy):
1182 ch = proxy[i]
1183 i += 1
1184
1185 # Escape quotes and backslashes
1186 if ch == quote or ch == '\\':
1187 out.write('\\')
1188 out.write(ch)
1189
1190 # Map special whitespace to '\t', \n', '\r'
1191 elif ch == '\t':
1192 out.write('\\t')
1193 elif ch == '\n':
1194 out.write('\\n')
1195 elif ch == '\r':
1196 out.write('\\r')
1197
1198 # Map non-printable US ASCII to '\xhh' */
1199 elif ch < ' ' or ch == 0x7F:
1200 out.write('\\x')
1201 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1202 out.write(hexdigits[ord(ch) & 0x000F])
1203
1204 # Copy ASCII characters as-is
1205 elif ord(ch) < 0x7F:
1206 out.write(ch)
1207
1208 # Non-ASCII characters
1209 else:
Victor Stinner150016f2010-05-19 23:04:56 +00001210 ucs = ch
Antoine Pitroub41e1282010-09-08 20:57:48 +00001211 ch2 = None
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001212 if sys.maxunicode < 0x10000:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001213 # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1214 # surrogate pairs before calling _unichr_is_printable.
Victor Stinner150016f2010-05-19 23:04:56 +00001215 if (i < len(proxy)
1216 and 0xD800 <= ord(ch) < 0xDC00 \
1217 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001218 ch2 = proxy[i]
Antoine Pitroub41e1282010-09-08 20:57:48 +00001219 ucs = ch + ch2
Victor Stinner150016f2010-05-19 23:04:56 +00001220 i += 1
Victor Stinner150016f2010-05-19 23:04:56 +00001221
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001222 # Unfortuately, Python 2's unicode type doesn't seem
1223 # to expose the "isprintable" method
Victor Stinner150016f2010-05-19 23:04:56 +00001224 printable = _unichr_is_printable(ucs)
1225 if printable:
1226 try:
1227 ucs.encode(ENCODING)
1228 except UnicodeEncodeError:
1229 printable = False
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001230
1231 # Map Unicode whitespace and control characters
1232 # (categories Z* and C* except ASCII space)
Victor Stinner150016f2010-05-19 23:04:56 +00001233 if not printable:
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001234 if ch2 is not None:
1235 # Match Python 3's representation of non-printable
1236 # wide characters.
1237 code = (ord(ch) & 0x03FF) << 10
1238 code |= ord(ch2) & 0x03FF
1239 code += 0x00010000
1240 else:
1241 code = ord(ucs)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001242
1243 # Map 8-bit characters to '\\xhh'
Victor Stinner150016f2010-05-19 23:04:56 +00001244 if code <= 0xff:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001245 out.write('\\x')
Victor Stinner150016f2010-05-19 23:04:56 +00001246 out.write(hexdigits[(code >> 4) & 0x000F])
1247 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001248 # Map 21-bit characters to '\U00xxxxxx'
Victor Stinner150016f2010-05-19 23:04:56 +00001249 elif code >= 0x10000:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001250 out.write('\\U')
Victor Stinner150016f2010-05-19 23:04:56 +00001251 out.write(hexdigits[(code >> 28) & 0x0000000F])
1252 out.write(hexdigits[(code >> 24) & 0x0000000F])
1253 out.write(hexdigits[(code >> 20) & 0x0000000F])
1254 out.write(hexdigits[(code >> 16) & 0x0000000F])
1255 out.write(hexdigits[(code >> 12) & 0x0000000F])
1256 out.write(hexdigits[(code >> 8) & 0x0000000F])
1257 out.write(hexdigits[(code >> 4) & 0x0000000F])
1258 out.write(hexdigits[code & 0x0000000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001259 # Map 16-bit characters to '\uxxxx'
1260 else:
1261 out.write('\\u')
Victor Stinner150016f2010-05-19 23:04:56 +00001262 out.write(hexdigits[(code >> 12) & 0x000F])
1263 out.write(hexdigits[(code >> 8) & 0x000F])
1264 out.write(hexdigits[(code >> 4) & 0x000F])
1265 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001266 else:
1267 # Copy characters as-is
1268 out.write(ch)
Antoine Pitroub41e1282010-09-08 20:57:48 +00001269 if ch2 is not None:
Victor Stinner150016f2010-05-19 23:04:56 +00001270 out.write(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001271
1272 out.write(quote)
1273
1274
1275
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001276
1277def int_from_int(gdbval):
1278 return int(str(gdbval))
1279
1280
1281def stringify(val):
1282 # TODO: repr() puts everything on one line; pformat can be nicer, but
1283 # can lead to v.long results; this function isolates the choice
1284 if True:
1285 return repr(val)
1286 else:
1287 from pprint import pformat
1288 return pformat(val)
1289
1290
1291class PyObjectPtrPrinter:
1292 "Prints a (PyObject*)"
1293
1294 def __init__ (self, gdbval):
1295 self.gdbval = gdbval
1296
1297 def to_string (self):
1298 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1299 if True:
1300 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1301 else:
1302 # Generate full proxy value then stringify it.
1303 # Doing so could be expensive
1304 proxyval = pyop.proxyval(set())
1305 return stringify(proxyval)
1306
1307def pretty_printer_lookup(gdbval):
1308 type = gdbval.type.unqualified()
1309 if type.code == gdb.TYPE_CODE_PTR:
1310 type = type.target().unqualified()
1311 t = str(type)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001312 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001313 return PyObjectPtrPrinter(gdbval)
1314
1315"""
1316During development, I've been manually invoking the code in this way:
1317(gdb) python
1318
1319import sys
1320sys.path.append('/home/david/coding/python-gdb')
1321import libpython
1322end
1323
1324then reloading it after each edit like this:
1325(gdb) python reload(libpython)
1326
1327The following code should ensure that the prettyprinter is registered
1328if the code is autoloaded by gdb when visiting libpython.so, provided
1329that this python file is installed to the same path as the library (or its
1330.debug file) plus a "-gdb.py" suffix, e.g:
1331 /usr/lib/libpython2.6.so.1.0-gdb.py
1332 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1333"""
1334def register (obj):
1335 if obj == None:
1336 obj = gdb
1337
1338 # Wire up the pretty-printer
1339 obj.pretty_printers.append(pretty_printer_lookup)
1340
1341register (gdb.current_objfile ())
1342
1343
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001344
1345# Unfortunately, the exact API exposed by the gdb module varies somewhat
1346# from build to build
1347# See http://bugs.python.org/issue8279?#msg102276
1348
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001349class Frame(object):
1350 '''
1351 Wrapper for gdb.Frame, adding various methods
1352 '''
1353 def __init__(self, gdbframe):
1354 self._gdbframe = gdbframe
1355
1356 def older(self):
1357 older = self._gdbframe.older()
1358 if older:
1359 return Frame(older)
1360 else:
1361 return None
1362
1363 def newer(self):
1364 newer = self._gdbframe.newer()
1365 if newer:
1366 return Frame(newer)
1367 else:
1368 return None
1369
1370 def select(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001371 '''If supported, select this frame and return True; return False if unsupported
1372
1373 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1374 onwards, but absent on Ubuntu buildbot'''
1375 if not hasattr(self._gdbframe, 'select'):
1376 print ('Unable to select frame: '
1377 'this build of gdb does not expose a gdb.Frame.select method')
1378 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001379 self._gdbframe.select()
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001380 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001381
1382 def get_index(self):
1383 '''Calculate index of frame, starting at 0 for the newest frame within
1384 this thread'''
1385 index = 0
1386 # Go down until you reach the newest frame:
1387 iter_frame = self
1388 while iter_frame.newer():
1389 index += 1
1390 iter_frame = iter_frame.newer()
1391 return index
1392
1393 def is_evalframeex(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001394 '''Is this a PyEval_EvalFrameEx frame?'''
Victor Stinner50eb60e2010-04-20 22:32:07 +00001395 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1396 '''
1397 I believe we also need to filter on the inline
1398 struct frame_id.inline_depth, only regarding frames with
1399 an inline depth of 0 as actually being this function
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001400
Victor Stinner50eb60e2010-04-20 22:32:07 +00001401 So we reject those with type gdb.INLINE_FRAME
1402 '''
1403 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1404 # We have a PyEval_EvalFrameEx frame:
1405 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001406
1407 return False
1408
1409 def get_pyop(self):
1410 try:
1411 f = self._gdbframe.read_var('f')
Victor Stinnerd2084162011-12-19 13:42:24 +01001412 frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1413 if not frame.is_optimized_out():
1414 return frame
1415 # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
1416 # because it was "optimized out". Try to get "f" from the frame
1417 # of the caller, PyEval_EvalCodeEx().
1418 orig_frame = frame
1419 caller = self._gdbframe.older()
1420 if caller:
1421 f = caller.read_var('f')
1422 frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1423 if not frame.is_optimized_out():
1424 return frame
1425 return orig_frame
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001426 except ValueError:
1427 return None
1428
1429 @classmethod
1430 def get_selected_frame(cls):
1431 _gdbframe = gdb.selected_frame()
1432 if _gdbframe:
1433 return Frame(_gdbframe)
1434 return None
1435
1436 @classmethod
1437 def get_selected_python_frame(cls):
1438 '''Try to obtain the Frame for the python code in the selected frame,
1439 or None'''
1440 frame = cls.get_selected_frame()
1441
1442 while frame:
1443 if frame.is_evalframeex():
1444 return frame
1445 frame = frame.older()
1446
1447 # Not found:
1448 return None
1449
1450 def print_summary(self):
1451 if self.is_evalframeex():
1452 pyop = self.get_pyop()
1453 if pyop:
Victor Stinner0e5a41b2010-08-17 22:49:25 +00001454 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1455 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
Victor Stinnerd2084162011-12-19 13:42:24 +01001456 if not pyop.is_optimized_out():
1457 line = pyop.current_line()
1458 if line is not None:
1459 sys.stdout.write(' %s\n' % line.strip())
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001460 else:
1461 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1462 else:
1463 sys.stdout.write('#%i\n' % self.get_index())
1464
Victor Stinnere670c882011-05-13 17:40:15 +02001465 def print_traceback(self):
1466 if self.is_evalframeex():
1467 pyop = self.get_pyop()
1468 if pyop:
1469 pyop.print_traceback()
Victor Stinnerd2084162011-12-19 13:42:24 +01001470 if not pyop.is_optimized_out():
1471 line = pyop.current_line()
1472 if line is not None:
1473 sys.stdout.write(' %s\n' % line.strip())
Victor Stinnere670c882011-05-13 17:40:15 +02001474 else:
1475 sys.stdout.write(' (unable to read python frame information)\n')
1476 else:
1477 sys.stdout.write(' (not a python frame)\n')
1478
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001479class PyList(gdb.Command):
1480 '''List the current Python source code, if any
1481
1482 Use
1483 py-list START
1484 to list at a different line number within the python source.
1485
1486 Use
1487 py-list START, END
1488 to list a specific range of lines within the python source.
1489 '''
1490
1491 def __init__(self):
1492 gdb.Command.__init__ (self,
1493 "py-list",
1494 gdb.COMMAND_FILES,
1495 gdb.COMPLETE_NONE)
1496
1497
1498 def invoke(self, args, from_tty):
1499 import re
1500
1501 start = None
1502 end = None
1503
1504 m = re.match(r'\s*(\d+)\s*', args)
1505 if m:
1506 start = int(m.group(0))
1507 end = start + 10
1508
1509 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1510 if m:
1511 start, end = map(int, m.groups())
1512
1513 frame = Frame.get_selected_python_frame()
1514 if not frame:
1515 print 'Unable to locate python frame'
1516 return
1517
1518 pyop = frame.get_pyop()
Victor Stinnerd2084162011-12-19 13:42:24 +01001519 if not pyop or pyop.is_optimized_out():
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001520 print 'Unable to read information on python frame'
1521 return
1522
1523 filename = pyop.filename()
1524 lineno = pyop.current_line_num()
1525
1526 if start is None:
1527 start = lineno - 5
1528 end = lineno + 5
1529
1530 if start<1:
1531 start = 1
1532
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001533 try:
1534 f = open(os_fsencode(filename), 'r')
1535 except IOError as err:
1536 sys.stdout.write('Unable to open %s: %s\n'
1537 % (filename, err))
1538 return
1539 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001540 all_lines = f.readlines()
1541 # start and end are 1-based, all_lines is 0-based;
1542 # so [start-1:end] as a python slice gives us [start, end] as a
1543 # closed interval
1544 for i, line in enumerate(all_lines[start-1:end]):
1545 linestr = str(i+start)
1546 # Highlight current line:
1547 if i + start == lineno:
1548 linestr = '>' + linestr
1549 sys.stdout.write('%4s %s' % (linestr, line))
1550
1551
1552# ...and register the command:
1553PyList()
1554
1555def move_in_stack(move_up):
1556 '''Move up or down the stack (for the py-up/py-down command)'''
1557 frame = Frame.get_selected_python_frame()
1558 while frame:
1559 if move_up:
1560 iter_frame = frame.older()
1561 else:
1562 iter_frame = frame.newer()
1563
1564 if not iter_frame:
1565 break
1566
1567 if iter_frame.is_evalframeex():
1568 # Result:
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001569 if iter_frame.select():
1570 iter_frame.print_summary()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001571 return
1572
1573 frame = iter_frame
1574
1575 if move_up:
1576 print 'Unable to find an older python frame'
1577 else:
1578 print 'Unable to find a newer python frame'
1579
1580class PyUp(gdb.Command):
1581 'Select and print the python stack frame that called this one (if any)'
1582 def __init__(self):
1583 gdb.Command.__init__ (self,
1584 "py-up",
1585 gdb.COMMAND_STACK,
1586 gdb.COMPLETE_NONE)
1587
1588
1589 def invoke(self, args, from_tty):
1590 move_in_stack(move_up=True)
1591
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001592class PyDown(gdb.Command):
1593 'Select and print the python stack frame called by this one (if any)'
1594 def __init__(self):
1595 gdb.Command.__init__ (self,
1596 "py-down",
1597 gdb.COMMAND_STACK,
1598 gdb.COMPLETE_NONE)
1599
1600
1601 def invoke(self, args, from_tty):
1602 move_in_stack(move_up=False)
1603
Victor Stinner50eb60e2010-04-20 22:32:07 +00001604# Not all builds of gdb have gdb.Frame.select
1605if hasattr(gdb.Frame, 'select'):
1606 PyUp()
1607 PyDown()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001608
Victor Stinnere670c882011-05-13 17:40:15 +02001609class PyBacktraceFull(gdb.Command):
1610 'Display the current python frame and all the frames within its call stack (if any)'
1611 def __init__(self):
1612 gdb.Command.__init__ (self,
1613 "py-bt-full",
1614 gdb.COMMAND_STACK,
1615 gdb.COMPLETE_NONE)
1616
1617
1618 def invoke(self, args, from_tty):
1619 frame = Frame.get_selected_python_frame()
1620 while frame:
1621 if frame.is_evalframeex():
1622 frame.print_summary()
1623 frame = frame.older()
1624
1625PyBacktraceFull()
1626
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001627class PyBacktrace(gdb.Command):
1628 'Display the current python frame and all the frames within its call stack (if any)'
1629 def __init__(self):
1630 gdb.Command.__init__ (self,
1631 "py-bt",
1632 gdb.COMMAND_STACK,
1633 gdb.COMPLETE_NONE)
1634
1635
1636 def invoke(self, args, from_tty):
Victor Stinnere670c882011-05-13 17:40:15 +02001637 sys.stdout.write('Traceback (most recent call first):\n')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001638 frame = Frame.get_selected_python_frame()
1639 while frame:
1640 if frame.is_evalframeex():
Victor Stinnere670c882011-05-13 17:40:15 +02001641 frame.print_traceback()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001642 frame = frame.older()
1643
1644PyBacktrace()
1645
1646class PyPrint(gdb.Command):
1647 'Look up the given python variable name, and print it'
1648 def __init__(self):
1649 gdb.Command.__init__ (self,
1650 "py-print",
1651 gdb.COMMAND_DATA,
1652 gdb.COMPLETE_NONE)
1653
1654
1655 def invoke(self, args, from_tty):
1656 name = str(args)
1657
1658 frame = Frame.get_selected_python_frame()
1659 if not frame:
1660 print 'Unable to locate python frame'
1661 return
1662
1663 pyop_frame = frame.get_pyop()
1664 if not pyop_frame:
1665 print 'Unable to read information on python frame'
1666 return
1667
1668 pyop_var, scope = pyop_frame.get_var_by_name(name)
1669
1670 if pyop_var:
1671 print ('%s %r = %s'
1672 % (scope,
1673 name,
1674 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1675 else:
1676 print '%r not found' % name
1677
1678PyPrint()
1679
1680class PyLocals(gdb.Command):
1681 'Look up the given python variable name, and print it'
1682 def __init__(self):
1683 gdb.Command.__init__ (self,
1684 "py-locals",
1685 gdb.COMMAND_DATA,
1686 gdb.COMPLETE_NONE)
1687
1688
1689 def invoke(self, args, from_tty):
1690 name = str(args)
1691
1692 frame = Frame.get_selected_python_frame()
1693 if not frame:
1694 print 'Unable to locate python frame'
1695 return
1696
1697 pyop_frame = frame.get_pyop()
1698 if not pyop_frame:
1699 print 'Unable to read information on python frame'
1700 return
1701
1702 for pyop_name, pyop_value in pyop_frame.iter_locals():
1703 print ('%s = %s'
1704 % (pyop_name.proxyval(set()),
1705 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1706
1707PyLocals()