blob: 30347cbc9320b08e4271f302cf4cb0ba6bbf7058 [file] [log] [blame]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
Victor Stinner67df3a42010-04-21 13:53:05 +000022holding three PyObject* that turn out to be PyBytesObject* instances, we can
Martin v. Löwis5ae68102010-04-21 22:38:42 +000023generate a proxy value within the gdb process that is a list of bytes
24instances:
25 [b"foo", b"bar", b"baz"]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000026
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object. This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43from __future__ import with_statement
44import gdb
Victor Stinner150016f2010-05-19 23:04:56 +000045import locale
Georg Brandlb639c142010-07-14 08:54:40 +000046import sys
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000047
48# Look up the gdb.Type for some standard types:
49_type_char_ptr = gdb.lookup_type('char').pointer() # char*
50_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
51_type_void_ptr = gdb.lookup_type('void').pointer() # void*
Martin v. Löwis24fa9832011-09-28 08:35:25 +020052_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
53_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000054
Victor Stinner0c4fbff2011-12-08 00:08:22 +010055# value computed later, see PyUnicodeObjectPtr.proxy()
56_is_pep393 = None
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020057
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000058SIZEOF_VOID_P = _type_void_ptr.sizeof
59
60
61Py_TPFLAGS_HEAPTYPE = (1L << 9)
62
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000063Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
64Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
65Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Martin v. Löwis5ae68102010-04-21 22:38:42 +000066Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000067Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
68Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
69Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
70Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
71
72
73MAX_OUTPUT_LEN=1024
74
Martin v. Löwis5ae68102010-04-21 22:38:42 +000075hexdigits = "0123456789abcdef"
76
Victor Stinner150016f2010-05-19 23:04:56 +000077ENCODING = locale.getpreferredencoding()
Martin v. Löwis5ae68102010-04-21 22:38:42 +000078
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000079class NullPyObjectPtr(RuntimeError):
80 pass
81
82
83def safety_limit(val):
84 # Given a integer value from the process being debugged, limit it to some
85 # safety threshold so that arbitrary breakage within said process doesn't
86 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
87 return min(val, 1000)
88
89
90def safe_range(val):
91 # As per range, but don't trust the value too much: cap it to a safety
92 # threshold in case the data was corrupted
93 return xrange(safety_limit(val))
94
Victor Stinner0e5a41b2010-08-17 22:49:25 +000095def write_unicode(file, text):
96 # Write a byte or unicode string to file. Unicode strings are encoded to
97 # ENCODING encoding with 'backslashreplace' error handler to avoid
98 # UnicodeEncodeError.
99 if isinstance(text, unicode):
100 text = text.encode(ENCODING, 'backslashreplace')
101 file.write(text)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000102
Victor Stinner6ffbee72010-10-17 19:35:30 +0000103def os_fsencode(filename):
104 if not isinstance(filename, unicode):
105 return filename
106 encoding = sys.getfilesystemencoding()
107 if encoding == 'mbcs':
108 # mbcs doesn't support surrogateescape
109 return filename.encode(encoding)
110 encoded = []
111 for char in filename:
112 # surrogateescape error handler
113 if 0xDC80 <= ord(char) <= 0xDCFF:
114 byte = chr(ord(char) - 0xDC00)
115 else:
116 byte = char.encode(encoding)
117 encoded.append(byte)
118 return ''.join(encoded)
119
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000120class StringTruncated(RuntimeError):
121 pass
122
123class TruncatedStringIO(object):
124 '''Similar to cStringIO, but can truncate the output by raising a
125 StringTruncated exception'''
126 def __init__(self, maxlen=None):
127 self._val = ''
128 self.maxlen = maxlen
129
130 def write(self, data):
131 if self.maxlen:
132 if len(data) + len(self._val) > self.maxlen:
133 # Truncation:
134 self._val += data[0:self.maxlen - len(self._val)]
135 raise StringTruncated()
136
137 self._val += data
138
139 def getvalue(self):
140 return self._val
141
142class PyObjectPtr(object):
143 """
144 Class wrapping a gdb.Value that's a either a (PyObject*) within the
Victor Stinner67df3a42010-04-21 13:53:05 +0000145 inferior process, or some subclass pointer e.g. (PyBytesObject*)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000146
147 There will be a subclass for every refined PyObject type that we care
148 about.
149
150 Note that at every stage the underlying pointer could be NULL, point
151 to corrupt data, etc; this is the debugger, after all.
152 """
153 _typename = 'PyObject'
154
155 def __init__(self, gdbval, cast_to=None):
156 if cast_to:
157 self._gdbval = gdbval.cast(cast_to)
158 else:
159 self._gdbval = gdbval
160
161 def field(self, name):
162 '''
163 Get the gdb.Value for the given field within the PyObject, coping with
164 some python 2 versus python 3 differences.
165
166 Various libpython types are defined using the "PyObject_HEAD" and
167 "PyObject_VAR_HEAD" macros.
168
169 In Python 2, this these are defined so that "ob_type" and (for a var
170 object) "ob_size" are fields of the type in question.
171
172 In Python 3, this is defined as an embedded PyVarObject type thus:
173 PyVarObject ob_base;
174 so that the "ob_size" field is located insize the "ob_base" field, and
175 the "ob_type" is most easily accessed by casting back to a (PyObject*).
176 '''
177 if self.is_null():
178 raise NullPyObjectPtr(self)
179
180 if name == 'ob_type':
181 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
182 return pyo_ptr.dereference()[name]
183
184 if name == 'ob_size':
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000185 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
186 return pyo_ptr.dereference()[name]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000187
188 # General case: look it up inside the object:
189 return self._gdbval.dereference()[name]
190
191 def pyop_field(self, name):
192 '''
193 Get a PyObjectPtr for the given PyObject* field within this PyObject,
194 coping with some python 2 versus python 3 differences.
195 '''
196 return PyObjectPtr.from_pyobject_ptr(self.field(name))
197
198 def write_field_repr(self, name, out, visited):
199 '''
200 Extract the PyObject* field named "name", and write its representation
201 to file-like object "out"
202 '''
203 field_obj = self.pyop_field(name)
204 field_obj.write_repr(out, visited)
205
206 def get_truncated_repr(self, maxlen):
207 '''
208 Get a repr-like string for the data, but truncate it at "maxlen" bytes
209 (ending the object graph traversal as soon as you do)
210 '''
211 out = TruncatedStringIO(maxlen)
212 try:
213 self.write_repr(out, set())
214 except StringTruncated:
215 # Truncation occurred:
216 return out.getvalue() + '...(truncated)'
217
218 # No truncation occurred:
219 return out.getvalue()
220
221 def type(self):
222 return PyTypeObjectPtr(self.field('ob_type'))
223
224 def is_null(self):
225 return 0 == long(self._gdbval)
226
227 def is_optimized_out(self):
228 '''
229 Is the value of the underlying PyObject* visible to the debugger?
230
231 This can vary with the precise version of the compiler used to build
232 Python, and the precise version of gdb.
233
234 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
235 PyEval_EvalFrameEx's "f"
236 '''
237 return self._gdbval.is_optimized_out
238
239 def safe_tp_name(self):
240 try:
241 return self.type().field('tp_name').string()
242 except NullPyObjectPtr:
243 # NULL tp_name?
244 return 'unknown'
245 except RuntimeError:
246 # Can't even read the object at all?
247 return 'unknown'
248
249 def proxyval(self, visited):
250 '''
251 Scrape a value from the inferior process, and try to represent it
252 within the gdb process, whilst (hopefully) avoiding crashes when
253 the remote data is corrupt.
254
255 Derived classes will override this.
256
257 For example, a PyIntObject* with ob_ival 42 in the inferior process
258 should result in an int(42) in this process.
259
260 visited: a set of all gdb.Value pyobject pointers already visited
261 whilst generating this value (to guard against infinite recursion when
262 visiting object graphs with loops). Analogous to Py_ReprEnter and
263 Py_ReprLeave
264 '''
265
266 class FakeRepr(object):
267 """
268 Class representing a non-descript PyObject* value in the inferior
269 process for when we don't have a custom scraper, intended to have
270 a sane repr().
271 """
272
273 def __init__(self, tp_name, address):
274 self.tp_name = tp_name
275 self.address = address
276
277 def __repr__(self):
278 # For the NULL pointer, we have no way of knowing a type, so
279 # special-case it as per
280 # http://bugs.python.org/issue8032#msg100882
281 if self.address == 0:
282 return '0x0'
283 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
284
285 return FakeRepr(self.safe_tp_name(),
286 long(self._gdbval))
287
288 def write_repr(self, out, visited):
289 '''
290 Write a string representation of the value scraped from the inferior
291 process to "out", a file-like object.
292 '''
293 # Default implementation: generate a proxy value and write its repr
294 # However, this could involve a lot of work for complicated objects,
295 # so for derived classes we specialize this
296 return out.write(repr(self.proxyval(visited)))
297
298 @classmethod
299 def subclass_from_type(cls, t):
300 '''
301 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
302 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
303 to use
304
305 Ideally, we would look up the symbols for the global types, but that
306 isn't working yet:
307 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
308 Traceback (most recent call last):
309 File "<string>", line 1, in <module>
310 NotImplementedError: Symbol type not yet supported in Python scripts.
311 Error while executing Python code.
312
313 For now, we use tp_flags, after doing some string comparisons on the
314 tp_name for some special-cases that don't seem to be visible through
315 flags
316 '''
317 try:
318 tp_name = t.field('tp_name').string()
319 tp_flags = int(t.field('tp_flags'))
320 except RuntimeError:
321 # Handle any kind of error e.g. NULL ptrs by simply using the base
322 # class
323 return cls
324
325 #print 'tp_flags = 0x%08x' % tp_flags
326 #print 'tp_name = %r' % tp_name
327
328 name_map = {'bool': PyBoolObjectPtr,
329 'classobj': PyClassObjectPtr,
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000330 'NoneType': PyNoneStructPtr,
331 'frame': PyFrameObjectPtr,
332 'set' : PySetObjectPtr,
333 'frozenset' : PySetObjectPtr,
334 'builtin_function_or_method' : PyCFunctionObjectPtr,
335 }
336 if tp_name in name_map:
337 return name_map[tp_name]
338
339 if tp_flags & Py_TPFLAGS_HEAPTYPE:
340 return HeapTypeObjectPtr
341
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000342 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
343 return PyLongObjectPtr
344 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
345 return PyListObjectPtr
346 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
347 return PyTupleObjectPtr
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000348 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
Victor Stinner67df3a42010-04-21 13:53:05 +0000349 return PyBytesObjectPtr
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000350 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
351 return PyUnicodeObjectPtr
352 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
353 return PyDictObjectPtr
354 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
355 return PyBaseExceptionObjectPtr
356 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
357 # return PyTypeObjectPtr
358
359 # Use the base class:
360 return cls
361
362 @classmethod
363 def from_pyobject_ptr(cls, gdbval):
364 '''
365 Try to locate the appropriate derived class dynamically, and cast
366 the pointer accordingly.
367 '''
368 try:
369 p = PyObjectPtr(gdbval)
370 cls = cls.subclass_from_type(p.type())
371 return cls(gdbval, cast_to=cls.get_gdb_type())
372 except RuntimeError:
373 # Handle any kind of error e.g. NULL ptrs by simply using the base
374 # class
375 pass
376 return cls(gdbval)
377
378 @classmethod
379 def get_gdb_type(cls):
380 return gdb.lookup_type(cls._typename).pointer()
381
382 def as_address(self):
383 return long(self._gdbval)
384
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000385class PyVarObjectPtr(PyObjectPtr):
386 _typename = 'PyVarObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000387
388class ProxyAlreadyVisited(object):
389 '''
390 Placeholder proxy to use when protecting against infinite recursion due to
391 loops in the object graph.
392
393 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
394 '''
395 def __init__(self, rep):
396 self._rep = rep
397
398 def __repr__(self):
399 return self._rep
400
401
402def _write_instance_repr(out, visited, name, pyop_attrdict, address):
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100403 '''Shared code for use by all classes:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000404 write a representation to file-like object "out"'''
405 out.write('<')
406 out.write(name)
407
408 # Write dictionary of instance attributes:
409 if isinstance(pyop_attrdict, PyDictObjectPtr):
410 out.write('(')
411 first = True
412 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
413 if not first:
414 out.write(', ')
415 first = False
416 out.write(pyop_arg.proxyval(visited))
417 out.write('=')
418 pyop_val.write_repr(out, visited)
419 out.write(')')
420 out.write(' at remote 0x%x>' % address)
421
422
423class InstanceProxy(object):
424
425 def __init__(self, cl_name, attrdict, address):
426 self.cl_name = cl_name
427 self.attrdict = attrdict
428 self.address = address
429
430 def __repr__(self):
431 if isinstance(self.attrdict, dict):
432 kwargs = ', '.join(["%s=%r" % (arg, val)
433 for arg, val in self.attrdict.iteritems()])
434 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
435 kwargs, self.address)
436 else:
437 return '<%s at remote 0x%x>' % (self.cl_name,
438 self.address)
439
440def _PyObject_VAR_SIZE(typeobj, nitems):
Victor Stinnerd2084162011-12-19 13:42:24 +0100441 if _PyObject_VAR_SIZE._type_size_t is None:
442 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
443
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000444 return ( ( typeobj.field('tp_basicsize') +
445 nitems * typeobj.field('tp_itemsize') +
446 (SIZEOF_VOID_P - 1)
447 ) & ~(SIZEOF_VOID_P - 1)
Victor Stinnerd2084162011-12-19 13:42:24 +0100448 ).cast(_PyObject_VAR_SIZE._type_size_t)
449_PyObject_VAR_SIZE._type_size_t = None
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000450
451class HeapTypeObjectPtr(PyObjectPtr):
452 _typename = 'PyObject'
453
454 def get_attr_dict(self):
455 '''
456 Get the PyDictObject ptr representing the attribute dictionary
457 (or None if there's a problem)
458 '''
459 try:
460 typeobj = self.type()
461 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
462 if dictoffset != 0:
463 if dictoffset < 0:
464 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
465 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
466 if tsize < 0:
467 tsize = -tsize
468 size = _PyObject_VAR_SIZE(typeobj, tsize)
469 dictoffset += size
470 assert dictoffset > 0
471 assert dictoffset % SIZEOF_VOID_P == 0
472
473 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
474 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
475 dictptr = dictptr.cast(PyObjectPtrPtr)
476 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
477 except RuntimeError:
478 # Corrupt data somewhere; fail safe
479 pass
480
481 # Not found, or some kind of error:
482 return None
483
484 def proxyval(self, visited):
485 '''
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100486 Support for classes.
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000487
488 Currently we just locate the dictionary using a transliteration to
489 python of _PyObject_GetDictPtr, ignoring descriptors
490 '''
491 # Guard against infinite loops:
492 if self.as_address() in visited:
493 return ProxyAlreadyVisited('<...>')
494 visited.add(self.as_address())
495
496 pyop_attr_dict = self.get_attr_dict()
497 if pyop_attr_dict:
498 attr_dict = pyop_attr_dict.proxyval(visited)
499 else:
500 attr_dict = {}
501 tp_name = self.safe_tp_name()
502
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100503 # Class:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000504 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
505
506 def write_repr(self, out, visited):
507 # Guard against infinite loops:
508 if self.as_address() in visited:
509 out.write('<...>')
510 return
511 visited.add(self.as_address())
512
513 pyop_attrdict = self.get_attr_dict()
514 _write_instance_repr(out, visited,
515 self.safe_tp_name(), pyop_attrdict, self.as_address())
516
517class ProxyException(Exception):
518 def __init__(self, tp_name, args):
519 self.tp_name = tp_name
520 self.args = args
521
522 def __repr__(self):
523 return '%s%r' % (self.tp_name, self.args)
524
525class PyBaseExceptionObjectPtr(PyObjectPtr):
526 """
527 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
528 within the process being debugged.
529 """
530 _typename = 'PyBaseExceptionObject'
531
532 def proxyval(self, visited):
533 # Guard against infinite loops:
534 if self.as_address() in visited:
535 return ProxyAlreadyVisited('(...)')
536 visited.add(self.as_address())
537 arg_proxy = self.pyop_field('args').proxyval(visited)
538 return ProxyException(self.safe_tp_name(),
539 arg_proxy)
540
541 def write_repr(self, out, visited):
542 # Guard against infinite loops:
543 if self.as_address() in visited:
544 out.write('(...)')
545 return
546 visited.add(self.as_address())
547
548 out.write(self.safe_tp_name())
549 self.write_field_repr('args', out, visited)
550
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000551class PyClassObjectPtr(PyObjectPtr):
552 """
553 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
554 instance within the process being debugged.
555 """
556 _typename = 'PyClassObject'
557
558
559class BuiltInFunctionProxy(object):
560 def __init__(self, ml_name):
561 self.ml_name = ml_name
562
563 def __repr__(self):
564 return "<built-in function %s>" % self.ml_name
565
566class BuiltInMethodProxy(object):
567 def __init__(self, ml_name, pyop_m_self):
568 self.ml_name = ml_name
569 self.pyop_m_self = pyop_m_self
570
571 def __repr__(self):
572 return ('<built-in method %s of %s object at remote 0x%x>'
573 % (self.ml_name,
574 self.pyop_m_self.safe_tp_name(),
575 self.pyop_m_self.as_address())
576 )
577
578class PyCFunctionObjectPtr(PyObjectPtr):
579 """
580 Class wrapping a gdb.Value that's a PyCFunctionObject*
581 (see Include/methodobject.h and Objects/methodobject.c)
582 """
583 _typename = 'PyCFunctionObject'
584
585 def proxyval(self, visited):
586 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
587 ml_name = m_ml['ml_name'].string()
588
589 pyop_m_self = self.pyop_field('m_self')
590 if pyop_m_self.is_null():
591 return BuiltInFunctionProxy(ml_name)
592 else:
593 return BuiltInMethodProxy(ml_name, pyop_m_self)
594
595
596class PyCodeObjectPtr(PyObjectPtr):
597 """
598 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
599 within the process being debugged.
600 """
601 _typename = 'PyCodeObject'
602
603 def addr2line(self, addrq):
604 '''
605 Get the line number for a given bytecode offset
606
607 Analogous to PyCode_Addr2Line; translated from pseudocode in
608 Objects/lnotab_notes.txt
609 '''
610 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
611
612 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
613 # not 0, as lnotab_notes.txt has it:
614 lineno = int_from_int(self.field('co_firstlineno'))
615
616 addr = 0
617 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
618 addr += ord(addr_incr)
619 if addr > addrq:
620 return lineno
621 lineno += ord(line_incr)
622 return lineno
623
624
625class PyDictObjectPtr(PyObjectPtr):
626 """
627 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
628 within the process being debugged.
629 """
630 _typename = 'PyDictObject'
631
632 def iteritems(self):
633 '''
634 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
635 analagous to dict.iteritems()
636 '''
637 for i in safe_range(self.field('ma_mask') + 1):
638 ep = self.field('ma_table') + i
639 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
640 if not pyop_value.is_null():
641 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
642 yield (pyop_key, pyop_value)
643
644 def proxyval(self, visited):
645 # Guard against infinite loops:
646 if self.as_address() in visited:
647 return ProxyAlreadyVisited('{...}')
648 visited.add(self.as_address())
649
650 result = {}
651 for pyop_key, pyop_value in self.iteritems():
652 proxy_key = pyop_key.proxyval(visited)
653 proxy_value = pyop_value.proxyval(visited)
654 result[proxy_key] = proxy_value
655 return result
656
657 def write_repr(self, out, visited):
658 # Guard against infinite loops:
659 if self.as_address() in visited:
660 out.write('{...}')
661 return
662 visited.add(self.as_address())
663
664 out.write('{')
665 first = True
666 for pyop_key, pyop_value in self.iteritems():
667 if not first:
668 out.write(', ')
669 first = False
670 pyop_key.write_repr(out, visited)
671 out.write(': ')
672 pyop_value.write_repr(out, visited)
673 out.write('}')
674
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000675class PyListObjectPtr(PyObjectPtr):
676 _typename = 'PyListObject'
677
678 def __getitem__(self, i):
679 # Get the gdb.Value for the (PyObject*) with the given index:
680 field_ob_item = self.field('ob_item')
681 return field_ob_item[i]
682
683 def proxyval(self, visited):
684 # Guard against infinite loops:
685 if self.as_address() in visited:
686 return ProxyAlreadyVisited('[...]')
687 visited.add(self.as_address())
688
689 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
690 for i in safe_range(int_from_int(self.field('ob_size')))]
691 return result
692
693 def write_repr(self, out, visited):
694 # Guard against infinite loops:
695 if self.as_address() in visited:
696 out.write('[...]')
697 return
698 visited.add(self.as_address())
699
700 out.write('[')
701 for i in safe_range(int_from_int(self.field('ob_size'))):
702 if i > 0:
703 out.write(', ')
704 element = PyObjectPtr.from_pyobject_ptr(self[i])
705 element.write_repr(out, visited)
706 out.write(']')
707
708class PyLongObjectPtr(PyObjectPtr):
709 _typename = 'PyLongObject'
710
711 def proxyval(self, visited):
712 '''
713 Python's Include/longobjrep.h has this declaration:
714 struct _longobject {
715 PyObject_VAR_HEAD
716 digit ob_digit[1];
717 };
718
719 with this description:
720 The absolute value of a number is equal to
721 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
722 Negative numbers are represented with ob_size < 0;
723 zero is represented by ob_size == 0.
724
725 where SHIFT can be either:
726 #define PyLong_SHIFT 30
727 #define PyLong_SHIFT 15
728 '''
729 ob_size = long(self.field('ob_size'))
730 if ob_size == 0:
731 return 0L
732
733 ob_digit = self.field('ob_digit')
734
735 if gdb.lookup_type('digit').sizeof == 2:
736 SHIFT = 15L
737 else:
738 SHIFT = 30L
739
740 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
741 for i in safe_range(abs(ob_size))]
742 result = sum(digits)
743 if ob_size < 0:
744 result = -result
745 return result
746
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000747 def write_repr(self, out, visited):
748 # Write this out as a Python 3 int literal, i.e. without the "L" suffix
749 proxy = self.proxyval(visited)
750 out.write("%s" % proxy)
751
752
753class PyBoolObjectPtr(PyLongObjectPtr):
754 """
755 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
756 <bool> instances (Py_True/Py_False) within the process being debugged.
757 """
758 def proxyval(self, visited):
759 if PyLongObjectPtr.proxyval(self, visited):
760 return True
761 else:
762 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000763
764class PyNoneStructPtr(PyObjectPtr):
765 """
766 Class wrapping a gdb.Value that's a PyObject* pointing to the
767 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
768 """
769 _typename = 'PyObject'
770
771 def proxyval(self, visited):
772 return None
773
774
775class PyFrameObjectPtr(PyObjectPtr):
776 _typename = 'PyFrameObject'
777
Victor Stinnerd2084162011-12-19 13:42:24 +0100778 def __init__(self, gdbval, cast_to=None):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000779 PyObjectPtr.__init__(self, gdbval, cast_to)
780
781 if not self.is_optimized_out():
782 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
783 self.co_name = self.co.pyop_field('co_name')
784 self.co_filename = self.co.pyop_field('co_filename')
785
786 self.f_lineno = int_from_int(self.field('f_lineno'))
787 self.f_lasti = int_from_int(self.field('f_lasti'))
788 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
789 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
790
791 def iter_locals(self):
792 '''
793 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
794 the local variables of this frame
795 '''
796 if self.is_optimized_out():
797 return
798
799 f_localsplus = self.field('f_localsplus')
800 for i in safe_range(self.co_nlocals):
801 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
802 if not pyop_value.is_null():
803 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
804 yield (pyop_name, pyop_value)
805
806 def iter_globals(self):
807 '''
808 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
809 the global variables of this frame
810 '''
811 if self.is_optimized_out():
Victor Stinnerd2084162011-12-19 13:42:24 +0100812 return ()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000813
814 pyop_globals = self.pyop_field('f_globals')
815 return pyop_globals.iteritems()
816
817 def iter_builtins(self):
818 '''
819 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
820 the builtin variables
821 '''
822 if self.is_optimized_out():
Victor Stinnerd2084162011-12-19 13:42:24 +0100823 return ()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000824
825 pyop_builtins = self.pyop_field('f_builtins')
826 return pyop_builtins.iteritems()
827
828 def get_var_by_name(self, name):
829 '''
830 Look for the named local variable, returning a (PyObjectPtr, scope) pair
831 where scope is a string 'local', 'global', 'builtin'
832
833 If not found, return (None, None)
834 '''
835 for pyop_name, pyop_value in self.iter_locals():
836 if name == pyop_name.proxyval(set()):
837 return pyop_value, 'local'
838 for pyop_name, pyop_value in self.iter_globals():
839 if name == pyop_name.proxyval(set()):
840 return pyop_value, 'global'
841 for pyop_name, pyop_value in self.iter_builtins():
842 if name == pyop_name.proxyval(set()):
843 return pyop_value, 'builtin'
844 return None, None
845
846 def filename(self):
847 '''Get the path of the current Python source file, as a string'''
848 if self.is_optimized_out():
849 return '(frame information optimized out)'
850 return self.co_filename.proxyval(set())
851
852 def current_line_num(self):
853 '''Get current line number as an integer (1-based)
854
855 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
856
857 See Objects/lnotab_notes.txt
858 '''
859 if self.is_optimized_out():
860 return None
861 f_trace = self.field('f_trace')
862 if long(f_trace) != 0:
863 # we have a non-NULL f_trace:
864 return self.f_lineno
865 else:
866 #try:
867 return self.co.addr2line(self.f_lasti)
868 #except ValueError:
869 # return self.f_lineno
870
871 def current_line(self):
872 '''Get the text of the current source line as a string, with a trailing
873 newline character'''
874 if self.is_optimized_out():
875 return '(frame information optimized out)'
Victor Stinner6ffbee72010-10-17 19:35:30 +0000876 filename = self.filename()
Victor Stinnerd57c5c82011-07-01 12:57:44 +0200877 try:
878 f = open(os_fsencode(filename), 'r')
879 except IOError:
880 return None
881 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000882 all_lines = f.readlines()
883 # Convert from 1-based current_line_num to 0-based list offset:
884 return all_lines[self.current_line_num()-1]
885
886 def write_repr(self, out, visited):
887 if self.is_optimized_out():
888 out.write('(frame information optimized out)')
889 return
890 out.write('Frame 0x%x, for file %s, line %i, in %s ('
891 % (self.as_address(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000892 self.co_filename.proxyval(visited),
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000893 self.current_line_num(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000894 self.co_name.proxyval(visited)))
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000895 first = True
896 for pyop_name, pyop_value in self.iter_locals():
897 if not first:
898 out.write(', ')
899 first = False
900
901 out.write(pyop_name.proxyval(visited))
902 out.write('=')
903 pyop_value.write_repr(out, visited)
904
905 out.write(')')
906
Victor Stinnere670c882011-05-13 17:40:15 +0200907 def print_traceback(self):
908 if self.is_optimized_out():
909 sys.stdout.write(' (frame information optimized out)\n')
Victor Stinnerd2084162011-12-19 13:42:24 +0100910 return
Victor Stinnere670c882011-05-13 17:40:15 +0200911 visited = set()
912 sys.stdout.write(' File "%s", line %i, in %s\n'
913 % (self.co_filename.proxyval(visited),
914 self.current_line_num(),
915 self.co_name.proxyval(visited)))
916
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000917class PySetObjectPtr(PyObjectPtr):
918 _typename = 'PySetObject'
919
920 def proxyval(self, visited):
921 # Guard against infinite loops:
922 if self.as_address() in visited:
923 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
924 visited.add(self.as_address())
925
926 members = []
927 table = self.field('table')
928 for i in safe_range(self.field('mask')+1):
929 setentry = table[i]
930 key = setentry['key']
931 if key != 0:
932 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
933 if key_proxy != '<dummy key>':
934 members.append(key_proxy)
935 if self.safe_tp_name() == 'frozenset':
936 return frozenset(members)
937 else:
938 return set(members)
939
940 def write_repr(self, out, visited):
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000941 # Emulate Python 3's set_repr
942 tp_name = self.safe_tp_name()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000943
944 # Guard against infinite loops:
945 if self.as_address() in visited:
946 out.write('(...)')
947 return
948 visited.add(self.as_address())
949
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000950 # Python 3's set_repr special-cases the empty set:
951 if not self.field('used'):
952 out.write(tp_name)
953 out.write('()')
954 return
955
956 # Python 3 uses {} for set literals:
957 if tp_name != 'set':
958 out.write(tp_name)
959 out.write('(')
960
961 out.write('{')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000962 first = True
963 table = self.field('table')
964 for i in safe_range(self.field('mask')+1):
965 setentry = table[i]
966 key = setentry['key']
967 if key != 0:
968 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
969 key_proxy = pyop_key.proxyval(visited) # FIXME!
970 if key_proxy != '<dummy key>':
971 if not first:
972 out.write(', ')
973 first = False
974 pyop_key.write_repr(out, visited)
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000975 out.write('}')
976
977 if tp_name != 'set':
978 out.write(')')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000979
980
Victor Stinner67df3a42010-04-21 13:53:05 +0000981class PyBytesObjectPtr(PyObjectPtr):
982 _typename = 'PyBytesObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000983
984 def __str__(self):
985 field_ob_size = self.field('ob_size')
986 field_ob_sval = self.field('ob_sval')
987 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
988 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
989
990 def proxyval(self, visited):
991 return str(self)
992
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000993 def write_repr(self, out, visited):
994 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
995
996 # Get a PyStringObject* within the Python 2 gdb process:
997 proxy = self.proxyval(visited)
998
999 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1000 # to Python 2 code:
1001 quote = "'"
1002 if "'" in proxy and not '"' in proxy:
1003 quote = '"'
1004 out.write('b')
1005 out.write(quote)
1006 for byte in proxy:
1007 if byte == quote or byte == '\\':
1008 out.write('\\')
1009 out.write(byte)
1010 elif byte == '\t':
1011 out.write('\\t')
1012 elif byte == '\n':
1013 out.write('\\n')
1014 elif byte == '\r':
1015 out.write('\\r')
1016 elif byte < ' ' or ord(byte) >= 0x7f:
1017 out.write('\\x')
1018 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1019 out.write(hexdigits[ord(byte) & 0xf])
1020 else:
1021 out.write(byte)
1022 out.write(quote)
1023
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001024class PyTupleObjectPtr(PyObjectPtr):
1025 _typename = 'PyTupleObject'
1026
1027 def __getitem__(self, i):
1028 # Get the gdb.Value for the (PyObject*) with the given index:
1029 field_ob_item = self.field('ob_item')
1030 return field_ob_item[i]
1031
1032 def proxyval(self, visited):
1033 # Guard against infinite loops:
1034 if self.as_address() in visited:
1035 return ProxyAlreadyVisited('(...)')
1036 visited.add(self.as_address())
1037
1038 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1039 for i in safe_range(int_from_int(self.field('ob_size')))])
1040 return result
1041
1042 def write_repr(self, out, visited):
1043 # Guard against infinite loops:
1044 if self.as_address() in visited:
1045 out.write('(...)')
1046 return
1047 visited.add(self.as_address())
1048
1049 out.write('(')
1050 for i in safe_range(int_from_int(self.field('ob_size'))):
1051 if i > 0:
1052 out.write(', ')
1053 element = PyObjectPtr.from_pyobject_ptr(self[i])
1054 element.write_repr(out, visited)
1055 if self.field('ob_size') == 1:
1056 out.write(',)')
1057 else:
1058 out.write(')')
1059
1060class PyTypeObjectPtr(PyObjectPtr):
1061 _typename = 'PyTypeObject'
1062
1063
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001064def _unichr_is_printable(char):
1065 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1066 if char == u" ":
1067 return True
1068 import unicodedata
Antoine Pitroub41e1282010-09-08 20:57:48 +00001069 return unicodedata.category(char) not in ("C", "Z")
1070
1071if sys.maxunicode >= 0x10000:
1072 _unichr = unichr
1073else:
1074 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1075 def _unichr(x):
1076 if x < 0x10000:
1077 return unichr(x)
1078 x -= 0x10000
1079 ch1 = 0xD800 | (x >> 10)
1080 ch2 = 0xDC00 | (x & 0x3FF)
1081 return unichr(ch1) + unichr(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001082
1083
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001084class PyUnicodeObjectPtr(PyObjectPtr):
1085 _typename = 'PyUnicodeObject'
1086
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001087 def char_width(self):
1088 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1089 return _type_Py_UNICODE.sizeof
1090
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001091 def proxyval(self, visited):
Victor Stinner0c4fbff2011-12-08 00:08:22 +01001092 global _is_pep393
1093 if _is_pep393 is None:
1094 fields = gdb.lookup_type('PyUnicodeObject').target().fields()
1095 _is_pep393 = 'data' in [f.name for f in fields]
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001096 if _is_pep393:
1097 # Python 3.3 and newer
1098 may_have_surrogates = False
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001099 compact = self.field('_base')
1100 ascii = compact['_base']
1101 state = ascii['state']
Victor Stinnera3b334d2011-10-03 13:53:37 +02001102 is_compact_ascii = (int(state['ascii']) and int(state['compact']))
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001103 if not int(state['ready']):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001104 # string is not ready
Victor Stinnerf16a3502011-11-04 22:34:01 +01001105 field_length = long(compact['wstr_length'])
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001106 may_have_surrogates = True
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001107 field_str = ascii['wstr']
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001108 else:
Victor Stinnerf16a3502011-11-04 22:34:01 +01001109 field_length = long(ascii['length'])
Victor Stinnera3b334d2011-10-03 13:53:37 +02001110 if is_compact_ascii:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001111 field_str = ascii.address + 1
1112 elif int(state['compact']):
1113 field_str = compact.address + 1
1114 else:
1115 field_str = self.field('data')['any']
1116 repr_kind = int(state['kind'])
1117 if repr_kind == 1:
1118 field_str = field_str.cast(_type_unsigned_char_ptr)
1119 elif repr_kind == 2:
1120 field_str = field_str.cast(_type_unsigned_short_ptr)
Antoine Pitrou3c0c5f22011-10-08 19:33:24 +02001121 elif repr_kind == 4:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001122 field_str = field_str.cast(_type_unsigned_int_ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123 else:
1124 # Python 3.2 and earlier
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001125 field_length = long(self.field('length'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001126 field_str = self.field('str')
1127 may_have_surrogates = self.char_width() == 2
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001128
1129 # Gather a list of ints from the Py_UNICODE array; these are either
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001130 # UCS-1, UCS-2 or UCS-4 code points:
1131 if not may_have_surrogates:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001132 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1133 else:
1134 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1135 # inferior process: we must join surrogate pairs.
1136 Py_UNICODEs = []
1137 i = 0
Antoine Pitroub1856d72010-09-08 21:07:40 +00001138 limit = safety_limit(field_length)
1139 while i < limit:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001140 ucs = int(field_str[i])
1141 i += 1
1142 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1143 Py_UNICODEs.append(ucs)
1144 continue
1145 # This could be a surrogate pair.
1146 ucs2 = int(field_str[i])
1147 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1148 continue
1149 code = (ucs & 0x03FF) << 10
1150 code |= ucs2 & 0x03FF
1151 code += 0x00010000
1152 Py_UNICODEs.append(code)
1153 i += 1
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001154
1155 # Convert the int code points to unicode characters, and generate a
Antoine Pitroub41e1282010-09-08 20:57:48 +00001156 # local unicode instance.
1157 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1158 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001159 return result
1160
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001161 def write_repr(self, out, visited):
1162 # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1163
1164 # Get a PyUnicodeObject* within the Python 2 gdb process:
1165 proxy = self.proxyval(visited)
1166
1167 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1168 # to Python 2:
1169 if "'" in proxy and '"' not in proxy:
1170 quote = '"'
1171 else:
1172 quote = "'"
1173 out.write(quote)
1174
1175 i = 0
1176 while i < len(proxy):
1177 ch = proxy[i]
1178 i += 1
1179
1180 # Escape quotes and backslashes
1181 if ch == quote or ch == '\\':
1182 out.write('\\')
1183 out.write(ch)
1184
1185 # Map special whitespace to '\t', \n', '\r'
1186 elif ch == '\t':
1187 out.write('\\t')
1188 elif ch == '\n':
1189 out.write('\\n')
1190 elif ch == '\r':
1191 out.write('\\r')
1192
1193 # Map non-printable US ASCII to '\xhh' */
1194 elif ch < ' ' or ch == 0x7F:
1195 out.write('\\x')
1196 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1197 out.write(hexdigits[ord(ch) & 0x000F])
1198
1199 # Copy ASCII characters as-is
1200 elif ord(ch) < 0x7F:
1201 out.write(ch)
1202
1203 # Non-ASCII characters
1204 else:
Victor Stinner150016f2010-05-19 23:04:56 +00001205 ucs = ch
Antoine Pitroub41e1282010-09-08 20:57:48 +00001206 ch2 = None
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001207 if sys.maxunicode < 0x10000:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001208 # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1209 # surrogate pairs before calling _unichr_is_printable.
Victor Stinner150016f2010-05-19 23:04:56 +00001210 if (i < len(proxy)
1211 and 0xD800 <= ord(ch) < 0xDC00 \
1212 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001213 ch2 = proxy[i]
Antoine Pitroub41e1282010-09-08 20:57:48 +00001214 ucs = ch + ch2
Victor Stinner150016f2010-05-19 23:04:56 +00001215 i += 1
Victor Stinner150016f2010-05-19 23:04:56 +00001216
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001217 # Unfortuately, Python 2's unicode type doesn't seem
1218 # to expose the "isprintable" method
Victor Stinner150016f2010-05-19 23:04:56 +00001219 printable = _unichr_is_printable(ucs)
1220 if printable:
1221 try:
1222 ucs.encode(ENCODING)
1223 except UnicodeEncodeError:
1224 printable = False
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001225
1226 # Map Unicode whitespace and control characters
1227 # (categories Z* and C* except ASCII space)
Victor Stinner150016f2010-05-19 23:04:56 +00001228 if not printable:
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001229 if ch2 is not None:
1230 # Match Python 3's representation of non-printable
1231 # wide characters.
1232 code = (ord(ch) & 0x03FF) << 10
1233 code |= ord(ch2) & 0x03FF
1234 code += 0x00010000
1235 else:
1236 code = ord(ucs)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001237
1238 # Map 8-bit characters to '\\xhh'
Victor Stinner150016f2010-05-19 23:04:56 +00001239 if code <= 0xff:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001240 out.write('\\x')
Victor Stinner150016f2010-05-19 23:04:56 +00001241 out.write(hexdigits[(code >> 4) & 0x000F])
1242 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001243 # Map 21-bit characters to '\U00xxxxxx'
Victor Stinner150016f2010-05-19 23:04:56 +00001244 elif code >= 0x10000:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001245 out.write('\\U')
Victor Stinner150016f2010-05-19 23:04:56 +00001246 out.write(hexdigits[(code >> 28) & 0x0000000F])
1247 out.write(hexdigits[(code >> 24) & 0x0000000F])
1248 out.write(hexdigits[(code >> 20) & 0x0000000F])
1249 out.write(hexdigits[(code >> 16) & 0x0000000F])
1250 out.write(hexdigits[(code >> 12) & 0x0000000F])
1251 out.write(hexdigits[(code >> 8) & 0x0000000F])
1252 out.write(hexdigits[(code >> 4) & 0x0000000F])
1253 out.write(hexdigits[code & 0x0000000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001254 # Map 16-bit characters to '\uxxxx'
1255 else:
1256 out.write('\\u')
Victor Stinner150016f2010-05-19 23:04:56 +00001257 out.write(hexdigits[(code >> 12) & 0x000F])
1258 out.write(hexdigits[(code >> 8) & 0x000F])
1259 out.write(hexdigits[(code >> 4) & 0x000F])
1260 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001261 else:
1262 # Copy characters as-is
1263 out.write(ch)
Antoine Pitroub41e1282010-09-08 20:57:48 +00001264 if ch2 is not None:
Victor Stinner150016f2010-05-19 23:04:56 +00001265 out.write(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001266
1267 out.write(quote)
1268
1269
1270
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001271
1272def int_from_int(gdbval):
1273 return int(str(gdbval))
1274
1275
1276def stringify(val):
1277 # TODO: repr() puts everything on one line; pformat can be nicer, but
1278 # can lead to v.long results; this function isolates the choice
1279 if True:
1280 return repr(val)
1281 else:
1282 from pprint import pformat
1283 return pformat(val)
1284
1285
1286class PyObjectPtrPrinter:
1287 "Prints a (PyObject*)"
1288
1289 def __init__ (self, gdbval):
1290 self.gdbval = gdbval
1291
1292 def to_string (self):
1293 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1294 if True:
1295 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1296 else:
1297 # Generate full proxy value then stringify it.
1298 # Doing so could be expensive
1299 proxyval = pyop.proxyval(set())
1300 return stringify(proxyval)
1301
1302def pretty_printer_lookup(gdbval):
1303 type = gdbval.type.unqualified()
1304 if type.code == gdb.TYPE_CODE_PTR:
1305 type = type.target().unqualified()
1306 t = str(type)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001307 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001308 return PyObjectPtrPrinter(gdbval)
1309
1310"""
1311During development, I've been manually invoking the code in this way:
1312(gdb) python
1313
1314import sys
1315sys.path.append('/home/david/coding/python-gdb')
1316import libpython
1317end
1318
1319then reloading it after each edit like this:
1320(gdb) python reload(libpython)
1321
1322The following code should ensure that the prettyprinter is registered
1323if the code is autoloaded by gdb when visiting libpython.so, provided
1324that this python file is installed to the same path as the library (or its
1325.debug file) plus a "-gdb.py" suffix, e.g:
1326 /usr/lib/libpython2.6.so.1.0-gdb.py
1327 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1328"""
1329def register (obj):
1330 if obj == None:
1331 obj = gdb
1332
1333 # Wire up the pretty-printer
1334 obj.pretty_printers.append(pretty_printer_lookup)
1335
1336register (gdb.current_objfile ())
1337
1338
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001339
1340# Unfortunately, the exact API exposed by the gdb module varies somewhat
1341# from build to build
1342# See http://bugs.python.org/issue8279?#msg102276
1343
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001344class Frame(object):
1345 '''
1346 Wrapper for gdb.Frame, adding various methods
1347 '''
1348 def __init__(self, gdbframe):
1349 self._gdbframe = gdbframe
1350
1351 def older(self):
1352 older = self._gdbframe.older()
1353 if older:
1354 return Frame(older)
1355 else:
1356 return None
1357
1358 def newer(self):
1359 newer = self._gdbframe.newer()
1360 if newer:
1361 return Frame(newer)
1362 else:
1363 return None
1364
1365 def select(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001366 '''If supported, select this frame and return True; return False if unsupported
1367
1368 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1369 onwards, but absent on Ubuntu buildbot'''
1370 if not hasattr(self._gdbframe, 'select'):
1371 print ('Unable to select frame: '
1372 'this build of gdb does not expose a gdb.Frame.select method')
1373 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001374 self._gdbframe.select()
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001375 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001376
1377 def get_index(self):
1378 '''Calculate index of frame, starting at 0 for the newest frame within
1379 this thread'''
1380 index = 0
1381 # Go down until you reach the newest frame:
1382 iter_frame = self
1383 while iter_frame.newer():
1384 index += 1
1385 iter_frame = iter_frame.newer()
1386 return index
1387
1388 def is_evalframeex(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001389 '''Is this a PyEval_EvalFrameEx frame?'''
Victor Stinner50eb60e2010-04-20 22:32:07 +00001390 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1391 '''
1392 I believe we also need to filter on the inline
1393 struct frame_id.inline_depth, only regarding frames with
1394 an inline depth of 0 as actually being this function
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001395
Victor Stinner50eb60e2010-04-20 22:32:07 +00001396 So we reject those with type gdb.INLINE_FRAME
1397 '''
1398 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1399 # We have a PyEval_EvalFrameEx frame:
1400 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001401
1402 return False
1403
1404 def get_pyop(self):
1405 try:
1406 f = self._gdbframe.read_var('f')
Victor Stinnerd2084162011-12-19 13:42:24 +01001407 frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1408 if not frame.is_optimized_out():
1409 return frame
1410 # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
1411 # because it was "optimized out". Try to get "f" from the frame
1412 # of the caller, PyEval_EvalCodeEx().
1413 orig_frame = frame
1414 caller = self._gdbframe.older()
1415 if caller:
1416 f = caller.read_var('f')
1417 frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1418 if not frame.is_optimized_out():
1419 return frame
1420 return orig_frame
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001421 except ValueError:
1422 return None
1423
1424 @classmethod
1425 def get_selected_frame(cls):
1426 _gdbframe = gdb.selected_frame()
1427 if _gdbframe:
1428 return Frame(_gdbframe)
1429 return None
1430
1431 @classmethod
1432 def get_selected_python_frame(cls):
1433 '''Try to obtain the Frame for the python code in the selected frame,
1434 or None'''
1435 frame = cls.get_selected_frame()
1436
1437 while frame:
1438 if frame.is_evalframeex():
1439 return frame
1440 frame = frame.older()
1441
1442 # Not found:
1443 return None
1444
1445 def print_summary(self):
1446 if self.is_evalframeex():
1447 pyop = self.get_pyop()
1448 if pyop:
Victor Stinner0e5a41b2010-08-17 22:49:25 +00001449 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1450 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
Victor Stinnerd2084162011-12-19 13:42:24 +01001451 if not pyop.is_optimized_out():
1452 line = pyop.current_line()
1453 if line is not None:
1454 sys.stdout.write(' %s\n' % line.strip())
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001455 else:
1456 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1457 else:
1458 sys.stdout.write('#%i\n' % self.get_index())
1459
Victor Stinnere670c882011-05-13 17:40:15 +02001460 def print_traceback(self):
1461 if self.is_evalframeex():
1462 pyop = self.get_pyop()
1463 if pyop:
1464 pyop.print_traceback()
Victor Stinnerd2084162011-12-19 13:42:24 +01001465 if not pyop.is_optimized_out():
1466 line = pyop.current_line()
1467 if line is not None:
1468 sys.stdout.write(' %s\n' % line.strip())
Victor Stinnere670c882011-05-13 17:40:15 +02001469 else:
1470 sys.stdout.write(' (unable to read python frame information)\n')
1471 else:
1472 sys.stdout.write(' (not a python frame)\n')
1473
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001474class PyList(gdb.Command):
1475 '''List the current Python source code, if any
1476
1477 Use
1478 py-list START
1479 to list at a different line number within the python source.
1480
1481 Use
1482 py-list START, END
1483 to list a specific range of lines within the python source.
1484 '''
1485
1486 def __init__(self):
1487 gdb.Command.__init__ (self,
1488 "py-list",
1489 gdb.COMMAND_FILES,
1490 gdb.COMPLETE_NONE)
1491
1492
1493 def invoke(self, args, from_tty):
1494 import re
1495
1496 start = None
1497 end = None
1498
1499 m = re.match(r'\s*(\d+)\s*', args)
1500 if m:
1501 start = int(m.group(0))
1502 end = start + 10
1503
1504 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1505 if m:
1506 start, end = map(int, m.groups())
1507
1508 frame = Frame.get_selected_python_frame()
1509 if not frame:
1510 print 'Unable to locate python frame'
1511 return
1512
1513 pyop = frame.get_pyop()
Victor Stinnerd2084162011-12-19 13:42:24 +01001514 if not pyop or pyop.is_optimized_out():
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001515 print 'Unable to read information on python frame'
1516 return
1517
1518 filename = pyop.filename()
1519 lineno = pyop.current_line_num()
1520
1521 if start is None:
1522 start = lineno - 5
1523 end = lineno + 5
1524
1525 if start<1:
1526 start = 1
1527
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001528 try:
1529 f = open(os_fsencode(filename), 'r')
1530 except IOError as err:
1531 sys.stdout.write('Unable to open %s: %s\n'
1532 % (filename, err))
1533 return
1534 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001535 all_lines = f.readlines()
1536 # start and end are 1-based, all_lines is 0-based;
1537 # so [start-1:end] as a python slice gives us [start, end] as a
1538 # closed interval
1539 for i, line in enumerate(all_lines[start-1:end]):
1540 linestr = str(i+start)
1541 # Highlight current line:
1542 if i + start == lineno:
1543 linestr = '>' + linestr
1544 sys.stdout.write('%4s %s' % (linestr, line))
1545
1546
1547# ...and register the command:
1548PyList()
1549
1550def move_in_stack(move_up):
1551 '''Move up or down the stack (for the py-up/py-down command)'''
1552 frame = Frame.get_selected_python_frame()
1553 while frame:
1554 if move_up:
1555 iter_frame = frame.older()
1556 else:
1557 iter_frame = frame.newer()
1558
1559 if not iter_frame:
1560 break
1561
1562 if iter_frame.is_evalframeex():
1563 # Result:
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001564 if iter_frame.select():
1565 iter_frame.print_summary()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001566 return
1567
1568 frame = iter_frame
1569
1570 if move_up:
1571 print 'Unable to find an older python frame'
1572 else:
1573 print 'Unable to find a newer python frame'
1574
1575class PyUp(gdb.Command):
1576 'Select and print the python stack frame that called this one (if any)'
1577 def __init__(self):
1578 gdb.Command.__init__ (self,
1579 "py-up",
1580 gdb.COMMAND_STACK,
1581 gdb.COMPLETE_NONE)
1582
1583
1584 def invoke(self, args, from_tty):
1585 move_in_stack(move_up=True)
1586
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001587class PyDown(gdb.Command):
1588 'Select and print the python stack frame called by this one (if any)'
1589 def __init__(self):
1590 gdb.Command.__init__ (self,
1591 "py-down",
1592 gdb.COMMAND_STACK,
1593 gdb.COMPLETE_NONE)
1594
1595
1596 def invoke(self, args, from_tty):
1597 move_in_stack(move_up=False)
1598
Victor Stinner50eb60e2010-04-20 22:32:07 +00001599# Not all builds of gdb have gdb.Frame.select
1600if hasattr(gdb.Frame, 'select'):
1601 PyUp()
1602 PyDown()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001603
Victor Stinnere670c882011-05-13 17:40:15 +02001604class PyBacktraceFull(gdb.Command):
1605 'Display the current python frame and all the frames within its call stack (if any)'
1606 def __init__(self):
1607 gdb.Command.__init__ (self,
1608 "py-bt-full",
1609 gdb.COMMAND_STACK,
1610 gdb.COMPLETE_NONE)
1611
1612
1613 def invoke(self, args, from_tty):
1614 frame = Frame.get_selected_python_frame()
1615 while frame:
1616 if frame.is_evalframeex():
1617 frame.print_summary()
1618 frame = frame.older()
1619
1620PyBacktraceFull()
1621
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001622class PyBacktrace(gdb.Command):
1623 'Display the current python frame and all the frames within its call stack (if any)'
1624 def __init__(self):
1625 gdb.Command.__init__ (self,
1626 "py-bt",
1627 gdb.COMMAND_STACK,
1628 gdb.COMPLETE_NONE)
1629
1630
1631 def invoke(self, args, from_tty):
Victor Stinnere670c882011-05-13 17:40:15 +02001632 sys.stdout.write('Traceback (most recent call first):\n')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001633 frame = Frame.get_selected_python_frame()
1634 while frame:
1635 if frame.is_evalframeex():
Victor Stinnere670c882011-05-13 17:40:15 +02001636 frame.print_traceback()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001637 frame = frame.older()
1638
1639PyBacktrace()
1640
1641class PyPrint(gdb.Command):
1642 'Look up the given python variable name, and print it'
1643 def __init__(self):
1644 gdb.Command.__init__ (self,
1645 "py-print",
1646 gdb.COMMAND_DATA,
1647 gdb.COMPLETE_NONE)
1648
1649
1650 def invoke(self, args, from_tty):
1651 name = str(args)
1652
1653 frame = Frame.get_selected_python_frame()
1654 if not frame:
1655 print 'Unable to locate python frame'
1656 return
1657
1658 pyop_frame = frame.get_pyop()
1659 if not pyop_frame:
1660 print 'Unable to read information on python frame'
1661 return
1662
1663 pyop_var, scope = pyop_frame.get_var_by_name(name)
1664
1665 if pyop_var:
1666 print ('%s %r = %s'
1667 % (scope,
1668 name,
1669 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1670 else:
1671 print '%r not found' % name
1672
1673PyPrint()
1674
1675class PyLocals(gdb.Command):
1676 'Look up the given python variable name, and print it'
1677 def __init__(self):
1678 gdb.Command.__init__ (self,
1679 "py-locals",
1680 gdb.COMMAND_DATA,
1681 gdb.COMPLETE_NONE)
1682
1683
1684 def invoke(self, args, from_tty):
1685 name = str(args)
1686
1687 frame = Frame.get_selected_python_frame()
1688 if not frame:
1689 print 'Unable to locate python frame'
1690 return
1691
1692 pyop_frame = frame.get_pyop()
1693 if not pyop_frame:
1694 print 'Unable to read information on python frame'
1695 return
1696
1697 for pyop_name, pyop_value in pyop_frame.iter_locals():
1698 print ('%s = %s'
1699 % (pyop_name.proxyval(set()),
1700 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1701
1702PyLocals()