blob: 1e38c26162f437b8d1f9976b2b9a1223a948f146 [file] [log] [blame]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
Victor Stinner67df3a42010-04-21 13:53:05 +000022holding three PyObject* that turn out to be PyBytesObject* instances, we can
Martin v. Löwis5ae68102010-04-21 22:38:42 +000023generate a proxy value within the gdb process that is a list of bytes
24instances:
25 [b"foo", b"bar", b"baz"]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000026
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object. This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43from __future__ import with_statement
44import gdb
Victor Stinner150016f2010-05-19 23:04:56 +000045import locale
Georg Brandlb639c142010-07-14 08:54:40 +000046import sys
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000047
48# Look up the gdb.Type for some standard types:
49_type_char_ptr = gdb.lookup_type('char').pointer() # char*
50_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
51_type_void_ptr = gdb.lookup_type('void').pointer() # void*
52_type_size_t = gdb.lookup_type('size_t')
Martin v. Löwis24fa9832011-09-28 08:35:25 +020053_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
54_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000055
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020056_is_pep393 = 'data' in [f.name for f in gdb.lookup_type('PyUnicodeObject').target().fields()]
57
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000058SIZEOF_VOID_P = _type_void_ptr.sizeof
59
60
61Py_TPFLAGS_HEAPTYPE = (1L << 9)
62
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000063Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
64Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
65Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Martin v. Löwis5ae68102010-04-21 22:38:42 +000066Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000067Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
68Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
69Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
70Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
71
72
73MAX_OUTPUT_LEN=1024
74
Martin v. Löwis5ae68102010-04-21 22:38:42 +000075hexdigits = "0123456789abcdef"
76
Victor Stinner150016f2010-05-19 23:04:56 +000077ENCODING = locale.getpreferredencoding()
Martin v. Löwis5ae68102010-04-21 22:38:42 +000078
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000079class NullPyObjectPtr(RuntimeError):
80 pass
81
82
83def safety_limit(val):
84 # Given a integer value from the process being debugged, limit it to some
85 # safety threshold so that arbitrary breakage within said process doesn't
86 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
87 return min(val, 1000)
88
89
90def safe_range(val):
91 # As per range, but don't trust the value too much: cap it to a safety
92 # threshold in case the data was corrupted
93 return xrange(safety_limit(val))
94
Victor Stinner0e5a41b2010-08-17 22:49:25 +000095def write_unicode(file, text):
96 # Write a byte or unicode string to file. Unicode strings are encoded to
97 # ENCODING encoding with 'backslashreplace' error handler to avoid
98 # UnicodeEncodeError.
99 if isinstance(text, unicode):
100 text = text.encode(ENCODING, 'backslashreplace')
101 file.write(text)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000102
Victor Stinner6ffbee72010-10-17 19:35:30 +0000103def os_fsencode(filename):
104 if not isinstance(filename, unicode):
105 return filename
106 encoding = sys.getfilesystemencoding()
107 if encoding == 'mbcs':
108 # mbcs doesn't support surrogateescape
109 return filename.encode(encoding)
110 encoded = []
111 for char in filename:
112 # surrogateescape error handler
113 if 0xDC80 <= ord(char) <= 0xDCFF:
114 byte = chr(ord(char) - 0xDC00)
115 else:
116 byte = char.encode(encoding)
117 encoded.append(byte)
118 return ''.join(encoded)
119
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000120class StringTruncated(RuntimeError):
121 pass
122
123class TruncatedStringIO(object):
124 '''Similar to cStringIO, but can truncate the output by raising a
125 StringTruncated exception'''
126 def __init__(self, maxlen=None):
127 self._val = ''
128 self.maxlen = maxlen
129
130 def write(self, data):
131 if self.maxlen:
132 if len(data) + len(self._val) > self.maxlen:
133 # Truncation:
134 self._val += data[0:self.maxlen - len(self._val)]
135 raise StringTruncated()
136
137 self._val += data
138
139 def getvalue(self):
140 return self._val
141
142class PyObjectPtr(object):
143 """
144 Class wrapping a gdb.Value that's a either a (PyObject*) within the
Victor Stinner67df3a42010-04-21 13:53:05 +0000145 inferior process, or some subclass pointer e.g. (PyBytesObject*)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000146
147 There will be a subclass for every refined PyObject type that we care
148 about.
149
150 Note that at every stage the underlying pointer could be NULL, point
151 to corrupt data, etc; this is the debugger, after all.
152 """
153 _typename = 'PyObject'
154
155 def __init__(self, gdbval, cast_to=None):
156 if cast_to:
157 self._gdbval = gdbval.cast(cast_to)
158 else:
159 self._gdbval = gdbval
160
161 def field(self, name):
162 '''
163 Get the gdb.Value for the given field within the PyObject, coping with
164 some python 2 versus python 3 differences.
165
166 Various libpython types are defined using the "PyObject_HEAD" and
167 "PyObject_VAR_HEAD" macros.
168
169 In Python 2, this these are defined so that "ob_type" and (for a var
170 object) "ob_size" are fields of the type in question.
171
172 In Python 3, this is defined as an embedded PyVarObject type thus:
173 PyVarObject ob_base;
174 so that the "ob_size" field is located insize the "ob_base" field, and
175 the "ob_type" is most easily accessed by casting back to a (PyObject*).
176 '''
177 if self.is_null():
178 raise NullPyObjectPtr(self)
179
180 if name == 'ob_type':
181 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
182 return pyo_ptr.dereference()[name]
183
184 if name == 'ob_size':
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000185 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
186 return pyo_ptr.dereference()[name]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000187
188 # General case: look it up inside the object:
189 return self._gdbval.dereference()[name]
190
191 def pyop_field(self, name):
192 '''
193 Get a PyObjectPtr for the given PyObject* field within this PyObject,
194 coping with some python 2 versus python 3 differences.
195 '''
196 return PyObjectPtr.from_pyobject_ptr(self.field(name))
197
198 def write_field_repr(self, name, out, visited):
199 '''
200 Extract the PyObject* field named "name", and write its representation
201 to file-like object "out"
202 '''
203 field_obj = self.pyop_field(name)
204 field_obj.write_repr(out, visited)
205
206 def get_truncated_repr(self, maxlen):
207 '''
208 Get a repr-like string for the data, but truncate it at "maxlen" bytes
209 (ending the object graph traversal as soon as you do)
210 '''
211 out = TruncatedStringIO(maxlen)
212 try:
213 self.write_repr(out, set())
214 except StringTruncated:
215 # Truncation occurred:
216 return out.getvalue() + '...(truncated)'
217
218 # No truncation occurred:
219 return out.getvalue()
220
221 def type(self):
222 return PyTypeObjectPtr(self.field('ob_type'))
223
224 def is_null(self):
225 return 0 == long(self._gdbval)
226
227 def is_optimized_out(self):
228 '''
229 Is the value of the underlying PyObject* visible to the debugger?
230
231 This can vary with the precise version of the compiler used to build
232 Python, and the precise version of gdb.
233
234 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
235 PyEval_EvalFrameEx's "f"
236 '''
237 return self._gdbval.is_optimized_out
238
239 def safe_tp_name(self):
240 try:
241 return self.type().field('tp_name').string()
242 except NullPyObjectPtr:
243 # NULL tp_name?
244 return 'unknown'
245 except RuntimeError:
246 # Can't even read the object at all?
247 return 'unknown'
248
249 def proxyval(self, visited):
250 '''
251 Scrape a value from the inferior process, and try to represent it
252 within the gdb process, whilst (hopefully) avoiding crashes when
253 the remote data is corrupt.
254
255 Derived classes will override this.
256
257 For example, a PyIntObject* with ob_ival 42 in the inferior process
258 should result in an int(42) in this process.
259
260 visited: a set of all gdb.Value pyobject pointers already visited
261 whilst generating this value (to guard against infinite recursion when
262 visiting object graphs with loops). Analogous to Py_ReprEnter and
263 Py_ReprLeave
264 '''
265
266 class FakeRepr(object):
267 """
268 Class representing a non-descript PyObject* value in the inferior
269 process for when we don't have a custom scraper, intended to have
270 a sane repr().
271 """
272
273 def __init__(self, tp_name, address):
274 self.tp_name = tp_name
275 self.address = address
276
277 def __repr__(self):
278 # For the NULL pointer, we have no way of knowing a type, so
279 # special-case it as per
280 # http://bugs.python.org/issue8032#msg100882
281 if self.address == 0:
282 return '0x0'
283 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
284
285 return FakeRepr(self.safe_tp_name(),
286 long(self._gdbval))
287
288 def write_repr(self, out, visited):
289 '''
290 Write a string representation of the value scraped from the inferior
291 process to "out", a file-like object.
292 '''
293 # Default implementation: generate a proxy value and write its repr
294 # However, this could involve a lot of work for complicated objects,
295 # so for derived classes we specialize this
296 return out.write(repr(self.proxyval(visited)))
297
298 @classmethod
299 def subclass_from_type(cls, t):
300 '''
301 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
302 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
303 to use
304
305 Ideally, we would look up the symbols for the global types, but that
306 isn't working yet:
307 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
308 Traceback (most recent call last):
309 File "<string>", line 1, in <module>
310 NotImplementedError: Symbol type not yet supported in Python scripts.
311 Error while executing Python code.
312
313 For now, we use tp_flags, after doing some string comparisons on the
314 tp_name for some special-cases that don't seem to be visible through
315 flags
316 '''
317 try:
318 tp_name = t.field('tp_name').string()
319 tp_flags = int(t.field('tp_flags'))
320 except RuntimeError:
321 # Handle any kind of error e.g. NULL ptrs by simply using the base
322 # class
323 return cls
324
325 #print 'tp_flags = 0x%08x' % tp_flags
326 #print 'tp_name = %r' % tp_name
327
328 name_map = {'bool': PyBoolObjectPtr,
329 'classobj': PyClassObjectPtr,
330 'instance': PyInstanceObjectPtr,
331 'NoneType': PyNoneStructPtr,
332 'frame': PyFrameObjectPtr,
333 'set' : PySetObjectPtr,
334 'frozenset' : PySetObjectPtr,
335 'builtin_function_or_method' : PyCFunctionObjectPtr,
336 }
337 if tp_name in name_map:
338 return name_map[tp_name]
339
340 if tp_flags & Py_TPFLAGS_HEAPTYPE:
341 return HeapTypeObjectPtr
342
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000343 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
344 return PyLongObjectPtr
345 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
346 return PyListObjectPtr
347 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
348 return PyTupleObjectPtr
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000349 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
Victor Stinner67df3a42010-04-21 13:53:05 +0000350 return PyBytesObjectPtr
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000351 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
352 return PyUnicodeObjectPtr
353 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
354 return PyDictObjectPtr
355 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
356 return PyBaseExceptionObjectPtr
357 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
358 # return PyTypeObjectPtr
359
360 # Use the base class:
361 return cls
362
363 @classmethod
364 def from_pyobject_ptr(cls, gdbval):
365 '''
366 Try to locate the appropriate derived class dynamically, and cast
367 the pointer accordingly.
368 '''
369 try:
370 p = PyObjectPtr(gdbval)
371 cls = cls.subclass_from_type(p.type())
372 return cls(gdbval, cast_to=cls.get_gdb_type())
373 except RuntimeError:
374 # Handle any kind of error e.g. NULL ptrs by simply using the base
375 # class
376 pass
377 return cls(gdbval)
378
379 @classmethod
380 def get_gdb_type(cls):
381 return gdb.lookup_type(cls._typename).pointer()
382
383 def as_address(self):
384 return long(self._gdbval)
385
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000386class PyVarObjectPtr(PyObjectPtr):
387 _typename = 'PyVarObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000388
389class ProxyAlreadyVisited(object):
390 '''
391 Placeholder proxy to use when protecting against infinite recursion due to
392 loops in the object graph.
393
394 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
395 '''
396 def __init__(self, rep):
397 self._rep = rep
398
399 def __repr__(self):
400 return self._rep
401
402
403def _write_instance_repr(out, visited, name, pyop_attrdict, address):
404 '''Shared code for use by old-style and new-style classes:
405 write a representation to file-like object "out"'''
406 out.write('<')
407 out.write(name)
408
409 # Write dictionary of instance attributes:
410 if isinstance(pyop_attrdict, PyDictObjectPtr):
411 out.write('(')
412 first = True
413 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
414 if not first:
415 out.write(', ')
416 first = False
417 out.write(pyop_arg.proxyval(visited))
418 out.write('=')
419 pyop_val.write_repr(out, visited)
420 out.write(')')
421 out.write(' at remote 0x%x>' % address)
422
423
424class InstanceProxy(object):
425
426 def __init__(self, cl_name, attrdict, address):
427 self.cl_name = cl_name
428 self.attrdict = attrdict
429 self.address = address
430
431 def __repr__(self):
432 if isinstance(self.attrdict, dict):
433 kwargs = ', '.join(["%s=%r" % (arg, val)
434 for arg, val in self.attrdict.iteritems()])
435 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
436 kwargs, self.address)
437 else:
438 return '<%s at remote 0x%x>' % (self.cl_name,
439 self.address)
440
441def _PyObject_VAR_SIZE(typeobj, nitems):
442 return ( ( typeobj.field('tp_basicsize') +
443 nitems * typeobj.field('tp_itemsize') +
444 (SIZEOF_VOID_P - 1)
445 ) & ~(SIZEOF_VOID_P - 1)
446 ).cast(_type_size_t)
447
448class HeapTypeObjectPtr(PyObjectPtr):
449 _typename = 'PyObject'
450
451 def get_attr_dict(self):
452 '''
453 Get the PyDictObject ptr representing the attribute dictionary
454 (or None if there's a problem)
455 '''
456 try:
457 typeobj = self.type()
458 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
459 if dictoffset != 0:
460 if dictoffset < 0:
461 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
462 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
463 if tsize < 0:
464 tsize = -tsize
465 size = _PyObject_VAR_SIZE(typeobj, tsize)
466 dictoffset += size
467 assert dictoffset > 0
468 assert dictoffset % SIZEOF_VOID_P == 0
469
470 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
471 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
472 dictptr = dictptr.cast(PyObjectPtrPtr)
473 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
474 except RuntimeError:
475 # Corrupt data somewhere; fail safe
476 pass
477
478 # Not found, or some kind of error:
479 return None
480
481 def proxyval(self, visited):
482 '''
483 Support for new-style classes.
484
485 Currently we just locate the dictionary using a transliteration to
486 python of _PyObject_GetDictPtr, ignoring descriptors
487 '''
488 # Guard against infinite loops:
489 if self.as_address() in visited:
490 return ProxyAlreadyVisited('<...>')
491 visited.add(self.as_address())
492
493 pyop_attr_dict = self.get_attr_dict()
494 if pyop_attr_dict:
495 attr_dict = pyop_attr_dict.proxyval(visited)
496 else:
497 attr_dict = {}
498 tp_name = self.safe_tp_name()
499
500 # New-style class:
501 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
502
503 def write_repr(self, out, visited):
504 # Guard against infinite loops:
505 if self.as_address() in visited:
506 out.write('<...>')
507 return
508 visited.add(self.as_address())
509
510 pyop_attrdict = self.get_attr_dict()
511 _write_instance_repr(out, visited,
512 self.safe_tp_name(), pyop_attrdict, self.as_address())
513
514class ProxyException(Exception):
515 def __init__(self, tp_name, args):
516 self.tp_name = tp_name
517 self.args = args
518
519 def __repr__(self):
520 return '%s%r' % (self.tp_name, self.args)
521
522class PyBaseExceptionObjectPtr(PyObjectPtr):
523 """
524 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
525 within the process being debugged.
526 """
527 _typename = 'PyBaseExceptionObject'
528
529 def proxyval(self, visited):
530 # Guard against infinite loops:
531 if self.as_address() in visited:
532 return ProxyAlreadyVisited('(...)')
533 visited.add(self.as_address())
534 arg_proxy = self.pyop_field('args').proxyval(visited)
535 return ProxyException(self.safe_tp_name(),
536 arg_proxy)
537
538 def write_repr(self, out, visited):
539 # Guard against infinite loops:
540 if self.as_address() in visited:
541 out.write('(...)')
542 return
543 visited.add(self.as_address())
544
545 out.write(self.safe_tp_name())
546 self.write_field_repr('args', out, visited)
547
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000548class PyClassObjectPtr(PyObjectPtr):
549 """
550 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
551 instance within the process being debugged.
552 """
553 _typename = 'PyClassObject'
554
555
556class BuiltInFunctionProxy(object):
557 def __init__(self, ml_name):
558 self.ml_name = ml_name
559
560 def __repr__(self):
561 return "<built-in function %s>" % self.ml_name
562
563class BuiltInMethodProxy(object):
564 def __init__(self, ml_name, pyop_m_self):
565 self.ml_name = ml_name
566 self.pyop_m_self = pyop_m_self
567
568 def __repr__(self):
569 return ('<built-in method %s of %s object at remote 0x%x>'
570 % (self.ml_name,
571 self.pyop_m_self.safe_tp_name(),
572 self.pyop_m_self.as_address())
573 )
574
575class PyCFunctionObjectPtr(PyObjectPtr):
576 """
577 Class wrapping a gdb.Value that's a PyCFunctionObject*
578 (see Include/methodobject.h and Objects/methodobject.c)
579 """
580 _typename = 'PyCFunctionObject'
581
582 def proxyval(self, visited):
583 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
584 ml_name = m_ml['ml_name'].string()
585
586 pyop_m_self = self.pyop_field('m_self')
587 if pyop_m_self.is_null():
588 return BuiltInFunctionProxy(ml_name)
589 else:
590 return BuiltInMethodProxy(ml_name, pyop_m_self)
591
592
593class PyCodeObjectPtr(PyObjectPtr):
594 """
595 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
596 within the process being debugged.
597 """
598 _typename = 'PyCodeObject'
599
600 def addr2line(self, addrq):
601 '''
602 Get the line number for a given bytecode offset
603
604 Analogous to PyCode_Addr2Line; translated from pseudocode in
605 Objects/lnotab_notes.txt
606 '''
607 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
608
609 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
610 # not 0, as lnotab_notes.txt has it:
611 lineno = int_from_int(self.field('co_firstlineno'))
612
613 addr = 0
614 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
615 addr += ord(addr_incr)
616 if addr > addrq:
617 return lineno
618 lineno += ord(line_incr)
619 return lineno
620
621
622class PyDictObjectPtr(PyObjectPtr):
623 """
624 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
625 within the process being debugged.
626 """
627 _typename = 'PyDictObject'
628
629 def iteritems(self):
630 '''
631 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
632 analagous to dict.iteritems()
633 '''
634 for i in safe_range(self.field('ma_mask') + 1):
635 ep = self.field('ma_table') + i
636 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
637 if not pyop_value.is_null():
638 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
639 yield (pyop_key, pyop_value)
640
641 def proxyval(self, visited):
642 # Guard against infinite loops:
643 if self.as_address() in visited:
644 return ProxyAlreadyVisited('{...}')
645 visited.add(self.as_address())
646
647 result = {}
648 for pyop_key, pyop_value in self.iteritems():
649 proxy_key = pyop_key.proxyval(visited)
650 proxy_value = pyop_value.proxyval(visited)
651 result[proxy_key] = proxy_value
652 return result
653
654 def write_repr(self, out, visited):
655 # Guard against infinite loops:
656 if self.as_address() in visited:
657 out.write('{...}')
658 return
659 visited.add(self.as_address())
660
661 out.write('{')
662 first = True
663 for pyop_key, pyop_value in self.iteritems():
664 if not first:
665 out.write(', ')
666 first = False
667 pyop_key.write_repr(out, visited)
668 out.write(': ')
669 pyop_value.write_repr(out, visited)
670 out.write('}')
671
672class PyInstanceObjectPtr(PyObjectPtr):
673 _typename = 'PyInstanceObject'
674
675 def proxyval(self, visited):
676 # Guard against infinite loops:
677 if self.as_address() in visited:
678 return ProxyAlreadyVisited('<...>')
679 visited.add(self.as_address())
680
681 # Get name of class:
682 in_class = self.pyop_field('in_class')
683 cl_name = in_class.pyop_field('cl_name').proxyval(visited)
684
685 # Get dictionary of instance attributes:
686 in_dict = self.pyop_field('in_dict').proxyval(visited)
687
688 # Old-style class:
689 return InstanceProxy(cl_name, in_dict, long(self._gdbval))
690
691 def write_repr(self, out, visited):
692 # Guard against infinite loops:
693 if self.as_address() in visited:
694 out.write('<...>')
695 return
696 visited.add(self.as_address())
697
698 # Old-style class:
699
700 # Get name of class:
701 in_class = self.pyop_field('in_class')
702 cl_name = in_class.pyop_field('cl_name').proxyval(visited)
703
704 # Get dictionary of instance attributes:
705 pyop_in_dict = self.pyop_field('in_dict')
706
707 _write_instance_repr(out, visited,
708 cl_name, pyop_in_dict, self.as_address())
709
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000710class PyListObjectPtr(PyObjectPtr):
711 _typename = 'PyListObject'
712
713 def __getitem__(self, i):
714 # Get the gdb.Value for the (PyObject*) with the given index:
715 field_ob_item = self.field('ob_item')
716 return field_ob_item[i]
717
718 def proxyval(self, visited):
719 # Guard against infinite loops:
720 if self.as_address() in visited:
721 return ProxyAlreadyVisited('[...]')
722 visited.add(self.as_address())
723
724 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
725 for i in safe_range(int_from_int(self.field('ob_size')))]
726 return result
727
728 def write_repr(self, out, visited):
729 # Guard against infinite loops:
730 if self.as_address() in visited:
731 out.write('[...]')
732 return
733 visited.add(self.as_address())
734
735 out.write('[')
736 for i in safe_range(int_from_int(self.field('ob_size'))):
737 if i > 0:
738 out.write(', ')
739 element = PyObjectPtr.from_pyobject_ptr(self[i])
740 element.write_repr(out, visited)
741 out.write(']')
742
743class PyLongObjectPtr(PyObjectPtr):
744 _typename = 'PyLongObject'
745
746 def proxyval(self, visited):
747 '''
748 Python's Include/longobjrep.h has this declaration:
749 struct _longobject {
750 PyObject_VAR_HEAD
751 digit ob_digit[1];
752 };
753
754 with this description:
755 The absolute value of a number is equal to
756 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
757 Negative numbers are represented with ob_size < 0;
758 zero is represented by ob_size == 0.
759
760 where SHIFT can be either:
761 #define PyLong_SHIFT 30
762 #define PyLong_SHIFT 15
763 '''
764 ob_size = long(self.field('ob_size'))
765 if ob_size == 0:
766 return 0L
767
768 ob_digit = self.field('ob_digit')
769
770 if gdb.lookup_type('digit').sizeof == 2:
771 SHIFT = 15L
772 else:
773 SHIFT = 30L
774
775 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
776 for i in safe_range(abs(ob_size))]
777 result = sum(digits)
778 if ob_size < 0:
779 result = -result
780 return result
781
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000782 def write_repr(self, out, visited):
783 # Write this out as a Python 3 int literal, i.e. without the "L" suffix
784 proxy = self.proxyval(visited)
785 out.write("%s" % proxy)
786
787
788class PyBoolObjectPtr(PyLongObjectPtr):
789 """
790 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
791 <bool> instances (Py_True/Py_False) within the process being debugged.
792 """
793 def proxyval(self, visited):
794 if PyLongObjectPtr.proxyval(self, visited):
795 return True
796 else:
797 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000798
799class PyNoneStructPtr(PyObjectPtr):
800 """
801 Class wrapping a gdb.Value that's a PyObject* pointing to the
802 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
803 """
804 _typename = 'PyObject'
805
806 def proxyval(self, visited):
807 return None
808
809
810class PyFrameObjectPtr(PyObjectPtr):
811 _typename = 'PyFrameObject'
812
813 def __init__(self, gdbval, cast_to):
814 PyObjectPtr.__init__(self, gdbval, cast_to)
815
816 if not self.is_optimized_out():
817 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
818 self.co_name = self.co.pyop_field('co_name')
819 self.co_filename = self.co.pyop_field('co_filename')
820
821 self.f_lineno = int_from_int(self.field('f_lineno'))
822 self.f_lasti = int_from_int(self.field('f_lasti'))
823 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
824 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
825
826 def iter_locals(self):
827 '''
828 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
829 the local variables of this frame
830 '''
831 if self.is_optimized_out():
832 return
833
834 f_localsplus = self.field('f_localsplus')
835 for i in safe_range(self.co_nlocals):
836 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
837 if not pyop_value.is_null():
838 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
839 yield (pyop_name, pyop_value)
840
841 def iter_globals(self):
842 '''
843 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
844 the global variables of this frame
845 '''
846 if self.is_optimized_out():
847 return
848
849 pyop_globals = self.pyop_field('f_globals')
850 return pyop_globals.iteritems()
851
852 def iter_builtins(self):
853 '''
854 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
855 the builtin variables
856 '''
857 if self.is_optimized_out():
858 return
859
860 pyop_builtins = self.pyop_field('f_builtins')
861 return pyop_builtins.iteritems()
862
863 def get_var_by_name(self, name):
864 '''
865 Look for the named local variable, returning a (PyObjectPtr, scope) pair
866 where scope is a string 'local', 'global', 'builtin'
867
868 If not found, return (None, None)
869 '''
870 for pyop_name, pyop_value in self.iter_locals():
871 if name == pyop_name.proxyval(set()):
872 return pyop_value, 'local'
873 for pyop_name, pyop_value in self.iter_globals():
874 if name == pyop_name.proxyval(set()):
875 return pyop_value, 'global'
876 for pyop_name, pyop_value in self.iter_builtins():
877 if name == pyop_name.proxyval(set()):
878 return pyop_value, 'builtin'
879 return None, None
880
881 def filename(self):
882 '''Get the path of the current Python source file, as a string'''
883 if self.is_optimized_out():
884 return '(frame information optimized out)'
885 return self.co_filename.proxyval(set())
886
887 def current_line_num(self):
888 '''Get current line number as an integer (1-based)
889
890 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
891
892 See Objects/lnotab_notes.txt
893 '''
894 if self.is_optimized_out():
895 return None
896 f_trace = self.field('f_trace')
897 if long(f_trace) != 0:
898 # we have a non-NULL f_trace:
899 return self.f_lineno
900 else:
901 #try:
902 return self.co.addr2line(self.f_lasti)
903 #except ValueError:
904 # return self.f_lineno
905
906 def current_line(self):
907 '''Get the text of the current source line as a string, with a trailing
908 newline character'''
909 if self.is_optimized_out():
910 return '(frame information optimized out)'
Victor Stinner6ffbee72010-10-17 19:35:30 +0000911 filename = self.filename()
Victor Stinnerd57c5c82011-07-01 12:57:44 +0200912 try:
913 f = open(os_fsencode(filename), 'r')
914 except IOError:
915 return None
916 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000917 all_lines = f.readlines()
918 # Convert from 1-based current_line_num to 0-based list offset:
919 return all_lines[self.current_line_num()-1]
920
921 def write_repr(self, out, visited):
922 if self.is_optimized_out():
923 out.write('(frame information optimized out)')
924 return
925 out.write('Frame 0x%x, for file %s, line %i, in %s ('
926 % (self.as_address(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000927 self.co_filename.proxyval(visited),
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000928 self.current_line_num(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000929 self.co_name.proxyval(visited)))
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000930 first = True
931 for pyop_name, pyop_value in self.iter_locals():
932 if not first:
933 out.write(', ')
934 first = False
935
936 out.write(pyop_name.proxyval(visited))
937 out.write('=')
938 pyop_value.write_repr(out, visited)
939
940 out.write(')')
941
Victor Stinnere670c882011-05-13 17:40:15 +0200942 def print_traceback(self):
943 if self.is_optimized_out():
944 sys.stdout.write(' (frame information optimized out)\n')
945 visited = set()
946 sys.stdout.write(' File "%s", line %i, in %s\n'
947 % (self.co_filename.proxyval(visited),
948 self.current_line_num(),
949 self.co_name.proxyval(visited)))
950
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000951class PySetObjectPtr(PyObjectPtr):
952 _typename = 'PySetObject'
953
954 def proxyval(self, visited):
955 # Guard against infinite loops:
956 if self.as_address() in visited:
957 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
958 visited.add(self.as_address())
959
960 members = []
961 table = self.field('table')
962 for i in safe_range(self.field('mask')+1):
963 setentry = table[i]
964 key = setentry['key']
965 if key != 0:
966 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
967 if key_proxy != '<dummy key>':
968 members.append(key_proxy)
969 if self.safe_tp_name() == 'frozenset':
970 return frozenset(members)
971 else:
972 return set(members)
973
974 def write_repr(self, out, visited):
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000975 # Emulate Python 3's set_repr
976 tp_name = self.safe_tp_name()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000977
978 # Guard against infinite loops:
979 if self.as_address() in visited:
980 out.write('(...)')
981 return
982 visited.add(self.as_address())
983
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000984 # Python 3's set_repr special-cases the empty set:
985 if not self.field('used'):
986 out.write(tp_name)
987 out.write('()')
988 return
989
990 # Python 3 uses {} for set literals:
991 if tp_name != 'set':
992 out.write(tp_name)
993 out.write('(')
994
995 out.write('{')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000996 first = True
997 table = self.field('table')
998 for i in safe_range(self.field('mask')+1):
999 setentry = table[i]
1000 key = setentry['key']
1001 if key != 0:
1002 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
1003 key_proxy = pyop_key.proxyval(visited) # FIXME!
1004 if key_proxy != '<dummy key>':
1005 if not first:
1006 out.write(', ')
1007 first = False
1008 pyop_key.write_repr(out, visited)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001009 out.write('}')
1010
1011 if tp_name != 'set':
1012 out.write(')')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001013
1014
Victor Stinner67df3a42010-04-21 13:53:05 +00001015class PyBytesObjectPtr(PyObjectPtr):
1016 _typename = 'PyBytesObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001017
1018 def __str__(self):
1019 field_ob_size = self.field('ob_size')
1020 field_ob_sval = self.field('ob_sval')
1021 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
1022 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1023
1024 def proxyval(self, visited):
1025 return str(self)
1026
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001027 def write_repr(self, out, visited):
1028 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1029
1030 # Get a PyStringObject* within the Python 2 gdb process:
1031 proxy = self.proxyval(visited)
1032
1033 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1034 # to Python 2 code:
1035 quote = "'"
1036 if "'" in proxy and not '"' in proxy:
1037 quote = '"'
1038 out.write('b')
1039 out.write(quote)
1040 for byte in proxy:
1041 if byte == quote or byte == '\\':
1042 out.write('\\')
1043 out.write(byte)
1044 elif byte == '\t':
1045 out.write('\\t')
1046 elif byte == '\n':
1047 out.write('\\n')
1048 elif byte == '\r':
1049 out.write('\\r')
1050 elif byte < ' ' or ord(byte) >= 0x7f:
1051 out.write('\\x')
1052 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1053 out.write(hexdigits[ord(byte) & 0xf])
1054 else:
1055 out.write(byte)
1056 out.write(quote)
1057
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001058class PyTupleObjectPtr(PyObjectPtr):
1059 _typename = 'PyTupleObject'
1060
1061 def __getitem__(self, i):
1062 # Get the gdb.Value for the (PyObject*) with the given index:
1063 field_ob_item = self.field('ob_item')
1064 return field_ob_item[i]
1065
1066 def proxyval(self, visited):
1067 # Guard against infinite loops:
1068 if self.as_address() in visited:
1069 return ProxyAlreadyVisited('(...)')
1070 visited.add(self.as_address())
1071
1072 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1073 for i in safe_range(int_from_int(self.field('ob_size')))])
1074 return result
1075
1076 def write_repr(self, out, visited):
1077 # Guard against infinite loops:
1078 if self.as_address() in visited:
1079 out.write('(...)')
1080 return
1081 visited.add(self.as_address())
1082
1083 out.write('(')
1084 for i in safe_range(int_from_int(self.field('ob_size'))):
1085 if i > 0:
1086 out.write(', ')
1087 element = PyObjectPtr.from_pyobject_ptr(self[i])
1088 element.write_repr(out, visited)
1089 if self.field('ob_size') == 1:
1090 out.write(',)')
1091 else:
1092 out.write(')')
1093
1094class PyTypeObjectPtr(PyObjectPtr):
1095 _typename = 'PyTypeObject'
1096
1097
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001098def _unichr_is_printable(char):
1099 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1100 if char == u" ":
1101 return True
1102 import unicodedata
Antoine Pitroub41e1282010-09-08 20:57:48 +00001103 return unicodedata.category(char) not in ("C", "Z")
1104
1105if sys.maxunicode >= 0x10000:
1106 _unichr = unichr
1107else:
1108 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1109 def _unichr(x):
1110 if x < 0x10000:
1111 return unichr(x)
1112 x -= 0x10000
1113 ch1 = 0xD800 | (x >> 10)
1114 ch2 = 0xDC00 | (x & 0x3FF)
1115 return unichr(ch1) + unichr(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001116
1117
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001118class PyUnicodeObjectPtr(PyObjectPtr):
1119 _typename = 'PyUnicodeObject'
1120
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001121 def char_width(self):
1122 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1123 return _type_Py_UNICODE.sizeof
1124
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001125 def proxyval(self, visited):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001126 if _is_pep393:
1127 # Python 3.3 and newer
1128 may_have_surrogates = False
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001129 compact = self.field('_base')
1130 ascii = compact['_base']
1131 state = ascii['state']
Victor Stinnera3b334d2011-10-03 13:53:37 +02001132 is_compact_ascii = (int(state['ascii']) and int(state['compact']))
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001133 if not int(state['ready']):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 # string is not ready
Victor Stinnerf16a3502011-11-04 22:34:01 +01001135 field_length = long(compact['wstr_length'])
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 may_have_surrogates = True
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001137 field_str = ascii['wstr']
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001138 else:
Victor Stinnerf16a3502011-11-04 22:34:01 +01001139 field_length = long(ascii['length'])
Victor Stinnera3b334d2011-10-03 13:53:37 +02001140 if is_compact_ascii:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001141 field_str = ascii.address + 1
1142 elif int(state['compact']):
1143 field_str = compact.address + 1
1144 else:
1145 field_str = self.field('data')['any']
1146 repr_kind = int(state['kind'])
1147 if repr_kind == 1:
1148 field_str = field_str.cast(_type_unsigned_char_ptr)
1149 elif repr_kind == 2:
1150 field_str = field_str.cast(_type_unsigned_short_ptr)
Antoine Pitrou3c0c5f22011-10-08 19:33:24 +02001151 elif repr_kind == 4:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001152 field_str = field_str.cast(_type_unsigned_int_ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001153 else:
1154 # Python 3.2 and earlier
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001155 field_length = long(self.field('length'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 field_str = self.field('str')
1157 may_have_surrogates = self.char_width() == 2
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001158
1159 # Gather a list of ints from the Py_UNICODE array; these are either
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001160 # UCS-1, UCS-2 or UCS-4 code points:
1161 if not may_have_surrogates:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001162 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1163 else:
1164 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1165 # inferior process: we must join surrogate pairs.
1166 Py_UNICODEs = []
1167 i = 0
Antoine Pitroub1856d72010-09-08 21:07:40 +00001168 limit = safety_limit(field_length)
1169 while i < limit:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001170 ucs = int(field_str[i])
1171 i += 1
1172 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1173 Py_UNICODEs.append(ucs)
1174 continue
1175 # This could be a surrogate pair.
1176 ucs2 = int(field_str[i])
1177 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1178 continue
1179 code = (ucs & 0x03FF) << 10
1180 code |= ucs2 & 0x03FF
1181 code += 0x00010000
1182 Py_UNICODEs.append(code)
1183 i += 1
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001184
1185 # Convert the int code points to unicode characters, and generate a
Antoine Pitroub41e1282010-09-08 20:57:48 +00001186 # local unicode instance.
1187 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1188 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001189 return result
1190
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001191 def write_repr(self, out, visited):
1192 # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1193
1194 # Get a PyUnicodeObject* within the Python 2 gdb process:
1195 proxy = self.proxyval(visited)
1196
1197 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1198 # to Python 2:
1199 if "'" in proxy and '"' not in proxy:
1200 quote = '"'
1201 else:
1202 quote = "'"
1203 out.write(quote)
1204
1205 i = 0
1206 while i < len(proxy):
1207 ch = proxy[i]
1208 i += 1
1209
1210 # Escape quotes and backslashes
1211 if ch == quote or ch == '\\':
1212 out.write('\\')
1213 out.write(ch)
1214
1215 # Map special whitespace to '\t', \n', '\r'
1216 elif ch == '\t':
1217 out.write('\\t')
1218 elif ch == '\n':
1219 out.write('\\n')
1220 elif ch == '\r':
1221 out.write('\\r')
1222
1223 # Map non-printable US ASCII to '\xhh' */
1224 elif ch < ' ' or ch == 0x7F:
1225 out.write('\\x')
1226 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1227 out.write(hexdigits[ord(ch) & 0x000F])
1228
1229 # Copy ASCII characters as-is
1230 elif ord(ch) < 0x7F:
1231 out.write(ch)
1232
1233 # Non-ASCII characters
1234 else:
Victor Stinner150016f2010-05-19 23:04:56 +00001235 ucs = ch
Antoine Pitroub41e1282010-09-08 20:57:48 +00001236 ch2 = None
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001237 if sys.maxunicode < 0x10000:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001238 # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1239 # surrogate pairs before calling _unichr_is_printable.
Victor Stinner150016f2010-05-19 23:04:56 +00001240 if (i < len(proxy)
1241 and 0xD800 <= ord(ch) < 0xDC00 \
1242 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001243 ch2 = proxy[i]
Antoine Pitroub41e1282010-09-08 20:57:48 +00001244 ucs = ch + ch2
Victor Stinner150016f2010-05-19 23:04:56 +00001245 i += 1
Victor Stinner150016f2010-05-19 23:04:56 +00001246
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001247 # Unfortuately, Python 2's unicode type doesn't seem
1248 # to expose the "isprintable" method
Victor Stinner150016f2010-05-19 23:04:56 +00001249 printable = _unichr_is_printable(ucs)
1250 if printable:
1251 try:
1252 ucs.encode(ENCODING)
1253 except UnicodeEncodeError:
1254 printable = False
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001255
1256 # Map Unicode whitespace and control characters
1257 # (categories Z* and C* except ASCII space)
Victor Stinner150016f2010-05-19 23:04:56 +00001258 if not printable:
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001259 if ch2 is not None:
1260 # Match Python 3's representation of non-printable
1261 # wide characters.
1262 code = (ord(ch) & 0x03FF) << 10
1263 code |= ord(ch2) & 0x03FF
1264 code += 0x00010000
1265 else:
1266 code = ord(ucs)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001267
1268 # Map 8-bit characters to '\\xhh'
Victor Stinner150016f2010-05-19 23:04:56 +00001269 if code <= 0xff:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001270 out.write('\\x')
Victor Stinner150016f2010-05-19 23:04:56 +00001271 out.write(hexdigits[(code >> 4) & 0x000F])
1272 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001273 # Map 21-bit characters to '\U00xxxxxx'
Victor Stinner150016f2010-05-19 23:04:56 +00001274 elif code >= 0x10000:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001275 out.write('\\U')
Victor Stinner150016f2010-05-19 23:04:56 +00001276 out.write(hexdigits[(code >> 28) & 0x0000000F])
1277 out.write(hexdigits[(code >> 24) & 0x0000000F])
1278 out.write(hexdigits[(code >> 20) & 0x0000000F])
1279 out.write(hexdigits[(code >> 16) & 0x0000000F])
1280 out.write(hexdigits[(code >> 12) & 0x0000000F])
1281 out.write(hexdigits[(code >> 8) & 0x0000000F])
1282 out.write(hexdigits[(code >> 4) & 0x0000000F])
1283 out.write(hexdigits[code & 0x0000000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001284 # Map 16-bit characters to '\uxxxx'
1285 else:
1286 out.write('\\u')
Victor Stinner150016f2010-05-19 23:04:56 +00001287 out.write(hexdigits[(code >> 12) & 0x000F])
1288 out.write(hexdigits[(code >> 8) & 0x000F])
1289 out.write(hexdigits[(code >> 4) & 0x000F])
1290 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001291 else:
1292 # Copy characters as-is
1293 out.write(ch)
Antoine Pitroub41e1282010-09-08 20:57:48 +00001294 if ch2 is not None:
Victor Stinner150016f2010-05-19 23:04:56 +00001295 out.write(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001296
1297 out.write(quote)
1298
1299
1300
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001301
1302def int_from_int(gdbval):
1303 return int(str(gdbval))
1304
1305
1306def stringify(val):
1307 # TODO: repr() puts everything on one line; pformat can be nicer, but
1308 # can lead to v.long results; this function isolates the choice
1309 if True:
1310 return repr(val)
1311 else:
1312 from pprint import pformat
1313 return pformat(val)
1314
1315
1316class PyObjectPtrPrinter:
1317 "Prints a (PyObject*)"
1318
1319 def __init__ (self, gdbval):
1320 self.gdbval = gdbval
1321
1322 def to_string (self):
1323 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1324 if True:
1325 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1326 else:
1327 # Generate full proxy value then stringify it.
1328 # Doing so could be expensive
1329 proxyval = pyop.proxyval(set())
1330 return stringify(proxyval)
1331
1332def pretty_printer_lookup(gdbval):
1333 type = gdbval.type.unqualified()
1334 if type.code == gdb.TYPE_CODE_PTR:
1335 type = type.target().unqualified()
1336 t = str(type)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001337 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001338 return PyObjectPtrPrinter(gdbval)
1339
1340"""
1341During development, I've been manually invoking the code in this way:
1342(gdb) python
1343
1344import sys
1345sys.path.append('/home/david/coding/python-gdb')
1346import libpython
1347end
1348
1349then reloading it after each edit like this:
1350(gdb) python reload(libpython)
1351
1352The following code should ensure that the prettyprinter is registered
1353if the code is autoloaded by gdb when visiting libpython.so, provided
1354that this python file is installed to the same path as the library (or its
1355.debug file) plus a "-gdb.py" suffix, e.g:
1356 /usr/lib/libpython2.6.so.1.0-gdb.py
1357 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1358"""
1359def register (obj):
1360 if obj == None:
1361 obj = gdb
1362
1363 # Wire up the pretty-printer
1364 obj.pretty_printers.append(pretty_printer_lookup)
1365
1366register (gdb.current_objfile ())
1367
1368
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001369
1370# Unfortunately, the exact API exposed by the gdb module varies somewhat
1371# from build to build
1372# See http://bugs.python.org/issue8279?#msg102276
1373
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001374class Frame(object):
1375 '''
1376 Wrapper for gdb.Frame, adding various methods
1377 '''
1378 def __init__(self, gdbframe):
1379 self._gdbframe = gdbframe
1380
1381 def older(self):
1382 older = self._gdbframe.older()
1383 if older:
1384 return Frame(older)
1385 else:
1386 return None
1387
1388 def newer(self):
1389 newer = self._gdbframe.newer()
1390 if newer:
1391 return Frame(newer)
1392 else:
1393 return None
1394
1395 def select(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001396 '''If supported, select this frame and return True; return False if unsupported
1397
1398 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1399 onwards, but absent on Ubuntu buildbot'''
1400 if not hasattr(self._gdbframe, 'select'):
1401 print ('Unable to select frame: '
1402 'this build of gdb does not expose a gdb.Frame.select method')
1403 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001404 self._gdbframe.select()
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001405 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001406
1407 def get_index(self):
1408 '''Calculate index of frame, starting at 0 for the newest frame within
1409 this thread'''
1410 index = 0
1411 # Go down until you reach the newest frame:
1412 iter_frame = self
1413 while iter_frame.newer():
1414 index += 1
1415 iter_frame = iter_frame.newer()
1416 return index
1417
1418 def is_evalframeex(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001419 '''Is this a PyEval_EvalFrameEx frame?'''
Victor Stinner50eb60e2010-04-20 22:32:07 +00001420 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1421 '''
1422 I believe we also need to filter on the inline
1423 struct frame_id.inline_depth, only regarding frames with
1424 an inline depth of 0 as actually being this function
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001425
Victor Stinner50eb60e2010-04-20 22:32:07 +00001426 So we reject those with type gdb.INLINE_FRAME
1427 '''
1428 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1429 # We have a PyEval_EvalFrameEx frame:
1430 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001431
1432 return False
1433
1434 def get_pyop(self):
1435 try:
1436 f = self._gdbframe.read_var('f')
1437 return PyFrameObjectPtr.from_pyobject_ptr(f)
1438 except ValueError:
1439 return None
1440
1441 @classmethod
1442 def get_selected_frame(cls):
1443 _gdbframe = gdb.selected_frame()
1444 if _gdbframe:
1445 return Frame(_gdbframe)
1446 return None
1447
1448 @classmethod
1449 def get_selected_python_frame(cls):
1450 '''Try to obtain the Frame for the python code in the selected frame,
1451 or None'''
1452 frame = cls.get_selected_frame()
1453
1454 while frame:
1455 if frame.is_evalframeex():
1456 return frame
1457 frame = frame.older()
1458
1459 # Not found:
1460 return None
1461
1462 def print_summary(self):
1463 if self.is_evalframeex():
1464 pyop = self.get_pyop()
1465 if pyop:
Victor Stinner0e5a41b2010-08-17 22:49:25 +00001466 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1467 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001468 line = pyop.current_line()
1469 if line is not None:
1470 sys.stdout.write(line)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001471 else:
1472 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1473 else:
1474 sys.stdout.write('#%i\n' % self.get_index())
1475
Victor Stinnere670c882011-05-13 17:40:15 +02001476 def print_traceback(self):
1477 if self.is_evalframeex():
1478 pyop = self.get_pyop()
1479 if pyop:
1480 pyop.print_traceback()
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001481 line = pyop.current_line()
1482 if line is not None:
1483 sys.stdout.write(' %s\n' % line.strip())
Victor Stinnere670c882011-05-13 17:40:15 +02001484 else:
1485 sys.stdout.write(' (unable to read python frame information)\n')
1486 else:
1487 sys.stdout.write(' (not a python frame)\n')
1488
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001489class PyList(gdb.Command):
1490 '''List the current Python source code, if any
1491
1492 Use
1493 py-list START
1494 to list at a different line number within the python source.
1495
1496 Use
1497 py-list START, END
1498 to list a specific range of lines within the python source.
1499 '''
1500
1501 def __init__(self):
1502 gdb.Command.__init__ (self,
1503 "py-list",
1504 gdb.COMMAND_FILES,
1505 gdb.COMPLETE_NONE)
1506
1507
1508 def invoke(self, args, from_tty):
1509 import re
1510
1511 start = None
1512 end = None
1513
1514 m = re.match(r'\s*(\d+)\s*', args)
1515 if m:
1516 start = int(m.group(0))
1517 end = start + 10
1518
1519 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1520 if m:
1521 start, end = map(int, m.groups())
1522
1523 frame = Frame.get_selected_python_frame()
1524 if not frame:
1525 print 'Unable to locate python frame'
1526 return
1527
1528 pyop = frame.get_pyop()
1529 if not pyop:
1530 print 'Unable to read information on python frame'
1531 return
1532
1533 filename = pyop.filename()
1534 lineno = pyop.current_line_num()
1535
1536 if start is None:
1537 start = lineno - 5
1538 end = lineno + 5
1539
1540 if start<1:
1541 start = 1
1542
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001543 try:
1544 f = open(os_fsencode(filename), 'r')
1545 except IOError as err:
1546 sys.stdout.write('Unable to open %s: %s\n'
1547 % (filename, err))
1548 return
1549 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001550 all_lines = f.readlines()
1551 # start and end are 1-based, all_lines is 0-based;
1552 # so [start-1:end] as a python slice gives us [start, end] as a
1553 # closed interval
1554 for i, line in enumerate(all_lines[start-1:end]):
1555 linestr = str(i+start)
1556 # Highlight current line:
1557 if i + start == lineno:
1558 linestr = '>' + linestr
1559 sys.stdout.write('%4s %s' % (linestr, line))
1560
1561
1562# ...and register the command:
1563PyList()
1564
1565def move_in_stack(move_up):
1566 '''Move up or down the stack (for the py-up/py-down command)'''
1567 frame = Frame.get_selected_python_frame()
1568 while frame:
1569 if move_up:
1570 iter_frame = frame.older()
1571 else:
1572 iter_frame = frame.newer()
1573
1574 if not iter_frame:
1575 break
1576
1577 if iter_frame.is_evalframeex():
1578 # Result:
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001579 if iter_frame.select():
1580 iter_frame.print_summary()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001581 return
1582
1583 frame = iter_frame
1584
1585 if move_up:
1586 print 'Unable to find an older python frame'
1587 else:
1588 print 'Unable to find a newer python frame'
1589
1590class PyUp(gdb.Command):
1591 'Select and print the python stack frame that called this one (if any)'
1592 def __init__(self):
1593 gdb.Command.__init__ (self,
1594 "py-up",
1595 gdb.COMMAND_STACK,
1596 gdb.COMPLETE_NONE)
1597
1598
1599 def invoke(self, args, from_tty):
1600 move_in_stack(move_up=True)
1601
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001602class PyDown(gdb.Command):
1603 'Select and print the python stack frame called by this one (if any)'
1604 def __init__(self):
1605 gdb.Command.__init__ (self,
1606 "py-down",
1607 gdb.COMMAND_STACK,
1608 gdb.COMPLETE_NONE)
1609
1610
1611 def invoke(self, args, from_tty):
1612 move_in_stack(move_up=False)
1613
Victor Stinner50eb60e2010-04-20 22:32:07 +00001614# Not all builds of gdb have gdb.Frame.select
1615if hasattr(gdb.Frame, 'select'):
1616 PyUp()
1617 PyDown()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001618
Victor Stinnere670c882011-05-13 17:40:15 +02001619class PyBacktraceFull(gdb.Command):
1620 'Display the current python frame and all the frames within its call stack (if any)'
1621 def __init__(self):
1622 gdb.Command.__init__ (self,
1623 "py-bt-full",
1624 gdb.COMMAND_STACK,
1625 gdb.COMPLETE_NONE)
1626
1627
1628 def invoke(self, args, from_tty):
1629 frame = Frame.get_selected_python_frame()
1630 while frame:
1631 if frame.is_evalframeex():
1632 frame.print_summary()
1633 frame = frame.older()
1634
1635PyBacktraceFull()
1636
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001637class PyBacktrace(gdb.Command):
1638 'Display the current python frame and all the frames within its call stack (if any)'
1639 def __init__(self):
1640 gdb.Command.__init__ (self,
1641 "py-bt",
1642 gdb.COMMAND_STACK,
1643 gdb.COMPLETE_NONE)
1644
1645
1646 def invoke(self, args, from_tty):
Victor Stinnere670c882011-05-13 17:40:15 +02001647 sys.stdout.write('Traceback (most recent call first):\n')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001648 frame = Frame.get_selected_python_frame()
1649 while frame:
1650 if frame.is_evalframeex():
Victor Stinnere670c882011-05-13 17:40:15 +02001651 frame.print_traceback()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001652 frame = frame.older()
1653
1654PyBacktrace()
1655
1656class PyPrint(gdb.Command):
1657 'Look up the given python variable name, and print it'
1658 def __init__(self):
1659 gdb.Command.__init__ (self,
1660 "py-print",
1661 gdb.COMMAND_DATA,
1662 gdb.COMPLETE_NONE)
1663
1664
1665 def invoke(self, args, from_tty):
1666 name = str(args)
1667
1668 frame = Frame.get_selected_python_frame()
1669 if not frame:
1670 print 'Unable to locate python frame'
1671 return
1672
1673 pyop_frame = frame.get_pyop()
1674 if not pyop_frame:
1675 print 'Unable to read information on python frame'
1676 return
1677
1678 pyop_var, scope = pyop_frame.get_var_by_name(name)
1679
1680 if pyop_var:
1681 print ('%s %r = %s'
1682 % (scope,
1683 name,
1684 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1685 else:
1686 print '%r not found' % name
1687
1688PyPrint()
1689
1690class PyLocals(gdb.Command):
1691 'Look up the given python variable name, and print it'
1692 def __init__(self):
1693 gdb.Command.__init__ (self,
1694 "py-locals",
1695 gdb.COMMAND_DATA,
1696 gdb.COMPLETE_NONE)
1697
1698
1699 def invoke(self, args, from_tty):
1700 name = str(args)
1701
1702 frame = Frame.get_selected_python_frame()
1703 if not frame:
1704 print 'Unable to locate python frame'
1705 return
1706
1707 pyop_frame = frame.get_pyop()
1708 if not pyop_frame:
1709 print 'Unable to read information on python frame'
1710 return
1711
1712 for pyop_name, pyop_value in pyop_frame.iter_locals():
1713 print ('%s = %s'
1714 % (pyop_name.proxyval(set()),
1715 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1716
1717PyLocals()