blob: 6972b9b1d6adbd707f0781771c489beec8bf4cde [file] [log] [blame]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
Victor Stinner67df3a42010-04-21 13:53:05 +000022holding three PyObject* that turn out to be PyBytesObject* instances, we can
Martin v. Löwis5ae68102010-04-21 22:38:42 +000023generate a proxy value within the gdb process that is a list of bytes
24instances:
25 [b"foo", b"bar", b"baz"]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000026
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object. This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43from __future__ import with_statement
44import gdb
Victor Stinner150016f2010-05-19 23:04:56 +000045import locale
Georg Brandlb639c142010-07-14 08:54:40 +000046import sys
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000047
48# Look up the gdb.Type for some standard types:
49_type_char_ptr = gdb.lookup_type('char').pointer() # char*
50_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
51_type_void_ptr = gdb.lookup_type('void').pointer() # void*
52_type_size_t = gdb.lookup_type('size_t')
Martin v. Löwis24fa9832011-09-28 08:35:25 +020053_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
54_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000055
Victor Stinner0c4fbff2011-12-08 00:08:22 +010056# value computed later, see PyUnicodeObjectPtr.proxy()
57_is_pep393 = None
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020058
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000059SIZEOF_VOID_P = _type_void_ptr.sizeof
60
61
62Py_TPFLAGS_HEAPTYPE = (1L << 9)
63
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000064Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
65Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
66Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Martin v. Löwis5ae68102010-04-21 22:38:42 +000067Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000068Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
69Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
70Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
71Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
72
73
74MAX_OUTPUT_LEN=1024
75
Martin v. Löwis5ae68102010-04-21 22:38:42 +000076hexdigits = "0123456789abcdef"
77
Victor Stinner150016f2010-05-19 23:04:56 +000078ENCODING = locale.getpreferredencoding()
Martin v. Löwis5ae68102010-04-21 22:38:42 +000079
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000080class NullPyObjectPtr(RuntimeError):
81 pass
82
83
84def safety_limit(val):
85 # Given a integer value from the process being debugged, limit it to some
86 # safety threshold so that arbitrary breakage within said process doesn't
87 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
88 return min(val, 1000)
89
90
91def safe_range(val):
92 # As per range, but don't trust the value too much: cap it to a safety
93 # threshold in case the data was corrupted
94 return xrange(safety_limit(val))
95
Victor Stinner0e5a41b2010-08-17 22:49:25 +000096def write_unicode(file, text):
97 # Write a byte or unicode string to file. Unicode strings are encoded to
98 # ENCODING encoding with 'backslashreplace' error handler to avoid
99 # UnicodeEncodeError.
100 if isinstance(text, unicode):
101 text = text.encode(ENCODING, 'backslashreplace')
102 file.write(text)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000103
Victor Stinner6ffbee72010-10-17 19:35:30 +0000104def os_fsencode(filename):
105 if not isinstance(filename, unicode):
106 return filename
107 encoding = sys.getfilesystemencoding()
108 if encoding == 'mbcs':
109 # mbcs doesn't support surrogateescape
110 return filename.encode(encoding)
111 encoded = []
112 for char in filename:
113 # surrogateescape error handler
114 if 0xDC80 <= ord(char) <= 0xDCFF:
115 byte = chr(ord(char) - 0xDC00)
116 else:
117 byte = char.encode(encoding)
118 encoded.append(byte)
119 return ''.join(encoded)
120
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000121class StringTruncated(RuntimeError):
122 pass
123
124class TruncatedStringIO(object):
125 '''Similar to cStringIO, but can truncate the output by raising a
126 StringTruncated exception'''
127 def __init__(self, maxlen=None):
128 self._val = ''
129 self.maxlen = maxlen
130
131 def write(self, data):
132 if self.maxlen:
133 if len(data) + len(self._val) > self.maxlen:
134 # Truncation:
135 self._val += data[0:self.maxlen - len(self._val)]
136 raise StringTruncated()
137
138 self._val += data
139
140 def getvalue(self):
141 return self._val
142
143class PyObjectPtr(object):
144 """
145 Class wrapping a gdb.Value that's a either a (PyObject*) within the
Victor Stinner67df3a42010-04-21 13:53:05 +0000146 inferior process, or some subclass pointer e.g. (PyBytesObject*)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000147
148 There will be a subclass for every refined PyObject type that we care
149 about.
150
151 Note that at every stage the underlying pointer could be NULL, point
152 to corrupt data, etc; this is the debugger, after all.
153 """
154 _typename = 'PyObject'
155
156 def __init__(self, gdbval, cast_to=None):
157 if cast_to:
158 self._gdbval = gdbval.cast(cast_to)
159 else:
160 self._gdbval = gdbval
161
162 def field(self, name):
163 '''
164 Get the gdb.Value for the given field within the PyObject, coping with
165 some python 2 versus python 3 differences.
166
167 Various libpython types are defined using the "PyObject_HEAD" and
168 "PyObject_VAR_HEAD" macros.
169
170 In Python 2, this these are defined so that "ob_type" and (for a var
171 object) "ob_size" are fields of the type in question.
172
173 In Python 3, this is defined as an embedded PyVarObject type thus:
174 PyVarObject ob_base;
175 so that the "ob_size" field is located insize the "ob_base" field, and
176 the "ob_type" is most easily accessed by casting back to a (PyObject*).
177 '''
178 if self.is_null():
179 raise NullPyObjectPtr(self)
180
181 if name == 'ob_type':
182 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
183 return pyo_ptr.dereference()[name]
184
185 if name == 'ob_size':
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000186 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
187 return pyo_ptr.dereference()[name]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000188
189 # General case: look it up inside the object:
190 return self._gdbval.dereference()[name]
191
192 def pyop_field(self, name):
193 '''
194 Get a PyObjectPtr for the given PyObject* field within this PyObject,
195 coping with some python 2 versus python 3 differences.
196 '''
197 return PyObjectPtr.from_pyobject_ptr(self.field(name))
198
199 def write_field_repr(self, name, out, visited):
200 '''
201 Extract the PyObject* field named "name", and write its representation
202 to file-like object "out"
203 '''
204 field_obj = self.pyop_field(name)
205 field_obj.write_repr(out, visited)
206
207 def get_truncated_repr(self, maxlen):
208 '''
209 Get a repr-like string for the data, but truncate it at "maxlen" bytes
210 (ending the object graph traversal as soon as you do)
211 '''
212 out = TruncatedStringIO(maxlen)
213 try:
214 self.write_repr(out, set())
215 except StringTruncated:
216 # Truncation occurred:
217 return out.getvalue() + '...(truncated)'
218
219 # No truncation occurred:
220 return out.getvalue()
221
222 def type(self):
223 return PyTypeObjectPtr(self.field('ob_type'))
224
225 def is_null(self):
226 return 0 == long(self._gdbval)
227
228 def is_optimized_out(self):
229 '''
230 Is the value of the underlying PyObject* visible to the debugger?
231
232 This can vary with the precise version of the compiler used to build
233 Python, and the precise version of gdb.
234
235 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
236 PyEval_EvalFrameEx's "f"
237 '''
238 return self._gdbval.is_optimized_out
239
240 def safe_tp_name(self):
241 try:
242 return self.type().field('tp_name').string()
243 except NullPyObjectPtr:
244 # NULL tp_name?
245 return 'unknown'
246 except RuntimeError:
247 # Can't even read the object at all?
248 return 'unknown'
249
250 def proxyval(self, visited):
251 '''
252 Scrape a value from the inferior process, and try to represent it
253 within the gdb process, whilst (hopefully) avoiding crashes when
254 the remote data is corrupt.
255
256 Derived classes will override this.
257
258 For example, a PyIntObject* with ob_ival 42 in the inferior process
259 should result in an int(42) in this process.
260
261 visited: a set of all gdb.Value pyobject pointers already visited
262 whilst generating this value (to guard against infinite recursion when
263 visiting object graphs with loops). Analogous to Py_ReprEnter and
264 Py_ReprLeave
265 '''
266
267 class FakeRepr(object):
268 """
269 Class representing a non-descript PyObject* value in the inferior
270 process for when we don't have a custom scraper, intended to have
271 a sane repr().
272 """
273
274 def __init__(self, tp_name, address):
275 self.tp_name = tp_name
276 self.address = address
277
278 def __repr__(self):
279 # For the NULL pointer, we have no way of knowing a type, so
280 # special-case it as per
281 # http://bugs.python.org/issue8032#msg100882
282 if self.address == 0:
283 return '0x0'
284 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
285
286 return FakeRepr(self.safe_tp_name(),
287 long(self._gdbval))
288
289 def write_repr(self, out, visited):
290 '''
291 Write a string representation of the value scraped from the inferior
292 process to "out", a file-like object.
293 '''
294 # Default implementation: generate a proxy value and write its repr
295 # However, this could involve a lot of work for complicated objects,
296 # so for derived classes we specialize this
297 return out.write(repr(self.proxyval(visited)))
298
299 @classmethod
300 def subclass_from_type(cls, t):
301 '''
302 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
303 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
304 to use
305
306 Ideally, we would look up the symbols for the global types, but that
307 isn't working yet:
308 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
309 Traceback (most recent call last):
310 File "<string>", line 1, in <module>
311 NotImplementedError: Symbol type not yet supported in Python scripts.
312 Error while executing Python code.
313
314 For now, we use tp_flags, after doing some string comparisons on the
315 tp_name for some special-cases that don't seem to be visible through
316 flags
317 '''
318 try:
319 tp_name = t.field('tp_name').string()
320 tp_flags = int(t.field('tp_flags'))
321 except RuntimeError:
322 # Handle any kind of error e.g. NULL ptrs by simply using the base
323 # class
324 return cls
325
326 #print 'tp_flags = 0x%08x' % tp_flags
327 #print 'tp_name = %r' % tp_name
328
329 name_map = {'bool': PyBoolObjectPtr,
330 'classobj': PyClassObjectPtr,
331 'instance': PyInstanceObjectPtr,
332 'NoneType': PyNoneStructPtr,
333 'frame': PyFrameObjectPtr,
334 'set' : PySetObjectPtr,
335 'frozenset' : PySetObjectPtr,
336 'builtin_function_or_method' : PyCFunctionObjectPtr,
337 }
338 if tp_name in name_map:
339 return name_map[tp_name]
340
341 if tp_flags & Py_TPFLAGS_HEAPTYPE:
342 return HeapTypeObjectPtr
343
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000344 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
345 return PyLongObjectPtr
346 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
347 return PyListObjectPtr
348 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
349 return PyTupleObjectPtr
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000350 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
Victor Stinner67df3a42010-04-21 13:53:05 +0000351 return PyBytesObjectPtr
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000352 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
353 return PyUnicodeObjectPtr
354 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
355 return PyDictObjectPtr
356 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
357 return PyBaseExceptionObjectPtr
358 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
359 # return PyTypeObjectPtr
360
361 # Use the base class:
362 return cls
363
364 @classmethod
365 def from_pyobject_ptr(cls, gdbval):
366 '''
367 Try to locate the appropriate derived class dynamically, and cast
368 the pointer accordingly.
369 '''
370 try:
371 p = PyObjectPtr(gdbval)
372 cls = cls.subclass_from_type(p.type())
373 return cls(gdbval, cast_to=cls.get_gdb_type())
374 except RuntimeError:
375 # Handle any kind of error e.g. NULL ptrs by simply using the base
376 # class
377 pass
378 return cls(gdbval)
379
380 @classmethod
381 def get_gdb_type(cls):
382 return gdb.lookup_type(cls._typename).pointer()
383
384 def as_address(self):
385 return long(self._gdbval)
386
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000387class PyVarObjectPtr(PyObjectPtr):
388 _typename = 'PyVarObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000389
390class ProxyAlreadyVisited(object):
391 '''
392 Placeholder proxy to use when protecting against infinite recursion due to
393 loops in the object graph.
394
395 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
396 '''
397 def __init__(self, rep):
398 self._rep = rep
399
400 def __repr__(self):
401 return self._rep
402
403
404def _write_instance_repr(out, visited, name, pyop_attrdict, address):
405 '''Shared code for use by old-style and new-style classes:
406 write a representation to file-like object "out"'''
407 out.write('<')
408 out.write(name)
409
410 # Write dictionary of instance attributes:
411 if isinstance(pyop_attrdict, PyDictObjectPtr):
412 out.write('(')
413 first = True
414 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
415 if not first:
416 out.write(', ')
417 first = False
418 out.write(pyop_arg.proxyval(visited))
419 out.write('=')
420 pyop_val.write_repr(out, visited)
421 out.write(')')
422 out.write(' at remote 0x%x>' % address)
423
424
425class InstanceProxy(object):
426
427 def __init__(self, cl_name, attrdict, address):
428 self.cl_name = cl_name
429 self.attrdict = attrdict
430 self.address = address
431
432 def __repr__(self):
433 if isinstance(self.attrdict, dict):
434 kwargs = ', '.join(["%s=%r" % (arg, val)
435 for arg, val in self.attrdict.iteritems()])
436 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
437 kwargs, self.address)
438 else:
439 return '<%s at remote 0x%x>' % (self.cl_name,
440 self.address)
441
442def _PyObject_VAR_SIZE(typeobj, nitems):
443 return ( ( typeobj.field('tp_basicsize') +
444 nitems * typeobj.field('tp_itemsize') +
445 (SIZEOF_VOID_P - 1)
446 ) & ~(SIZEOF_VOID_P - 1)
447 ).cast(_type_size_t)
448
449class HeapTypeObjectPtr(PyObjectPtr):
450 _typename = 'PyObject'
451
452 def get_attr_dict(self):
453 '''
454 Get the PyDictObject ptr representing the attribute dictionary
455 (or None if there's a problem)
456 '''
457 try:
458 typeobj = self.type()
459 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
460 if dictoffset != 0:
461 if dictoffset < 0:
462 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
463 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
464 if tsize < 0:
465 tsize = -tsize
466 size = _PyObject_VAR_SIZE(typeobj, tsize)
467 dictoffset += size
468 assert dictoffset > 0
469 assert dictoffset % SIZEOF_VOID_P == 0
470
471 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
472 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
473 dictptr = dictptr.cast(PyObjectPtrPtr)
474 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
475 except RuntimeError:
476 # Corrupt data somewhere; fail safe
477 pass
478
479 # Not found, or some kind of error:
480 return None
481
482 def proxyval(self, visited):
483 '''
484 Support for new-style classes.
485
486 Currently we just locate the dictionary using a transliteration to
487 python of _PyObject_GetDictPtr, ignoring descriptors
488 '''
489 # Guard against infinite loops:
490 if self.as_address() in visited:
491 return ProxyAlreadyVisited('<...>')
492 visited.add(self.as_address())
493
494 pyop_attr_dict = self.get_attr_dict()
495 if pyop_attr_dict:
496 attr_dict = pyop_attr_dict.proxyval(visited)
497 else:
498 attr_dict = {}
499 tp_name = self.safe_tp_name()
500
501 # New-style class:
502 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
503
504 def write_repr(self, out, visited):
505 # Guard against infinite loops:
506 if self.as_address() in visited:
507 out.write('<...>')
508 return
509 visited.add(self.as_address())
510
511 pyop_attrdict = self.get_attr_dict()
512 _write_instance_repr(out, visited,
513 self.safe_tp_name(), pyop_attrdict, self.as_address())
514
515class ProxyException(Exception):
516 def __init__(self, tp_name, args):
517 self.tp_name = tp_name
518 self.args = args
519
520 def __repr__(self):
521 return '%s%r' % (self.tp_name, self.args)
522
523class PyBaseExceptionObjectPtr(PyObjectPtr):
524 """
525 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
526 within the process being debugged.
527 """
528 _typename = 'PyBaseExceptionObject'
529
530 def proxyval(self, visited):
531 # Guard against infinite loops:
532 if self.as_address() in visited:
533 return ProxyAlreadyVisited('(...)')
534 visited.add(self.as_address())
535 arg_proxy = self.pyop_field('args').proxyval(visited)
536 return ProxyException(self.safe_tp_name(),
537 arg_proxy)
538
539 def write_repr(self, out, visited):
540 # Guard against infinite loops:
541 if self.as_address() in visited:
542 out.write('(...)')
543 return
544 visited.add(self.as_address())
545
546 out.write(self.safe_tp_name())
547 self.write_field_repr('args', out, visited)
548
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000549class PyClassObjectPtr(PyObjectPtr):
550 """
551 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
552 instance within the process being debugged.
553 """
554 _typename = 'PyClassObject'
555
556
557class BuiltInFunctionProxy(object):
558 def __init__(self, ml_name):
559 self.ml_name = ml_name
560
561 def __repr__(self):
562 return "<built-in function %s>" % self.ml_name
563
564class BuiltInMethodProxy(object):
565 def __init__(self, ml_name, pyop_m_self):
566 self.ml_name = ml_name
567 self.pyop_m_self = pyop_m_self
568
569 def __repr__(self):
570 return ('<built-in method %s of %s object at remote 0x%x>'
571 % (self.ml_name,
572 self.pyop_m_self.safe_tp_name(),
573 self.pyop_m_self.as_address())
574 )
575
576class PyCFunctionObjectPtr(PyObjectPtr):
577 """
578 Class wrapping a gdb.Value that's a PyCFunctionObject*
579 (see Include/methodobject.h and Objects/methodobject.c)
580 """
581 _typename = 'PyCFunctionObject'
582
583 def proxyval(self, visited):
584 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
585 ml_name = m_ml['ml_name'].string()
586
587 pyop_m_self = self.pyop_field('m_self')
588 if pyop_m_self.is_null():
589 return BuiltInFunctionProxy(ml_name)
590 else:
591 return BuiltInMethodProxy(ml_name, pyop_m_self)
592
593
594class PyCodeObjectPtr(PyObjectPtr):
595 """
596 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
597 within the process being debugged.
598 """
599 _typename = 'PyCodeObject'
600
601 def addr2line(self, addrq):
602 '''
603 Get the line number for a given bytecode offset
604
605 Analogous to PyCode_Addr2Line; translated from pseudocode in
606 Objects/lnotab_notes.txt
607 '''
608 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
609
610 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
611 # not 0, as lnotab_notes.txt has it:
612 lineno = int_from_int(self.field('co_firstlineno'))
613
614 addr = 0
615 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
616 addr += ord(addr_incr)
617 if addr > addrq:
618 return lineno
619 lineno += ord(line_incr)
620 return lineno
621
622
623class PyDictObjectPtr(PyObjectPtr):
624 """
625 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
626 within the process being debugged.
627 """
628 _typename = 'PyDictObject'
629
630 def iteritems(self):
631 '''
632 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
633 analagous to dict.iteritems()
634 '''
635 for i in safe_range(self.field('ma_mask') + 1):
636 ep = self.field('ma_table') + i
637 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
638 if not pyop_value.is_null():
639 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
640 yield (pyop_key, pyop_value)
641
642 def proxyval(self, visited):
643 # Guard against infinite loops:
644 if self.as_address() in visited:
645 return ProxyAlreadyVisited('{...}')
646 visited.add(self.as_address())
647
648 result = {}
649 for pyop_key, pyop_value in self.iteritems():
650 proxy_key = pyop_key.proxyval(visited)
651 proxy_value = pyop_value.proxyval(visited)
652 result[proxy_key] = proxy_value
653 return result
654
655 def write_repr(self, out, visited):
656 # Guard against infinite loops:
657 if self.as_address() in visited:
658 out.write('{...}')
659 return
660 visited.add(self.as_address())
661
662 out.write('{')
663 first = True
664 for pyop_key, pyop_value in self.iteritems():
665 if not first:
666 out.write(', ')
667 first = False
668 pyop_key.write_repr(out, visited)
669 out.write(': ')
670 pyop_value.write_repr(out, visited)
671 out.write('}')
672
673class PyInstanceObjectPtr(PyObjectPtr):
674 _typename = 'PyInstanceObject'
675
676 def proxyval(self, visited):
677 # Guard against infinite loops:
678 if self.as_address() in visited:
679 return ProxyAlreadyVisited('<...>')
680 visited.add(self.as_address())
681
682 # Get name of class:
683 in_class = self.pyop_field('in_class')
684 cl_name = in_class.pyop_field('cl_name').proxyval(visited)
685
686 # Get dictionary of instance attributes:
687 in_dict = self.pyop_field('in_dict').proxyval(visited)
688
689 # Old-style class:
690 return InstanceProxy(cl_name, in_dict, long(self._gdbval))
691
692 def write_repr(self, out, visited):
693 # Guard against infinite loops:
694 if self.as_address() in visited:
695 out.write('<...>')
696 return
697 visited.add(self.as_address())
698
699 # Old-style class:
700
701 # Get name of class:
702 in_class = self.pyop_field('in_class')
703 cl_name = in_class.pyop_field('cl_name').proxyval(visited)
704
705 # Get dictionary of instance attributes:
706 pyop_in_dict = self.pyop_field('in_dict')
707
708 _write_instance_repr(out, visited,
709 cl_name, pyop_in_dict, self.as_address())
710
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000711class PyListObjectPtr(PyObjectPtr):
712 _typename = 'PyListObject'
713
714 def __getitem__(self, i):
715 # Get the gdb.Value for the (PyObject*) with the given index:
716 field_ob_item = self.field('ob_item')
717 return field_ob_item[i]
718
719 def proxyval(self, visited):
720 # Guard against infinite loops:
721 if self.as_address() in visited:
722 return ProxyAlreadyVisited('[...]')
723 visited.add(self.as_address())
724
725 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
726 for i in safe_range(int_from_int(self.field('ob_size')))]
727 return result
728
729 def write_repr(self, out, visited):
730 # Guard against infinite loops:
731 if self.as_address() in visited:
732 out.write('[...]')
733 return
734 visited.add(self.as_address())
735
736 out.write('[')
737 for i in safe_range(int_from_int(self.field('ob_size'))):
738 if i > 0:
739 out.write(', ')
740 element = PyObjectPtr.from_pyobject_ptr(self[i])
741 element.write_repr(out, visited)
742 out.write(']')
743
744class PyLongObjectPtr(PyObjectPtr):
745 _typename = 'PyLongObject'
746
747 def proxyval(self, visited):
748 '''
749 Python's Include/longobjrep.h has this declaration:
750 struct _longobject {
751 PyObject_VAR_HEAD
752 digit ob_digit[1];
753 };
754
755 with this description:
756 The absolute value of a number is equal to
757 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
758 Negative numbers are represented with ob_size < 0;
759 zero is represented by ob_size == 0.
760
761 where SHIFT can be either:
762 #define PyLong_SHIFT 30
763 #define PyLong_SHIFT 15
764 '''
765 ob_size = long(self.field('ob_size'))
766 if ob_size == 0:
767 return 0L
768
769 ob_digit = self.field('ob_digit')
770
771 if gdb.lookup_type('digit').sizeof == 2:
772 SHIFT = 15L
773 else:
774 SHIFT = 30L
775
776 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
777 for i in safe_range(abs(ob_size))]
778 result = sum(digits)
779 if ob_size < 0:
780 result = -result
781 return result
782
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000783 def write_repr(self, out, visited):
784 # Write this out as a Python 3 int literal, i.e. without the "L" suffix
785 proxy = self.proxyval(visited)
786 out.write("%s" % proxy)
787
788
789class PyBoolObjectPtr(PyLongObjectPtr):
790 """
791 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
792 <bool> instances (Py_True/Py_False) within the process being debugged.
793 """
794 def proxyval(self, visited):
795 if PyLongObjectPtr.proxyval(self, visited):
796 return True
797 else:
798 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000799
800class PyNoneStructPtr(PyObjectPtr):
801 """
802 Class wrapping a gdb.Value that's a PyObject* pointing to the
803 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
804 """
805 _typename = 'PyObject'
806
807 def proxyval(self, visited):
808 return None
809
810
811class PyFrameObjectPtr(PyObjectPtr):
812 _typename = 'PyFrameObject'
813
814 def __init__(self, gdbval, cast_to):
815 PyObjectPtr.__init__(self, gdbval, cast_to)
816
817 if not self.is_optimized_out():
818 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
819 self.co_name = self.co.pyop_field('co_name')
820 self.co_filename = self.co.pyop_field('co_filename')
821
822 self.f_lineno = int_from_int(self.field('f_lineno'))
823 self.f_lasti = int_from_int(self.field('f_lasti'))
824 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
825 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
826
827 def iter_locals(self):
828 '''
829 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
830 the local variables of this frame
831 '''
832 if self.is_optimized_out():
833 return
834
835 f_localsplus = self.field('f_localsplus')
836 for i in safe_range(self.co_nlocals):
837 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
838 if not pyop_value.is_null():
839 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
840 yield (pyop_name, pyop_value)
841
842 def iter_globals(self):
843 '''
844 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
845 the global variables of this frame
846 '''
847 if self.is_optimized_out():
848 return
849
850 pyop_globals = self.pyop_field('f_globals')
851 return pyop_globals.iteritems()
852
853 def iter_builtins(self):
854 '''
855 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
856 the builtin variables
857 '''
858 if self.is_optimized_out():
859 return
860
861 pyop_builtins = self.pyop_field('f_builtins')
862 return pyop_builtins.iteritems()
863
864 def get_var_by_name(self, name):
865 '''
866 Look for the named local variable, returning a (PyObjectPtr, scope) pair
867 where scope is a string 'local', 'global', 'builtin'
868
869 If not found, return (None, None)
870 '''
871 for pyop_name, pyop_value in self.iter_locals():
872 if name == pyop_name.proxyval(set()):
873 return pyop_value, 'local'
874 for pyop_name, pyop_value in self.iter_globals():
875 if name == pyop_name.proxyval(set()):
876 return pyop_value, 'global'
877 for pyop_name, pyop_value in self.iter_builtins():
878 if name == pyop_name.proxyval(set()):
879 return pyop_value, 'builtin'
880 return None, None
881
882 def filename(self):
883 '''Get the path of the current Python source file, as a string'''
884 if self.is_optimized_out():
885 return '(frame information optimized out)'
886 return self.co_filename.proxyval(set())
887
888 def current_line_num(self):
889 '''Get current line number as an integer (1-based)
890
891 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
892
893 See Objects/lnotab_notes.txt
894 '''
895 if self.is_optimized_out():
896 return None
897 f_trace = self.field('f_trace')
898 if long(f_trace) != 0:
899 # we have a non-NULL f_trace:
900 return self.f_lineno
901 else:
902 #try:
903 return self.co.addr2line(self.f_lasti)
904 #except ValueError:
905 # return self.f_lineno
906
907 def current_line(self):
908 '''Get the text of the current source line as a string, with a trailing
909 newline character'''
910 if self.is_optimized_out():
911 return '(frame information optimized out)'
Victor Stinner6ffbee72010-10-17 19:35:30 +0000912 filename = self.filename()
Victor Stinnerd57c5c82011-07-01 12:57:44 +0200913 try:
914 f = open(os_fsencode(filename), 'r')
915 except IOError:
916 return None
917 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000918 all_lines = f.readlines()
919 # Convert from 1-based current_line_num to 0-based list offset:
920 return all_lines[self.current_line_num()-1]
921
922 def write_repr(self, out, visited):
923 if self.is_optimized_out():
924 out.write('(frame information optimized out)')
925 return
926 out.write('Frame 0x%x, for file %s, line %i, in %s ('
927 % (self.as_address(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000928 self.co_filename.proxyval(visited),
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000929 self.current_line_num(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000930 self.co_name.proxyval(visited)))
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000931 first = True
932 for pyop_name, pyop_value in self.iter_locals():
933 if not first:
934 out.write(', ')
935 first = False
936
937 out.write(pyop_name.proxyval(visited))
938 out.write('=')
939 pyop_value.write_repr(out, visited)
940
941 out.write(')')
942
Victor Stinnere670c882011-05-13 17:40:15 +0200943 def print_traceback(self):
944 if self.is_optimized_out():
945 sys.stdout.write(' (frame information optimized out)\n')
946 visited = set()
947 sys.stdout.write(' File "%s", line %i, in %s\n'
948 % (self.co_filename.proxyval(visited),
949 self.current_line_num(),
950 self.co_name.proxyval(visited)))
951
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000952class PySetObjectPtr(PyObjectPtr):
953 _typename = 'PySetObject'
954
955 def proxyval(self, visited):
956 # Guard against infinite loops:
957 if self.as_address() in visited:
958 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
959 visited.add(self.as_address())
960
961 members = []
962 table = self.field('table')
963 for i in safe_range(self.field('mask')+1):
964 setentry = table[i]
965 key = setentry['key']
966 if key != 0:
967 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
968 if key_proxy != '<dummy key>':
969 members.append(key_proxy)
970 if self.safe_tp_name() == 'frozenset':
971 return frozenset(members)
972 else:
973 return set(members)
974
975 def write_repr(self, out, visited):
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000976 # Emulate Python 3's set_repr
977 tp_name = self.safe_tp_name()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000978
979 # Guard against infinite loops:
980 if self.as_address() in visited:
981 out.write('(...)')
982 return
983 visited.add(self.as_address())
984
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000985 # Python 3's set_repr special-cases the empty set:
986 if not self.field('used'):
987 out.write(tp_name)
988 out.write('()')
989 return
990
991 # Python 3 uses {} for set literals:
992 if tp_name != 'set':
993 out.write(tp_name)
994 out.write('(')
995
996 out.write('{')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000997 first = True
998 table = self.field('table')
999 for i in safe_range(self.field('mask')+1):
1000 setentry = table[i]
1001 key = setentry['key']
1002 if key != 0:
1003 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
1004 key_proxy = pyop_key.proxyval(visited) # FIXME!
1005 if key_proxy != '<dummy key>':
1006 if not first:
1007 out.write(', ')
1008 first = False
1009 pyop_key.write_repr(out, visited)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001010 out.write('}')
1011
1012 if tp_name != 'set':
1013 out.write(')')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001014
1015
Victor Stinner67df3a42010-04-21 13:53:05 +00001016class PyBytesObjectPtr(PyObjectPtr):
1017 _typename = 'PyBytesObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001018
1019 def __str__(self):
1020 field_ob_size = self.field('ob_size')
1021 field_ob_sval = self.field('ob_sval')
1022 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
1023 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1024
1025 def proxyval(self, visited):
1026 return str(self)
1027
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001028 def write_repr(self, out, visited):
1029 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1030
1031 # Get a PyStringObject* within the Python 2 gdb process:
1032 proxy = self.proxyval(visited)
1033
1034 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1035 # to Python 2 code:
1036 quote = "'"
1037 if "'" in proxy and not '"' in proxy:
1038 quote = '"'
1039 out.write('b')
1040 out.write(quote)
1041 for byte in proxy:
1042 if byte == quote or byte == '\\':
1043 out.write('\\')
1044 out.write(byte)
1045 elif byte == '\t':
1046 out.write('\\t')
1047 elif byte == '\n':
1048 out.write('\\n')
1049 elif byte == '\r':
1050 out.write('\\r')
1051 elif byte < ' ' or ord(byte) >= 0x7f:
1052 out.write('\\x')
1053 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1054 out.write(hexdigits[ord(byte) & 0xf])
1055 else:
1056 out.write(byte)
1057 out.write(quote)
1058
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001059class PyTupleObjectPtr(PyObjectPtr):
1060 _typename = 'PyTupleObject'
1061
1062 def __getitem__(self, i):
1063 # Get the gdb.Value for the (PyObject*) with the given index:
1064 field_ob_item = self.field('ob_item')
1065 return field_ob_item[i]
1066
1067 def proxyval(self, visited):
1068 # Guard against infinite loops:
1069 if self.as_address() in visited:
1070 return ProxyAlreadyVisited('(...)')
1071 visited.add(self.as_address())
1072
1073 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1074 for i in safe_range(int_from_int(self.field('ob_size')))])
1075 return result
1076
1077 def write_repr(self, out, visited):
1078 # Guard against infinite loops:
1079 if self.as_address() in visited:
1080 out.write('(...)')
1081 return
1082 visited.add(self.as_address())
1083
1084 out.write('(')
1085 for i in safe_range(int_from_int(self.field('ob_size'))):
1086 if i > 0:
1087 out.write(', ')
1088 element = PyObjectPtr.from_pyobject_ptr(self[i])
1089 element.write_repr(out, visited)
1090 if self.field('ob_size') == 1:
1091 out.write(',)')
1092 else:
1093 out.write(')')
1094
1095class PyTypeObjectPtr(PyObjectPtr):
1096 _typename = 'PyTypeObject'
1097
1098
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001099def _unichr_is_printable(char):
1100 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1101 if char == u" ":
1102 return True
1103 import unicodedata
Antoine Pitroub41e1282010-09-08 20:57:48 +00001104 return unicodedata.category(char) not in ("C", "Z")
1105
1106if sys.maxunicode >= 0x10000:
1107 _unichr = unichr
1108else:
1109 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1110 def _unichr(x):
1111 if x < 0x10000:
1112 return unichr(x)
1113 x -= 0x10000
1114 ch1 = 0xD800 | (x >> 10)
1115 ch2 = 0xDC00 | (x & 0x3FF)
1116 return unichr(ch1) + unichr(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001117
1118
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001119class PyUnicodeObjectPtr(PyObjectPtr):
1120 _typename = 'PyUnicodeObject'
1121
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001122 def char_width(self):
1123 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1124 return _type_Py_UNICODE.sizeof
1125
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001126 def proxyval(self, visited):
Victor Stinner0c4fbff2011-12-08 00:08:22 +01001127 global _is_pep393
1128 if _is_pep393 is None:
1129 fields = gdb.lookup_type('PyUnicodeObject').target().fields()
1130 _is_pep393 = 'data' in [f.name for f in fields]
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001131 if _is_pep393:
1132 # Python 3.3 and newer
1133 may_have_surrogates = False
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001134 compact = self.field('_base')
1135 ascii = compact['_base']
1136 state = ascii['state']
Victor Stinnera3b334d2011-10-03 13:53:37 +02001137 is_compact_ascii = (int(state['ascii']) and int(state['compact']))
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001138 if not int(state['ready']):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 # string is not ready
Victor Stinnerf16a3502011-11-04 22:34:01 +01001140 field_length = long(compact['wstr_length'])
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001141 may_have_surrogates = True
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001142 field_str = ascii['wstr']
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001143 else:
Victor Stinnerf16a3502011-11-04 22:34:01 +01001144 field_length = long(ascii['length'])
Victor Stinnera3b334d2011-10-03 13:53:37 +02001145 if is_compact_ascii:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001146 field_str = ascii.address + 1
1147 elif int(state['compact']):
1148 field_str = compact.address + 1
1149 else:
1150 field_str = self.field('data')['any']
1151 repr_kind = int(state['kind'])
1152 if repr_kind == 1:
1153 field_str = field_str.cast(_type_unsigned_char_ptr)
1154 elif repr_kind == 2:
1155 field_str = field_str.cast(_type_unsigned_short_ptr)
Antoine Pitrou3c0c5f22011-10-08 19:33:24 +02001156 elif repr_kind == 4:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001157 field_str = field_str.cast(_type_unsigned_int_ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 else:
1159 # Python 3.2 and earlier
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001160 field_length = long(self.field('length'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001161 field_str = self.field('str')
1162 may_have_surrogates = self.char_width() == 2
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001163
1164 # Gather a list of ints from the Py_UNICODE array; these are either
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165 # UCS-1, UCS-2 or UCS-4 code points:
1166 if not may_have_surrogates:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001167 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1168 else:
1169 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1170 # inferior process: we must join surrogate pairs.
1171 Py_UNICODEs = []
1172 i = 0
Antoine Pitroub1856d72010-09-08 21:07:40 +00001173 limit = safety_limit(field_length)
1174 while i < limit:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001175 ucs = int(field_str[i])
1176 i += 1
1177 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1178 Py_UNICODEs.append(ucs)
1179 continue
1180 # This could be a surrogate pair.
1181 ucs2 = int(field_str[i])
1182 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1183 continue
1184 code = (ucs & 0x03FF) << 10
1185 code |= ucs2 & 0x03FF
1186 code += 0x00010000
1187 Py_UNICODEs.append(code)
1188 i += 1
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001189
1190 # Convert the int code points to unicode characters, and generate a
Antoine Pitroub41e1282010-09-08 20:57:48 +00001191 # local unicode instance.
1192 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1193 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001194 return result
1195
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001196 def write_repr(self, out, visited):
1197 # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1198
1199 # Get a PyUnicodeObject* within the Python 2 gdb process:
1200 proxy = self.proxyval(visited)
1201
1202 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1203 # to Python 2:
1204 if "'" in proxy and '"' not in proxy:
1205 quote = '"'
1206 else:
1207 quote = "'"
1208 out.write(quote)
1209
1210 i = 0
1211 while i < len(proxy):
1212 ch = proxy[i]
1213 i += 1
1214
1215 # Escape quotes and backslashes
1216 if ch == quote or ch == '\\':
1217 out.write('\\')
1218 out.write(ch)
1219
1220 # Map special whitespace to '\t', \n', '\r'
1221 elif ch == '\t':
1222 out.write('\\t')
1223 elif ch == '\n':
1224 out.write('\\n')
1225 elif ch == '\r':
1226 out.write('\\r')
1227
1228 # Map non-printable US ASCII to '\xhh' */
1229 elif ch < ' ' or ch == 0x7F:
1230 out.write('\\x')
1231 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1232 out.write(hexdigits[ord(ch) & 0x000F])
1233
1234 # Copy ASCII characters as-is
1235 elif ord(ch) < 0x7F:
1236 out.write(ch)
1237
1238 # Non-ASCII characters
1239 else:
Victor Stinner150016f2010-05-19 23:04:56 +00001240 ucs = ch
Antoine Pitroub41e1282010-09-08 20:57:48 +00001241 ch2 = None
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001242 if sys.maxunicode < 0x10000:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001243 # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1244 # surrogate pairs before calling _unichr_is_printable.
Victor Stinner150016f2010-05-19 23:04:56 +00001245 if (i < len(proxy)
1246 and 0xD800 <= ord(ch) < 0xDC00 \
1247 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001248 ch2 = proxy[i]
Antoine Pitroub41e1282010-09-08 20:57:48 +00001249 ucs = ch + ch2
Victor Stinner150016f2010-05-19 23:04:56 +00001250 i += 1
Victor Stinner150016f2010-05-19 23:04:56 +00001251
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001252 # Unfortuately, Python 2's unicode type doesn't seem
1253 # to expose the "isprintable" method
Victor Stinner150016f2010-05-19 23:04:56 +00001254 printable = _unichr_is_printable(ucs)
1255 if printable:
1256 try:
1257 ucs.encode(ENCODING)
1258 except UnicodeEncodeError:
1259 printable = False
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001260
1261 # Map Unicode whitespace and control characters
1262 # (categories Z* and C* except ASCII space)
Victor Stinner150016f2010-05-19 23:04:56 +00001263 if not printable:
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001264 if ch2 is not None:
1265 # Match Python 3's representation of non-printable
1266 # wide characters.
1267 code = (ord(ch) & 0x03FF) << 10
1268 code |= ord(ch2) & 0x03FF
1269 code += 0x00010000
1270 else:
1271 code = ord(ucs)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001272
1273 # Map 8-bit characters to '\\xhh'
Victor Stinner150016f2010-05-19 23:04:56 +00001274 if code <= 0xff:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001275 out.write('\\x')
Victor Stinner150016f2010-05-19 23:04:56 +00001276 out.write(hexdigits[(code >> 4) & 0x000F])
1277 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001278 # Map 21-bit characters to '\U00xxxxxx'
Victor Stinner150016f2010-05-19 23:04:56 +00001279 elif code >= 0x10000:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001280 out.write('\\U')
Victor Stinner150016f2010-05-19 23:04:56 +00001281 out.write(hexdigits[(code >> 28) & 0x0000000F])
1282 out.write(hexdigits[(code >> 24) & 0x0000000F])
1283 out.write(hexdigits[(code >> 20) & 0x0000000F])
1284 out.write(hexdigits[(code >> 16) & 0x0000000F])
1285 out.write(hexdigits[(code >> 12) & 0x0000000F])
1286 out.write(hexdigits[(code >> 8) & 0x0000000F])
1287 out.write(hexdigits[(code >> 4) & 0x0000000F])
1288 out.write(hexdigits[code & 0x0000000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001289 # Map 16-bit characters to '\uxxxx'
1290 else:
1291 out.write('\\u')
Victor Stinner150016f2010-05-19 23:04:56 +00001292 out.write(hexdigits[(code >> 12) & 0x000F])
1293 out.write(hexdigits[(code >> 8) & 0x000F])
1294 out.write(hexdigits[(code >> 4) & 0x000F])
1295 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001296 else:
1297 # Copy characters as-is
1298 out.write(ch)
Antoine Pitroub41e1282010-09-08 20:57:48 +00001299 if ch2 is not None:
Victor Stinner150016f2010-05-19 23:04:56 +00001300 out.write(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001301
1302 out.write(quote)
1303
1304
1305
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001306
1307def int_from_int(gdbval):
1308 return int(str(gdbval))
1309
1310
1311def stringify(val):
1312 # TODO: repr() puts everything on one line; pformat can be nicer, but
1313 # can lead to v.long results; this function isolates the choice
1314 if True:
1315 return repr(val)
1316 else:
1317 from pprint import pformat
1318 return pformat(val)
1319
1320
1321class PyObjectPtrPrinter:
1322 "Prints a (PyObject*)"
1323
1324 def __init__ (self, gdbval):
1325 self.gdbval = gdbval
1326
1327 def to_string (self):
1328 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1329 if True:
1330 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1331 else:
1332 # Generate full proxy value then stringify it.
1333 # Doing so could be expensive
1334 proxyval = pyop.proxyval(set())
1335 return stringify(proxyval)
1336
1337def pretty_printer_lookup(gdbval):
1338 type = gdbval.type.unqualified()
1339 if type.code == gdb.TYPE_CODE_PTR:
1340 type = type.target().unqualified()
1341 t = str(type)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001342 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001343 return PyObjectPtrPrinter(gdbval)
1344
1345"""
1346During development, I've been manually invoking the code in this way:
1347(gdb) python
1348
1349import sys
1350sys.path.append('/home/david/coding/python-gdb')
1351import libpython
1352end
1353
1354then reloading it after each edit like this:
1355(gdb) python reload(libpython)
1356
1357The following code should ensure that the prettyprinter is registered
1358if the code is autoloaded by gdb when visiting libpython.so, provided
1359that this python file is installed to the same path as the library (or its
1360.debug file) plus a "-gdb.py" suffix, e.g:
1361 /usr/lib/libpython2.6.so.1.0-gdb.py
1362 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1363"""
1364def register (obj):
1365 if obj == None:
1366 obj = gdb
1367
1368 # Wire up the pretty-printer
1369 obj.pretty_printers.append(pretty_printer_lookup)
1370
1371register (gdb.current_objfile ())
1372
1373
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001374
1375# Unfortunately, the exact API exposed by the gdb module varies somewhat
1376# from build to build
1377# See http://bugs.python.org/issue8279?#msg102276
1378
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001379class Frame(object):
1380 '''
1381 Wrapper for gdb.Frame, adding various methods
1382 '''
1383 def __init__(self, gdbframe):
1384 self._gdbframe = gdbframe
1385
1386 def older(self):
1387 older = self._gdbframe.older()
1388 if older:
1389 return Frame(older)
1390 else:
1391 return None
1392
1393 def newer(self):
1394 newer = self._gdbframe.newer()
1395 if newer:
1396 return Frame(newer)
1397 else:
1398 return None
1399
1400 def select(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001401 '''If supported, select this frame and return True; return False if unsupported
1402
1403 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1404 onwards, but absent on Ubuntu buildbot'''
1405 if not hasattr(self._gdbframe, 'select'):
1406 print ('Unable to select frame: '
1407 'this build of gdb does not expose a gdb.Frame.select method')
1408 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001409 self._gdbframe.select()
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001410 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001411
1412 def get_index(self):
1413 '''Calculate index of frame, starting at 0 for the newest frame within
1414 this thread'''
1415 index = 0
1416 # Go down until you reach the newest frame:
1417 iter_frame = self
1418 while iter_frame.newer():
1419 index += 1
1420 iter_frame = iter_frame.newer()
1421 return index
1422
1423 def is_evalframeex(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001424 '''Is this a PyEval_EvalFrameEx frame?'''
Victor Stinner50eb60e2010-04-20 22:32:07 +00001425 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1426 '''
1427 I believe we also need to filter on the inline
1428 struct frame_id.inline_depth, only regarding frames with
1429 an inline depth of 0 as actually being this function
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001430
Victor Stinner50eb60e2010-04-20 22:32:07 +00001431 So we reject those with type gdb.INLINE_FRAME
1432 '''
1433 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1434 # We have a PyEval_EvalFrameEx frame:
1435 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001436
1437 return False
1438
1439 def get_pyop(self):
1440 try:
1441 f = self._gdbframe.read_var('f')
1442 return PyFrameObjectPtr.from_pyobject_ptr(f)
1443 except ValueError:
1444 return None
1445
1446 @classmethod
1447 def get_selected_frame(cls):
1448 _gdbframe = gdb.selected_frame()
1449 if _gdbframe:
1450 return Frame(_gdbframe)
1451 return None
1452
1453 @classmethod
1454 def get_selected_python_frame(cls):
1455 '''Try to obtain the Frame for the python code in the selected frame,
1456 or None'''
1457 frame = cls.get_selected_frame()
1458
1459 while frame:
1460 if frame.is_evalframeex():
1461 return frame
1462 frame = frame.older()
1463
1464 # Not found:
1465 return None
1466
1467 def print_summary(self):
1468 if self.is_evalframeex():
1469 pyop = self.get_pyop()
1470 if pyop:
Victor Stinner0e5a41b2010-08-17 22:49:25 +00001471 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1472 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001473 line = pyop.current_line()
1474 if line is not None:
1475 sys.stdout.write(line)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001476 else:
1477 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1478 else:
1479 sys.stdout.write('#%i\n' % self.get_index())
1480
Victor Stinnere670c882011-05-13 17:40:15 +02001481 def print_traceback(self):
1482 if self.is_evalframeex():
1483 pyop = self.get_pyop()
1484 if pyop:
1485 pyop.print_traceback()
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001486 line = pyop.current_line()
1487 if line is not None:
1488 sys.stdout.write(' %s\n' % line.strip())
Victor Stinnere670c882011-05-13 17:40:15 +02001489 else:
1490 sys.stdout.write(' (unable to read python frame information)\n')
1491 else:
1492 sys.stdout.write(' (not a python frame)\n')
1493
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001494class PyList(gdb.Command):
1495 '''List the current Python source code, if any
1496
1497 Use
1498 py-list START
1499 to list at a different line number within the python source.
1500
1501 Use
1502 py-list START, END
1503 to list a specific range of lines within the python source.
1504 '''
1505
1506 def __init__(self):
1507 gdb.Command.__init__ (self,
1508 "py-list",
1509 gdb.COMMAND_FILES,
1510 gdb.COMPLETE_NONE)
1511
1512
1513 def invoke(self, args, from_tty):
1514 import re
1515
1516 start = None
1517 end = None
1518
1519 m = re.match(r'\s*(\d+)\s*', args)
1520 if m:
1521 start = int(m.group(0))
1522 end = start + 10
1523
1524 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1525 if m:
1526 start, end = map(int, m.groups())
1527
1528 frame = Frame.get_selected_python_frame()
1529 if not frame:
1530 print 'Unable to locate python frame'
1531 return
1532
1533 pyop = frame.get_pyop()
1534 if not pyop:
1535 print 'Unable to read information on python frame'
1536 return
1537
1538 filename = pyop.filename()
1539 lineno = pyop.current_line_num()
1540
1541 if start is None:
1542 start = lineno - 5
1543 end = lineno + 5
1544
1545 if start<1:
1546 start = 1
1547
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001548 try:
1549 f = open(os_fsencode(filename), 'r')
1550 except IOError as err:
1551 sys.stdout.write('Unable to open %s: %s\n'
1552 % (filename, err))
1553 return
1554 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001555 all_lines = f.readlines()
1556 # start and end are 1-based, all_lines is 0-based;
1557 # so [start-1:end] as a python slice gives us [start, end] as a
1558 # closed interval
1559 for i, line in enumerate(all_lines[start-1:end]):
1560 linestr = str(i+start)
1561 # Highlight current line:
1562 if i + start == lineno:
1563 linestr = '>' + linestr
1564 sys.stdout.write('%4s %s' % (linestr, line))
1565
1566
1567# ...and register the command:
1568PyList()
1569
1570def move_in_stack(move_up):
1571 '''Move up or down the stack (for the py-up/py-down command)'''
1572 frame = Frame.get_selected_python_frame()
1573 while frame:
1574 if move_up:
1575 iter_frame = frame.older()
1576 else:
1577 iter_frame = frame.newer()
1578
1579 if not iter_frame:
1580 break
1581
1582 if iter_frame.is_evalframeex():
1583 # Result:
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001584 if iter_frame.select():
1585 iter_frame.print_summary()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001586 return
1587
1588 frame = iter_frame
1589
1590 if move_up:
1591 print 'Unable to find an older python frame'
1592 else:
1593 print 'Unable to find a newer python frame'
1594
1595class PyUp(gdb.Command):
1596 'Select and print the python stack frame that called this one (if any)'
1597 def __init__(self):
1598 gdb.Command.__init__ (self,
1599 "py-up",
1600 gdb.COMMAND_STACK,
1601 gdb.COMPLETE_NONE)
1602
1603
1604 def invoke(self, args, from_tty):
1605 move_in_stack(move_up=True)
1606
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001607class PyDown(gdb.Command):
1608 'Select and print the python stack frame called by this one (if any)'
1609 def __init__(self):
1610 gdb.Command.__init__ (self,
1611 "py-down",
1612 gdb.COMMAND_STACK,
1613 gdb.COMPLETE_NONE)
1614
1615
1616 def invoke(self, args, from_tty):
1617 move_in_stack(move_up=False)
1618
Victor Stinner50eb60e2010-04-20 22:32:07 +00001619# Not all builds of gdb have gdb.Frame.select
1620if hasattr(gdb.Frame, 'select'):
1621 PyUp()
1622 PyDown()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001623
Victor Stinnere670c882011-05-13 17:40:15 +02001624class PyBacktraceFull(gdb.Command):
1625 'Display the current python frame and all the frames within its call stack (if any)'
1626 def __init__(self):
1627 gdb.Command.__init__ (self,
1628 "py-bt-full",
1629 gdb.COMMAND_STACK,
1630 gdb.COMPLETE_NONE)
1631
1632
1633 def invoke(self, args, from_tty):
1634 frame = Frame.get_selected_python_frame()
1635 while frame:
1636 if frame.is_evalframeex():
1637 frame.print_summary()
1638 frame = frame.older()
1639
1640PyBacktraceFull()
1641
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001642class PyBacktrace(gdb.Command):
1643 'Display the current python frame and all the frames within its call stack (if any)'
1644 def __init__(self):
1645 gdb.Command.__init__ (self,
1646 "py-bt",
1647 gdb.COMMAND_STACK,
1648 gdb.COMPLETE_NONE)
1649
1650
1651 def invoke(self, args, from_tty):
Victor Stinnere670c882011-05-13 17:40:15 +02001652 sys.stdout.write('Traceback (most recent call first):\n')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001653 frame = Frame.get_selected_python_frame()
1654 while frame:
1655 if frame.is_evalframeex():
Victor Stinnere670c882011-05-13 17:40:15 +02001656 frame.print_traceback()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001657 frame = frame.older()
1658
1659PyBacktrace()
1660
1661class PyPrint(gdb.Command):
1662 'Look up the given python variable name, and print it'
1663 def __init__(self):
1664 gdb.Command.__init__ (self,
1665 "py-print",
1666 gdb.COMMAND_DATA,
1667 gdb.COMPLETE_NONE)
1668
1669
1670 def invoke(self, args, from_tty):
1671 name = str(args)
1672
1673 frame = Frame.get_selected_python_frame()
1674 if not frame:
1675 print 'Unable to locate python frame'
1676 return
1677
1678 pyop_frame = frame.get_pyop()
1679 if not pyop_frame:
1680 print 'Unable to read information on python frame'
1681 return
1682
1683 pyop_var, scope = pyop_frame.get_var_by_name(name)
1684
1685 if pyop_var:
1686 print ('%s %r = %s'
1687 % (scope,
1688 name,
1689 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1690 else:
1691 print '%r not found' % name
1692
1693PyPrint()
1694
1695class PyLocals(gdb.Command):
1696 'Look up the given python variable name, and print it'
1697 def __init__(self):
1698 gdb.Command.__init__ (self,
1699 "py-locals",
1700 gdb.COMMAND_DATA,
1701 gdb.COMPLETE_NONE)
1702
1703
1704 def invoke(self, args, from_tty):
1705 name = str(args)
1706
1707 frame = Frame.get_selected_python_frame()
1708 if not frame:
1709 print 'Unable to locate python frame'
1710 return
1711
1712 pyop_frame = frame.get_pyop()
1713 if not pyop_frame:
1714 print 'Unable to read information on python frame'
1715 return
1716
1717 for pyop_name, pyop_value in pyop_frame.iter_locals():
1718 print ('%s = %s'
1719 % (pyop_name.proxyval(set()),
1720 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1721
1722PyLocals()