blob: 0eef228c4f5c4a39563e8086e8b01225908e78a1 [file] [log] [blame]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
Victor Stinner67df3a42010-04-21 13:53:05 +000022holding three PyObject* that turn out to be PyBytesObject* instances, we can
Martin v. Löwis5ae68102010-04-21 22:38:42 +000023generate a proxy value within the gdb process that is a list of bytes
24instances:
25 [b"foo", b"bar", b"baz"]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000026
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object. This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43from __future__ import with_statement
44import gdb
Victor Stinner150016f2010-05-19 23:04:56 +000045import locale
Georg Brandlb639c142010-07-14 08:54:40 +000046import sys
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000047
48# Look up the gdb.Type for some standard types:
49_type_char_ptr = gdb.lookup_type('char').pointer() # char*
50_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
51_type_void_ptr = gdb.lookup_type('void').pointer() # void*
52_type_size_t = gdb.lookup_type('size_t')
Martin v. Löwis24fa9832011-09-28 08:35:25 +020053_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
54_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000055
Victor Stinner0c4fbff2011-12-08 00:08:22 +010056# value computed later, see PyUnicodeObjectPtr.proxy()
57_is_pep393 = None
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020058
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000059SIZEOF_VOID_P = _type_void_ptr.sizeof
60
61
62Py_TPFLAGS_HEAPTYPE = (1L << 9)
63
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000064Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
65Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
66Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Martin v. Löwis5ae68102010-04-21 22:38:42 +000067Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000068Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
69Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
70Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
71Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
72
73
74MAX_OUTPUT_LEN=1024
75
Martin v. Löwis5ae68102010-04-21 22:38:42 +000076hexdigits = "0123456789abcdef"
77
Victor Stinner150016f2010-05-19 23:04:56 +000078ENCODING = locale.getpreferredencoding()
Martin v. Löwis5ae68102010-04-21 22:38:42 +000079
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000080class NullPyObjectPtr(RuntimeError):
81 pass
82
83
84def safety_limit(val):
85 # Given a integer value from the process being debugged, limit it to some
86 # safety threshold so that arbitrary breakage within said process doesn't
87 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
88 return min(val, 1000)
89
90
91def safe_range(val):
92 # As per range, but don't trust the value too much: cap it to a safety
93 # threshold in case the data was corrupted
94 return xrange(safety_limit(val))
95
Victor Stinner0e5a41b2010-08-17 22:49:25 +000096def write_unicode(file, text):
97 # Write a byte or unicode string to file. Unicode strings are encoded to
98 # ENCODING encoding with 'backslashreplace' error handler to avoid
99 # UnicodeEncodeError.
100 if isinstance(text, unicode):
101 text = text.encode(ENCODING, 'backslashreplace')
102 file.write(text)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000103
Victor Stinner6ffbee72010-10-17 19:35:30 +0000104def os_fsencode(filename):
105 if not isinstance(filename, unicode):
106 return filename
107 encoding = sys.getfilesystemencoding()
108 if encoding == 'mbcs':
109 # mbcs doesn't support surrogateescape
110 return filename.encode(encoding)
111 encoded = []
112 for char in filename:
113 # surrogateescape error handler
114 if 0xDC80 <= ord(char) <= 0xDCFF:
115 byte = chr(ord(char) - 0xDC00)
116 else:
117 byte = char.encode(encoding)
118 encoded.append(byte)
119 return ''.join(encoded)
120
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000121class StringTruncated(RuntimeError):
122 pass
123
124class TruncatedStringIO(object):
125 '''Similar to cStringIO, but can truncate the output by raising a
126 StringTruncated exception'''
127 def __init__(self, maxlen=None):
128 self._val = ''
129 self.maxlen = maxlen
130
131 def write(self, data):
132 if self.maxlen:
133 if len(data) + len(self._val) > self.maxlen:
134 # Truncation:
135 self._val += data[0:self.maxlen - len(self._val)]
136 raise StringTruncated()
137
138 self._val += data
139
140 def getvalue(self):
141 return self._val
142
143class PyObjectPtr(object):
144 """
145 Class wrapping a gdb.Value that's a either a (PyObject*) within the
Victor Stinner67df3a42010-04-21 13:53:05 +0000146 inferior process, or some subclass pointer e.g. (PyBytesObject*)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000147
148 There will be a subclass for every refined PyObject type that we care
149 about.
150
151 Note that at every stage the underlying pointer could be NULL, point
152 to corrupt data, etc; this is the debugger, after all.
153 """
154 _typename = 'PyObject'
155
156 def __init__(self, gdbval, cast_to=None):
157 if cast_to:
158 self._gdbval = gdbval.cast(cast_to)
159 else:
160 self._gdbval = gdbval
161
162 def field(self, name):
163 '''
164 Get the gdb.Value for the given field within the PyObject, coping with
165 some python 2 versus python 3 differences.
166
167 Various libpython types are defined using the "PyObject_HEAD" and
168 "PyObject_VAR_HEAD" macros.
169
170 In Python 2, this these are defined so that "ob_type" and (for a var
171 object) "ob_size" are fields of the type in question.
172
173 In Python 3, this is defined as an embedded PyVarObject type thus:
174 PyVarObject ob_base;
175 so that the "ob_size" field is located insize the "ob_base" field, and
176 the "ob_type" is most easily accessed by casting back to a (PyObject*).
177 '''
178 if self.is_null():
179 raise NullPyObjectPtr(self)
180
181 if name == 'ob_type':
182 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
183 return pyo_ptr.dereference()[name]
184
185 if name == 'ob_size':
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000186 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
187 return pyo_ptr.dereference()[name]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000188
189 # General case: look it up inside the object:
190 return self._gdbval.dereference()[name]
191
192 def pyop_field(self, name):
193 '''
194 Get a PyObjectPtr for the given PyObject* field within this PyObject,
195 coping with some python 2 versus python 3 differences.
196 '''
197 return PyObjectPtr.from_pyobject_ptr(self.field(name))
198
199 def write_field_repr(self, name, out, visited):
200 '''
201 Extract the PyObject* field named "name", and write its representation
202 to file-like object "out"
203 '''
204 field_obj = self.pyop_field(name)
205 field_obj.write_repr(out, visited)
206
207 def get_truncated_repr(self, maxlen):
208 '''
209 Get a repr-like string for the data, but truncate it at "maxlen" bytes
210 (ending the object graph traversal as soon as you do)
211 '''
212 out = TruncatedStringIO(maxlen)
213 try:
214 self.write_repr(out, set())
215 except StringTruncated:
216 # Truncation occurred:
217 return out.getvalue() + '...(truncated)'
218
219 # No truncation occurred:
220 return out.getvalue()
221
222 def type(self):
223 return PyTypeObjectPtr(self.field('ob_type'))
224
225 def is_null(self):
226 return 0 == long(self._gdbval)
227
228 def is_optimized_out(self):
229 '''
230 Is the value of the underlying PyObject* visible to the debugger?
231
232 This can vary with the precise version of the compiler used to build
233 Python, and the precise version of gdb.
234
235 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
236 PyEval_EvalFrameEx's "f"
237 '''
238 return self._gdbval.is_optimized_out
239
240 def safe_tp_name(self):
241 try:
242 return self.type().field('tp_name').string()
243 except NullPyObjectPtr:
244 # NULL tp_name?
245 return 'unknown'
246 except RuntimeError:
247 # Can't even read the object at all?
248 return 'unknown'
249
250 def proxyval(self, visited):
251 '''
252 Scrape a value from the inferior process, and try to represent it
253 within the gdb process, whilst (hopefully) avoiding crashes when
254 the remote data is corrupt.
255
256 Derived classes will override this.
257
258 For example, a PyIntObject* with ob_ival 42 in the inferior process
259 should result in an int(42) in this process.
260
261 visited: a set of all gdb.Value pyobject pointers already visited
262 whilst generating this value (to guard against infinite recursion when
263 visiting object graphs with loops). Analogous to Py_ReprEnter and
264 Py_ReprLeave
265 '''
266
267 class FakeRepr(object):
268 """
269 Class representing a non-descript PyObject* value in the inferior
270 process for when we don't have a custom scraper, intended to have
271 a sane repr().
272 """
273
274 def __init__(self, tp_name, address):
275 self.tp_name = tp_name
276 self.address = address
277
278 def __repr__(self):
279 # For the NULL pointer, we have no way of knowing a type, so
280 # special-case it as per
281 # http://bugs.python.org/issue8032#msg100882
282 if self.address == 0:
283 return '0x0'
284 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
285
286 return FakeRepr(self.safe_tp_name(),
287 long(self._gdbval))
288
289 def write_repr(self, out, visited):
290 '''
291 Write a string representation of the value scraped from the inferior
292 process to "out", a file-like object.
293 '''
294 # Default implementation: generate a proxy value and write its repr
295 # However, this could involve a lot of work for complicated objects,
296 # so for derived classes we specialize this
297 return out.write(repr(self.proxyval(visited)))
298
299 @classmethod
300 def subclass_from_type(cls, t):
301 '''
302 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
303 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
304 to use
305
306 Ideally, we would look up the symbols for the global types, but that
307 isn't working yet:
308 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
309 Traceback (most recent call last):
310 File "<string>", line 1, in <module>
311 NotImplementedError: Symbol type not yet supported in Python scripts.
312 Error while executing Python code.
313
314 For now, we use tp_flags, after doing some string comparisons on the
315 tp_name for some special-cases that don't seem to be visible through
316 flags
317 '''
318 try:
319 tp_name = t.field('tp_name').string()
320 tp_flags = int(t.field('tp_flags'))
321 except RuntimeError:
322 # Handle any kind of error e.g. NULL ptrs by simply using the base
323 # class
324 return cls
325
326 #print 'tp_flags = 0x%08x' % tp_flags
327 #print 'tp_name = %r' % tp_name
328
329 name_map = {'bool': PyBoolObjectPtr,
330 'classobj': PyClassObjectPtr,
331 'instance': PyInstanceObjectPtr,
332 'NoneType': PyNoneStructPtr,
333 'frame': PyFrameObjectPtr,
334 'set' : PySetObjectPtr,
335 'frozenset' : PySetObjectPtr,
336 'builtin_function_or_method' : PyCFunctionObjectPtr,
337 }
338 if tp_name in name_map:
339 return name_map[tp_name]
340
341 if tp_flags & Py_TPFLAGS_HEAPTYPE:
342 return HeapTypeObjectPtr
343
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000344 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
345 return PyLongObjectPtr
346 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
347 return PyListObjectPtr
348 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
349 return PyTupleObjectPtr
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000350 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
Victor Stinner67df3a42010-04-21 13:53:05 +0000351 return PyBytesObjectPtr
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000352 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
353 return PyUnicodeObjectPtr
354 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
355 return PyDictObjectPtr
356 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
357 return PyBaseExceptionObjectPtr
358 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
359 # return PyTypeObjectPtr
360
361 # Use the base class:
362 return cls
363
364 @classmethod
365 def from_pyobject_ptr(cls, gdbval):
366 '''
367 Try to locate the appropriate derived class dynamically, and cast
368 the pointer accordingly.
369 '''
370 try:
371 p = PyObjectPtr(gdbval)
372 cls = cls.subclass_from_type(p.type())
373 return cls(gdbval, cast_to=cls.get_gdb_type())
374 except RuntimeError:
375 # Handle any kind of error e.g. NULL ptrs by simply using the base
376 # class
377 pass
378 return cls(gdbval)
379
380 @classmethod
381 def get_gdb_type(cls):
382 return gdb.lookup_type(cls._typename).pointer()
383
384 def as_address(self):
385 return long(self._gdbval)
386
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000387class PyVarObjectPtr(PyObjectPtr):
388 _typename = 'PyVarObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000389
390class ProxyAlreadyVisited(object):
391 '''
392 Placeholder proxy to use when protecting against infinite recursion due to
393 loops in the object graph.
394
395 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
396 '''
397 def __init__(self, rep):
398 self._rep = rep
399
400 def __repr__(self):
401 return self._rep
402
403
404def _write_instance_repr(out, visited, name, pyop_attrdict, address):
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100405 '''Shared code for use by all classes:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000406 write a representation to file-like object "out"'''
407 out.write('<')
408 out.write(name)
409
410 # Write dictionary of instance attributes:
411 if isinstance(pyop_attrdict, PyDictObjectPtr):
412 out.write('(')
413 first = True
414 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
415 if not first:
416 out.write(', ')
417 first = False
418 out.write(pyop_arg.proxyval(visited))
419 out.write('=')
420 pyop_val.write_repr(out, visited)
421 out.write(')')
422 out.write(' at remote 0x%x>' % address)
423
424
425class InstanceProxy(object):
426
427 def __init__(self, cl_name, attrdict, address):
428 self.cl_name = cl_name
429 self.attrdict = attrdict
430 self.address = address
431
432 def __repr__(self):
433 if isinstance(self.attrdict, dict):
434 kwargs = ', '.join(["%s=%r" % (arg, val)
435 for arg, val in self.attrdict.iteritems()])
436 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
437 kwargs, self.address)
438 else:
439 return '<%s at remote 0x%x>' % (self.cl_name,
440 self.address)
441
442def _PyObject_VAR_SIZE(typeobj, nitems):
443 return ( ( typeobj.field('tp_basicsize') +
444 nitems * typeobj.field('tp_itemsize') +
445 (SIZEOF_VOID_P - 1)
446 ) & ~(SIZEOF_VOID_P - 1)
447 ).cast(_type_size_t)
448
449class HeapTypeObjectPtr(PyObjectPtr):
450 _typename = 'PyObject'
451
452 def get_attr_dict(self):
453 '''
454 Get the PyDictObject ptr representing the attribute dictionary
455 (or None if there's a problem)
456 '''
457 try:
458 typeobj = self.type()
459 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
460 if dictoffset != 0:
461 if dictoffset < 0:
462 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
463 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
464 if tsize < 0:
465 tsize = -tsize
466 size = _PyObject_VAR_SIZE(typeobj, tsize)
467 dictoffset += size
468 assert dictoffset > 0
469 assert dictoffset % SIZEOF_VOID_P == 0
470
471 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
472 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
473 dictptr = dictptr.cast(PyObjectPtrPtr)
474 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
475 except RuntimeError:
476 # Corrupt data somewhere; fail safe
477 pass
478
479 # Not found, or some kind of error:
480 return None
481
482 def proxyval(self, visited):
483 '''
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100484 Support for classes.
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000485
486 Currently we just locate the dictionary using a transliteration to
487 python of _PyObject_GetDictPtr, ignoring descriptors
488 '''
489 # Guard against infinite loops:
490 if self.as_address() in visited:
491 return ProxyAlreadyVisited('<...>')
492 visited.add(self.as_address())
493
494 pyop_attr_dict = self.get_attr_dict()
495 if pyop_attr_dict:
496 attr_dict = pyop_attr_dict.proxyval(visited)
497 else:
498 attr_dict = {}
499 tp_name = self.safe_tp_name()
500
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100501 # Class:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000502 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
503
504 def write_repr(self, out, visited):
505 # Guard against infinite loops:
506 if self.as_address() in visited:
507 out.write('<...>')
508 return
509 visited.add(self.as_address())
510
511 pyop_attrdict = self.get_attr_dict()
512 _write_instance_repr(out, visited,
513 self.safe_tp_name(), pyop_attrdict, self.as_address())
514
515class ProxyException(Exception):
516 def __init__(self, tp_name, args):
517 self.tp_name = tp_name
518 self.args = args
519
520 def __repr__(self):
521 return '%s%r' % (self.tp_name, self.args)
522
523class PyBaseExceptionObjectPtr(PyObjectPtr):
524 """
525 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
526 within the process being debugged.
527 """
528 _typename = 'PyBaseExceptionObject'
529
530 def proxyval(self, visited):
531 # Guard against infinite loops:
532 if self.as_address() in visited:
533 return ProxyAlreadyVisited('(...)')
534 visited.add(self.as_address())
535 arg_proxy = self.pyop_field('args').proxyval(visited)
536 return ProxyException(self.safe_tp_name(),
537 arg_proxy)
538
539 def write_repr(self, out, visited):
540 # Guard against infinite loops:
541 if self.as_address() in visited:
542 out.write('(...)')
543 return
544 visited.add(self.as_address())
545
546 out.write(self.safe_tp_name())
547 self.write_field_repr('args', out, visited)
548
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000549class PyClassObjectPtr(PyObjectPtr):
550 """
551 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
552 instance within the process being debugged.
553 """
554 _typename = 'PyClassObject'
555
556
557class BuiltInFunctionProxy(object):
558 def __init__(self, ml_name):
559 self.ml_name = ml_name
560
561 def __repr__(self):
562 return "<built-in function %s>" % self.ml_name
563
564class BuiltInMethodProxy(object):
565 def __init__(self, ml_name, pyop_m_self):
566 self.ml_name = ml_name
567 self.pyop_m_self = pyop_m_self
568
569 def __repr__(self):
570 return ('<built-in method %s of %s object at remote 0x%x>'
571 % (self.ml_name,
572 self.pyop_m_self.safe_tp_name(),
573 self.pyop_m_self.as_address())
574 )
575
576class PyCFunctionObjectPtr(PyObjectPtr):
577 """
578 Class wrapping a gdb.Value that's a PyCFunctionObject*
579 (see Include/methodobject.h and Objects/methodobject.c)
580 """
581 _typename = 'PyCFunctionObject'
582
583 def proxyval(self, visited):
584 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
585 ml_name = m_ml['ml_name'].string()
586
587 pyop_m_self = self.pyop_field('m_self')
588 if pyop_m_self.is_null():
589 return BuiltInFunctionProxy(ml_name)
590 else:
591 return BuiltInMethodProxy(ml_name, pyop_m_self)
592
593
594class PyCodeObjectPtr(PyObjectPtr):
595 """
596 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
597 within the process being debugged.
598 """
599 _typename = 'PyCodeObject'
600
601 def addr2line(self, addrq):
602 '''
603 Get the line number for a given bytecode offset
604
605 Analogous to PyCode_Addr2Line; translated from pseudocode in
606 Objects/lnotab_notes.txt
607 '''
608 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
609
610 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
611 # not 0, as lnotab_notes.txt has it:
612 lineno = int_from_int(self.field('co_firstlineno'))
613
614 addr = 0
615 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
616 addr += ord(addr_incr)
617 if addr > addrq:
618 return lineno
619 lineno += ord(line_incr)
620 return lineno
621
622
623class PyDictObjectPtr(PyObjectPtr):
624 """
625 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
626 within the process being debugged.
627 """
628 _typename = 'PyDictObject'
629
630 def iteritems(self):
631 '''
632 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
633 analagous to dict.iteritems()
634 '''
635 for i in safe_range(self.field('ma_mask') + 1):
636 ep = self.field('ma_table') + i
637 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
638 if not pyop_value.is_null():
639 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
640 yield (pyop_key, pyop_value)
641
642 def proxyval(self, visited):
643 # Guard against infinite loops:
644 if self.as_address() in visited:
645 return ProxyAlreadyVisited('{...}')
646 visited.add(self.as_address())
647
648 result = {}
649 for pyop_key, pyop_value in self.iteritems():
650 proxy_key = pyop_key.proxyval(visited)
651 proxy_value = pyop_value.proxyval(visited)
652 result[proxy_key] = proxy_value
653 return result
654
655 def write_repr(self, out, visited):
656 # Guard against infinite loops:
657 if self.as_address() in visited:
658 out.write('{...}')
659 return
660 visited.add(self.as_address())
661
662 out.write('{')
663 first = True
664 for pyop_key, pyop_value in self.iteritems():
665 if not first:
666 out.write(', ')
667 first = False
668 pyop_key.write_repr(out, visited)
669 out.write(': ')
670 pyop_value.write_repr(out, visited)
671 out.write('}')
672
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000673class PyListObjectPtr(PyObjectPtr):
674 _typename = 'PyListObject'
675
676 def __getitem__(self, i):
677 # Get the gdb.Value for the (PyObject*) with the given index:
678 field_ob_item = self.field('ob_item')
679 return field_ob_item[i]
680
681 def proxyval(self, visited):
682 # Guard against infinite loops:
683 if self.as_address() in visited:
684 return ProxyAlreadyVisited('[...]')
685 visited.add(self.as_address())
686
687 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
688 for i in safe_range(int_from_int(self.field('ob_size')))]
689 return result
690
691 def write_repr(self, out, visited):
692 # Guard against infinite loops:
693 if self.as_address() in visited:
694 out.write('[...]')
695 return
696 visited.add(self.as_address())
697
698 out.write('[')
699 for i in safe_range(int_from_int(self.field('ob_size'))):
700 if i > 0:
701 out.write(', ')
702 element = PyObjectPtr.from_pyobject_ptr(self[i])
703 element.write_repr(out, visited)
704 out.write(']')
705
706class PyLongObjectPtr(PyObjectPtr):
707 _typename = 'PyLongObject'
708
709 def proxyval(self, visited):
710 '''
711 Python's Include/longobjrep.h has this declaration:
712 struct _longobject {
713 PyObject_VAR_HEAD
714 digit ob_digit[1];
715 };
716
717 with this description:
718 The absolute value of a number is equal to
719 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
720 Negative numbers are represented with ob_size < 0;
721 zero is represented by ob_size == 0.
722
723 where SHIFT can be either:
724 #define PyLong_SHIFT 30
725 #define PyLong_SHIFT 15
726 '''
727 ob_size = long(self.field('ob_size'))
728 if ob_size == 0:
729 return 0L
730
731 ob_digit = self.field('ob_digit')
732
733 if gdb.lookup_type('digit').sizeof == 2:
734 SHIFT = 15L
735 else:
736 SHIFT = 30L
737
738 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
739 for i in safe_range(abs(ob_size))]
740 result = sum(digits)
741 if ob_size < 0:
742 result = -result
743 return result
744
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000745 def write_repr(self, out, visited):
746 # Write this out as a Python 3 int literal, i.e. without the "L" suffix
747 proxy = self.proxyval(visited)
748 out.write("%s" % proxy)
749
750
751class PyBoolObjectPtr(PyLongObjectPtr):
752 """
753 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
754 <bool> instances (Py_True/Py_False) within the process being debugged.
755 """
756 def proxyval(self, visited):
757 if PyLongObjectPtr.proxyval(self, visited):
758 return True
759 else:
760 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000761
762class PyNoneStructPtr(PyObjectPtr):
763 """
764 Class wrapping a gdb.Value that's a PyObject* pointing to the
765 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
766 """
767 _typename = 'PyObject'
768
769 def proxyval(self, visited):
770 return None
771
772
773class PyFrameObjectPtr(PyObjectPtr):
774 _typename = 'PyFrameObject'
775
776 def __init__(self, gdbval, cast_to):
777 PyObjectPtr.__init__(self, gdbval, cast_to)
778
779 if not self.is_optimized_out():
780 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
781 self.co_name = self.co.pyop_field('co_name')
782 self.co_filename = self.co.pyop_field('co_filename')
783
784 self.f_lineno = int_from_int(self.field('f_lineno'))
785 self.f_lasti = int_from_int(self.field('f_lasti'))
786 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
787 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
788
789 def iter_locals(self):
790 '''
791 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
792 the local variables of this frame
793 '''
794 if self.is_optimized_out():
795 return
796
797 f_localsplus = self.field('f_localsplus')
798 for i in safe_range(self.co_nlocals):
799 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
800 if not pyop_value.is_null():
801 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
802 yield (pyop_name, pyop_value)
803
804 def iter_globals(self):
805 '''
806 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
807 the global variables of this frame
808 '''
809 if self.is_optimized_out():
810 return
811
812 pyop_globals = self.pyop_field('f_globals')
813 return pyop_globals.iteritems()
814
815 def iter_builtins(self):
816 '''
817 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
818 the builtin variables
819 '''
820 if self.is_optimized_out():
821 return
822
823 pyop_builtins = self.pyop_field('f_builtins')
824 return pyop_builtins.iteritems()
825
826 def get_var_by_name(self, name):
827 '''
828 Look for the named local variable, returning a (PyObjectPtr, scope) pair
829 where scope is a string 'local', 'global', 'builtin'
830
831 If not found, return (None, None)
832 '''
833 for pyop_name, pyop_value in self.iter_locals():
834 if name == pyop_name.proxyval(set()):
835 return pyop_value, 'local'
836 for pyop_name, pyop_value in self.iter_globals():
837 if name == pyop_name.proxyval(set()):
838 return pyop_value, 'global'
839 for pyop_name, pyop_value in self.iter_builtins():
840 if name == pyop_name.proxyval(set()):
841 return pyop_value, 'builtin'
842 return None, None
843
844 def filename(self):
845 '''Get the path of the current Python source file, as a string'''
846 if self.is_optimized_out():
847 return '(frame information optimized out)'
848 return self.co_filename.proxyval(set())
849
850 def current_line_num(self):
851 '''Get current line number as an integer (1-based)
852
853 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
854
855 See Objects/lnotab_notes.txt
856 '''
857 if self.is_optimized_out():
858 return None
859 f_trace = self.field('f_trace')
860 if long(f_trace) != 0:
861 # we have a non-NULL f_trace:
862 return self.f_lineno
863 else:
864 #try:
865 return self.co.addr2line(self.f_lasti)
866 #except ValueError:
867 # return self.f_lineno
868
869 def current_line(self):
870 '''Get the text of the current source line as a string, with a trailing
871 newline character'''
872 if self.is_optimized_out():
873 return '(frame information optimized out)'
Victor Stinner6ffbee72010-10-17 19:35:30 +0000874 filename = self.filename()
Victor Stinnerd57c5c82011-07-01 12:57:44 +0200875 try:
876 f = open(os_fsencode(filename), 'r')
877 except IOError:
878 return None
879 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000880 all_lines = f.readlines()
881 # Convert from 1-based current_line_num to 0-based list offset:
882 return all_lines[self.current_line_num()-1]
883
884 def write_repr(self, out, visited):
885 if self.is_optimized_out():
886 out.write('(frame information optimized out)')
887 return
888 out.write('Frame 0x%x, for file %s, line %i, in %s ('
889 % (self.as_address(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000890 self.co_filename.proxyval(visited),
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000891 self.current_line_num(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000892 self.co_name.proxyval(visited)))
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000893 first = True
894 for pyop_name, pyop_value in self.iter_locals():
895 if not first:
896 out.write(', ')
897 first = False
898
899 out.write(pyop_name.proxyval(visited))
900 out.write('=')
901 pyop_value.write_repr(out, visited)
902
903 out.write(')')
904
Victor Stinnere670c882011-05-13 17:40:15 +0200905 def print_traceback(self):
906 if self.is_optimized_out():
907 sys.stdout.write(' (frame information optimized out)\n')
908 visited = set()
909 sys.stdout.write(' File "%s", line %i, in %s\n'
910 % (self.co_filename.proxyval(visited),
911 self.current_line_num(),
912 self.co_name.proxyval(visited)))
913
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000914class PySetObjectPtr(PyObjectPtr):
915 _typename = 'PySetObject'
916
917 def proxyval(self, visited):
918 # Guard against infinite loops:
919 if self.as_address() in visited:
920 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
921 visited.add(self.as_address())
922
923 members = []
924 table = self.field('table')
925 for i in safe_range(self.field('mask')+1):
926 setentry = table[i]
927 key = setentry['key']
928 if key != 0:
929 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
930 if key_proxy != '<dummy key>':
931 members.append(key_proxy)
932 if self.safe_tp_name() == 'frozenset':
933 return frozenset(members)
934 else:
935 return set(members)
936
937 def write_repr(self, out, visited):
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000938 # Emulate Python 3's set_repr
939 tp_name = self.safe_tp_name()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000940
941 # Guard against infinite loops:
942 if self.as_address() in visited:
943 out.write('(...)')
944 return
945 visited.add(self.as_address())
946
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000947 # Python 3's set_repr special-cases the empty set:
948 if not self.field('used'):
949 out.write(tp_name)
950 out.write('()')
951 return
952
953 # Python 3 uses {} for set literals:
954 if tp_name != 'set':
955 out.write(tp_name)
956 out.write('(')
957
958 out.write('{')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000959 first = True
960 table = self.field('table')
961 for i in safe_range(self.field('mask')+1):
962 setentry = table[i]
963 key = setentry['key']
964 if key != 0:
965 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
966 key_proxy = pyop_key.proxyval(visited) # FIXME!
967 if key_proxy != '<dummy key>':
968 if not first:
969 out.write(', ')
970 first = False
971 pyop_key.write_repr(out, visited)
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000972 out.write('}')
973
974 if tp_name != 'set':
975 out.write(')')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000976
977
Victor Stinner67df3a42010-04-21 13:53:05 +0000978class PyBytesObjectPtr(PyObjectPtr):
979 _typename = 'PyBytesObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000980
981 def __str__(self):
982 field_ob_size = self.field('ob_size')
983 field_ob_sval = self.field('ob_sval')
984 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
985 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
986
987 def proxyval(self, visited):
988 return str(self)
989
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000990 def write_repr(self, out, visited):
991 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
992
993 # Get a PyStringObject* within the Python 2 gdb process:
994 proxy = self.proxyval(visited)
995
996 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
997 # to Python 2 code:
998 quote = "'"
999 if "'" in proxy and not '"' in proxy:
1000 quote = '"'
1001 out.write('b')
1002 out.write(quote)
1003 for byte in proxy:
1004 if byte == quote or byte == '\\':
1005 out.write('\\')
1006 out.write(byte)
1007 elif byte == '\t':
1008 out.write('\\t')
1009 elif byte == '\n':
1010 out.write('\\n')
1011 elif byte == '\r':
1012 out.write('\\r')
1013 elif byte < ' ' or ord(byte) >= 0x7f:
1014 out.write('\\x')
1015 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1016 out.write(hexdigits[ord(byte) & 0xf])
1017 else:
1018 out.write(byte)
1019 out.write(quote)
1020
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001021class PyTupleObjectPtr(PyObjectPtr):
1022 _typename = 'PyTupleObject'
1023
1024 def __getitem__(self, i):
1025 # Get the gdb.Value for the (PyObject*) with the given index:
1026 field_ob_item = self.field('ob_item')
1027 return field_ob_item[i]
1028
1029 def proxyval(self, visited):
1030 # Guard against infinite loops:
1031 if self.as_address() in visited:
1032 return ProxyAlreadyVisited('(...)')
1033 visited.add(self.as_address())
1034
1035 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1036 for i in safe_range(int_from_int(self.field('ob_size')))])
1037 return result
1038
1039 def write_repr(self, out, visited):
1040 # Guard against infinite loops:
1041 if self.as_address() in visited:
1042 out.write('(...)')
1043 return
1044 visited.add(self.as_address())
1045
1046 out.write('(')
1047 for i in safe_range(int_from_int(self.field('ob_size'))):
1048 if i > 0:
1049 out.write(', ')
1050 element = PyObjectPtr.from_pyobject_ptr(self[i])
1051 element.write_repr(out, visited)
1052 if self.field('ob_size') == 1:
1053 out.write(',)')
1054 else:
1055 out.write(')')
1056
1057class PyTypeObjectPtr(PyObjectPtr):
1058 _typename = 'PyTypeObject'
1059
1060
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001061def _unichr_is_printable(char):
1062 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1063 if char == u" ":
1064 return True
1065 import unicodedata
Antoine Pitroub41e1282010-09-08 20:57:48 +00001066 return unicodedata.category(char) not in ("C", "Z")
1067
1068if sys.maxunicode >= 0x10000:
1069 _unichr = unichr
1070else:
1071 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1072 def _unichr(x):
1073 if x < 0x10000:
1074 return unichr(x)
1075 x -= 0x10000
1076 ch1 = 0xD800 | (x >> 10)
1077 ch2 = 0xDC00 | (x & 0x3FF)
1078 return unichr(ch1) + unichr(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001079
1080
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001081class PyUnicodeObjectPtr(PyObjectPtr):
1082 _typename = 'PyUnicodeObject'
1083
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001084 def char_width(self):
1085 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1086 return _type_Py_UNICODE.sizeof
1087
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001088 def proxyval(self, visited):
Victor Stinner0c4fbff2011-12-08 00:08:22 +01001089 global _is_pep393
1090 if _is_pep393 is None:
1091 fields = gdb.lookup_type('PyUnicodeObject').target().fields()
1092 _is_pep393 = 'data' in [f.name for f in fields]
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001093 if _is_pep393:
1094 # Python 3.3 and newer
1095 may_have_surrogates = False
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001096 compact = self.field('_base')
1097 ascii = compact['_base']
1098 state = ascii['state']
Victor Stinnera3b334d2011-10-03 13:53:37 +02001099 is_compact_ascii = (int(state['ascii']) and int(state['compact']))
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001100 if not int(state['ready']):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001101 # string is not ready
Victor Stinnerf16a3502011-11-04 22:34:01 +01001102 field_length = long(compact['wstr_length'])
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001103 may_have_surrogates = True
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001104 field_str = ascii['wstr']
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001105 else:
Victor Stinnerf16a3502011-11-04 22:34:01 +01001106 field_length = long(ascii['length'])
Victor Stinnera3b334d2011-10-03 13:53:37 +02001107 if is_compact_ascii:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001108 field_str = ascii.address + 1
1109 elif int(state['compact']):
1110 field_str = compact.address + 1
1111 else:
1112 field_str = self.field('data')['any']
1113 repr_kind = int(state['kind'])
1114 if repr_kind == 1:
1115 field_str = field_str.cast(_type_unsigned_char_ptr)
1116 elif repr_kind == 2:
1117 field_str = field_str.cast(_type_unsigned_short_ptr)
Antoine Pitrou3c0c5f22011-10-08 19:33:24 +02001118 elif repr_kind == 4:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001119 field_str = field_str.cast(_type_unsigned_int_ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 else:
1121 # Python 3.2 and earlier
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001122 field_length = long(self.field('length'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123 field_str = self.field('str')
1124 may_have_surrogates = self.char_width() == 2
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001125
1126 # Gather a list of ints from the Py_UNICODE array; these are either
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 # UCS-1, UCS-2 or UCS-4 code points:
1128 if not may_have_surrogates:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001129 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1130 else:
1131 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1132 # inferior process: we must join surrogate pairs.
1133 Py_UNICODEs = []
1134 i = 0
Antoine Pitroub1856d72010-09-08 21:07:40 +00001135 limit = safety_limit(field_length)
1136 while i < limit:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001137 ucs = int(field_str[i])
1138 i += 1
1139 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1140 Py_UNICODEs.append(ucs)
1141 continue
1142 # This could be a surrogate pair.
1143 ucs2 = int(field_str[i])
1144 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1145 continue
1146 code = (ucs & 0x03FF) << 10
1147 code |= ucs2 & 0x03FF
1148 code += 0x00010000
1149 Py_UNICODEs.append(code)
1150 i += 1
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001151
1152 # Convert the int code points to unicode characters, and generate a
Antoine Pitroub41e1282010-09-08 20:57:48 +00001153 # local unicode instance.
1154 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1155 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001156 return result
1157
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001158 def write_repr(self, out, visited):
1159 # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1160
1161 # Get a PyUnicodeObject* within the Python 2 gdb process:
1162 proxy = self.proxyval(visited)
1163
1164 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1165 # to Python 2:
1166 if "'" in proxy and '"' not in proxy:
1167 quote = '"'
1168 else:
1169 quote = "'"
1170 out.write(quote)
1171
1172 i = 0
1173 while i < len(proxy):
1174 ch = proxy[i]
1175 i += 1
1176
1177 # Escape quotes and backslashes
1178 if ch == quote or ch == '\\':
1179 out.write('\\')
1180 out.write(ch)
1181
1182 # Map special whitespace to '\t', \n', '\r'
1183 elif ch == '\t':
1184 out.write('\\t')
1185 elif ch == '\n':
1186 out.write('\\n')
1187 elif ch == '\r':
1188 out.write('\\r')
1189
1190 # Map non-printable US ASCII to '\xhh' */
1191 elif ch < ' ' or ch == 0x7F:
1192 out.write('\\x')
1193 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1194 out.write(hexdigits[ord(ch) & 0x000F])
1195
1196 # Copy ASCII characters as-is
1197 elif ord(ch) < 0x7F:
1198 out.write(ch)
1199
1200 # Non-ASCII characters
1201 else:
Victor Stinner150016f2010-05-19 23:04:56 +00001202 ucs = ch
Antoine Pitroub41e1282010-09-08 20:57:48 +00001203 ch2 = None
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001204 if sys.maxunicode < 0x10000:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001205 # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1206 # surrogate pairs before calling _unichr_is_printable.
Victor Stinner150016f2010-05-19 23:04:56 +00001207 if (i < len(proxy)
1208 and 0xD800 <= ord(ch) < 0xDC00 \
1209 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001210 ch2 = proxy[i]
Antoine Pitroub41e1282010-09-08 20:57:48 +00001211 ucs = ch + ch2
Victor Stinner150016f2010-05-19 23:04:56 +00001212 i += 1
Victor Stinner150016f2010-05-19 23:04:56 +00001213
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001214 # Unfortuately, Python 2's unicode type doesn't seem
1215 # to expose the "isprintable" method
Victor Stinner150016f2010-05-19 23:04:56 +00001216 printable = _unichr_is_printable(ucs)
1217 if printable:
1218 try:
1219 ucs.encode(ENCODING)
1220 except UnicodeEncodeError:
1221 printable = False
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001222
1223 # Map Unicode whitespace and control characters
1224 # (categories Z* and C* except ASCII space)
Victor Stinner150016f2010-05-19 23:04:56 +00001225 if not printable:
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001226 if ch2 is not None:
1227 # Match Python 3's representation of non-printable
1228 # wide characters.
1229 code = (ord(ch) & 0x03FF) << 10
1230 code |= ord(ch2) & 0x03FF
1231 code += 0x00010000
1232 else:
1233 code = ord(ucs)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001234
1235 # Map 8-bit characters to '\\xhh'
Victor Stinner150016f2010-05-19 23:04:56 +00001236 if code <= 0xff:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001237 out.write('\\x')
Victor Stinner150016f2010-05-19 23:04:56 +00001238 out.write(hexdigits[(code >> 4) & 0x000F])
1239 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001240 # Map 21-bit characters to '\U00xxxxxx'
Victor Stinner150016f2010-05-19 23:04:56 +00001241 elif code >= 0x10000:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001242 out.write('\\U')
Victor Stinner150016f2010-05-19 23:04:56 +00001243 out.write(hexdigits[(code >> 28) & 0x0000000F])
1244 out.write(hexdigits[(code >> 24) & 0x0000000F])
1245 out.write(hexdigits[(code >> 20) & 0x0000000F])
1246 out.write(hexdigits[(code >> 16) & 0x0000000F])
1247 out.write(hexdigits[(code >> 12) & 0x0000000F])
1248 out.write(hexdigits[(code >> 8) & 0x0000000F])
1249 out.write(hexdigits[(code >> 4) & 0x0000000F])
1250 out.write(hexdigits[code & 0x0000000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001251 # Map 16-bit characters to '\uxxxx'
1252 else:
1253 out.write('\\u')
Victor Stinner150016f2010-05-19 23:04:56 +00001254 out.write(hexdigits[(code >> 12) & 0x000F])
1255 out.write(hexdigits[(code >> 8) & 0x000F])
1256 out.write(hexdigits[(code >> 4) & 0x000F])
1257 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001258 else:
1259 # Copy characters as-is
1260 out.write(ch)
Antoine Pitroub41e1282010-09-08 20:57:48 +00001261 if ch2 is not None:
Victor Stinner150016f2010-05-19 23:04:56 +00001262 out.write(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001263
1264 out.write(quote)
1265
1266
1267
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001268
1269def int_from_int(gdbval):
1270 return int(str(gdbval))
1271
1272
1273def stringify(val):
1274 # TODO: repr() puts everything on one line; pformat can be nicer, but
1275 # can lead to v.long results; this function isolates the choice
1276 if True:
1277 return repr(val)
1278 else:
1279 from pprint import pformat
1280 return pformat(val)
1281
1282
1283class PyObjectPtrPrinter:
1284 "Prints a (PyObject*)"
1285
1286 def __init__ (self, gdbval):
1287 self.gdbval = gdbval
1288
1289 def to_string (self):
1290 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1291 if True:
1292 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1293 else:
1294 # Generate full proxy value then stringify it.
1295 # Doing so could be expensive
1296 proxyval = pyop.proxyval(set())
1297 return stringify(proxyval)
1298
1299def pretty_printer_lookup(gdbval):
1300 type = gdbval.type.unqualified()
1301 if type.code == gdb.TYPE_CODE_PTR:
1302 type = type.target().unqualified()
1303 t = str(type)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001304 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001305 return PyObjectPtrPrinter(gdbval)
1306
1307"""
1308During development, I've been manually invoking the code in this way:
1309(gdb) python
1310
1311import sys
1312sys.path.append('/home/david/coding/python-gdb')
1313import libpython
1314end
1315
1316then reloading it after each edit like this:
1317(gdb) python reload(libpython)
1318
1319The following code should ensure that the prettyprinter is registered
1320if the code is autoloaded by gdb when visiting libpython.so, provided
1321that this python file is installed to the same path as the library (or its
1322.debug file) plus a "-gdb.py" suffix, e.g:
1323 /usr/lib/libpython2.6.so.1.0-gdb.py
1324 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1325"""
1326def register (obj):
1327 if obj == None:
1328 obj = gdb
1329
1330 # Wire up the pretty-printer
1331 obj.pretty_printers.append(pretty_printer_lookup)
1332
1333register (gdb.current_objfile ())
1334
1335
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001336
1337# Unfortunately, the exact API exposed by the gdb module varies somewhat
1338# from build to build
1339# See http://bugs.python.org/issue8279?#msg102276
1340
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001341class Frame(object):
1342 '''
1343 Wrapper for gdb.Frame, adding various methods
1344 '''
1345 def __init__(self, gdbframe):
1346 self._gdbframe = gdbframe
1347
1348 def older(self):
1349 older = self._gdbframe.older()
1350 if older:
1351 return Frame(older)
1352 else:
1353 return None
1354
1355 def newer(self):
1356 newer = self._gdbframe.newer()
1357 if newer:
1358 return Frame(newer)
1359 else:
1360 return None
1361
1362 def select(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001363 '''If supported, select this frame and return True; return False if unsupported
1364
1365 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1366 onwards, but absent on Ubuntu buildbot'''
1367 if not hasattr(self._gdbframe, 'select'):
1368 print ('Unable to select frame: '
1369 'this build of gdb does not expose a gdb.Frame.select method')
1370 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001371 self._gdbframe.select()
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001372 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001373
1374 def get_index(self):
1375 '''Calculate index of frame, starting at 0 for the newest frame within
1376 this thread'''
1377 index = 0
1378 # Go down until you reach the newest frame:
1379 iter_frame = self
1380 while iter_frame.newer():
1381 index += 1
1382 iter_frame = iter_frame.newer()
1383 return index
1384
1385 def is_evalframeex(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001386 '''Is this a PyEval_EvalFrameEx frame?'''
Victor Stinner50eb60e2010-04-20 22:32:07 +00001387 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1388 '''
1389 I believe we also need to filter on the inline
1390 struct frame_id.inline_depth, only regarding frames with
1391 an inline depth of 0 as actually being this function
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001392
Victor Stinner50eb60e2010-04-20 22:32:07 +00001393 So we reject those with type gdb.INLINE_FRAME
1394 '''
1395 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1396 # We have a PyEval_EvalFrameEx frame:
1397 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001398
1399 return False
1400
1401 def get_pyop(self):
1402 try:
1403 f = self._gdbframe.read_var('f')
1404 return PyFrameObjectPtr.from_pyobject_ptr(f)
1405 except ValueError:
1406 return None
1407
1408 @classmethod
1409 def get_selected_frame(cls):
1410 _gdbframe = gdb.selected_frame()
1411 if _gdbframe:
1412 return Frame(_gdbframe)
1413 return None
1414
1415 @classmethod
1416 def get_selected_python_frame(cls):
1417 '''Try to obtain the Frame for the python code in the selected frame,
1418 or None'''
1419 frame = cls.get_selected_frame()
1420
1421 while frame:
1422 if frame.is_evalframeex():
1423 return frame
1424 frame = frame.older()
1425
1426 # Not found:
1427 return None
1428
1429 def print_summary(self):
1430 if self.is_evalframeex():
1431 pyop = self.get_pyop()
1432 if pyop:
Victor Stinner0e5a41b2010-08-17 22:49:25 +00001433 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1434 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001435 line = pyop.current_line()
1436 if line is not None:
1437 sys.stdout.write(line)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001438 else:
1439 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1440 else:
1441 sys.stdout.write('#%i\n' % self.get_index())
1442
Victor Stinnere670c882011-05-13 17:40:15 +02001443 def print_traceback(self):
1444 if self.is_evalframeex():
1445 pyop = self.get_pyop()
1446 if pyop:
1447 pyop.print_traceback()
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001448 line = pyop.current_line()
1449 if line is not None:
1450 sys.stdout.write(' %s\n' % line.strip())
Victor Stinnere670c882011-05-13 17:40:15 +02001451 else:
1452 sys.stdout.write(' (unable to read python frame information)\n')
1453 else:
1454 sys.stdout.write(' (not a python frame)\n')
1455
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001456class PyList(gdb.Command):
1457 '''List the current Python source code, if any
1458
1459 Use
1460 py-list START
1461 to list at a different line number within the python source.
1462
1463 Use
1464 py-list START, END
1465 to list a specific range of lines within the python source.
1466 '''
1467
1468 def __init__(self):
1469 gdb.Command.__init__ (self,
1470 "py-list",
1471 gdb.COMMAND_FILES,
1472 gdb.COMPLETE_NONE)
1473
1474
1475 def invoke(self, args, from_tty):
1476 import re
1477
1478 start = None
1479 end = None
1480
1481 m = re.match(r'\s*(\d+)\s*', args)
1482 if m:
1483 start = int(m.group(0))
1484 end = start + 10
1485
1486 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1487 if m:
1488 start, end = map(int, m.groups())
1489
1490 frame = Frame.get_selected_python_frame()
1491 if not frame:
1492 print 'Unable to locate python frame'
1493 return
1494
1495 pyop = frame.get_pyop()
1496 if not pyop:
1497 print 'Unable to read information on python frame'
1498 return
1499
1500 filename = pyop.filename()
1501 lineno = pyop.current_line_num()
1502
1503 if start is None:
1504 start = lineno - 5
1505 end = lineno + 5
1506
1507 if start<1:
1508 start = 1
1509
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001510 try:
1511 f = open(os_fsencode(filename), 'r')
1512 except IOError as err:
1513 sys.stdout.write('Unable to open %s: %s\n'
1514 % (filename, err))
1515 return
1516 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001517 all_lines = f.readlines()
1518 # start and end are 1-based, all_lines is 0-based;
1519 # so [start-1:end] as a python slice gives us [start, end] as a
1520 # closed interval
1521 for i, line in enumerate(all_lines[start-1:end]):
1522 linestr = str(i+start)
1523 # Highlight current line:
1524 if i + start == lineno:
1525 linestr = '>' + linestr
1526 sys.stdout.write('%4s %s' % (linestr, line))
1527
1528
1529# ...and register the command:
1530PyList()
1531
1532def move_in_stack(move_up):
1533 '''Move up or down the stack (for the py-up/py-down command)'''
1534 frame = Frame.get_selected_python_frame()
1535 while frame:
1536 if move_up:
1537 iter_frame = frame.older()
1538 else:
1539 iter_frame = frame.newer()
1540
1541 if not iter_frame:
1542 break
1543
1544 if iter_frame.is_evalframeex():
1545 # Result:
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001546 if iter_frame.select():
1547 iter_frame.print_summary()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001548 return
1549
1550 frame = iter_frame
1551
1552 if move_up:
1553 print 'Unable to find an older python frame'
1554 else:
1555 print 'Unable to find a newer python frame'
1556
1557class PyUp(gdb.Command):
1558 'Select and print the python stack frame that called this one (if any)'
1559 def __init__(self):
1560 gdb.Command.__init__ (self,
1561 "py-up",
1562 gdb.COMMAND_STACK,
1563 gdb.COMPLETE_NONE)
1564
1565
1566 def invoke(self, args, from_tty):
1567 move_in_stack(move_up=True)
1568
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001569class PyDown(gdb.Command):
1570 'Select and print the python stack frame called by this one (if any)'
1571 def __init__(self):
1572 gdb.Command.__init__ (self,
1573 "py-down",
1574 gdb.COMMAND_STACK,
1575 gdb.COMPLETE_NONE)
1576
1577
1578 def invoke(self, args, from_tty):
1579 move_in_stack(move_up=False)
1580
Victor Stinner50eb60e2010-04-20 22:32:07 +00001581# Not all builds of gdb have gdb.Frame.select
1582if hasattr(gdb.Frame, 'select'):
1583 PyUp()
1584 PyDown()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001585
Victor Stinnere670c882011-05-13 17:40:15 +02001586class PyBacktraceFull(gdb.Command):
1587 'Display the current python frame and all the frames within its call stack (if any)'
1588 def __init__(self):
1589 gdb.Command.__init__ (self,
1590 "py-bt-full",
1591 gdb.COMMAND_STACK,
1592 gdb.COMPLETE_NONE)
1593
1594
1595 def invoke(self, args, from_tty):
1596 frame = Frame.get_selected_python_frame()
1597 while frame:
1598 if frame.is_evalframeex():
1599 frame.print_summary()
1600 frame = frame.older()
1601
1602PyBacktraceFull()
1603
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001604class PyBacktrace(gdb.Command):
1605 'Display the current python frame and all the frames within its call stack (if any)'
1606 def __init__(self):
1607 gdb.Command.__init__ (self,
1608 "py-bt",
1609 gdb.COMMAND_STACK,
1610 gdb.COMPLETE_NONE)
1611
1612
1613 def invoke(self, args, from_tty):
Victor Stinnere670c882011-05-13 17:40:15 +02001614 sys.stdout.write('Traceback (most recent call first):\n')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001615 frame = Frame.get_selected_python_frame()
1616 while frame:
1617 if frame.is_evalframeex():
Victor Stinnere670c882011-05-13 17:40:15 +02001618 frame.print_traceback()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001619 frame = frame.older()
1620
1621PyBacktrace()
1622
1623class PyPrint(gdb.Command):
1624 'Look up the given python variable name, and print it'
1625 def __init__(self):
1626 gdb.Command.__init__ (self,
1627 "py-print",
1628 gdb.COMMAND_DATA,
1629 gdb.COMPLETE_NONE)
1630
1631
1632 def invoke(self, args, from_tty):
1633 name = str(args)
1634
1635 frame = Frame.get_selected_python_frame()
1636 if not frame:
1637 print 'Unable to locate python frame'
1638 return
1639
1640 pyop_frame = frame.get_pyop()
1641 if not pyop_frame:
1642 print 'Unable to read information on python frame'
1643 return
1644
1645 pyop_var, scope = pyop_frame.get_var_by_name(name)
1646
1647 if pyop_var:
1648 print ('%s %r = %s'
1649 % (scope,
1650 name,
1651 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1652 else:
1653 print '%r not found' % name
1654
1655PyPrint()
1656
1657class PyLocals(gdb.Command):
1658 'Look up the given python variable name, and print it'
1659 def __init__(self):
1660 gdb.Command.__init__ (self,
1661 "py-locals",
1662 gdb.COMMAND_DATA,
1663 gdb.COMPLETE_NONE)
1664
1665
1666 def invoke(self, args, from_tty):
1667 name = str(args)
1668
1669 frame = Frame.get_selected_python_frame()
1670 if not frame:
1671 print 'Unable to locate python frame'
1672 return
1673
1674 pyop_frame = frame.get_pyop()
1675 if not pyop_frame:
1676 print 'Unable to read information on python frame'
1677 return
1678
1679 for pyop_name, pyop_value in pyop_frame.iter_locals():
1680 print ('%s = %s'
1681 % (pyop_name.proxyval(set()),
1682 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1683
1684PyLocals()