blob: b6ec063c93657095ff2464b93aecb3adf1cad431 [file] [log] [blame]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types. Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython. In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process. For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
Victor Stinner67df3a42010-04-21 13:53:05 +000022holding three PyObject* that turn out to be PyBytesObject* instances, we can
Martin v. Löwis5ae68102010-04-21 22:38:42 +000023generate a proxy value within the gdb process that is a list of bytes
24instances:
25 [b"foo", b"bar", b"baz"]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000026
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object. This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43from __future__ import with_statement
44import gdb
Victor Stinner150016f2010-05-19 23:04:56 +000045import locale
Georg Brandlb639c142010-07-14 08:54:40 +000046import sys
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000047
48# Look up the gdb.Type for some standard types:
49_type_char_ptr = gdb.lookup_type('char').pointer() # char*
50_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
51_type_void_ptr = gdb.lookup_type('void').pointer() # void*
52_type_size_t = gdb.lookup_type('size_t')
Martin v. Löwis24fa9832011-09-28 08:35:25 +020053_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
54_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000055
Victor Stinner0c4fbff2011-12-08 00:08:22 +010056# value computed later, see PyUnicodeObjectPtr.proxy()
57_is_pep393 = None
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020058
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000059SIZEOF_VOID_P = _type_void_ptr.sizeof
60
61
62Py_TPFLAGS_HEAPTYPE = (1L << 9)
63
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000064Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
65Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
66Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Martin v. Löwis5ae68102010-04-21 22:38:42 +000067Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000068Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
69Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
70Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
71Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
72
73
74MAX_OUTPUT_LEN=1024
75
Martin v. Löwis5ae68102010-04-21 22:38:42 +000076hexdigits = "0123456789abcdef"
77
Victor Stinner150016f2010-05-19 23:04:56 +000078ENCODING = locale.getpreferredencoding()
Martin v. Löwis5ae68102010-04-21 22:38:42 +000079
Benjamin Peterson6a6666a2010-04-11 21:49:28 +000080class NullPyObjectPtr(RuntimeError):
81 pass
82
83
84def safety_limit(val):
85 # Given a integer value from the process being debugged, limit it to some
86 # safety threshold so that arbitrary breakage within said process doesn't
87 # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
88 return min(val, 1000)
89
90
91def safe_range(val):
92 # As per range, but don't trust the value too much: cap it to a safety
93 # threshold in case the data was corrupted
94 return xrange(safety_limit(val))
95
Victor Stinner0e5a41b2010-08-17 22:49:25 +000096def write_unicode(file, text):
97 # Write a byte or unicode string to file. Unicode strings are encoded to
98 # ENCODING encoding with 'backslashreplace' error handler to avoid
99 # UnicodeEncodeError.
100 if isinstance(text, unicode):
101 text = text.encode(ENCODING, 'backslashreplace')
102 file.write(text)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000103
Victor Stinner6ffbee72010-10-17 19:35:30 +0000104def os_fsencode(filename):
105 if not isinstance(filename, unicode):
106 return filename
107 encoding = sys.getfilesystemencoding()
108 if encoding == 'mbcs':
109 # mbcs doesn't support surrogateescape
110 return filename.encode(encoding)
111 encoded = []
112 for char in filename:
113 # surrogateescape error handler
114 if 0xDC80 <= ord(char) <= 0xDCFF:
115 byte = chr(ord(char) - 0xDC00)
116 else:
117 byte = char.encode(encoding)
118 encoded.append(byte)
119 return ''.join(encoded)
120
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000121class StringTruncated(RuntimeError):
122 pass
123
124class TruncatedStringIO(object):
125 '''Similar to cStringIO, but can truncate the output by raising a
126 StringTruncated exception'''
127 def __init__(self, maxlen=None):
128 self._val = ''
129 self.maxlen = maxlen
130
131 def write(self, data):
132 if self.maxlen:
133 if len(data) + len(self._val) > self.maxlen:
134 # Truncation:
135 self._val += data[0:self.maxlen - len(self._val)]
136 raise StringTruncated()
137
138 self._val += data
139
140 def getvalue(self):
141 return self._val
142
143class PyObjectPtr(object):
144 """
145 Class wrapping a gdb.Value that's a either a (PyObject*) within the
Victor Stinner67df3a42010-04-21 13:53:05 +0000146 inferior process, or some subclass pointer e.g. (PyBytesObject*)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000147
148 There will be a subclass for every refined PyObject type that we care
149 about.
150
151 Note that at every stage the underlying pointer could be NULL, point
152 to corrupt data, etc; this is the debugger, after all.
153 """
154 _typename = 'PyObject'
155
156 def __init__(self, gdbval, cast_to=None):
157 if cast_to:
158 self._gdbval = gdbval.cast(cast_to)
159 else:
160 self._gdbval = gdbval
161
162 def field(self, name):
163 '''
164 Get the gdb.Value for the given field within the PyObject, coping with
165 some python 2 versus python 3 differences.
166
167 Various libpython types are defined using the "PyObject_HEAD" and
168 "PyObject_VAR_HEAD" macros.
169
170 In Python 2, this these are defined so that "ob_type" and (for a var
171 object) "ob_size" are fields of the type in question.
172
173 In Python 3, this is defined as an embedded PyVarObject type thus:
174 PyVarObject ob_base;
175 so that the "ob_size" field is located insize the "ob_base" field, and
176 the "ob_type" is most easily accessed by casting back to a (PyObject*).
177 '''
178 if self.is_null():
179 raise NullPyObjectPtr(self)
180
181 if name == 'ob_type':
182 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
183 return pyo_ptr.dereference()[name]
184
185 if name == 'ob_size':
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000186 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
187 return pyo_ptr.dereference()[name]
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000188
189 # General case: look it up inside the object:
190 return self._gdbval.dereference()[name]
191
192 def pyop_field(self, name):
193 '''
194 Get a PyObjectPtr for the given PyObject* field within this PyObject,
195 coping with some python 2 versus python 3 differences.
196 '''
197 return PyObjectPtr.from_pyobject_ptr(self.field(name))
198
199 def write_field_repr(self, name, out, visited):
200 '''
201 Extract the PyObject* field named "name", and write its representation
202 to file-like object "out"
203 '''
204 field_obj = self.pyop_field(name)
205 field_obj.write_repr(out, visited)
206
207 def get_truncated_repr(self, maxlen):
208 '''
209 Get a repr-like string for the data, but truncate it at "maxlen" bytes
210 (ending the object graph traversal as soon as you do)
211 '''
212 out = TruncatedStringIO(maxlen)
213 try:
214 self.write_repr(out, set())
215 except StringTruncated:
216 # Truncation occurred:
217 return out.getvalue() + '...(truncated)'
218
219 # No truncation occurred:
220 return out.getvalue()
221
222 def type(self):
223 return PyTypeObjectPtr(self.field('ob_type'))
224
225 def is_null(self):
226 return 0 == long(self._gdbval)
227
228 def is_optimized_out(self):
229 '''
230 Is the value of the underlying PyObject* visible to the debugger?
231
232 This can vary with the precise version of the compiler used to build
233 Python, and the precise version of gdb.
234
235 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
236 PyEval_EvalFrameEx's "f"
237 '''
238 return self._gdbval.is_optimized_out
239
240 def safe_tp_name(self):
241 try:
242 return self.type().field('tp_name').string()
243 except NullPyObjectPtr:
244 # NULL tp_name?
245 return 'unknown'
246 except RuntimeError:
247 # Can't even read the object at all?
248 return 'unknown'
249
250 def proxyval(self, visited):
251 '''
252 Scrape a value from the inferior process, and try to represent it
253 within the gdb process, whilst (hopefully) avoiding crashes when
254 the remote data is corrupt.
255
256 Derived classes will override this.
257
258 For example, a PyIntObject* with ob_ival 42 in the inferior process
259 should result in an int(42) in this process.
260
261 visited: a set of all gdb.Value pyobject pointers already visited
262 whilst generating this value (to guard against infinite recursion when
263 visiting object graphs with loops). Analogous to Py_ReprEnter and
264 Py_ReprLeave
265 '''
266
267 class FakeRepr(object):
268 """
269 Class representing a non-descript PyObject* value in the inferior
270 process for when we don't have a custom scraper, intended to have
271 a sane repr().
272 """
273
274 def __init__(self, tp_name, address):
275 self.tp_name = tp_name
276 self.address = address
277
278 def __repr__(self):
279 # For the NULL pointer, we have no way of knowing a type, so
280 # special-case it as per
281 # http://bugs.python.org/issue8032#msg100882
282 if self.address == 0:
283 return '0x0'
284 return '<%s at remote 0x%x>' % (self.tp_name, self.address)
285
286 return FakeRepr(self.safe_tp_name(),
287 long(self._gdbval))
288
289 def write_repr(self, out, visited):
290 '''
291 Write a string representation of the value scraped from the inferior
292 process to "out", a file-like object.
293 '''
294 # Default implementation: generate a proxy value and write its repr
295 # However, this could involve a lot of work for complicated objects,
296 # so for derived classes we specialize this
297 return out.write(repr(self.proxyval(visited)))
298
299 @classmethod
300 def subclass_from_type(cls, t):
301 '''
302 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
303 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
304 to use
305
306 Ideally, we would look up the symbols for the global types, but that
307 isn't working yet:
308 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
309 Traceback (most recent call last):
310 File "<string>", line 1, in <module>
311 NotImplementedError: Symbol type not yet supported in Python scripts.
312 Error while executing Python code.
313
314 For now, we use tp_flags, after doing some string comparisons on the
315 tp_name for some special-cases that don't seem to be visible through
316 flags
317 '''
318 try:
319 tp_name = t.field('tp_name').string()
320 tp_flags = int(t.field('tp_flags'))
321 except RuntimeError:
322 # Handle any kind of error e.g. NULL ptrs by simply using the base
323 # class
324 return cls
325
326 #print 'tp_flags = 0x%08x' % tp_flags
327 #print 'tp_name = %r' % tp_name
328
329 name_map = {'bool': PyBoolObjectPtr,
330 'classobj': PyClassObjectPtr,
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000331 'NoneType': PyNoneStructPtr,
332 'frame': PyFrameObjectPtr,
333 'set' : PySetObjectPtr,
334 'frozenset' : PySetObjectPtr,
335 'builtin_function_or_method' : PyCFunctionObjectPtr,
336 }
337 if tp_name in name_map:
338 return name_map[tp_name]
339
340 if tp_flags & Py_TPFLAGS_HEAPTYPE:
341 return HeapTypeObjectPtr
342
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000343 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
344 return PyLongObjectPtr
345 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
346 return PyListObjectPtr
347 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
348 return PyTupleObjectPtr
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000349 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
Victor Stinner67df3a42010-04-21 13:53:05 +0000350 return PyBytesObjectPtr
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000351 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
352 return PyUnicodeObjectPtr
353 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
354 return PyDictObjectPtr
355 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
356 return PyBaseExceptionObjectPtr
357 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
358 # return PyTypeObjectPtr
359
360 # Use the base class:
361 return cls
362
363 @classmethod
364 def from_pyobject_ptr(cls, gdbval):
365 '''
366 Try to locate the appropriate derived class dynamically, and cast
367 the pointer accordingly.
368 '''
369 try:
370 p = PyObjectPtr(gdbval)
371 cls = cls.subclass_from_type(p.type())
372 return cls(gdbval, cast_to=cls.get_gdb_type())
373 except RuntimeError:
374 # Handle any kind of error e.g. NULL ptrs by simply using the base
375 # class
376 pass
377 return cls(gdbval)
378
379 @classmethod
380 def get_gdb_type(cls):
381 return gdb.lookup_type(cls._typename).pointer()
382
383 def as_address(self):
384 return long(self._gdbval)
385
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000386class PyVarObjectPtr(PyObjectPtr):
387 _typename = 'PyVarObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000388
389class ProxyAlreadyVisited(object):
390 '''
391 Placeholder proxy to use when protecting against infinite recursion due to
392 loops in the object graph.
393
394 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
395 '''
396 def __init__(self, rep):
397 self._rep = rep
398
399 def __repr__(self):
400 return self._rep
401
402
403def _write_instance_repr(out, visited, name, pyop_attrdict, address):
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100404 '''Shared code for use by all classes:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000405 write a representation to file-like object "out"'''
406 out.write('<')
407 out.write(name)
408
409 # Write dictionary of instance attributes:
410 if isinstance(pyop_attrdict, PyDictObjectPtr):
411 out.write('(')
412 first = True
413 for pyop_arg, pyop_val in pyop_attrdict.iteritems():
414 if not first:
415 out.write(', ')
416 first = False
417 out.write(pyop_arg.proxyval(visited))
418 out.write('=')
419 pyop_val.write_repr(out, visited)
420 out.write(')')
421 out.write(' at remote 0x%x>' % address)
422
423
424class InstanceProxy(object):
425
426 def __init__(self, cl_name, attrdict, address):
427 self.cl_name = cl_name
428 self.attrdict = attrdict
429 self.address = address
430
431 def __repr__(self):
432 if isinstance(self.attrdict, dict):
433 kwargs = ', '.join(["%s=%r" % (arg, val)
434 for arg, val in self.attrdict.iteritems()])
435 return '<%s(%s) at remote 0x%x>' % (self.cl_name,
436 kwargs, self.address)
437 else:
438 return '<%s at remote 0x%x>' % (self.cl_name,
439 self.address)
440
441def _PyObject_VAR_SIZE(typeobj, nitems):
442 return ( ( typeobj.field('tp_basicsize') +
443 nitems * typeobj.field('tp_itemsize') +
444 (SIZEOF_VOID_P - 1)
445 ) & ~(SIZEOF_VOID_P - 1)
446 ).cast(_type_size_t)
447
448class HeapTypeObjectPtr(PyObjectPtr):
449 _typename = 'PyObject'
450
451 def get_attr_dict(self):
452 '''
453 Get the PyDictObject ptr representing the attribute dictionary
454 (or None if there's a problem)
455 '''
456 try:
457 typeobj = self.type()
458 dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
459 if dictoffset != 0:
460 if dictoffset < 0:
461 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
462 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
463 if tsize < 0:
464 tsize = -tsize
465 size = _PyObject_VAR_SIZE(typeobj, tsize)
466 dictoffset += size
467 assert dictoffset > 0
468 assert dictoffset % SIZEOF_VOID_P == 0
469
470 dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
471 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
472 dictptr = dictptr.cast(PyObjectPtrPtr)
473 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
474 except RuntimeError:
475 # Corrupt data somewhere; fail safe
476 pass
477
478 # Not found, or some kind of error:
479 return None
480
481 def proxyval(self, visited):
482 '''
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100483 Support for classes.
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000484
485 Currently we just locate the dictionary using a transliteration to
486 python of _PyObject_GetDictPtr, ignoring descriptors
487 '''
488 # Guard against infinite loops:
489 if self.as_address() in visited:
490 return ProxyAlreadyVisited('<...>')
491 visited.add(self.as_address())
492
493 pyop_attr_dict = self.get_attr_dict()
494 if pyop_attr_dict:
495 attr_dict = pyop_attr_dict.proxyval(visited)
496 else:
497 attr_dict = {}
498 tp_name = self.safe_tp_name()
499
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100500 # Class:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000501 return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
502
503 def write_repr(self, out, visited):
504 # Guard against infinite loops:
505 if self.as_address() in visited:
506 out.write('<...>')
507 return
508 visited.add(self.as_address())
509
510 pyop_attrdict = self.get_attr_dict()
511 _write_instance_repr(out, visited,
512 self.safe_tp_name(), pyop_attrdict, self.as_address())
513
514class ProxyException(Exception):
515 def __init__(self, tp_name, args):
516 self.tp_name = tp_name
517 self.args = args
518
519 def __repr__(self):
520 return '%s%r' % (self.tp_name, self.args)
521
522class PyBaseExceptionObjectPtr(PyObjectPtr):
523 """
524 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
525 within the process being debugged.
526 """
527 _typename = 'PyBaseExceptionObject'
528
529 def proxyval(self, visited):
530 # Guard against infinite loops:
531 if self.as_address() in visited:
532 return ProxyAlreadyVisited('(...)')
533 visited.add(self.as_address())
534 arg_proxy = self.pyop_field('args').proxyval(visited)
535 return ProxyException(self.safe_tp_name(),
536 arg_proxy)
537
538 def write_repr(self, out, visited):
539 # Guard against infinite loops:
540 if self.as_address() in visited:
541 out.write('(...)')
542 return
543 visited.add(self.as_address())
544
545 out.write(self.safe_tp_name())
546 self.write_field_repr('args', out, visited)
547
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000548class PyClassObjectPtr(PyObjectPtr):
549 """
550 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
551 instance within the process being debugged.
552 """
553 _typename = 'PyClassObject'
554
555
556class BuiltInFunctionProxy(object):
557 def __init__(self, ml_name):
558 self.ml_name = ml_name
559
560 def __repr__(self):
561 return "<built-in function %s>" % self.ml_name
562
563class BuiltInMethodProxy(object):
564 def __init__(self, ml_name, pyop_m_self):
565 self.ml_name = ml_name
566 self.pyop_m_self = pyop_m_self
567
568 def __repr__(self):
569 return ('<built-in method %s of %s object at remote 0x%x>'
570 % (self.ml_name,
571 self.pyop_m_self.safe_tp_name(),
572 self.pyop_m_self.as_address())
573 )
574
575class PyCFunctionObjectPtr(PyObjectPtr):
576 """
577 Class wrapping a gdb.Value that's a PyCFunctionObject*
578 (see Include/methodobject.h and Objects/methodobject.c)
579 """
580 _typename = 'PyCFunctionObject'
581
582 def proxyval(self, visited):
583 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
584 ml_name = m_ml['ml_name'].string()
585
586 pyop_m_self = self.pyop_field('m_self')
587 if pyop_m_self.is_null():
588 return BuiltInFunctionProxy(ml_name)
589 else:
590 return BuiltInMethodProxy(ml_name, pyop_m_self)
591
592
593class PyCodeObjectPtr(PyObjectPtr):
594 """
595 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
596 within the process being debugged.
597 """
598 _typename = 'PyCodeObject'
599
600 def addr2line(self, addrq):
601 '''
602 Get the line number for a given bytecode offset
603
604 Analogous to PyCode_Addr2Line; translated from pseudocode in
605 Objects/lnotab_notes.txt
606 '''
607 co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
608
609 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
610 # not 0, as lnotab_notes.txt has it:
611 lineno = int_from_int(self.field('co_firstlineno'))
612
613 addr = 0
614 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
615 addr += ord(addr_incr)
616 if addr > addrq:
617 return lineno
618 lineno += ord(line_incr)
619 return lineno
620
621
622class PyDictObjectPtr(PyObjectPtr):
623 """
624 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
625 within the process being debugged.
626 """
627 _typename = 'PyDictObject'
628
629 def iteritems(self):
630 '''
631 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
632 analagous to dict.iteritems()
633 '''
634 for i in safe_range(self.field('ma_mask') + 1):
635 ep = self.field('ma_table') + i
636 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
637 if not pyop_value.is_null():
638 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
639 yield (pyop_key, pyop_value)
640
641 def proxyval(self, visited):
642 # Guard against infinite loops:
643 if self.as_address() in visited:
644 return ProxyAlreadyVisited('{...}')
645 visited.add(self.as_address())
646
647 result = {}
648 for pyop_key, pyop_value in self.iteritems():
649 proxy_key = pyop_key.proxyval(visited)
650 proxy_value = pyop_value.proxyval(visited)
651 result[proxy_key] = proxy_value
652 return result
653
654 def write_repr(self, out, visited):
655 # Guard against infinite loops:
656 if self.as_address() in visited:
657 out.write('{...}')
658 return
659 visited.add(self.as_address())
660
661 out.write('{')
662 first = True
663 for pyop_key, pyop_value in self.iteritems():
664 if not first:
665 out.write(', ')
666 first = False
667 pyop_key.write_repr(out, visited)
668 out.write(': ')
669 pyop_value.write_repr(out, visited)
670 out.write('}')
671
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000672class PyListObjectPtr(PyObjectPtr):
673 _typename = 'PyListObject'
674
675 def __getitem__(self, i):
676 # Get the gdb.Value for the (PyObject*) with the given index:
677 field_ob_item = self.field('ob_item')
678 return field_ob_item[i]
679
680 def proxyval(self, visited):
681 # Guard against infinite loops:
682 if self.as_address() in visited:
683 return ProxyAlreadyVisited('[...]')
684 visited.add(self.as_address())
685
686 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
687 for i in safe_range(int_from_int(self.field('ob_size')))]
688 return result
689
690 def write_repr(self, out, visited):
691 # Guard against infinite loops:
692 if self.as_address() in visited:
693 out.write('[...]')
694 return
695 visited.add(self.as_address())
696
697 out.write('[')
698 for i in safe_range(int_from_int(self.field('ob_size'))):
699 if i > 0:
700 out.write(', ')
701 element = PyObjectPtr.from_pyobject_ptr(self[i])
702 element.write_repr(out, visited)
703 out.write(']')
704
705class PyLongObjectPtr(PyObjectPtr):
706 _typename = 'PyLongObject'
707
708 def proxyval(self, visited):
709 '''
710 Python's Include/longobjrep.h has this declaration:
711 struct _longobject {
712 PyObject_VAR_HEAD
713 digit ob_digit[1];
714 };
715
716 with this description:
717 The absolute value of a number is equal to
718 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
719 Negative numbers are represented with ob_size < 0;
720 zero is represented by ob_size == 0.
721
722 where SHIFT can be either:
723 #define PyLong_SHIFT 30
724 #define PyLong_SHIFT 15
725 '''
726 ob_size = long(self.field('ob_size'))
727 if ob_size == 0:
728 return 0L
729
730 ob_digit = self.field('ob_digit')
731
732 if gdb.lookup_type('digit').sizeof == 2:
733 SHIFT = 15L
734 else:
735 SHIFT = 30L
736
737 digits = [long(ob_digit[i]) * 2**(SHIFT*i)
738 for i in safe_range(abs(ob_size))]
739 result = sum(digits)
740 if ob_size < 0:
741 result = -result
742 return result
743
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000744 def write_repr(self, out, visited):
745 # Write this out as a Python 3 int literal, i.e. without the "L" suffix
746 proxy = self.proxyval(visited)
747 out.write("%s" % proxy)
748
749
750class PyBoolObjectPtr(PyLongObjectPtr):
751 """
752 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
753 <bool> instances (Py_True/Py_False) within the process being debugged.
754 """
755 def proxyval(self, visited):
756 if PyLongObjectPtr.proxyval(self, visited):
757 return True
758 else:
759 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000760
761class PyNoneStructPtr(PyObjectPtr):
762 """
763 Class wrapping a gdb.Value that's a PyObject* pointing to the
764 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
765 """
766 _typename = 'PyObject'
767
768 def proxyval(self, visited):
769 return None
770
771
772class PyFrameObjectPtr(PyObjectPtr):
773 _typename = 'PyFrameObject'
774
775 def __init__(self, gdbval, cast_to):
776 PyObjectPtr.__init__(self, gdbval, cast_to)
777
778 if not self.is_optimized_out():
779 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
780 self.co_name = self.co.pyop_field('co_name')
781 self.co_filename = self.co.pyop_field('co_filename')
782
783 self.f_lineno = int_from_int(self.field('f_lineno'))
784 self.f_lasti = int_from_int(self.field('f_lasti'))
785 self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
786 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
787
788 def iter_locals(self):
789 '''
790 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
791 the local variables of this frame
792 '''
793 if self.is_optimized_out():
794 return
795
796 f_localsplus = self.field('f_localsplus')
797 for i in safe_range(self.co_nlocals):
798 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
799 if not pyop_value.is_null():
800 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
801 yield (pyop_name, pyop_value)
802
803 def iter_globals(self):
804 '''
805 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
806 the global variables of this frame
807 '''
808 if self.is_optimized_out():
809 return
810
811 pyop_globals = self.pyop_field('f_globals')
812 return pyop_globals.iteritems()
813
814 def iter_builtins(self):
815 '''
816 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
817 the builtin variables
818 '''
819 if self.is_optimized_out():
820 return
821
822 pyop_builtins = self.pyop_field('f_builtins')
823 return pyop_builtins.iteritems()
824
825 def get_var_by_name(self, name):
826 '''
827 Look for the named local variable, returning a (PyObjectPtr, scope) pair
828 where scope is a string 'local', 'global', 'builtin'
829
830 If not found, return (None, None)
831 '''
832 for pyop_name, pyop_value in self.iter_locals():
833 if name == pyop_name.proxyval(set()):
834 return pyop_value, 'local'
835 for pyop_name, pyop_value in self.iter_globals():
836 if name == pyop_name.proxyval(set()):
837 return pyop_value, 'global'
838 for pyop_name, pyop_value in self.iter_builtins():
839 if name == pyop_name.proxyval(set()):
840 return pyop_value, 'builtin'
841 return None, None
842
843 def filename(self):
844 '''Get the path of the current Python source file, as a string'''
845 if self.is_optimized_out():
846 return '(frame information optimized out)'
847 return self.co_filename.proxyval(set())
848
849 def current_line_num(self):
850 '''Get current line number as an integer (1-based)
851
852 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
853
854 See Objects/lnotab_notes.txt
855 '''
856 if self.is_optimized_out():
857 return None
858 f_trace = self.field('f_trace')
859 if long(f_trace) != 0:
860 # we have a non-NULL f_trace:
861 return self.f_lineno
862 else:
863 #try:
864 return self.co.addr2line(self.f_lasti)
865 #except ValueError:
866 # return self.f_lineno
867
868 def current_line(self):
869 '''Get the text of the current source line as a string, with a trailing
870 newline character'''
871 if self.is_optimized_out():
872 return '(frame information optimized out)'
Victor Stinner6ffbee72010-10-17 19:35:30 +0000873 filename = self.filename()
Victor Stinnerd57c5c82011-07-01 12:57:44 +0200874 try:
875 f = open(os_fsencode(filename), 'r')
876 except IOError:
877 return None
878 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000879 all_lines = f.readlines()
880 # Convert from 1-based current_line_num to 0-based list offset:
881 return all_lines[self.current_line_num()-1]
882
883 def write_repr(self, out, visited):
884 if self.is_optimized_out():
885 out.write('(frame information optimized out)')
886 return
887 out.write('Frame 0x%x, for file %s, line %i, in %s ('
888 % (self.as_address(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000889 self.co_filename.proxyval(visited),
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000890 self.current_line_num(),
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000891 self.co_name.proxyval(visited)))
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000892 first = True
893 for pyop_name, pyop_value in self.iter_locals():
894 if not first:
895 out.write(', ')
896 first = False
897
898 out.write(pyop_name.proxyval(visited))
899 out.write('=')
900 pyop_value.write_repr(out, visited)
901
902 out.write(')')
903
Victor Stinnere670c882011-05-13 17:40:15 +0200904 def print_traceback(self):
905 if self.is_optimized_out():
906 sys.stdout.write(' (frame information optimized out)\n')
907 visited = set()
908 sys.stdout.write(' File "%s", line %i, in %s\n'
909 % (self.co_filename.proxyval(visited),
910 self.current_line_num(),
911 self.co_name.proxyval(visited)))
912
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000913class PySetObjectPtr(PyObjectPtr):
914 _typename = 'PySetObject'
915
916 def proxyval(self, visited):
917 # Guard against infinite loops:
918 if self.as_address() in visited:
919 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
920 visited.add(self.as_address())
921
922 members = []
923 table = self.field('table')
924 for i in safe_range(self.field('mask')+1):
925 setentry = table[i]
926 key = setentry['key']
927 if key != 0:
928 key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
929 if key_proxy != '<dummy key>':
930 members.append(key_proxy)
931 if self.safe_tp_name() == 'frozenset':
932 return frozenset(members)
933 else:
934 return set(members)
935
936 def write_repr(self, out, visited):
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000937 # Emulate Python 3's set_repr
938 tp_name = self.safe_tp_name()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000939
940 # Guard against infinite loops:
941 if self.as_address() in visited:
942 out.write('(...)')
943 return
944 visited.add(self.as_address())
945
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000946 # Python 3's set_repr special-cases the empty set:
947 if not self.field('used'):
948 out.write(tp_name)
949 out.write('()')
950 return
951
952 # Python 3 uses {} for set literals:
953 if tp_name != 'set':
954 out.write(tp_name)
955 out.write('(')
956
957 out.write('{')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000958 first = True
959 table = self.field('table')
960 for i in safe_range(self.field('mask')+1):
961 setentry = table[i]
962 key = setentry['key']
963 if key != 0:
964 pyop_key = PyObjectPtr.from_pyobject_ptr(key)
965 key_proxy = pyop_key.proxyval(visited) # FIXME!
966 if key_proxy != '<dummy key>':
967 if not first:
968 out.write(', ')
969 first = False
970 pyop_key.write_repr(out, visited)
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000971 out.write('}')
972
973 if tp_name != 'set':
974 out.write(')')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000975
976
Victor Stinner67df3a42010-04-21 13:53:05 +0000977class PyBytesObjectPtr(PyObjectPtr):
978 _typename = 'PyBytesObject'
Benjamin Peterson6a6666a2010-04-11 21:49:28 +0000979
980 def __str__(self):
981 field_ob_size = self.field('ob_size')
982 field_ob_sval = self.field('ob_sval')
983 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
984 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
985
986 def proxyval(self, visited):
987 return str(self)
988
Martin v. Löwis5ae68102010-04-21 22:38:42 +0000989 def write_repr(self, out, visited):
990 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
991
992 # Get a PyStringObject* within the Python 2 gdb process:
993 proxy = self.proxyval(visited)
994
995 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
996 # to Python 2 code:
997 quote = "'"
998 if "'" in proxy and not '"' in proxy:
999 quote = '"'
1000 out.write('b')
1001 out.write(quote)
1002 for byte in proxy:
1003 if byte == quote or byte == '\\':
1004 out.write('\\')
1005 out.write(byte)
1006 elif byte == '\t':
1007 out.write('\\t')
1008 elif byte == '\n':
1009 out.write('\\n')
1010 elif byte == '\r':
1011 out.write('\\r')
1012 elif byte < ' ' or ord(byte) >= 0x7f:
1013 out.write('\\x')
1014 out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1015 out.write(hexdigits[ord(byte) & 0xf])
1016 else:
1017 out.write(byte)
1018 out.write(quote)
1019
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001020class PyTupleObjectPtr(PyObjectPtr):
1021 _typename = 'PyTupleObject'
1022
1023 def __getitem__(self, i):
1024 # Get the gdb.Value for the (PyObject*) with the given index:
1025 field_ob_item = self.field('ob_item')
1026 return field_ob_item[i]
1027
1028 def proxyval(self, visited):
1029 # Guard against infinite loops:
1030 if self.as_address() in visited:
1031 return ProxyAlreadyVisited('(...)')
1032 visited.add(self.as_address())
1033
1034 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1035 for i in safe_range(int_from_int(self.field('ob_size')))])
1036 return result
1037
1038 def write_repr(self, out, visited):
1039 # Guard against infinite loops:
1040 if self.as_address() in visited:
1041 out.write('(...)')
1042 return
1043 visited.add(self.as_address())
1044
1045 out.write('(')
1046 for i in safe_range(int_from_int(self.field('ob_size'))):
1047 if i > 0:
1048 out.write(', ')
1049 element = PyObjectPtr.from_pyobject_ptr(self[i])
1050 element.write_repr(out, visited)
1051 if self.field('ob_size') == 1:
1052 out.write(',)')
1053 else:
1054 out.write(')')
1055
1056class PyTypeObjectPtr(PyObjectPtr):
1057 _typename = 'PyTypeObject'
1058
1059
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001060def _unichr_is_printable(char):
1061 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1062 if char == u" ":
1063 return True
1064 import unicodedata
Antoine Pitroub41e1282010-09-08 20:57:48 +00001065 return unicodedata.category(char) not in ("C", "Z")
1066
1067if sys.maxunicode >= 0x10000:
1068 _unichr = unichr
1069else:
1070 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1071 def _unichr(x):
1072 if x < 0x10000:
1073 return unichr(x)
1074 x -= 0x10000
1075 ch1 = 0xD800 | (x >> 10)
1076 ch2 = 0xDC00 | (x & 0x3FF)
1077 return unichr(ch1) + unichr(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001078
1079
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001080class PyUnicodeObjectPtr(PyObjectPtr):
1081 _typename = 'PyUnicodeObject'
1082
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001083 def char_width(self):
1084 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1085 return _type_Py_UNICODE.sizeof
1086
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001087 def proxyval(self, visited):
Victor Stinner0c4fbff2011-12-08 00:08:22 +01001088 global _is_pep393
1089 if _is_pep393 is None:
1090 fields = gdb.lookup_type('PyUnicodeObject').target().fields()
1091 _is_pep393 = 'data' in [f.name for f in fields]
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001092 if _is_pep393:
1093 # Python 3.3 and newer
1094 may_have_surrogates = False
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001095 compact = self.field('_base')
1096 ascii = compact['_base']
1097 state = ascii['state']
Victor Stinnera3b334d2011-10-03 13:53:37 +02001098 is_compact_ascii = (int(state['ascii']) and int(state['compact']))
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001099 if not int(state['ready']):
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001100 # string is not ready
Victor Stinnerf16a3502011-11-04 22:34:01 +01001101 field_length = long(compact['wstr_length'])
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001102 may_have_surrogates = True
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001103 field_str = ascii['wstr']
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001104 else:
Victor Stinnerf16a3502011-11-04 22:34:01 +01001105 field_length = long(ascii['length'])
Victor Stinnera3b334d2011-10-03 13:53:37 +02001106 if is_compact_ascii:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001107 field_str = ascii.address + 1
1108 elif int(state['compact']):
1109 field_str = compact.address + 1
1110 else:
1111 field_str = self.field('data')['any']
1112 repr_kind = int(state['kind'])
1113 if repr_kind == 1:
1114 field_str = field_str.cast(_type_unsigned_char_ptr)
1115 elif repr_kind == 2:
1116 field_str = field_str.cast(_type_unsigned_short_ptr)
Antoine Pitrou3c0c5f22011-10-08 19:33:24 +02001117 elif repr_kind == 4:
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001118 field_str = field_str.cast(_type_unsigned_int_ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001119 else:
1120 # Python 3.2 and earlier
Martin v. Löwis24fa9832011-09-28 08:35:25 +02001121 field_length = long(self.field('length'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 field_str = self.field('str')
1123 may_have_surrogates = self.char_width() == 2
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001124
1125 # Gather a list of ints from the Py_UNICODE array; these are either
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001126 # UCS-1, UCS-2 or UCS-4 code points:
1127 if not may_have_surrogates:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001128 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1129 else:
1130 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1131 # inferior process: we must join surrogate pairs.
1132 Py_UNICODEs = []
1133 i = 0
Antoine Pitroub1856d72010-09-08 21:07:40 +00001134 limit = safety_limit(field_length)
1135 while i < limit:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001136 ucs = int(field_str[i])
1137 i += 1
1138 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1139 Py_UNICODEs.append(ucs)
1140 continue
1141 # This could be a surrogate pair.
1142 ucs2 = int(field_str[i])
1143 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1144 continue
1145 code = (ucs & 0x03FF) << 10
1146 code |= ucs2 & 0x03FF
1147 code += 0x00010000
1148 Py_UNICODEs.append(code)
1149 i += 1
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001150
1151 # Convert the int code points to unicode characters, and generate a
Antoine Pitroub41e1282010-09-08 20:57:48 +00001152 # local unicode instance.
1153 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1154 result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001155 return result
1156
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001157 def write_repr(self, out, visited):
1158 # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1159
1160 # Get a PyUnicodeObject* within the Python 2 gdb process:
1161 proxy = self.proxyval(visited)
1162
1163 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1164 # to Python 2:
1165 if "'" in proxy and '"' not in proxy:
1166 quote = '"'
1167 else:
1168 quote = "'"
1169 out.write(quote)
1170
1171 i = 0
1172 while i < len(proxy):
1173 ch = proxy[i]
1174 i += 1
1175
1176 # Escape quotes and backslashes
1177 if ch == quote or ch == '\\':
1178 out.write('\\')
1179 out.write(ch)
1180
1181 # Map special whitespace to '\t', \n', '\r'
1182 elif ch == '\t':
1183 out.write('\\t')
1184 elif ch == '\n':
1185 out.write('\\n')
1186 elif ch == '\r':
1187 out.write('\\r')
1188
1189 # Map non-printable US ASCII to '\xhh' */
1190 elif ch < ' ' or ch == 0x7F:
1191 out.write('\\x')
1192 out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1193 out.write(hexdigits[ord(ch) & 0x000F])
1194
1195 # Copy ASCII characters as-is
1196 elif ord(ch) < 0x7F:
1197 out.write(ch)
1198
1199 # Non-ASCII characters
1200 else:
Victor Stinner150016f2010-05-19 23:04:56 +00001201 ucs = ch
Antoine Pitroub41e1282010-09-08 20:57:48 +00001202 ch2 = None
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001203 if sys.maxunicode < 0x10000:
Antoine Pitroub41e1282010-09-08 20:57:48 +00001204 # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1205 # surrogate pairs before calling _unichr_is_printable.
Victor Stinner150016f2010-05-19 23:04:56 +00001206 if (i < len(proxy)
1207 and 0xD800 <= ord(ch) < 0xDC00 \
1208 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001209 ch2 = proxy[i]
Antoine Pitroub41e1282010-09-08 20:57:48 +00001210 ucs = ch + ch2
Victor Stinner150016f2010-05-19 23:04:56 +00001211 i += 1
Victor Stinner150016f2010-05-19 23:04:56 +00001212
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001213 # Unfortuately, Python 2's unicode type doesn't seem
1214 # to expose the "isprintable" method
Victor Stinner150016f2010-05-19 23:04:56 +00001215 printable = _unichr_is_printable(ucs)
1216 if printable:
1217 try:
1218 ucs.encode(ENCODING)
1219 except UnicodeEncodeError:
1220 printable = False
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001221
1222 # Map Unicode whitespace and control characters
1223 # (categories Z* and C* except ASCII space)
Victor Stinner150016f2010-05-19 23:04:56 +00001224 if not printable:
Antoine Pitrou7c9cf012010-09-08 21:57:37 +00001225 if ch2 is not None:
1226 # Match Python 3's representation of non-printable
1227 # wide characters.
1228 code = (ord(ch) & 0x03FF) << 10
1229 code |= ord(ch2) & 0x03FF
1230 code += 0x00010000
1231 else:
1232 code = ord(ucs)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001233
1234 # Map 8-bit characters to '\\xhh'
Victor Stinner150016f2010-05-19 23:04:56 +00001235 if code <= 0xff:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001236 out.write('\\x')
Victor Stinner150016f2010-05-19 23:04:56 +00001237 out.write(hexdigits[(code >> 4) & 0x000F])
1238 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001239 # Map 21-bit characters to '\U00xxxxxx'
Victor Stinner150016f2010-05-19 23:04:56 +00001240 elif code >= 0x10000:
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001241 out.write('\\U')
Victor Stinner150016f2010-05-19 23:04:56 +00001242 out.write(hexdigits[(code >> 28) & 0x0000000F])
1243 out.write(hexdigits[(code >> 24) & 0x0000000F])
1244 out.write(hexdigits[(code >> 20) & 0x0000000F])
1245 out.write(hexdigits[(code >> 16) & 0x0000000F])
1246 out.write(hexdigits[(code >> 12) & 0x0000000F])
1247 out.write(hexdigits[(code >> 8) & 0x0000000F])
1248 out.write(hexdigits[(code >> 4) & 0x0000000F])
1249 out.write(hexdigits[code & 0x0000000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001250 # Map 16-bit characters to '\uxxxx'
1251 else:
1252 out.write('\\u')
Victor Stinner150016f2010-05-19 23:04:56 +00001253 out.write(hexdigits[(code >> 12) & 0x000F])
1254 out.write(hexdigits[(code >> 8) & 0x000F])
1255 out.write(hexdigits[(code >> 4) & 0x000F])
1256 out.write(hexdigits[code & 0x000F])
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001257 else:
1258 # Copy characters as-is
1259 out.write(ch)
Antoine Pitroub41e1282010-09-08 20:57:48 +00001260 if ch2 is not None:
Victor Stinner150016f2010-05-19 23:04:56 +00001261 out.write(ch2)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001262
1263 out.write(quote)
1264
1265
1266
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001267
1268def int_from_int(gdbval):
1269 return int(str(gdbval))
1270
1271
1272def stringify(val):
1273 # TODO: repr() puts everything on one line; pformat can be nicer, but
1274 # can lead to v.long results; this function isolates the choice
1275 if True:
1276 return repr(val)
1277 else:
1278 from pprint import pformat
1279 return pformat(val)
1280
1281
1282class PyObjectPtrPrinter:
1283 "Prints a (PyObject*)"
1284
1285 def __init__ (self, gdbval):
1286 self.gdbval = gdbval
1287
1288 def to_string (self):
1289 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1290 if True:
1291 return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1292 else:
1293 # Generate full proxy value then stringify it.
1294 # Doing so could be expensive
1295 proxyval = pyop.proxyval(set())
1296 return stringify(proxyval)
1297
1298def pretty_printer_lookup(gdbval):
1299 type = gdbval.type.unqualified()
1300 if type.code == gdb.TYPE_CODE_PTR:
1301 type = type.target().unqualified()
1302 t = str(type)
Martin v. Löwis5ae68102010-04-21 22:38:42 +00001303 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001304 return PyObjectPtrPrinter(gdbval)
1305
1306"""
1307During development, I've been manually invoking the code in this way:
1308(gdb) python
1309
1310import sys
1311sys.path.append('/home/david/coding/python-gdb')
1312import libpython
1313end
1314
1315then reloading it after each edit like this:
1316(gdb) python reload(libpython)
1317
1318The following code should ensure that the prettyprinter is registered
1319if the code is autoloaded by gdb when visiting libpython.so, provided
1320that this python file is installed to the same path as the library (or its
1321.debug file) plus a "-gdb.py" suffix, e.g:
1322 /usr/lib/libpython2.6.so.1.0-gdb.py
1323 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1324"""
1325def register (obj):
1326 if obj == None:
1327 obj = gdb
1328
1329 # Wire up the pretty-printer
1330 obj.pretty_printers.append(pretty_printer_lookup)
1331
1332register (gdb.current_objfile ())
1333
1334
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001335
1336# Unfortunately, the exact API exposed by the gdb module varies somewhat
1337# from build to build
1338# See http://bugs.python.org/issue8279?#msg102276
1339
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001340class Frame(object):
1341 '''
1342 Wrapper for gdb.Frame, adding various methods
1343 '''
1344 def __init__(self, gdbframe):
1345 self._gdbframe = gdbframe
1346
1347 def older(self):
1348 older = self._gdbframe.older()
1349 if older:
1350 return Frame(older)
1351 else:
1352 return None
1353
1354 def newer(self):
1355 newer = self._gdbframe.newer()
1356 if newer:
1357 return Frame(newer)
1358 else:
1359 return None
1360
1361 def select(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001362 '''If supported, select this frame and return True; return False if unsupported
1363
1364 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1365 onwards, but absent on Ubuntu buildbot'''
1366 if not hasattr(self._gdbframe, 'select'):
1367 print ('Unable to select frame: '
1368 'this build of gdb does not expose a gdb.Frame.select method')
1369 return False
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001370 self._gdbframe.select()
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001371 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001372
1373 def get_index(self):
1374 '''Calculate index of frame, starting at 0 for the newest frame within
1375 this thread'''
1376 index = 0
1377 # Go down until you reach the newest frame:
1378 iter_frame = self
1379 while iter_frame.newer():
1380 index += 1
1381 iter_frame = iter_frame.newer()
1382 return index
1383
1384 def is_evalframeex(self):
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001385 '''Is this a PyEval_EvalFrameEx frame?'''
Victor Stinner50eb60e2010-04-20 22:32:07 +00001386 if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1387 '''
1388 I believe we also need to filter on the inline
1389 struct frame_id.inline_depth, only regarding frames with
1390 an inline depth of 0 as actually being this function
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001391
Victor Stinner50eb60e2010-04-20 22:32:07 +00001392 So we reject those with type gdb.INLINE_FRAME
1393 '''
1394 if self._gdbframe.type() == gdb.NORMAL_FRAME:
1395 # We have a PyEval_EvalFrameEx frame:
1396 return True
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001397
1398 return False
1399
1400 def get_pyop(self):
1401 try:
1402 f = self._gdbframe.read_var('f')
1403 return PyFrameObjectPtr.from_pyobject_ptr(f)
1404 except ValueError:
1405 return None
1406
1407 @classmethod
1408 def get_selected_frame(cls):
1409 _gdbframe = gdb.selected_frame()
1410 if _gdbframe:
1411 return Frame(_gdbframe)
1412 return None
1413
1414 @classmethod
1415 def get_selected_python_frame(cls):
1416 '''Try to obtain the Frame for the python code in the selected frame,
1417 or None'''
1418 frame = cls.get_selected_frame()
1419
1420 while frame:
1421 if frame.is_evalframeex():
1422 return frame
1423 frame = frame.older()
1424
1425 # Not found:
1426 return None
1427
1428 def print_summary(self):
1429 if self.is_evalframeex():
1430 pyop = self.get_pyop()
1431 if pyop:
Victor Stinner0e5a41b2010-08-17 22:49:25 +00001432 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1433 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001434 line = pyop.current_line()
1435 if line is not None:
1436 sys.stdout.write(line)
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001437 else:
1438 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1439 else:
1440 sys.stdout.write('#%i\n' % self.get_index())
1441
Victor Stinnere670c882011-05-13 17:40:15 +02001442 def print_traceback(self):
1443 if self.is_evalframeex():
1444 pyop = self.get_pyop()
1445 if pyop:
1446 pyop.print_traceback()
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001447 line = pyop.current_line()
1448 if line is not None:
1449 sys.stdout.write(' %s\n' % line.strip())
Victor Stinnere670c882011-05-13 17:40:15 +02001450 else:
1451 sys.stdout.write(' (unable to read python frame information)\n')
1452 else:
1453 sys.stdout.write(' (not a python frame)\n')
1454
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001455class PyList(gdb.Command):
1456 '''List the current Python source code, if any
1457
1458 Use
1459 py-list START
1460 to list at a different line number within the python source.
1461
1462 Use
1463 py-list START, END
1464 to list a specific range of lines within the python source.
1465 '''
1466
1467 def __init__(self):
1468 gdb.Command.__init__ (self,
1469 "py-list",
1470 gdb.COMMAND_FILES,
1471 gdb.COMPLETE_NONE)
1472
1473
1474 def invoke(self, args, from_tty):
1475 import re
1476
1477 start = None
1478 end = None
1479
1480 m = re.match(r'\s*(\d+)\s*', args)
1481 if m:
1482 start = int(m.group(0))
1483 end = start + 10
1484
1485 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1486 if m:
1487 start, end = map(int, m.groups())
1488
1489 frame = Frame.get_selected_python_frame()
1490 if not frame:
1491 print 'Unable to locate python frame'
1492 return
1493
1494 pyop = frame.get_pyop()
1495 if not pyop:
1496 print 'Unable to read information on python frame'
1497 return
1498
1499 filename = pyop.filename()
1500 lineno = pyop.current_line_num()
1501
1502 if start is None:
1503 start = lineno - 5
1504 end = lineno + 5
1505
1506 if start<1:
1507 start = 1
1508
Victor Stinnerd57c5c82011-07-01 12:57:44 +02001509 try:
1510 f = open(os_fsencode(filename), 'r')
1511 except IOError as err:
1512 sys.stdout.write('Unable to open %s: %s\n'
1513 % (filename, err))
1514 return
1515 with f:
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001516 all_lines = f.readlines()
1517 # start and end are 1-based, all_lines is 0-based;
1518 # so [start-1:end] as a python slice gives us [start, end] as a
1519 # closed interval
1520 for i, line in enumerate(all_lines[start-1:end]):
1521 linestr = str(i+start)
1522 # Highlight current line:
1523 if i + start == lineno:
1524 linestr = '>' + linestr
1525 sys.stdout.write('%4s %s' % (linestr, line))
1526
1527
1528# ...and register the command:
1529PyList()
1530
1531def move_in_stack(move_up):
1532 '''Move up or down the stack (for the py-up/py-down command)'''
1533 frame = Frame.get_selected_python_frame()
1534 while frame:
1535 if move_up:
1536 iter_frame = frame.older()
1537 else:
1538 iter_frame = frame.newer()
1539
1540 if not iter_frame:
1541 break
1542
1543 if iter_frame.is_evalframeex():
1544 # Result:
Martin v. Löwis5226fd62010-04-21 06:05:58 +00001545 if iter_frame.select():
1546 iter_frame.print_summary()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001547 return
1548
1549 frame = iter_frame
1550
1551 if move_up:
1552 print 'Unable to find an older python frame'
1553 else:
1554 print 'Unable to find a newer python frame'
1555
1556class PyUp(gdb.Command):
1557 'Select and print the python stack frame that called this one (if any)'
1558 def __init__(self):
1559 gdb.Command.__init__ (self,
1560 "py-up",
1561 gdb.COMMAND_STACK,
1562 gdb.COMPLETE_NONE)
1563
1564
1565 def invoke(self, args, from_tty):
1566 move_in_stack(move_up=True)
1567
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001568class PyDown(gdb.Command):
1569 'Select and print the python stack frame called by this one (if any)'
1570 def __init__(self):
1571 gdb.Command.__init__ (self,
1572 "py-down",
1573 gdb.COMMAND_STACK,
1574 gdb.COMPLETE_NONE)
1575
1576
1577 def invoke(self, args, from_tty):
1578 move_in_stack(move_up=False)
1579
Victor Stinner50eb60e2010-04-20 22:32:07 +00001580# Not all builds of gdb have gdb.Frame.select
1581if hasattr(gdb.Frame, 'select'):
1582 PyUp()
1583 PyDown()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001584
Victor Stinnere670c882011-05-13 17:40:15 +02001585class PyBacktraceFull(gdb.Command):
1586 'Display the current python frame and all the frames within its call stack (if any)'
1587 def __init__(self):
1588 gdb.Command.__init__ (self,
1589 "py-bt-full",
1590 gdb.COMMAND_STACK,
1591 gdb.COMPLETE_NONE)
1592
1593
1594 def invoke(self, args, from_tty):
1595 frame = Frame.get_selected_python_frame()
1596 while frame:
1597 if frame.is_evalframeex():
1598 frame.print_summary()
1599 frame = frame.older()
1600
1601PyBacktraceFull()
1602
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001603class PyBacktrace(gdb.Command):
1604 'Display the current python frame and all the frames within its call stack (if any)'
1605 def __init__(self):
1606 gdb.Command.__init__ (self,
1607 "py-bt",
1608 gdb.COMMAND_STACK,
1609 gdb.COMPLETE_NONE)
1610
1611
1612 def invoke(self, args, from_tty):
Victor Stinnere670c882011-05-13 17:40:15 +02001613 sys.stdout.write('Traceback (most recent call first):\n')
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001614 frame = Frame.get_selected_python_frame()
1615 while frame:
1616 if frame.is_evalframeex():
Victor Stinnere670c882011-05-13 17:40:15 +02001617 frame.print_traceback()
Benjamin Peterson6a6666a2010-04-11 21:49:28 +00001618 frame = frame.older()
1619
1620PyBacktrace()
1621
1622class PyPrint(gdb.Command):
1623 'Look up the given python variable name, and print it'
1624 def __init__(self):
1625 gdb.Command.__init__ (self,
1626 "py-print",
1627 gdb.COMMAND_DATA,
1628 gdb.COMPLETE_NONE)
1629
1630
1631 def invoke(self, args, from_tty):
1632 name = str(args)
1633
1634 frame = Frame.get_selected_python_frame()
1635 if not frame:
1636 print 'Unable to locate python frame'
1637 return
1638
1639 pyop_frame = frame.get_pyop()
1640 if not pyop_frame:
1641 print 'Unable to read information on python frame'
1642 return
1643
1644 pyop_var, scope = pyop_frame.get_var_by_name(name)
1645
1646 if pyop_var:
1647 print ('%s %r = %s'
1648 % (scope,
1649 name,
1650 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1651 else:
1652 print '%r not found' % name
1653
1654PyPrint()
1655
1656class PyLocals(gdb.Command):
1657 'Look up the given python variable name, and print it'
1658 def __init__(self):
1659 gdb.Command.__init__ (self,
1660 "py-locals",
1661 gdb.COMMAND_DATA,
1662 gdb.COMPLETE_NONE)
1663
1664
1665 def invoke(self, args, from_tty):
1666 name = str(args)
1667
1668 frame = Frame.get_selected_python_frame()
1669 if not frame:
1670 print 'Unable to locate python frame'
1671 return
1672
1673 pyop_frame = frame.get_pyop()
1674 if not pyop_frame:
1675 print 'Unable to read information on python frame'
1676 return
1677
1678 for pyop_name, pyop_value in pyop_frame.iter_locals():
1679 print ('%s = %s'
1680 % (pyop_name.proxyval(set()),
1681 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1682
1683PyLocals()