Issue #8380: Port gdb/libpython to 3.x.
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index a8f28d7..426fb7b 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -20,8 +20,9 @@
 process, we can generate a "proxy value" within the gdb process.  For example,
 given a PyObject* in the inferior process that is in fact a PyListObject*
 holding three PyObject* that turn out to be PyBytesObject* instances, we can
-generate a proxy value within the gdb process that is a list of strings:
-  ["foo", "bar", "baz"]
+generate a proxy value within the gdb process that is a list of bytes
+instances:
+  [b"foo", b"bar", b"baz"]
 
 Doing so can be expensive for complicated graphs of objects, and could take
 some time, so we also have a "write_repr" method that writes a representation
@@ -57,7 +58,7 @@
 Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
 Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
 Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
-Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
+Py_TPFLAGS_BYTES_SUBCLASS    = (1L << 27)
 Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
 Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
 Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
@@ -66,6 +67,9 @@
 
 MAX_OUTPUT_LEN=1024
 
+hexdigits = "0123456789abcdef"
+
+
 class NullPyObjectPtr(RuntimeError):
     pass
 
@@ -148,12 +152,8 @@
             return pyo_ptr.dereference()[name]
 
         if name == 'ob_size':
-            try:
-            # Python 2:
-                return self._gdbval.dereference()[name]
-            except RuntimeError:
-                # Python 3:
-                return self._gdbval.dereference()['ob_base'][name]
+            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
 
         # General case: look it up inside the object:
         return self._gdbval.dereference()[name]
@@ -318,7 +318,7 @@
             return PyListObjectPtr
         if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
             return PyTupleObjectPtr
-        if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
+        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
             return PyBytesObjectPtr
         if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
             return PyUnicodeObjectPtr
@@ -355,6 +355,8 @@
     def as_address(self):
         return long(self._gdbval)
 
+class PyVarObjectPtr(PyObjectPtr):
+    _typename = 'PyVarObject'
 
 class ProxyAlreadyVisited(object):
     '''
@@ -515,20 +517,6 @@
         out.write(self.safe_tp_name())
         self.write_field_repr('args', out, visited)
 
-class PyBoolObjectPtr(PyObjectPtr):
-    """
-    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
-    <bool> instances (Py_True/Py_False) within the process being debugged.
-    """
-    _typename = 'PyBoolObject'
-
-    def proxyval(self, visited):
-        if int_from_int(self.field('ob_ival')):
-            return True
-        else:
-            return False
-
-
 class PyClassObjectPtr(PyObjectPtr):
     """
     Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
@@ -691,13 +679,6 @@
         _write_instance_repr(out, visited,
                              cl_name, pyop_in_dict, self.as_address())
 
-class PyIntObjectPtr(PyObjectPtr):
-    _typename = 'PyIntObject'
-
-    def proxyval(self, visited):
-        result = int_from_int(self.field('ob_ival'))
-        return result
-
 class PyListObjectPtr(PyObjectPtr):
     _typename = 'PyListObject'
 
@@ -770,6 +751,22 @@
             result = -result
         return result
 
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+        proxy = self.proxyval(visited)
+        out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+    <bool> instances (Py_True/Py_False) within the process being debugged.
+    """
+    def proxyval(self, visited):
+        if PyLongObjectPtr.proxyval(self, visited):
+            return True
+        else:
+            return False
 
 class PyNoneStructPtr(PyObjectPtr):
     """
@@ -894,9 +891,9 @@
             return
         out.write('Frame 0x%x, for file %s, line %i, in %s ('
                   % (self.as_address(),
-                     self.co_filename,
+                     self.co_filename.proxyval(visited),
                      self.current_line_num(),
-                     self.co_name))
+                     self.co_name.proxyval(visited)))
         first = True
         for pyop_name, pyop_value in self.iter_locals():
             if not first:
@@ -933,7 +930,8 @@
             return set(members)
 
     def write_repr(self, out, visited):
-        out.write(self.safe_tp_name())
+        # Emulate Python 3's set_repr
+        tp_name = self.safe_tp_name()
 
         # Guard against infinite loops:
         if self.as_address() in visited:
@@ -941,7 +939,18 @@
             return
         visited.add(self.as_address())
 
-        out.write('([')
+        # Python 3's set_repr special-cases the empty set:
+        if not self.field('used'):
+            out.write(tp_name)
+            out.write('()')
+            return
+
+        # Python 3 uses {} for set literals:
+        if tp_name != 'set':
+            out.write(tp_name)
+            out.write('(')
+
+        out.write('{')
         first = True
         table = self.field('table')
         for i in safe_range(self.field('mask')+1):
@@ -955,7 +964,10 @@
                         out.write(', ')
                     first = False
                     pyop_key.write_repr(out, visited)
-        out.write('])')
+        out.write('}')
+
+        if tp_name != 'set':
+            out.write(')')
 
 
 class PyBytesObjectPtr(PyObjectPtr):
@@ -970,6 +982,37 @@
     def proxyval(self, visited):
         return str(self)
 
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+        # Get a PyStringObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+        # to Python 2 code:
+        quote = "'"
+        if "'" in proxy and not '"' in proxy:
+            quote = '"'
+        out.write('b')
+        out.write(quote)
+        for byte in proxy:
+            if byte == quote or byte == '\\':
+                out.write('\\')
+                out.write(byte)
+            elif byte == '\t':
+                out.write('\\t')
+            elif byte == '\n':
+                out.write('\\n')
+            elif byte == '\r':
+                out.write('\\r')
+            elif byte < ' ' or ord(byte) >= 0x7f:
+                out.write('\\x')
+                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+                out.write(hexdigits[ord(byte) & 0xf])
+            else:
+                out.write(byte)
+        out.write(quote)
+
 class PyTupleObjectPtr(PyObjectPtr):
     _typename = 'PyTupleObject'
 
@@ -1010,9 +1053,21 @@
     _typename = 'PyTypeObject'
 
 
+def _unichr_is_printable(char):
+    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+    if char == u" ":
+        return True
+    import unicodedata
+    return unicodedata.category(char)[0] not in ("C", "Z")
+
+
 class PyUnicodeObjectPtr(PyObjectPtr):
     _typename = 'PyUnicodeObject'
 
+    def char_width(self):
+        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
+        return _type_Py_UNICODE.sizeof
+
     def proxyval(self, visited):
         # From unicodeobject.h:
         #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
@@ -1029,6 +1084,102 @@
         result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
         return result
 
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
+
+        # Get a PyUnicodeObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+        # to Python 2:
+        if "'" in proxy and '"' not in proxy:
+            quote = '"'
+        else:
+            quote = "'"
+        out.write(quote)
+
+        i = 0
+        while i < len(proxy):
+            ch = proxy[i]
+            i += 1
+
+            # Escape quotes and backslashes
+            if ch == quote or ch == '\\':
+                out.write('\\')
+                out.write(ch)
+
+            #  Map special whitespace to '\t', \n', '\r'
+            elif ch == '\t':
+                out.write('\\t')
+            elif ch == '\n':
+                out.write('\\n')
+            elif ch == '\r':
+                out.write('\\r')
+
+            # Map non-printable US ASCII to '\xhh' */
+            elif ch < ' ' or ch == 0x7F:
+                out.write('\\x')
+                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+                out.write(hexdigits[ord(ch) & 0x000F])
+
+            # Copy ASCII characters as-is
+            elif ord(ch) < 0x7F:
+                out.write(ch)
+
+            # Non-ASCII characters
+            else:
+                ucs = ch;
+
+                if self.char_width == 2:
+                    ch2 = 0
+                    # Get code point from surrogate pair
+                    if i < len(proxy):
+                        ch2 = proxy[i]
+                        if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00
+                            and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
+                            ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+                            i += 1
+
+                # Map Unicode whitespace and control characters
+                # (categories Z* and C* except ASCII space)
+                if not _unichr_is_printable(ucs):
+                    # Unfortuately, Python 2's unicode type doesn't seem
+                    # to expose the "isprintable" method
+
+                    # Map 8-bit characters to '\\xhh'
+                    if ucs <= 0xff:
+                        out.write('\\x')
+                        out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
+                        out.write(hexdigits[ord(ucs) & 0x000F])
+                    # Map 21-bit characters to '\U00xxxxxx'
+                    elif ucs >= 0x10000:
+                        out.write('\\U')
+                        out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F])
+                        out.write(hexdigits[ord(ucs) & 0x0000000F])
+                    # Map 16-bit characters to '\uxxxx'
+                    else:
+                        out.write('\\u')
+                        out.write(hexdigits[(ord(ucs) >> 12) & 0x000F])
+                        out.write(hexdigits[(ord(ucs) >> 8) & 0x000F])
+                        out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
+                        out.write(hexdigits[ord(ucs) & 0x000F])
+                else:
+                    # Copy characters as-is
+                    out.write(ch)
+                    if self.char_width == 2:
+                        if ord(ucs) >= 0x10000:
+                            out.write(ch2)
+
+        out.write(quote)
+
+
+
 
 def int_from_int(gdbval):
     return int(str(gdbval))
@@ -1065,7 +1216,7 @@
     if type.code == gdb.TYPE_CODE_PTR:
         type = type.target().unqualified()
         t = str(type)
-        if t in ("PyObject", "PyFrameObject"):
+        if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
             return PyObjectPtrPrinter(gdbval)
 
 """