Merged revisions 84655 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r84655 | antoine.pitrou | 2010-09-09 22:30:23 +0200 (jeu., 09 sept. 2010) | 6 lines
Issue #9804: ascii() now always represents unicode surrogate pairs as
a single `\UXXXXXXXX`, regardless of whether the character is printable
or not. Also, the "backslashreplace" error handler now joins surrogate
pairs into a single character on UCS-2 builds.
........
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 2a08337..645ef9d 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -174,6 +174,28 @@
a = {}
a[0] = a
self.assertEqual(ascii(a), '{0: {...}}')
+ # Advanced checks for unicode strings
+ def _check_uni(s):
+ self.assertEqual(ascii(s), repr(s))
+ _check_uni("'")
+ _check_uni('"')
+ _check_uni('"\'')
+ _check_uni('\0')
+ _check_uni('\r\n\t .')
+ # Unprintable non-ASCII characters
+ _check_uni('\x85')
+ _check_uni('\u1fff')
+ _check_uni('\U00012fff')
+ # Lone surrogates
+ _check_uni('\ud800')
+ _check_uni('\udfff')
+ # Issue #9804: surrogates should be joined even for printable
+ # wide characters (UCS-2 builds).
+ self.assertEqual(ascii('\U0001d121'), "'\\U0001d121'")
+ # All together
+ s = "'\0\"\n\r\t abcd\x85é\U00012fff\uD800\U0001D121xxx."
+ self.assertEqual(ascii(s),
+ r"""'\'\x00"\n\r\t abcd\x85\xe9\U00012fff\ud800\U0001d121xxx.'""")
def test_neg(self):
x = -sys.maxsize-1