Encode surrogates in UTF-8 even for a wide Py_UNICODE.
Implement sys.maxunicode.
Explicitly wrap around upper/lower computations for wide Py_UNICODE.
When decoding large characters with UTF-8, represent expected test
results using the \U notation.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index c82ac69..c9732d6 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -386,9 +386,9 @@
''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
# UTF-8 specific decoding tests
verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
- 'utf-8') == u'\ud84d\udc56' )
+ 'utf-8') == u'\U00023456' )
verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
- 'utf-8') == u'\ud800\udc02' )
+ 'utf-8') == u'\U00010002' )
verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
'utf-8') == u'\u20ac' )