Make the unicode-escape and the UTF-16 codecs handle surrogates
correctly and thus roundtrip-safe.

Some minor cleanups of the code.

Added tests for the roundtrip-safety.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index c9732d6..eb74854 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -445,11 +445,19 @@
 verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
 verify(u'hello'.encode('latin-1') == 'hello')
 
+# Roundtrip safety for BMP (just the first 1024 chars)
 u = u''.join(map(unichr, range(1024)))
 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
                  'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
     verify(unicode(u.encode(encoding),encoding) == u)
 
+# Roundtrip safety for non-BMP (just a few chars)
+u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
+for encoding in ('utf-8',
+                 'utf-16', 'utf-16-le', 'utf-16-be',
+                 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+    verify(unicode(u.encode(encoding),encoding) == u)
+
 u = u''.join(map(unichr, range(256)))
 for encoding in (
     'latin-1',