Implement PEP 393.

commit: d63a3b8beb4a0841cb59fb3515347ccaab34b733 [log] [tgz]
author: Martin v. Löwis <martin@v.loewis.de> Wed Sep 28 07:41:54 2011 +0200
committer: Martin v. Löwis <martin@v.loewis.de> Wed Sep 28 07:41:54 2011 +0200
tree: 3b4e3cc63151c5a5a910c3550a190aefaea96ad4
parent: 48d49497c50e79d14e9df9527d766ca3a0a38be5 [diff] [blame]
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 4355ef5..ed96d76 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py

@@ -833,13 +833,39 @@
         class newstyleclass(object): pass
         check(newstyleclass, s)
         # unicode
-        usize = len('\0'.encode('unicode-internal'))
-        samples = ['', '1'*100]
-        # we need to test for both sizes, because we don't know if the string
-        # has been cached
+        # each tuple contains a string and its expected character size
+        # don't put any static strings here, as they may contain
+        # wchar_t or UTF-8 representations
+        samples = ['1'*100, '\xff'*50,
+                   '\u0100'*40, '\uffff'*100,
+                   '\U00010000'*30, '\U0010ffff'*100]
+        asciifields = h + "PPiP"
+        compactfields = asciifields + "PPP"
+        unicodefields = compactfields + "P"
         for s in samples:
-            basicsize =  size(h + 'PPPiP') + usize * (len(s) + 1)
-            check(s, basicsize)
+            maxchar = ord(max(s))
+            if maxchar < 128:
+                L = size(asciifields) + len(s) + 1
+            elif maxchar < 256:
+                L = size(compactfields) + len(s) + 1
+            elif maxchar < 65536:
+                L = size(compactfields) + 2*(len(s) + 1)
+            else:
+                L = size(compactfields) + 4*(len(s) + 1)
+            check(s, L)
+        # verify that the UTF-8 size is accounted for
+        s = chr(0x4000)   # 4 bytes canonical representation
+        check(s, size(compactfields) + 4)
+        try:
+            # FIXME: codecs.lookup(str) calls encoding.search_function() which
+            # calls __import__ using str in the module name. __import__ encodes
+            # the module name to the file system encoding (which is the locale
+            # encoding), so test_sys fails if the locale encoding is not UTF-8.
+            codecs.lookup(s) # produces 4 bytes UTF-8
+        except LookupError:
+            check(s, size(compactfields) + 4 + 4)
+        # TODO: add check that forces the presence of wchar_t representation
+        # TODO: add check that forces layout of unicodefields
         # weakref
         import weakref
         check(weakref.ref(int), size(h + '2Pl2P'))
commit	d63a3b8beb4a0841cb59fb3515347ccaab34b733	[log] [tgz]
author	Martin v. Löwis <martin@v.loewis.de>	Wed Sep 28 07:41:54 2011 +0200
committer	Martin v. Löwis <martin@v.loewis.de>	Wed Sep 28 07:41:54 2011 +0200
tree	3b4e3cc63151c5a5a910c3550a190aefaea96ad4
parent	48d49497c50e79d14e9df9527d766ca3a0a38be5 [diff] [blame]