blob: 47af8b938f9b0feae1d36155501b4a6d7d5413ae [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Guido van Rossum98297ee2007-11-06 21:34:58 +00008import codecs
9import struct
10import sys
11import unittest
12import warnings
Benjamin Petersonee8712c2008-05-20 21:35:26 +000013from test import support, string_tests
Eric Smitha1eac722011-01-29 11:15:35 +000014import _string
Guido van Rossuma831cac2000-03-10 23:23:21 +000015
Ezio Melottia5c92b42011-08-23 00:37:08 +030016# decorator to skip tests on narrow builds
17requires_wide_build = unittest.skipIf(sys.maxunicode == 65535,
18 'requires wide build')
19
Neal Norwitz430f68b2005-11-24 22:00:56 +000020# Error handling (bad decoder return)
21def search_function(encoding):
22 def decode1(input, errors="strict"):
23 return 42 # not a tuple
24 def encode1(input, errors="strict"):
25 return 42 # not a tuple
26 def encode2(input, errors="strict"):
27 return (42, 42) # no unicode
28 def decode2(input, errors="strict"):
29 return (42, 42) # no unicode
30 if encoding=="test.unicode1":
31 return (encode1, decode1, None, None)
32 elif encoding=="test.unicode2":
33 return (encode2, decode2, None, None)
34 else:
35 return None
36codecs.register(search_function)
37
Brett Cannon226b2302010-03-20 22:22:22 +000038class UnicodeTest(string_tests.CommonTest,
39 string_tests.MixinStrUnicodeUserStringTest,
40 string_tests.MixinStrUnicodeTest):
41
Guido van Rossumef87d6e2007-05-02 19:09:54 +000042 type2test = str
Walter Dörwald0fd583c2003-02-21 12:53:50 +000043
44 def checkequalnofix(self, result, object, methodname, *args):
45 method = getattr(object, methodname)
46 realresult = method(*args)
47 self.assertEqual(realresult, result)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000048 self.assertTrue(type(realresult) is type(result))
Walter Dörwald0fd583c2003-02-21 12:53:50 +000049
50 # if the original is returned make sure that
51 # this doesn't happen with subclasses
52 if realresult is object:
Guido van Rossumef87d6e2007-05-02 19:09:54 +000053 class usub(str):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000054 def __repr__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000055 return 'usub(%r)' % str.__repr__(self)
Walter Dörwald0fd583c2003-02-21 12:53:50 +000056 object = usub(object)
57 method = getattr(object, methodname)
58 realresult = method(*args)
59 self.assertEqual(realresult, result)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000060 self.assertTrue(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000061
Jeremy Hylton504de6b2003-10-06 05:08:26 +000062 def test_literals(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000063 self.assertEqual('\xff', '\u00ff')
64 self.assertEqual('\uffff', '\U0000ffff')
Guido van Rossum36e0a922007-07-20 04:05:57 +000065 self.assertRaises(SyntaxError, eval, '\'\\Ufffffffe\'')
66 self.assertRaises(SyntaxError, eval, '\'\\Uffffffff\'')
67 self.assertRaises(SyntaxError, eval, '\'\\U%08x\'' % 0x110000)
Benjamin Petersoncd76c272008-04-05 15:09:30 +000068 # raw strings should not have unicode escapes
Florent Xiclunaa87b3832010-09-13 02:28:18 +000069 self.assertNotEqual(r"\u0020", " ")
Jeremy Hylton504de6b2003-10-06 05:08:26 +000070
Georg Brandl559e5d72008-06-11 18:37:52 +000071 def test_ascii(self):
72 if not sys.platform.startswith('java'):
73 # Test basic sanity of repr()
74 self.assertEqual(ascii('abc'), "'abc'")
75 self.assertEqual(ascii('ab\\c'), "'ab\\\\c'")
76 self.assertEqual(ascii('ab\\'), "'ab\\\\'")
77 self.assertEqual(ascii('\\c'), "'\\\\c'")
78 self.assertEqual(ascii('\\'), "'\\\\'")
79 self.assertEqual(ascii('\n'), "'\\n'")
80 self.assertEqual(ascii('\r'), "'\\r'")
81 self.assertEqual(ascii('\t'), "'\\t'")
82 self.assertEqual(ascii('\b'), "'\\x08'")
83 self.assertEqual(ascii("'\""), """'\\'"'""")
84 self.assertEqual(ascii("'\""), """'\\'"'""")
85 self.assertEqual(ascii("'"), '''"'"''')
86 self.assertEqual(ascii('"'), """'"'""")
87 latin1repr = (
88 "'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
89 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
90 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
91 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
92 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
93 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
94 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
95 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
96 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
97 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
98 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
99 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
100 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
101 "\\xfe\\xff'")
102 testrepr = ascii(''.join(map(chr, range(256))))
103 self.assertEqual(testrepr, latin1repr)
104 # Test ascii works on wide unicode escapes without overflow.
105 self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096),
106 ascii("\U00010000" * 39 + "\uffff" * 4096))
107
108 class WrongRepr:
109 def __repr__(self):
110 return b'byte-repr'
111 self.assertRaises(TypeError, ascii, WrongRepr())
112
Walter Dörwald28256f22003-01-19 16:59:20 +0000113 def test_repr(self):
114 if not sys.platform.startswith('java'):
115 # Test basic sanity of repr()
Walter Dörwald67e83882007-05-05 12:26:27 +0000116 self.assertEqual(repr('abc'), "'abc'")
117 self.assertEqual(repr('ab\\c'), "'ab\\\\c'")
118 self.assertEqual(repr('ab\\'), "'ab\\\\'")
119 self.assertEqual(repr('\\c'), "'\\\\c'")
120 self.assertEqual(repr('\\'), "'\\\\'")
121 self.assertEqual(repr('\n'), "'\\n'")
122 self.assertEqual(repr('\r'), "'\\r'")
123 self.assertEqual(repr('\t'), "'\\t'")
124 self.assertEqual(repr('\b'), "'\\x08'")
125 self.assertEqual(repr("'\""), """'\\'"'""")
126 self.assertEqual(repr("'\""), """'\\'"'""")
127 self.assertEqual(repr("'"), '''"'"''')
128 self.assertEqual(repr('"'), """'"'""")
Walter Dörwald28256f22003-01-19 16:59:20 +0000129 latin1repr = (
Walter Dörwald67e83882007-05-05 12:26:27 +0000130 "'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
Walter Dörwald28256f22003-01-19 16:59:20 +0000131 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
132 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
133 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
134 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
135 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
Georg Brandl559e5d72008-06-11 18:37:52 +0000136 "\\x9c\\x9d\\x9e\\x9f\\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9"
137 "\xaa\xab\xac\\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
138 "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5"
139 "\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
140 "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1"
141 "\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
142 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd"
143 "\xfe\xff'")
Guido van Rossum805365e2007-05-07 22:24:25 +0000144 testrepr = repr(''.join(map(chr, range(256))))
Walter Dörwald28256f22003-01-19 16:59:20 +0000145 self.assertEqual(testrepr, latin1repr)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000146 # Test repr works on wide unicode escapes without overflow.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000147 self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096),
148 repr("\U00010000" * 39 + "\uffff" * 4096))
Walter Dörwald28256f22003-01-19 16:59:20 +0000149
Georg Brandl559e5d72008-06-11 18:37:52 +0000150 class WrongRepr:
151 def __repr__(self):
152 return b'byte-repr'
153 self.assertRaises(TypeError, repr, WrongRepr())
154
Guido van Rossum49d6b072006-08-17 21:11:47 +0000155 def test_iterators(self):
156 # Make sure unicode objects have an __iter__ method
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000157 it = "\u1111\u2222\u3333".__iter__()
158 self.assertEqual(next(it), "\u1111")
159 self.assertEqual(next(it), "\u2222")
160 self.assertEqual(next(it), "\u3333")
Georg Brandla18af4e2007-04-21 15:47:16 +0000161 self.assertRaises(StopIteration, next, it)
Guido van Rossum49d6b072006-08-17 21:11:47 +0000162
Walter Dörwald28256f22003-01-19 16:59:20 +0000163 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000164 string_tests.CommonTest.test_count(self)
165 # check mixed argument types
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000166 self.checkequalnofix(3, 'aaa', 'count', 'a')
167 self.checkequalnofix(0, 'aaa', 'count', 'b')
168 self.checkequalnofix(3, 'aaa', 'count', 'a')
169 self.checkequalnofix(0, 'aaa', 'count', 'b')
170 self.checkequalnofix(0, 'aaa', 'count', 'b')
171 self.checkequalnofix(1, 'aaa', 'count', 'a', -1)
172 self.checkequalnofix(3, 'aaa', 'count', 'a', -10)
173 self.checkequalnofix(2, 'aaa', 'count', 'a', 0, -1)
174 self.checkequalnofix(0, 'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000175
Walter Dörwald28256f22003-01-19 16:59:20 +0000176 def test_find(self):
Antoine Pitrouc0bbe7d2011-10-08 22:41:35 +0200177 string_tests.CommonTest.test_find(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000178 self.checkequalnofix(0, 'abcdefghiabc', 'find', 'abc')
179 self.checkequalnofix(9, 'abcdefghiabc', 'find', 'abc', 1)
180 self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000181
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000182 self.assertRaises(TypeError, 'hello'.find)
183 self.assertRaises(TypeError, 'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000184
Walter Dörwald28256f22003-01-19 16:59:20 +0000185 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000186 string_tests.CommonTest.test_rfind(self)
187 # check mixed argument types
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000188 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc')
189 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
190 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000191
Walter Dörwald28256f22003-01-19 16:59:20 +0000192 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000193 string_tests.CommonTest.test_index(self)
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000194 self.checkequalnofix(0, 'abcdefghiabc', 'index', '')
195 self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def')
196 self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc')
197 self.checkequalnofix(9, 'abcdefghiabc', 'index', 'abc', 1)
198 self.assertRaises(ValueError, 'abcdefghiabc'.index, 'hib')
199 self.assertRaises(ValueError, 'abcdefghiab'.index, 'abc', 1)
200 self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', 8)
201 self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202
Walter Dörwald28256f22003-01-19 16:59:20 +0000203 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000204 string_tests.CommonTest.test_rindex(self)
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000205 self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '')
206 self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def')
207 self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc')
208 self.checkequalnofix(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000209
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000210 self.assertRaises(ValueError, 'abcdefghiabc'.rindex, 'hib')
211 self.assertRaises(ValueError, 'defghiabc'.rindex, 'def', 1)
212 self.assertRaises(ValueError, 'defghiabc'.rindex, 'abc', 0, -1)
213 self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
214 self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000215
Georg Brandlceee0772007-11-27 23:48:05 +0000216 def test_maketrans_translate(self):
217 # these work with plain translate()
218 self.checkequalnofix('bbbc', 'abababc', 'translate',
219 {ord('a'): None})
220 self.checkequalnofix('iiic', 'abababc', 'translate',
221 {ord('a'): None, ord('b'): ord('i')})
222 self.checkequalnofix('iiix', 'abababc', 'translate',
223 {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
224 self.checkequalnofix('c', 'abababc', 'translate',
225 {ord('a'): None, ord('b'): ''})
226 self.checkequalnofix('xyyx', 'xzx', 'translate',
227 {ord('z'): 'yy'})
228 # this needs maketrans()
229 self.checkequalnofix('abababc', 'abababc', 'translate',
230 {'b': '<i>'})
231 tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
232 self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
233 # test alternative way of calling maketrans()
234 tbl = self.type2test.maketrans('abc', 'xyz', 'd')
235 self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
236
237 self.assertRaises(TypeError, self.type2test.maketrans)
238 self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
239 self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
240 self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
241 self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
242 self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
243 self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000244
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000245 self.assertRaises(TypeError, 'hello'.translate)
Walter Dörwald67e83882007-05-05 12:26:27 +0000246 self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000247
Walter Dörwald28256f22003-01-19 16:59:20 +0000248 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000249 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000250
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000251 # Mixed arguments
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000252 self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
253 self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
254 self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000255
Walter Dörwald28256f22003-01-19 16:59:20 +0000256 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000257 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000258
Guido van Rossumf1044292007-09-27 18:01:22 +0000259 class MyWrapper:
260 def __init__(self, sval): self.sval = sval
261 def __str__(self): return self.sval
262
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000263 # mixed arguments
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000264 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
265 self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
266 self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
267 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
268 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
269 self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
270 self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
Guido van Rossum98297ee2007-11-06 21:34:58 +0000271 self.checkraises(TypeError, ' ', 'join', ['1', '2', MyWrapper('foo')])
272 self.checkraises(TypeError, ' ', 'join', ['1', '2', '3', bytes()])
273 self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
274 self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
Marc-André Lemburge5034372000-08-08 08:04:29 +0000275
Walter Dörwald28256f22003-01-19 16:59:20 +0000276 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000277 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000278
Walter Dörwald28256f22003-01-19 16:59:20 +0000279 # method call forwarded from str implementation because of unicode argument
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000280 self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
281 self.assertRaises(TypeError, 'replace'.replace, "r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000282
Guido van Rossum98297ee2007-11-06 21:34:58 +0000283 def test_bytes_comparison(self):
Brett Cannon226b2302010-03-20 22:22:22 +0000284 with support.check_warnings():
285 warnings.simplefilter('ignore', BytesWarning)
286 self.assertEqual('abc' == b'abc', False)
287 self.assertEqual('abc' != b'abc', True)
288 self.assertEqual('abc' == bytearray(b'abc'), False)
289 self.assertEqual('abc' != bytearray(b'abc'), True)
Brett Cannon40430012007-10-22 20:24:51 +0000290
Walter Dörwald28256f22003-01-19 16:59:20 +0000291 def test_comparison(self):
292 # Comparisons:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000293 self.assertEqual('abc', 'abc')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000294 self.assertTrue('abcd' > 'abc')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000295 self.assertTrue('abc' < 'abcd')
Walter Dörwald28256f22003-01-19 16:59:20 +0000296
297 if 0:
298 # Move these tests to a Unicode collation module test...
299 # Testing UTF-16 code point order comparisons...
300
301 # No surrogates, no fixup required.
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000302 self.assertTrue('\u0061' < '\u20ac')
Walter Dörwald28256f22003-01-19 16:59:20 +0000303 # Non surrogate below surrogate value, no fixup required
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000304 self.assertTrue('\u0061' < '\ud800\udc02')
Walter Dörwald28256f22003-01-19 16:59:20 +0000305
306 # Non surrogate above surrogate value, fixup required
307 def test_lecmp(s, s2):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000308 self.assertTrue(s < s2)
Walter Dörwald28256f22003-01-19 16:59:20 +0000309
310 def test_fixup(s):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000311 s2 = '\ud800\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000312 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000313 s2 = '\ud900\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000314 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000315 s2 = '\uda00\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000316 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000317 s2 = '\udb00\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000318 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000319 s2 = '\ud800\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000320 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000321 s2 = '\ud900\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000322 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000323 s2 = '\uda00\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000324 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000325 s2 = '\udb00\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000326 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000327 s2 = '\ud800\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000328 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000329 s2 = '\ud900\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000330 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000331 s2 = '\uda00\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000332 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000333 s2 = '\udb00\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000334 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000335 s2 = '\ud800\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000336 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000337 s2 = '\ud900\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000338 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000339 s2 = '\uda00\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000340 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000341 s2 = '\udb00\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000342 test_lecmp(s, s2)
343
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000344 test_fixup('\ue000')
345 test_fixup('\uff61')
Walter Dörwald28256f22003-01-19 16:59:20 +0000346
347 # Surrogates on both sides, no fixup required
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000348 self.assertTrue('\ud800\udc02' < '\ud84d\udc56')
Walter Dörwald28256f22003-01-19 16:59:20 +0000349
Walter Dörwald28256f22003-01-19 16:59:20 +0000350 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000351 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000352 self.checkequalnofix(False, '\u1FFc', 'islower')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300353 # non-BMP, uppercase
354 self.assertFalse('\U00010401'.islower())
355 self.assertFalse('\U00010427'.islower())
356 # non-BMP, lowercase
357 self.assertTrue('\U00010429'.islower())
358 self.assertTrue('\U0001044E'.islower())
359 # non-BMP, non-cased
360 self.assertFalse('\U0001F40D'.islower())
361 self.assertFalse('\U0001F46F'.islower())
Walter Dörwald28256f22003-01-19 16:59:20 +0000362
363 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000364 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
365 if not sys.platform.startswith('java'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000366 self.checkequalnofix(False, '\u1FFc', 'isupper')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300367 # non-BMP, uppercase
368 self.assertTrue('\U00010401'.isupper())
369 self.assertTrue('\U00010427'.isupper())
370 # non-BMP, lowercase
371 self.assertFalse('\U00010429'.isupper())
372 self.assertFalse('\U0001044E'.isupper())
373 # non-BMP, non-cased
374 self.assertFalse('\U0001F40D'.isupper())
375 self.assertFalse('\U0001F46F'.isupper())
Walter Dörwald28256f22003-01-19 16:59:20 +0000376
377 def test_istitle(self):
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300378 string_tests.MixinStrUnicodeUserStringTest.test_istitle(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000379 self.checkequalnofix(True, '\u1FFc', 'istitle')
380 self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000381
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300382 # non-BMP, uppercase + lowercase
383 self.assertTrue('\U00010401\U00010429'.istitle())
384 self.assertTrue('\U00010427\U0001044E'.istitle())
385 # apparently there are no titlecased (Lt) non-BMP chars in Unicode 6
386 for ch in ['\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F']:
387 self.assertFalse(ch.istitle(), '{!a} is not title'.format(ch))
388
Walter Dörwald28256f22003-01-19 16:59:20 +0000389 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000390 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000391 self.checkequalnofix(True, '\u2000', 'isspace')
392 self.checkequalnofix(True, '\u200a', 'isspace')
393 self.checkequalnofix(False, '\u2014', 'isspace')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300394 # apparently there are no non-BMP spaces chars in Unicode 6
395 for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E',
396 '\U0001F40D', '\U0001F46F']:
397 self.assertFalse(ch.isspace(), '{!a} is not space.'.format(ch))
398
399 def test_isalnum(self):
400 string_tests.MixinStrUnicodeUserStringTest.test_isalnum(self)
401 for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E',
402 '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']:
403 self.assertTrue(ch.isalnum(), '{!a} is alnum.'.format(ch))
Walter Dörwald28256f22003-01-19 16:59:20 +0000404
405 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000406 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000407 self.checkequalnofix(True, '\u1FFc', 'isalpha')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300408 # non-BMP, cased
409 self.assertTrue('\U00010401'.isalpha())
410 self.assertTrue('\U00010427'.isalpha())
411 self.assertTrue('\U00010429'.isalpha())
412 self.assertTrue('\U0001044E'.isalpha())
413 # non-BMP, non-cased
414 self.assertFalse('\U0001F40D'.isalpha())
415 self.assertFalse('\U0001F46F'.isalpha())
Walter Dörwald28256f22003-01-19 16:59:20 +0000416
417 def test_isdecimal(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000418 self.checkequalnofix(False, '', 'isdecimal')
419 self.checkequalnofix(False, 'a', 'isdecimal')
420 self.checkequalnofix(True, '0', 'isdecimal')
421 self.checkequalnofix(False, '\u2460', 'isdecimal') # CIRCLED DIGIT ONE
422 self.checkequalnofix(False, '\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
423 self.checkequalnofix(True, '\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
424 self.checkequalnofix(True, '0123456789', 'isdecimal')
425 self.checkequalnofix(False, '0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000426
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000427 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000428
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300429 for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E',
430 '\U0001F40D', '\U0001F46F', '\U00011065', '\U0001F107']:
431 self.assertFalse(ch.isdecimal(), '{!a} is not decimal.'.format(ch))
432 for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']:
433 self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))
434
Walter Dörwald28256f22003-01-19 16:59:20 +0000435 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000436 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000437 self.checkequalnofix(True, '\u2460', 'isdigit')
438 self.checkequalnofix(False, '\xbc', 'isdigit')
439 self.checkequalnofix(True, '\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000440
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300441 for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E',
442 '\U0001F40D', '\U0001F46F', '\U00011065']:
443 self.assertFalse(ch.isdigit(), '{!a} is not a digit.'.format(ch))
444 for ch in ['\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']:
445 self.assertTrue(ch.isdigit(), '{!a} is a digit.'.format(ch))
446
Walter Dörwald28256f22003-01-19 16:59:20 +0000447 def test_isnumeric(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000448 self.checkequalnofix(False, '', 'isnumeric')
449 self.checkequalnofix(False, 'a', 'isnumeric')
450 self.checkequalnofix(True, '0', 'isnumeric')
451 self.checkequalnofix(True, '\u2460', 'isnumeric')
452 self.checkequalnofix(True, '\xbc', 'isnumeric')
453 self.checkequalnofix(True, '\u0660', 'isnumeric')
454 self.checkequalnofix(True, '0123456789', 'isnumeric')
455 self.checkequalnofix(False, '0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000456
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000457 self.assertRaises(TypeError, "abc".isnumeric, 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000458
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300459 for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E',
460 '\U0001F40D', '\U0001F46F']:
461 self.assertFalse(ch.isnumeric(), '{!a} is not numeric.'.format(ch))
462 for ch in ['\U00011065', '\U0001D7F6', '\U00011066',
463 '\U000104A0', '\U0001F107']:
464 self.assertTrue(ch.isnumeric(), '{!a} is numeric.'.format(ch))
465
Martin v. Löwis47383402007-08-15 07:32:56 +0000466 def test_isidentifier(self):
467 self.assertTrue("a".isidentifier())
468 self.assertTrue("Z".isidentifier())
469 self.assertTrue("_".isidentifier())
470 self.assertTrue("b0".isidentifier())
471 self.assertTrue("bc".isidentifier())
472 self.assertTrue("b_".isidentifier())
Antoine Pitroud72402e2010-10-27 18:52:48 +0000473 self.assertTrue("µ".isidentifier())
Benjamin Petersonf413b802011-08-12 22:17:18 -0500474 self.assertTrue("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".isidentifier())
Martin v. Löwis47383402007-08-15 07:32:56 +0000475
476 self.assertFalse(" ".isidentifier())
477 self.assertFalse("[".isidentifier())
Antoine Pitroud72402e2010-10-27 18:52:48 +0000478 self.assertFalse("©".isidentifier())
Georg Brandld52429f2008-07-04 15:55:02 +0000479 self.assertFalse("0".isidentifier())
Martin v. Löwis47383402007-08-15 07:32:56 +0000480
Georg Brandl559e5d72008-06-11 18:37:52 +0000481 def test_isprintable(self):
482 self.assertTrue("".isprintable())
Benjamin Peterson09832742009-03-26 17:15:46 +0000483 self.assertTrue(" ".isprintable())
Georg Brandl559e5d72008-06-11 18:37:52 +0000484 self.assertTrue("abcdefg".isprintable())
485 self.assertFalse("abcdefg\n".isprintable())
Georg Brandld52429f2008-07-04 15:55:02 +0000486 # some defined Unicode character
487 self.assertTrue("\u0374".isprintable())
488 # undefined character
Amaury Forgeot d'Arca083f1e2008-09-10 23:51:42 +0000489 self.assertFalse("\u0378".isprintable())
Georg Brandld52429f2008-07-04 15:55:02 +0000490 # single surrogate character
Georg Brandl559e5d72008-06-11 18:37:52 +0000491 self.assertFalse("\ud800".isprintable())
492
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300493 self.assertTrue('\U0001F46F'.isprintable())
494 self.assertFalse('\U000E0020'.isprintable())
495
496 def test_surrogates(self):
497 for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800',
498 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'):
499 self.assertTrue(s.islower())
500 self.assertFalse(s.isupper())
501 self.assertFalse(s.istitle())
502 for s in ('A\uD800B\uDFFF', 'A\uDFFFB\uD800',
503 'A\uD800B\uDFFFA', 'A\uDFFFB\uD800A'):
504 self.assertFalse(s.islower())
505 self.assertTrue(s.isupper())
506 self.assertTrue(s.istitle())
507
508 for meth_name in ('islower', 'isupper', 'istitle'):
509 meth = getattr(str, meth_name)
510 for s in ('\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF'):
511 self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
512
513 for meth_name in ('isalpha', 'isalnum', 'isdigit', 'isspace',
514 'isdecimal', 'isnumeric',
515 'isidentifier', 'isprintable'):
516 meth = getattr(str, meth_name)
517 for s in ('\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF',
518 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
519 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'):
520 self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
521
522
Ezio Melottia5c92b42011-08-23 00:37:08 +0300523 @requires_wide_build
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300524 def test_lower(self):
525 string_tests.CommonTest.test_lower(self)
526 self.assertEqual('\U00010427'.lower(), '\U0001044F')
527 self.assertEqual('\U00010427\U00010427'.lower(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300528 '\U0001044F\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300529 self.assertEqual('\U00010427\U0001044F'.lower(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300530 '\U0001044F\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300531 self.assertEqual('X\U00010427x\U0001044F'.lower(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300532 'x\U0001044Fx\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300533
Ezio Melottia5c92b42011-08-23 00:37:08 +0300534 @requires_wide_build
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300535 def test_upper(self):
536 string_tests.CommonTest.test_upper(self)
537 self.assertEqual('\U0001044F'.upper(), '\U00010427')
538 self.assertEqual('\U0001044F\U0001044F'.upper(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300539 '\U00010427\U00010427')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300540 self.assertEqual('\U00010427\U0001044F'.upper(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300541 '\U00010427\U00010427')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300542 self.assertEqual('X\U00010427x\U0001044F'.upper(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300543 'X\U00010427X\U00010427')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300544
Ezio Melottia5c92b42011-08-23 00:37:08 +0300545 @requires_wide_build
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300546 def test_capitalize(self):
547 string_tests.CommonTest.test_capitalize(self)
548 self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
549 self.assertEqual('\U0001044F\U0001044F'.capitalize(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300550 '\U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300551 self.assertEqual('\U00010427\U0001044F'.capitalize(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300552 '\U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300553 self.assertEqual('\U0001044F\U00010427'.capitalize(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300554 '\U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300555 self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300556 'X\U0001044Fx\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300557
Ezio Melottia5c92b42011-08-23 00:37:08 +0300558 @requires_wide_build
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300559 def test_title(self):
560 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
561 self.assertEqual('\U0001044F'.title(), '\U00010427')
562 self.assertEqual('\U0001044F\U0001044F'.title(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300563 '\U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300564 self.assertEqual('\U0001044F\U0001044F \U0001044F\U0001044F'.title(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300565 '\U00010427\U0001044F \U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300566 self.assertEqual('\U00010427\U0001044F \U00010427\U0001044F'.title(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300567 '\U00010427\U0001044F \U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300568 self.assertEqual('\U0001044F\U00010427 \U0001044F\U00010427'.title(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300569 '\U00010427\U0001044F \U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300570 self.assertEqual('X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300571 'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300572
Ezio Melottia5c92b42011-08-23 00:37:08 +0300573 @requires_wide_build
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300574 def test_swapcase(self):
575 string_tests.CommonTest.test_swapcase(self)
576 self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
577 self.assertEqual('\U00010427'.swapcase(), '\U0001044F')
578 self.assertEqual('\U0001044F\U0001044F'.swapcase(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300579 '\U00010427\U00010427')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300580 self.assertEqual('\U00010427\U0001044F'.swapcase(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300581 '\U0001044F\U00010427')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300582 self.assertEqual('\U0001044F\U00010427'.swapcase(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300583 '\U00010427\U0001044F')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300584 self.assertEqual('X\U00010427x\U0001044F'.swapcase(),
Ezio Melottia5c92b42011-08-23 00:37:08 +0300585 'x\U0001044FX\U00010427')
Ezio Melotti93e7afc2011-08-22 14:08:38 +0300586
Walter Dörwald28256f22003-01-19 16:59:20 +0000587 def test_contains(self):
588 # Testing Unicode contains method
Benjamin Peterson577473f2010-01-19 00:09:57 +0000589 self.assertIn('a', 'abdb')
590 self.assertIn('a', 'bdab')
591 self.assertIn('a', 'bdaba')
592 self.assertIn('a', 'bdba')
593 self.assertNotIn('a', 'bdb')
594 self.assertIn('a', 'bdba')
595 self.assertIn('a', ('a',1,None))
596 self.assertIn('a', (1,None,'a'))
597 self.assertIn('a', ('a',1,None))
598 self.assertIn('a', (1,None,'a'))
599 self.assertNotIn('a', ('x',1,'y'))
600 self.assertNotIn('a', ('x',1,None))
601 self.assertNotIn('abcd', 'abcxxxx')
602 self.assertIn('ab', 'abcd')
603 self.assertIn('ab', 'abc')
604 self.assertIn('ab', (1,None,'ab'))
605 self.assertIn('', 'abc')
606 self.assertIn('', '')
607 self.assertIn('', 'abc')
608 self.assertNotIn('\0', 'abc')
609 self.assertIn('\0', '\0abc')
610 self.assertIn('\0', 'abc\0')
611 self.assertIn('a', '\0abc')
612 self.assertIn('asdf', 'asdf')
613 self.assertNotIn('asdf', 'asd')
614 self.assertNotIn('asdf', '')
Walter Dörwald28256f22003-01-19 16:59:20 +0000615
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000616 self.assertRaises(TypeError, "abc".__contains__)
Walter Dörwald28256f22003-01-19 16:59:20 +0000617
Eric Smith8c663262007-08-25 02:26:07 +0000618 def test_format(self):
619 self.assertEqual(''.format(), '')
620 self.assertEqual('a'.format(), 'a')
621 self.assertEqual('ab'.format(), 'ab')
622 self.assertEqual('a{{'.format(), 'a{')
623 self.assertEqual('a}}'.format(), 'a}')
624 self.assertEqual('{{b'.format(), '{b')
625 self.assertEqual('}}b'.format(), '}b')
626 self.assertEqual('a{{b'.format(), 'a{b')
627
628 # examples from the PEP:
629 import datetime
630 self.assertEqual("My name is {0}".format('Fred'), "My name is Fred")
631 self.assertEqual("My name is {0[name]}".format(dict(name='Fred')),
632 "My name is Fred")
633 self.assertEqual("My name is {0} :-{{}}".format('Fred'),
634 "My name is Fred :-{}")
635
636 d = datetime.date(2007, 8, 18)
637 self.assertEqual("The year is {0.year}".format(d),
638 "The year is 2007")
639
Eric Smith8c663262007-08-25 02:26:07 +0000640 # classes we'll use for testing
641 class C:
642 def __init__(self, x=100):
643 self._x = x
644 def __format__(self, spec):
645 return spec
646
647 class D:
648 def __init__(self, x):
649 self.x = x
650 def __format__(self, spec):
651 return str(self.x)
652
653 # class with __str__, but no __format__
654 class E:
655 def __init__(self, x):
656 self.x = x
657 def __str__(self):
658 return 'E(' + self.x + ')'
659
660 # class with __repr__, but no __format__ or __str__
661 class F:
662 def __init__(self, x):
663 self.x = x
664 def __repr__(self):
665 return 'F(' + self.x + ')'
666
667 # class with __format__ that forwards to string, for some format_spec's
668 class G:
669 def __init__(self, x):
670 self.x = x
671 def __str__(self):
672 return "string is " + self.x
673 def __format__(self, format_spec):
674 if format_spec == 'd':
675 return 'G(' + self.x + ')'
676 return object.__format__(self, format_spec)
677
Eric Smith739e2ad2007-08-27 19:07:22 +0000678 class I(datetime.date):
679 def __format__(self, format_spec):
680 return self.strftime(format_spec)
681
Eric Smith185e30c2007-08-30 22:23:08 +0000682 class J(int):
683 def __format__(self, format_spec):
684 return int.__format__(self * 2, format_spec)
685
Eric Smith8c663262007-08-25 02:26:07 +0000686
687 self.assertEqual(''.format(), '')
688 self.assertEqual('abc'.format(), 'abc')
689 self.assertEqual('{0}'.format('abc'), 'abc')
690 self.assertEqual('{0:}'.format('abc'), 'abc')
691# self.assertEqual('{ 0 }'.format('abc'), 'abc')
692 self.assertEqual('X{0}'.format('abc'), 'Xabc')
693 self.assertEqual('{0}X'.format('abc'), 'abcX')
694 self.assertEqual('X{0}Y'.format('abc'), 'XabcY')
695 self.assertEqual('{1}'.format(1, 'abc'), 'abc')
696 self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc')
697 self.assertEqual('{1}X'.format(1, 'abc'), 'abcX')
698 self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY')
699 self.assertEqual('{0}'.format(-15), '-15')
700 self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc')
701 self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc')
702 self.assertEqual('{{'.format(), '{')
703 self.assertEqual('}}'.format(), '}')
704 self.assertEqual('{{}}'.format(), '{}')
705 self.assertEqual('{{x}}'.format(), '{x}')
706 self.assertEqual('{{{0}}}'.format(123), '{123}')
707 self.assertEqual('{{{{0}}}}'.format(), '{{0}}')
708 self.assertEqual('}}{{'.format(), '}{')
709 self.assertEqual('}}x{{'.format(), '}x{')
710
Eric Smith7ade6482007-08-26 22:27:13 +0000711 # weird field names
712 self.assertEqual("{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz')
713 self.assertEqual("{0[foo bar]}".format({'foo bar':'baz'}), 'baz')
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000714 self.assertEqual("{0[ ]}".format({' ':3}), '3')
Eric Smith7ade6482007-08-26 22:27:13 +0000715
Eric Smith8c663262007-08-25 02:26:07 +0000716 self.assertEqual('{foo._x}'.format(foo=C(20)), '20')
717 self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010')
718 self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc')
719 self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc')
720 self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def')
721 self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def')
722 self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
723
Eric Smith8c663262007-08-25 02:26:07 +0000724 # strings
725 self.assertEqual('{0:.3s}'.format('abc'), 'abc')
726 self.assertEqual('{0:.3s}'.format('ab'), 'ab')
727 self.assertEqual('{0:.3s}'.format('abcdef'), 'abc')
728 self.assertEqual('{0:.0s}'.format('abcdef'), '')
729 self.assertEqual('{0:3.3s}'.format('abc'), 'abc')
730 self.assertEqual('{0:2.3s}'.format('abc'), 'abc')
731 self.assertEqual('{0:2.2s}'.format('abc'), 'ab')
732 self.assertEqual('{0:3.2s}'.format('abc'), 'ab ')
733 self.assertEqual('{0:x<0s}'.format('result'), 'result')
734 self.assertEqual('{0:x<5s}'.format('result'), 'result')
735 self.assertEqual('{0:x<6s}'.format('result'), 'result')
736 self.assertEqual('{0:x<7s}'.format('result'), 'resultx')
737 self.assertEqual('{0:x<8s}'.format('result'), 'resultxx')
738 self.assertEqual('{0: <7s}'.format('result'), 'result ')
739 self.assertEqual('{0:<7s}'.format('result'), 'result ')
740 self.assertEqual('{0:>7s}'.format('result'), ' result')
741 self.assertEqual('{0:>8s}'.format('result'), ' result')
742 self.assertEqual('{0:^8s}'.format('result'), ' result ')
743 self.assertEqual('{0:^9s}'.format('result'), ' result ')
744 self.assertEqual('{0:^10s}'.format('result'), ' result ')
745 self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999)
746 self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
747 self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
748
749 # format specifiers for user defined type
750 self.assertEqual('{0:abc}'.format(C()), 'abc')
751
Georg Brandld52429f2008-07-04 15:55:02 +0000752 # !r, !s and !a coercions
Eric Smith8c663262007-08-25 02:26:07 +0000753 self.assertEqual('{0!s}'.format('Hello'), 'Hello')
754 self.assertEqual('{0!s:}'.format('Hello'), 'Hello')
755 self.assertEqual('{0!s:15}'.format('Hello'), 'Hello ')
756 self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello ')
757 self.assertEqual('{0!r}'.format('Hello'), "'Hello'")
758 self.assertEqual('{0!r:}'.format('Hello'), "'Hello'")
759 self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)')
Amaury Forgeot d'Arca083f1e2008-09-10 23:51:42 +0000760 self.assertEqual('{0!r}'.format('\u0378'), "'\\u0378'") # nonprintable
Georg Brandld52429f2008-07-04 15:55:02 +0000761 self.assertEqual('{0!r}'.format('\u0374'), "'\u0374'") # printable
762 self.assertEqual('{0!r}'.format(F('\u0374')), 'F(\u0374)')
Georg Brandl559e5d72008-06-11 18:37:52 +0000763 self.assertEqual('{0!a}'.format('Hello'), "'Hello'")
Amaury Forgeot d'Arca083f1e2008-09-10 23:51:42 +0000764 self.assertEqual('{0!a}'.format('\u0378'), "'\\u0378'") # nonprintable
Georg Brandld52429f2008-07-04 15:55:02 +0000765 self.assertEqual('{0!a}'.format('\u0374'), "'\\u0374'") # printable
Georg Brandl559e5d72008-06-11 18:37:52 +0000766 self.assertEqual('{0!a:}'.format('Hello'), "'Hello'")
767 self.assertEqual('{0!a}'.format(F('Hello')), 'F(Hello)')
Georg Brandld52429f2008-07-04 15:55:02 +0000768 self.assertEqual('{0!a}'.format(F('\u0374')), 'F(\\u0374)')
Eric Smith8c663262007-08-25 02:26:07 +0000769
Eric Smith8c663262007-08-25 02:26:07 +0000770 # test fallback to object.__format__
771 self.assertEqual('{0}'.format({}), '{}')
772 self.assertEqual('{0}'.format([]), '[]')
773 self.assertEqual('{0}'.format([1]), '[1]')
Eric Smithe4d63172010-09-13 20:48:43 +0000774
Eric Smith8c663262007-08-25 02:26:07 +0000775 self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
Eric Smith8c663262007-08-25 02:26:07 +0000776 self.assertEqual('{0!s}'.format(G('data')), 'string is data')
777
Eric Smithe4d63172010-09-13 20:48:43 +0000778 msg = 'object.__format__ with a non-empty format string is deprecated'
779 with support.check_warnings((msg, PendingDeprecationWarning)):
780 self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ')
781 self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ')
782 self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
783
Eric Smith739e2ad2007-08-27 19:07:22 +0000784 self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007,
785 month=8,
786 day=27)),
787 "date: 2007-08-27")
788
Eric Smith185e30c2007-08-30 22:23:08 +0000789 # test deriving from a builtin type and overriding __format__
790 self.assertEqual("{0}".format(J(10)), "20")
791
792
Eric Smith8c663262007-08-25 02:26:07 +0000793 # string format specifiers
794 self.assertEqual('{0:}'.format('a'), 'a')
795
796 # computed format specifiers
797 self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
798 self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
799 self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
800 self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ')
801 self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ')
802
803 # test various errors
804 self.assertRaises(ValueError, '{'.format)
805 self.assertRaises(ValueError, '}'.format)
806 self.assertRaises(ValueError, 'a{'.format)
807 self.assertRaises(ValueError, 'a}'.format)
808 self.assertRaises(ValueError, '{a'.format)
809 self.assertRaises(ValueError, '}a'.format)
Eric Smith11529192007-09-04 23:04:22 +0000810 self.assertRaises(IndexError, '{0}'.format)
811 self.assertRaises(IndexError, '{1}'.format, 'abc')
812 self.assertRaises(KeyError, '{x}'.format)
Eric Smith8c663262007-08-25 02:26:07 +0000813 self.assertRaises(ValueError, "}{".format)
Eric Smith8c663262007-08-25 02:26:07 +0000814 self.assertRaises(ValueError, "abc{0:{}".format)
815 self.assertRaises(ValueError, "{0".format)
Eric Smith11529192007-09-04 23:04:22 +0000816 self.assertRaises(IndexError, "{0.}".format)
817 self.assertRaises(ValueError, "{0.}".format, 0)
818 self.assertRaises(IndexError, "{0[}".format)
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000819 self.assertRaises(ValueError, "{0[}".format, [])
Eric Smith11529192007-09-04 23:04:22 +0000820 self.assertRaises(KeyError, "{0]}".format)
821 self.assertRaises(ValueError, "{0.[]}".format, 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000822 self.assertRaises(ValueError, "{0..foo}".format, 0)
Eric Smith11529192007-09-04 23:04:22 +0000823 self.assertRaises(ValueError, "{0[0}".format, 0)
824 self.assertRaises(ValueError, "{0[0:foo}".format, 0)
825 self.assertRaises(KeyError, "{c]}".format)
826 self.assertRaises(ValueError, "{{ {{{0}}".format, 0)
827 self.assertRaises(ValueError, "{0}}".format, 0)
828 self.assertRaises(KeyError, "{foo}".format, bar=3)
Eric Smith8c663262007-08-25 02:26:07 +0000829 self.assertRaises(ValueError, "{0!x}".format, 3)
Eric Smith11529192007-09-04 23:04:22 +0000830 self.assertRaises(ValueError, "{0!}".format, 0)
831 self.assertRaises(ValueError, "{0!rs}".format, 0)
Eric Smith8c663262007-08-25 02:26:07 +0000832 self.assertRaises(ValueError, "{!}".format)
Eric Smith8ec90442009-03-14 12:29:34 +0000833 self.assertRaises(IndexError, "{:}".format)
834 self.assertRaises(IndexError, "{:s}".format)
835 self.assertRaises(IndexError, "{}".format)
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000836 big = "23098475029384702983476098230754973209482573"
837 self.assertRaises(ValueError, ("{" + big + "}").format)
838 self.assertRaises(ValueError, ("{[" + big + "]}").format, [0])
Eric Smith8c663262007-08-25 02:26:07 +0000839
Eric Smith41669ca2009-05-23 14:23:22 +0000840 # issue 6089
841 self.assertRaises(ValueError, "{0[0]x}".format, [None])
842 self.assertRaises(ValueError, "{0[0](10)}".format, [None])
843
Eric Smith8c663262007-08-25 02:26:07 +0000844 # can't have a replacement on the field name portion
845 self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
846
847 # exceed maximum recursion depth
848 self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
849 self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
850 0, 1, 2, 3, 4, 5, 6, 7)
851
852 # string format spec errors
853 self.assertRaises(ValueError, "{0:-s}".format, '')
854 self.assertRaises(ValueError, format, "", "-")
855 self.assertRaises(ValueError, "{0:=s}".format, '')
856
Eric Smithb1ebcc62008-07-15 13:02:41 +0000857 # Alternate formatting is not supported
858 self.assertRaises(ValueError, format, '', '#')
859 self.assertRaises(ValueError, format, '', '#20')
860
Eric Smith27bbca62010-11-04 17:06:58 +0000861 def test_format_map(self):
862 self.assertEqual(''.format_map({}), '')
863 self.assertEqual('a'.format_map({}), 'a')
864 self.assertEqual('ab'.format_map({}), 'ab')
865 self.assertEqual('a{{'.format_map({}), 'a{')
866 self.assertEqual('a}}'.format_map({}), 'a}')
867 self.assertEqual('{{b'.format_map({}), '{b')
868 self.assertEqual('}}b'.format_map({}), '}b')
869 self.assertEqual('a{{b'.format_map({}), 'a{b')
870
871 # using mappings
872 class Mapping(dict):
873 def __missing__(self, key):
874 return key
875 self.assertEqual('{hello}'.format_map(Mapping()), 'hello')
876 self.assertEqual('{a} {world}'.format_map(Mapping(a='hello')), 'hello world')
877
878 class InternalMapping:
879 def __init__(self):
880 self.mapping = {'a': 'hello'}
881 def __getitem__(self, key):
882 return self.mapping[key]
883 self.assertEqual('{a}'.format_map(InternalMapping()), 'hello')
884
885
Eric Smith27bbca62010-11-04 17:06:58 +0000886 class C:
887 def __init__(self, x=100):
888 self._x = x
889 def __format__(self, spec):
890 return spec
Eric Smith27bbca62010-11-04 17:06:58 +0000891 self.assertEqual('{foo._x}'.format_map({'foo': C(20)}), '20')
892
893 # test various errors
Eric V. Smithedbb6ca2012-03-12 15:16:22 -0700894 self.assertRaises(TypeError, ''.format_map)
895 self.assertRaises(TypeError, 'a'.format_map)
896
897 self.assertRaises(ValueError, '{'.format_map, {})
898 self.assertRaises(ValueError, '}'.format_map, {})
899 self.assertRaises(ValueError, 'a{'.format_map, {})
900 self.assertRaises(ValueError, 'a}'.format_map, {})
901 self.assertRaises(ValueError, '{a'.format_map, {})
902 self.assertRaises(ValueError, '}a'.format_map, {})
Eric Smith27bbca62010-11-04 17:06:58 +0000903
Eric V. Smith12ebefc2011-07-18 14:03:41 -0400904 # issue #12579: can't supply positional params to format_map
905 self.assertRaises(ValueError, '{}'.format_map, {'a' : 2})
906 self.assertRaises(ValueError, '{}'.format_map, 'a')
907 self.assertRaises(ValueError, '{a} {}'.format_map, {"a" : 2, "b" : 1})
908
Mark Dickinsonfb90c092012-10-28 10:18:03 +0000909 def test_format_huge_precision(self):
910 format_string = ".{}f".format(sys.maxsize + 1)
911 with self.assertRaises(ValueError):
912 result = format(2.34, format_string)
913
914 def test_format_huge_width(self):
915 format_string = "{}f".format(sys.maxsize + 1)
916 with self.assertRaises(ValueError):
917 result = format(2.34, format_string)
918
919 def test_format_huge_item_number(self):
920 format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
921 with self.assertRaises(ValueError):
922 result = format_string.format(2.34)
923
Eric Smith8ec90442009-03-14 12:29:34 +0000924 def test_format_auto_numbering(self):
925 class C:
926 def __init__(self, x=100):
927 self._x = x
928 def __format__(self, spec):
929 return spec
930
931 self.assertEqual('{}'.format(10), '10')
932 self.assertEqual('{:5}'.format('s'), 's ')
933 self.assertEqual('{!r}'.format('s'), "'s'")
934 self.assertEqual('{._x}'.format(C(10)), '10')
935 self.assertEqual('{[1]}'.format([1, 2]), '2')
936 self.assertEqual('{[a]}'.format({'a':4, 'b':2}), '4')
937 self.assertEqual('a{}b{}c'.format(0, 1), 'a0b1c')
938
939 self.assertEqual('a{:{}}b'.format('x', '^10'), 'a x b')
940 self.assertEqual('a{:{}x}b'.format(20, '#'), 'a0x14b')
941
942 # can't mix and match numbering and auto-numbering
943 self.assertRaises(ValueError, '{}{1}'.format, 1, 2)
944 self.assertRaises(ValueError, '{1}{}'.format, 1, 2)
945 self.assertRaises(ValueError, '{:{1}}'.format, 1, 2)
946 self.assertRaises(ValueError, '{0:{}}'.format, 1, 2)
947
948 # can mix and match auto-numbering and named
949 self.assertEqual('{f}{}'.format(4, f='test'), 'test4')
950 self.assertEqual('{}{f}'.format(4, f='test'), '4test')
951 self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
952 self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
953
Walter Dörwald28256f22003-01-19 16:59:20 +0000954 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000955 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000956 # Testing Unicode formatting strings...
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000957 self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc')
958 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00')
959 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, -2, 3), 'abc, abc, 1, -2.000000, 3.00')
960 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.5), 'abc, abc, -1, -2.000000, 3.50')
961 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
962 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000963 if not sys.platform.startswith('java'):
Walter Dörwald67e83882007-05-05 12:26:27 +0000964 self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
Georg Brandl559e5d72008-06-11 18:37:52 +0000965 self.assertEqual("%r" % ("\u1234",), "'\u1234'")
966 self.assertEqual("%a" % ("\u1234",), "'\\u1234'")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000967 self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
968 self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000969
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000970 self.assertEqual('%c' % 0x1234, '\u1234')
Amaury Forgeot d'Arca4db6862008-07-04 21:26:43 +0000971 self.assertEqual('%c' % 0x21483, '\U00021483')
972 self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
973 self.assertEqual('%c' % '\U00021483', '\U00021483')
974 self.assertRaises(TypeError, "%c".__mod__, "aa")
Stefan Krah99212f62010-07-19 17:58:26 +0000975 self.assertRaises(ValueError, "%.1\u1032f".__mod__, (1.0/3))
Senthil Kumaran9ebe08d2011-07-03 21:03:16 -0700976 self.assertRaises(TypeError, "%i".__mod__, "aa")
Walter Dörwald28256f22003-01-19 16:59:20 +0000977
978 # formatting jobs delegated from the string implementation:
Walter Dörwald28256f22003-01-19 16:59:20 +0000979 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000980 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
981 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
982 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
983 self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
984 self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
985 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,"abc"), '...1...2...3...abc...')
986 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,"abc"), '...%...%s...1...2...3...abc...')
987 self.assertEqual('...%s...' % "abc", '...abc...')
988 self.assertEqual('%*s' % (5,'abc',), ' abc')
989 self.assertEqual('%*s' % (-5,'abc',), 'abc ')
990 self.assertEqual('%*.*s' % (5,2,'abc',), ' ab')
991 self.assertEqual('%*.*s' % (5,3,'abc',), ' abc')
992 self.assertEqual('%i %*.*s' % (10, 5,3,'abc',), '10 abc')
993 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, 'abc',), '103 abc')
994 self.assertEqual('%c' % 'a', 'a')
Neil Schemenauercf52c072005-08-12 17:34:58 +0000995 class Wrapper:
996 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000997 return '\u1234'
998 self.assertEqual('%s' % Wrapper(), '\u1234')
Walter Dörwald28256f22003-01-19 16:59:20 +0000999
Eric Smith741191f2009-05-06 13:08:15 +00001000 # issue 3382
1001 NAN = float('nan')
1002 INF = float('inf')
1003 self.assertEqual('%f' % NAN, 'nan')
1004 self.assertEqual('%F' % NAN, 'NAN')
1005 self.assertEqual('%f' % INF, 'inf')
1006 self.assertEqual('%F' % INF, 'INF')
1007
Mark Dickinsonfb90c092012-10-28 10:18:03 +00001008 @support.cpython_only
1009 def test_formatting_huge_precision(self):
1010 from _testcapi import INT_MAX
1011 format_string = "%.{}f".format(INT_MAX + 1)
1012 with self.assertRaises(ValueError):
1013 result = format_string % 2.34
1014
1015 def test_formatting_huge_width(self):
1016 format_string = "%{}f".format(sys.maxsize + 1)
1017 with self.assertRaises(ValueError):
1018 result = format_string % 2.34
1019
Ezio Melottiba42fd52011-04-26 06:09:45 +03001020 def test_startswith_endswith_errors(self):
1021 for meth in ('foo'.startswith, 'foo'.endswith):
Ezio Melottif2b3f782011-04-26 06:40:59 +03001022 with self.assertRaises(TypeError) as cm:
Ezio Melottiba42fd52011-04-26 06:09:45 +03001023 meth(['f'])
Ezio Melottif2b3f782011-04-26 06:40:59 +03001024 exc = str(cm.exception)
Ezio Melottiba42fd52011-04-26 06:09:45 +03001025 self.assertIn('str', exc)
1026 self.assertIn('tuple', exc)
1027
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001028 @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
Georg Brandlda6b1072006-01-20 17:48:54 +00001029 def test_format_float(self):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001030 # should not format with a comma, but always with C locale
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001031 self.assertEqual('1.0', '%.1f' % 1.0)
Georg Brandlda6b1072006-01-20 17:48:54 +00001032
Walter Dörwald28256f22003-01-19 16:59:20 +00001033 def test_constructor(self):
1034 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
1035
1036 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001037 str('unicode remains unicode'),
1038 'unicode remains unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +00001039 )
1040
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001041 class UnicodeSubclass(str):
Marc-André Lemburg79f57832002-12-29 19:44:06 +00001042 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +00001043
Walter Dörwald28256f22003-01-19 16:59:20 +00001044 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001045 str(UnicodeSubclass('unicode subclass becomes unicode')),
1046 'unicode subclass becomes unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +00001047 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001048
Walter Dörwald28256f22003-01-19 16:59:20 +00001049 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001050 str('strings are converted to unicode'),
1051 'strings are converted to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +00001052 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001053
Walter Dörwald28256f22003-01-19 16:59:20 +00001054 class StringCompat:
1055 def __init__(self, x):
1056 self.x = x
1057 def __str__(self):
1058 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001059
Walter Dörwald28256f22003-01-19 16:59:20 +00001060 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001061 str(StringCompat('__str__ compatible objects are recognized')),
1062 '__str__ compatible objects are recognized'
Walter Dörwald28256f22003-01-19 16:59:20 +00001063 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001064
Walter Dörwald28256f22003-01-19 16:59:20 +00001065 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001066
Walter Dörwald28256f22003-01-19 16:59:20 +00001067 o = StringCompat('unicode(obj) is compatible to str()')
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001068 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Walter Dörwald28256f22003-01-19 16:59:20 +00001069 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001070
Guido van Rossume2a383d2007-01-15 16:59:06 +00001071 for obj in (123, 123.45, 123):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001072 self.assertEqual(str(obj), str(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001073
Walter Dörwald28256f22003-01-19 16:59:20 +00001074 # unicode(obj, encoding, error) tests (this maps to
1075 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001076
Walter Dörwald28256f22003-01-19 16:59:20 +00001077 if not sys.platform.startswith('java'):
1078 self.assertRaises(
1079 TypeError,
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001080 str,
1081 'decoding unicode is not supported',
Walter Dörwald28256f22003-01-19 16:59:20 +00001082 'utf-8',
1083 'strict'
1084 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001085
Walter Dörwald28256f22003-01-19 16:59:20 +00001086 self.assertEqual(
Walter Dörwald67e83882007-05-05 12:26:27 +00001087 str(b'strings are decoded to unicode', 'utf-8', 'strict'),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001088 'strings are decoded to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +00001089 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001090
Walter Dörwald28256f22003-01-19 16:59:20 +00001091 if not sys.platform.startswith('java'):
1092 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001093 str(
Guido van Rossumbae07c92007-10-08 02:46:15 +00001094 memoryview(b'character buffers are decoded to unicode'),
Walter Dörwald28256f22003-01-19 16:59:20 +00001095 'utf-8',
1096 'strict'
1097 ),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001098 'character buffers are decoded to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +00001099 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001100
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001101 self.assertRaises(TypeError, str, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001102
Chris Jerdonek17fc44c2012-11-20 17:31:02 -08001103 def test_constructor_keyword_args(self):
1104 """Pass various keyword argument combinations to the constructor."""
1105 # The object argument can be passed as a keyword.
1106 self.assertEqual(str(object='foo'), 'foo')
1107 self.assertEqual(str(object=b'foo', encoding='utf-8'), 'foo')
1108 # The errors argument without encoding triggers "decode" mode.
1109 self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'"
1110 self.assertEqual(str(object=b'foo', errors='strict'), 'foo')
1111
1112 def test_constructor_defaults(self):
1113 """Check the constructor argument defaults."""
1114 # The object argument defaults to '' or b''.
1115 self.assertEqual(str(), '')
1116 self.assertEqual(str(errors='strict'), '')
1117 utf8_cent = '¢'.encode('utf-8')
1118 # The encoding argument defaults to utf-8.
1119 self.assertEqual(str(utf8_cent, errors='strict'), '¢')
1120 # The errors argument defaults to strict.
1121 self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii')
1122
Walter Dörwald28256f22003-01-19 16:59:20 +00001123 def test_codecs_utf7(self):
1124 utfTests = [
Walter Dörwald67e83882007-05-05 12:26:27 +00001125 ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
1126 ('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example
1127 ('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example
1128 ('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example
1129 ('+', b'+-'),
1130 ('+-', b'+--'),
1131 ('+?', b'+-?'),
1132 ('\?', b'+AFw?'),
1133 ('+?', b'+-?'),
1134 (r'\\?', b'+AFwAXA?'),
1135 (r'\\\?', b'+AFwAXABc?'),
Antoine Pitrou244651a2009-05-04 18:56:13 +00001136 (r'++--', b'+-+---'),
1137 ('\U000abcde', b'+2m/c3g-'), # surrogate pairs
1138 ('/', b'/'),
Walter Dörwald28256f22003-01-19 16:59:20 +00001139 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001140
Walter Dörwald28256f22003-01-19 16:59:20 +00001141 for (x, y) in utfTests:
1142 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001143
Antoine Pitrou5418ee02011-11-15 01:42:21 +01001144 # Unpaired surrogates are passed through
1145 self.assertEqual('\uD801'.encode('utf-7'), b'+2AE-')
1146 self.assertEqual('\uD801x'.encode('utf-7'), b'+2AE-x')
1147 self.assertEqual('\uDC01'.encode('utf-7'), b'+3AE-')
1148 self.assertEqual('\uDC01x'.encode('utf-7'), b'+3AE-x')
1149 self.assertEqual(b'+2AE-'.decode('utf-7'), '\uD801')
1150 self.assertEqual(b'+2AE-x'.decode('utf-7'), '\uD801x')
1151 self.assertEqual(b'+3AE-'.decode('utf-7'), '\uDC01')
1152 self.assertEqual(b'+3AE-x'.decode('utf-7'), '\uDC01x')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001153
Antoine Pitrou5418ee02011-11-15 01:42:21 +01001154 self.assertEqual('\uD801\U000abcde'.encode('utf-7'), b'+2AHab9ze-')
1155 self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +00001156
Antoine Pitrou5ffd9e92008-07-25 18:05:24 +00001157 # Issue #2242: crash on some Windows/MSVC versions
Antoine Pitrou244651a2009-05-04 18:56:13 +00001158 self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
1159
1160 # Direct encoded characters
1161 set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
1162 # Optional direct characters
1163 set_o = '!"#$%&*;<=>@[]^_`{|}'
1164 for c in set_d:
1165 self.assertEqual(c.encode('utf7'), c.encode('ascii'))
1166 self.assertEqual(c.encode('ascii').decode('utf7'), c)
1167 for c in set_o:
1168 self.assertEqual(c.encode('ascii').decode('utf7'), c)
Antoine Pitrou5ffd9e92008-07-25 18:05:24 +00001169
Walter Dörwald28256f22003-01-19 16:59:20 +00001170 def test_codecs_utf8(self):
Walter Dörwald67e83882007-05-05 12:26:27 +00001171 self.assertEqual(''.encode('utf-8'), b'')
1172 self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
Martin v. Löwis74b7e442009-06-01 04:23:07 +00001173 if sys.maxunicode == 65535:
1174 self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
1175 self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
Martin v. Löwise0a2b722009-05-10 08:08:56 +00001176 self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
1177 self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
Martin v. Löwis74b7e442009-06-01 04:23:07 +00001178 if sys.maxunicode == 65535:
1179 self.assertEqual(
1180 ('\ud800\udc02'*1000).encode('utf-8'),
1181 b'\xf0\x90\x80\x82'*1000)
Walter Dörwald28256f22003-01-19 16:59:20 +00001182 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001183 '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
1184 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
1185 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
1186 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
1187 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
1188 ' Nunstuck git und'.encode('utf-8'),
Walter Dörwald67e83882007-05-05 12:26:27 +00001189 b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
1190 b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
1191 b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
1192 b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
1193 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
1194 b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
1195 b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
1196 b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
1197 b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
1198 b'\xe3\x80\x8cWenn ist das Nunstuck git und'
Walter Dörwald28256f22003-01-19 16:59:20 +00001199 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +00001200
Walter Dörwald28256f22003-01-19 16:59:20 +00001201 # UTF-8 specific decoding tests
Walter Dörwald67e83882007-05-05 12:26:27 +00001202 self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
1203 self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
1204 self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +00001205
Walter Dörwald28256f22003-01-19 16:59:20 +00001206 # Other possible utf-8 test cases:
1207 # * strict decoding testing for all of the
1208 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +00001209
Ezio Melotti57221d02010-07-01 07:32:02 +00001210 def test_utf8_decode_valid_sequences(self):
1211 sequences = [
1212 # single byte
1213 (b'\x00', '\x00'), (b'a', 'a'), (b'\x7f', '\x7f'),
1214 # 2 bytes
1215 (b'\xc2\x80', '\x80'), (b'\xdf\xbf', '\u07ff'),
1216 # 3 bytes
1217 (b'\xe0\xa0\x80', '\u0800'), (b'\xed\x9f\xbf', '\ud7ff'),
1218 (b'\xee\x80\x80', '\uE000'), (b'\xef\xbf\xbf', '\uffff'),
1219 # 4 bytes
1220 (b'\xF0\x90\x80\x80', '\U00010000'),
1221 (b'\xf4\x8f\xbf\xbf', '\U0010FFFF')
1222 ]
1223 for seq, res in sequences:
1224 self.assertEqual(seq.decode('utf-8'), res)
1225
1226
1227 def test_utf8_decode_invalid_sequences(self):
1228 # continuation bytes in a sequence of 2, 3, or 4 bytes
1229 continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]
1230 # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F
1231 invalid_2B_seq_start_bytes = [bytes([x]) for x in range(0xC0, 0xC2)]
1232 # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF
1233 invalid_4B_seq_start_bytes = [bytes([x]) for x in range(0xF5, 0xF8)]
1234 invalid_start_bytes = (
1235 continuation_bytes + invalid_2B_seq_start_bytes +
1236 invalid_4B_seq_start_bytes + [bytes([x]) for x in range(0xF7, 0x100)]
1237 )
1238
1239 for byte in invalid_start_bytes:
1240 self.assertRaises(UnicodeDecodeError, byte.decode, 'utf-8')
1241
1242 for sb in invalid_2B_seq_start_bytes:
1243 for cb in continuation_bytes:
1244 self.assertRaises(UnicodeDecodeError, (sb+cb).decode, 'utf-8')
1245
1246 for sb in invalid_4B_seq_start_bytes:
1247 for cb1 in continuation_bytes[:3]:
1248 for cb3 in continuation_bytes[:3]:
1249 self.assertRaises(UnicodeDecodeError,
1250 (sb+cb1+b'\x80'+cb3).decode, 'utf-8')
1251
1252 for cb in [bytes([x]) for x in range(0x80, 0xA0)]:
1253 self.assertRaises(UnicodeDecodeError,
1254 (b'\xE0'+cb+b'\x80').decode, 'utf-8')
1255 self.assertRaises(UnicodeDecodeError,
1256 (b'\xE0'+cb+b'\xBF').decode, 'utf-8')
1257 # surrogates
1258 for cb in [bytes([x]) for x in range(0xA0, 0xC0)]:
1259 self.assertRaises(UnicodeDecodeError,
1260 (b'\xED'+cb+b'\x80').decode, 'utf-8')
1261 self.assertRaises(UnicodeDecodeError,
1262 (b'\xED'+cb+b'\xBF').decode, 'utf-8')
1263 for cb in [bytes([x]) for x in range(0x80, 0x90)]:
1264 self.assertRaises(UnicodeDecodeError,
1265 (b'\xF0'+cb+b'\x80\x80').decode, 'utf-8')
1266 self.assertRaises(UnicodeDecodeError,
1267 (b'\xF0'+cb+b'\xBF\xBF').decode, 'utf-8')
1268 for cb in [bytes([x]) for x in range(0x90, 0xC0)]:
1269 self.assertRaises(UnicodeDecodeError,
1270 (b'\xF4'+cb+b'\x80\x80').decode, 'utf-8')
1271 self.assertRaises(UnicodeDecodeError,
1272 (b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8')
1273
1274 def test_issue8271(self):
1275 # Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
1276 # only the start byte and the continuation byte(s) are now considered
1277 # invalid, instead of the number of bytes specified by the start byte.
1278 # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
1279 # table 3-8, Row 2) for more information about the algorithm used.
1280 FFFD = '\ufffd'
1281 sequences = [
1282 # invalid start bytes
1283 (b'\x80', FFFD), # continuation byte
1284 (b'\x80\x80', FFFD*2), # 2 continuation bytes
1285 (b'\xc0', FFFD),
1286 (b'\xc0\xc0', FFFD*2),
1287 (b'\xc1', FFFD),
1288 (b'\xc1\xc0', FFFD*2),
1289 (b'\xc0\xc1', FFFD*2),
1290 # with start byte of a 2-byte sequence
1291 (b'\xc2', FFFD), # only the start byte
1292 (b'\xc2\xc2', FFFD*2), # 2 start bytes
1293 (b'\xc2\xc2\xc2', FFFD*3), # 2 start bytes
1294 (b'\xc2\x41', FFFD+'A'), # invalid continuation byte
1295 # with start byte of a 3-byte sequence
1296 (b'\xe1', FFFD), # only the start byte
1297 (b'\xe1\xe1', FFFD*2), # 2 start bytes
1298 (b'\xe1\xe1\xe1', FFFD*3), # 3 start bytes
1299 (b'\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes
1300 (b'\xe1\x80', FFFD), # only 1 continuation byte
1301 (b'\xe1\x41', FFFD+'A'), # invalid continuation byte
1302 (b'\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb
1303 (b'\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes
1304 (b'\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte
1305 (b'\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid
1306 (b'\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid
1307 # with start byte of a 4-byte sequence
1308 (b'\xf1', FFFD), # only the start byte
1309 (b'\xf1\xf1', FFFD*2), # 2 start bytes
1310 (b'\xf1\xf1\xf1', FFFD*3), # 3 start bytes
1311 (b'\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes
1312 (b'\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes
1313 (b'\xf1\x80', FFFD), # only 1 continuation bytes
1314 (b'\xf1\x80\x80', FFFD), # only 2 continuation bytes
1315 (b'\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid
1316 (b'\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid
1317 (b'\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid
1318 (b'\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid
1319 (b'\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid
1320 (b'\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid
1321 (b'\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid
1322 (b'\xf1\x41\xf1\x80', FFFD+'A'+FFFD),
1323 (b'\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2),
1324 (b'\xf1\xf1\x80\x41', FFFD*2+'A'),
1325 (b'\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2),
1326 # with invalid start byte of a 4-byte sequence (rfc2279)
1327 (b'\xf5', FFFD), # only the start byte
1328 (b'\xf5\xf5', FFFD*2), # 2 start bytes
1329 (b'\xf5\x80', FFFD*2), # only 1 continuation byte
1330 (b'\xf5\x80\x80', FFFD*3), # only 2 continuation byte
1331 (b'\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes
1332 (b'\xf5\x80\x41', FFFD*2+'A'), # 1 valid cb and 1 invalid
1333 (b'\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD),
1334 (b'\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'),
1335 # with invalid start byte of a 5-byte sequence (rfc2279)
1336 (b'\xf8', FFFD), # only the start byte
1337 (b'\xf8\xf8', FFFD*2), # 2 start bytes
1338 (b'\xf8\x80', FFFD*2), # only one continuation byte
1339 (b'\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid
1340 (b'\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes
1341 # with invalid start byte of a 6-byte sequence (rfc2279)
1342 (b'\xfc', FFFD), # only the start byte
1343 (b'\xfc\xfc', FFFD*2), # 2 start bytes
1344 (b'\xfc\x80\x80', FFFD*3), # only 2 continuation bytes
1345 (b'\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes
1346 # invalid start byte
1347 (b'\xfe', FFFD),
1348 (b'\xfe\x80\x80', FFFD*3),
1349 # other sequences
1350 (b'\xf1\x80\x41\x42\x43', '\ufffd\x41\x42\x43'),
1351 (b'\xf1\x80\xff\x42\x43', '\ufffd\ufffd\x42\x43'),
1352 (b'\xf1\x80\xc2\x81\x43', '\ufffd\x81\x43'),
1353 (b'\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64',
1354 '\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'),
1355 ]
1356 for n, (seq, res) in enumerate(sequences):
1357 self.assertRaises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict')
1358 self.assertEqual(seq.decode('utf-8', 'replace'), res)
1359 self.assertEqual((seq+b'b').decode('utf-8', 'replace'), res+'b')
1360 self.assertEqual(seq.decode('utf-8', 'ignore'),
1361 res.replace('\uFFFD', ''))
1362
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +00001363 def test_codecs_idna(self):
1364 # Test whether trailing dot is preserved
Walter Dörwald1324c6f2007-05-11 19:57:05 +00001365 self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.")
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +00001366
Walter Dörwald28256f22003-01-19 16:59:20 +00001367 def test_codecs_errors(self):
1368 # Error handling (encoding)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001369 self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
1370 self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
Walter Dörwald67e83882007-05-05 12:26:27 +00001371 self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x")
1372 self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x")
Benjamin Peterson308d6372009-09-18 21:42:35 +00001373 self.assertEqual('Andr\202 x'.encode('ascii', 'replace'),
1374 'Andr\202 x'.encode('ascii', errors='replace'))
1375 self.assertEqual('Andr\202 x'.encode('ascii', 'ignore'),
1376 'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +00001377
Walter Dörwald28256f22003-01-19 16:59:20 +00001378 # Error handling (decoding)
Walter Dörwald67e83882007-05-05 12:26:27 +00001379 self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii')
1380 self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
1381 self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
1382 self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +00001383
Walter Dörwald28256f22003-01-19 16:59:20 +00001384 # Error handling (unknown character names)
Guido van Rossum39478e82007-08-27 17:23:59 +00001385 self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +00001386
Walter Dörwald28256f22003-01-19 16:59:20 +00001387 # Error handling (truncated escape sequence)
Guido van Rossum9c627722007-08-27 18:31:48 +00001388 self.assertRaises(UnicodeError, b"\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +00001389
Guido van Rossum9c627722007-08-27 18:31:48 +00001390 self.assertRaises(TypeError, b"hello".decode, "test.unicode1")
1391 self.assertRaises(TypeError, str, b"hello", "test.unicode2")
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001392 self.assertRaises(TypeError, "hello".encode, "test.unicode1")
1393 self.assertRaises(TypeError, "hello".encode, "test.unicode2")
Walter Dörwald28256f22003-01-19 16:59:20 +00001394 # executes PyUnicode_Encode()
1395 import imp
1396 self.assertRaises(
1397 ImportError,
1398 imp.find_module,
1399 "non-existing module",
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001400 ["non-existing dir"]
Walter Dörwald28256f22003-01-19 16:59:20 +00001401 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +00001402
Walter Dörwald28256f22003-01-19 16:59:20 +00001403 # Error handling (wrong arguments)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001404 self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +00001405
Alexander Belopolsky942af5a2010-12-04 03:38:46 +00001406 # Error handling (lone surrogate in PyUnicode_TransformDecimalToASCII())
1407 self.assertRaises(UnicodeError, int, "\ud800")
1408 self.assertRaises(UnicodeError, int, "\udf00")
1409 self.assertRaises(UnicodeError, float, "\ud800")
1410 self.assertRaises(UnicodeError, float, "\udf00")
1411 self.assertRaises(UnicodeError, complex, "\ud800")
1412 self.assertRaises(UnicodeError, complex, "\udf00")
Guido van Rossum97064862000-04-10 13:52:48 +00001413
Walter Dörwald28256f22003-01-19 16:59:20 +00001414 def test_codecs(self):
1415 # Encoding
Walter Dörwald67e83882007-05-05 12:26:27 +00001416 self.assertEqual('hello'.encode('ascii'), b'hello')
1417 self.assertEqual('hello'.encode('utf-7'), b'hello')
1418 self.assertEqual('hello'.encode('utf-8'), b'hello')
1419 self.assertEqual('hello'.encode('utf8'), b'hello')
1420 self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
1421 self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
1422 self.assertEqual('hello'.encode('latin-1'), b'hello')
Guido van Rossum97064862000-04-10 13:52:48 +00001423
Walter Dörwald28256f22003-01-19 16:59:20 +00001424 # Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +00001425 for c in range(1024):
Guido van Rossum84fc66d2007-05-03 17:18:26 +00001426 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +00001427 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
1428 'utf-16-be', 'raw_unicode_escape',
1429 'unicode_escape', 'unicode_internal'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001430 self.assertEqual(str(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +00001431
Walter Dörwald28256f22003-01-19 16:59:20 +00001432 # Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +00001433 for c in range(256):
Guido van Rossum84fc66d2007-05-03 17:18:26 +00001434 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +00001435 for encoding in ('latin-1',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001436 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +00001437
Walter Dörwald28256f22003-01-19 16:59:20 +00001438 # Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +00001439 for c in range(128):
Guido van Rossum84fc66d2007-05-03 17:18:26 +00001440 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +00001441 for encoding in ('ascii',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001442 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +00001443
Walter Dörwald28256f22003-01-19 16:59:20 +00001444 # Roundtrip safety for non-BMP (just a few chars)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001445 u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
Walter Dörwald28256f22003-01-19 16:59:20 +00001446 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
1447 #'raw_unicode_escape',
1448 'unicode_escape', 'unicode_internal'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001449 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +00001450
Walter Dörwald28256f22003-01-19 16:59:20 +00001451 # UTF-8 must be roundtrip safe for all UCS-2 code points
1452 # This excludes surrogates: in the full range, there would be
1453 # a surrogate pair (\udbff\udc00), which gets converted back
1454 # to a non-BMP character (\U0010fc00)
Walter Dörwald1324c6f2007-05-11 19:57:05 +00001455 u = ''.join(map(chr, list(range(0,0xd800)) +
1456 list(range(0xe000,0x10000))))
Walter Dörwald28256f22003-01-19 16:59:20 +00001457 for encoding in ('utf-8',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001458 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001459
Walter Dörwald28256f22003-01-19 16:59:20 +00001460 def test_codecs_charmap(self):
1461 # 0-127
Guido van Rossum805365e2007-05-07 22:24:25 +00001462 s = bytes(range(128))
Walter Dörwald28256f22003-01-19 16:59:20 +00001463 for encoding in (
1464 'cp037', 'cp1026',
Benjamin Peterson5a6214a2010-06-27 22:41:29 +00001465 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
1466 'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
Walter Dörwald28256f22003-01-19 16:59:20 +00001467 'cp863', 'cp865', 'cp866',
1468 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
1469 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
1470 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
1471 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +00001472
Walter Dörwald28256f22003-01-19 16:59:20 +00001473 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1474 'cp1256', 'cp1257', 'cp1258',
1475 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +00001476
Walter Dörwald28256f22003-01-19 16:59:20 +00001477 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
1478 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +00001479
Walter Dörwald28256f22003-01-19 16:59:20 +00001480 ### These have undefined mappings:
1481 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +00001482
Walter Dörwald28256f22003-01-19 16:59:20 +00001483 ### These fail the round-trip:
1484 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +00001485
Walter Dörwald28256f22003-01-19 16:59:20 +00001486 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001487 self.assertEqual(str(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001488
Walter Dörwald28256f22003-01-19 16:59:20 +00001489 # 128-255
Guido van Rossum805365e2007-05-07 22:24:25 +00001490 s = bytes(range(128, 256))
Walter Dörwald28256f22003-01-19 16:59:20 +00001491 for encoding in (
1492 'cp037', 'cp1026',
Benjamin Peterson5a6214a2010-06-27 22:41:29 +00001493 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
1494 'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
Walter Dörwald28256f22003-01-19 16:59:20 +00001495 'cp863', 'cp865', 'cp866',
1496 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
1497 'iso8859_2', 'iso8859_4', 'iso8859_5',
1498 'iso8859_9', 'koi8_r', 'latin_1',
1499 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +00001500
Walter Dörwald28256f22003-01-19 16:59:20 +00001501 ### These have undefined mappings:
1502 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1503 #'cp1256', 'cp1257', 'cp1258',
1504 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
1505 #'iso8859_3', 'iso8859_6', 'iso8859_7',
1506 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +00001507
Walter Dörwald28256f22003-01-19 16:59:20 +00001508 ### These fail the round-trip:
1509 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +00001510
Walter Dörwald28256f22003-01-19 16:59:20 +00001511 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001512 self.assertEqual(str(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001513
Walter Dörwald28256f22003-01-19 16:59:20 +00001514 def test_concatenation(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001515 self.assertEqual(("abc" "def"), "abcdef")
1516 self.assertEqual(("abc" "def"), "abcdef")
1517 self.assertEqual(("abc" "def"), "abcdef")
1518 self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
1519 self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +00001520
Walter Dörwald28256f22003-01-19 16:59:20 +00001521 def test_printing(self):
1522 class BitBucket:
1523 def write(self, text):
1524 pass
Fred Drake004d5e62000-10-23 17:22:08 +00001525
Walter Dörwald28256f22003-01-19 16:59:20 +00001526 out = BitBucket()
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001527 print('abc', file=out)
1528 print('abc', 'def', file=out)
1529 print('abc', 'def', file=out)
1530 print('abc', 'def', file=out)
1531 print('abc\n', file=out)
1532 print('abc\n', end=' ', file=out)
1533 print('abc\n', end=' ', file=out)
1534 print('def\n', file=out)
1535 print('def\n', file=out)
Fred Drake004d5e62000-10-23 17:22:08 +00001536
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +00001537 def test_ucs4(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001538 x = '\U00100000'
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +00001539 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
1540 self.assertEqual(x, y)
1541
Florent Xiclunaa87b3832010-09-13 02:28:18 +00001542 y = br'\U00100000'
1543 x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
1544 self.assertEqual(x, y)
1545 y = br'\U00010000'
1546 x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
1547 self.assertEqual(x, y)
Christian Heimesfe337bf2008-03-23 21:54:12 +00001548
Florent Xiclunaa87b3832010-09-13 02:28:18 +00001549 try:
1550 br'\U11111111'.decode("raw-unicode-escape")
1551 except UnicodeDecodeError as e:
1552 self.assertEqual(e.start, 0)
1553 self.assertEqual(e.end, 10)
1554 else:
1555 self.fail("Should have raised UnicodeDecodeError")
Christian Heimesfe337bf2008-03-23 21:54:12 +00001556
Brett Cannonc3647ac2005-04-26 03:45:26 +00001557 def test_conversion(self):
1558 # Make sure __unicode__() works properly
1559 class Foo0:
1560 def __str__(self):
1561 return "foo"
1562
1563 class Foo1:
Guido van Rossum98297ee2007-11-06 21:34:58 +00001564 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001565 return "foo"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001566
1567 class Foo2(object):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001568 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001569 return "foo"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001570
1571 class Foo3(object):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001572 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001573 return "foo"
1574
1575 class Foo4(str):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001576 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001577 return "foo"
1578
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001579 class Foo5(str):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001580 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001581 return "foo"
1582
1583 class Foo6(str):
1584 def __str__(self):
1585 return "foos"
1586
Guido van Rossum98297ee2007-11-06 21:34:58 +00001587 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001588 return "foou"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001589
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001590 class Foo7(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001591 def __str__(self):
1592 return "foos"
Guido van Rossum98297ee2007-11-06 21:34:58 +00001593 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001594 return "foou"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001595
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001596 class Foo8(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001597 def __new__(cls, content=""):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001598 return str.__new__(cls, 2*content)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001599 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001600 return self
1601
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001602 class Foo9(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001603 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001604 return "not unicode"
1605
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001606 self.assertEqual(str(Foo0()), "foo")
1607 self.assertEqual(str(Foo1()), "foo")
1608 self.assertEqual(str(Foo2()), "foo")
1609 self.assertEqual(str(Foo3()), "foo")
1610 self.assertEqual(str(Foo4("bar")), "foo")
1611 self.assertEqual(str(Foo5("bar")), "foo")
1612 self.assertEqual(str(Foo6("bar")), "foou")
1613 self.assertEqual(str(Foo7("bar")), "foou")
1614 self.assertEqual(str(Foo8("foo")), "foofoo")
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001615 self.assertEqual(str(Foo9("foo")), "not unicode")
Brett Cannonc3647ac2005-04-26 03:45:26 +00001616
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001617 def test_unicode_repr(self):
1618 class s1:
1619 def __repr__(self):
1620 return '\\n'
1621
1622 class s2:
1623 def __repr__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001624 return '\\n'
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001625
1626 self.assertEqual(repr(s1()), '\\n')
1627 self.assertEqual(repr(s2()), '\\n')
1628
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001629 def test_printable_repr(self):
1630 self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
Martin v. Löwisbaecd722010-10-11 22:42:28 +00001631 self.assertEqual(repr('\U00014000'), "'\\U00014000'") # nonprintable
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001632
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001633 def test_expandtabs_overflows_gracefully(self):
1634 # This test only affects 32-bit platforms because expandtabs can only take
1635 # an int as the max value, not a 64-bit C long. If expandtabs is changed
1636 # to take a 64-bit long, this test should apply to all platforms.
Christian Heimesa37d4c62007-12-04 23:02:19 +00001637 if sys.maxsize > (1 << 32) or struct.calcsize('P') != 4:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001638 return
Christian Heimesa37d4c62007-12-04 23:02:19 +00001639 self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001640
Amaury Forgeot d'Arc7888d082008-08-01 01:06:32 +00001641 def test_raiseMemError(self):
1642 # Ensure that the freelist contains a consistent object, even
1643 # when a string allocation fails with a MemoryError.
1644 # This used to crash the interpreter,
1645 # or leak references when the number was smaller.
Antoine Pitroub305aeb2008-09-05 22:13:06 +00001646 charwidth = 4 if sys.maxunicode >= 0x10000 else 2
1647 # Note: sys.maxsize is half of the actual max allocation because of
1648 # the signedness of Py_ssize_t.
1649 alloc = lambda: "a" * (sys.maxsize // charwidth * 2)
Antoine Pitrou3db3e872008-08-17 17:06:51 +00001650 self.assertRaises(MemoryError, alloc)
1651 self.assertRaises(MemoryError, alloc)
1652
Victor Stinner808fc0a2010-03-22 12:50:40 +00001653 def test_format_subclass(self):
1654 class S(str):
1655 def __str__(self):
1656 return '__str__ overridden'
1657 s = S('xxx')
Florent Xiclunaa87b3832010-09-13 02:28:18 +00001658 self.assertEqual("%s" % s, '__str__ overridden')
1659 self.assertEqual("{}".format(s), '__str__ overridden')
Victor Stinner808fc0a2010-03-22 12:50:40 +00001660
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001661 # Test PyUnicode_FromFormat()
Victor Stinner1205f272010-09-11 00:54:47 +00001662 def test_from_format(self):
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001663 support.import_module('ctypes')
Victor Stinner659eb842011-02-23 12:14:22 +00001664 from ctypes import pythonapi, py_object, c_int
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001665 if sys.maxunicode == 65535:
1666 name = "PyUnicodeUCS2_FromFormat"
1667 else:
1668 name = "PyUnicodeUCS4_FromFormat"
1669 _PyUnicode_FromFormat = getattr(pythonapi, name)
1670 _PyUnicode_FromFormat.restype = py_object
1671
1672 def PyUnicode_FromFormat(format, *args):
1673 cargs = tuple(
1674 py_object(arg) if isinstance(arg, str) else arg
1675 for arg in args)
1676 return _PyUnicode_FromFormat(format, *cargs)
Victor Stinner1205f272010-09-11 00:54:47 +00001677
1678 # ascii format, non-ascii argument
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001679 text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
Victor Stinner1205f272010-09-11 00:54:47 +00001680 self.assertEqual(text, 'ascii\x7f=unicode\xe9')
1681
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001682 # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
1683 # raises an error
Ezio Melottied3a7d22010-12-01 02:32:32 +00001684 self.assertRaisesRegex(ValueError,
Victor Stinner1205f272010-09-11 00:54:47 +00001685 '^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
Victor Stinner4c7db312010-09-12 07:51:18 +00001686 'string, got a non-ASCII byte: 0xe9$',
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001687 PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
Amaury Forgeot d'Arc7888d082008-08-01 01:06:32 +00001688
Victor Stinner659eb842011-02-23 12:14:22 +00001689 self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
1690 self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
1691
Victor Stinner9a909002010-10-18 20:59:24 +00001692 # other tests
Victor Stinnerca1e7ec2011-01-05 00:19:28 +00001693 text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
Victor Stinner9a909002010-10-18 20:59:24 +00001694 self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
1695
Victor Stinner2b574a22011-03-01 22:48:49 +00001696 text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
1697 self.assertEqual(text, 'repr=abc')
1698
1699 # Test string decode from parameter of %s using utf-8.
1700 # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
1701 # '\u4eba\u6c11'
1702 text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
1703 self.assertEqual(text, 'repr=\u4eba\u6c11')
1704
1705 #Test replace error handler.
1706 text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
1707 self.assertEqual(text, 'repr=abc\ufffd')
1708
Victor Stinner1c24bd02010-10-02 11:03:13 +00001709 # Test PyUnicode_AsWideChar()
1710 def test_aswidechar(self):
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001711 from _testcapi import unicode_aswidechar
Antoine Pitrou0662bc22010-11-22 16:19:04 +00001712 support.import_module('ctypes')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001713 from ctypes import c_wchar, sizeof
1714
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001715 wchar, size = unicode_aswidechar('abcdef', 2)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001716 self.assertEqual(size, 2)
1717 self.assertEqual(wchar, 'ab')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001718
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001719 wchar, size = unicode_aswidechar('abc', 3)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001720 self.assertEqual(size, 3)
1721 self.assertEqual(wchar, 'abc')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001722
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001723 wchar, size = unicode_aswidechar('abc', 4)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001724 self.assertEqual(size, 3)
1725 self.assertEqual(wchar, 'abc\0')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001726
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001727 wchar, size = unicode_aswidechar('abc', 10)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001728 self.assertEqual(size, 3)
1729 self.assertEqual(wchar, 'abc\0')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001730
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001731 wchar, size = unicode_aswidechar('abc\0def', 20)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001732 self.assertEqual(size, 7)
1733 self.assertEqual(wchar, 'abc\0def\0')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001734
Victor Stinner5593d8a2010-10-02 11:11:27 +00001735 nonbmp = chr(0x10ffff)
1736 if sizeof(c_wchar) == 2:
1737 buflen = 3
1738 nchar = 2
1739 else: # sizeof(c_wchar) == 4
1740 buflen = 2
1741 nchar = 1
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001742 wchar, size = unicode_aswidechar(nonbmp, buflen)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001743 self.assertEqual(size, nchar)
1744 self.assertEqual(wchar, nonbmp + '\0')
Victor Stinner5593d8a2010-10-02 11:11:27 +00001745
Victor Stinner1c24bd02010-10-02 11:03:13 +00001746 # Test PyUnicode_AsWideCharString()
1747 def test_aswidecharstring(self):
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001748 from _testcapi import unicode_aswidecharstring
Antoine Pitrou0662bc22010-11-22 16:19:04 +00001749 support.import_module('ctypes')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001750 from ctypes import c_wchar, sizeof
1751
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001752 wchar, size = unicode_aswidecharstring('abc')
Ezio Melottib3aedd42010-11-20 19:04:17 +00001753 self.assertEqual(size, 3)
1754 self.assertEqual(wchar, 'abc\0')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001755
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001756 wchar, size = unicode_aswidecharstring('abc\0def')
Ezio Melottib3aedd42010-11-20 19:04:17 +00001757 self.assertEqual(size, 7)
1758 self.assertEqual(wchar, 'abc\0def\0')
Victor Stinner1c24bd02010-10-02 11:03:13 +00001759
Victor Stinner5593d8a2010-10-02 11:11:27 +00001760 nonbmp = chr(0x10ffff)
1761 if sizeof(c_wchar) == 2:
1762 nchar = 2
1763 else: # sizeof(c_wchar) == 4
1764 nchar = 1
Victor Stinner46c7b3b2010-10-02 11:49:31 +00001765 wchar, size = unicode_aswidecharstring(nonbmp)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001766 self.assertEqual(size, nchar)
1767 self.assertEqual(wchar, nonbmp + '\0')
Victor Stinner5593d8a2010-10-02 11:11:27 +00001768
Victor Stinner1c24bd02010-10-02 11:03:13 +00001769
Eric Smitha1eac722011-01-29 11:15:35 +00001770class StringModuleTest(unittest.TestCase):
1771 def test_formatter_parser(self):
1772 def parse(format):
1773 return list(_string.formatter_parser(format))
1774
1775 formatter = parse("prefix {2!s}xxx{0:^+10.3f}{obj.attr!s} {z[0]!s:10}")
1776 self.assertEqual(formatter, [
1777 ('prefix ', '2', '', 's'),
1778 ('xxx', '0', '^+10.3f', None),
1779 ('', 'obj.attr', '', 's'),
1780 (' ', 'z[0]', '10', 's'),
1781 ])
1782
1783 formatter = parse("prefix {} suffix")
1784 self.assertEqual(formatter, [
1785 ('prefix ', '', '', None),
1786 (' suffix', None, None, None),
1787 ])
1788
1789 formatter = parse("str")
1790 self.assertEqual(formatter, [
1791 ('str', None, None, None),
1792 ])
1793
1794 formatter = parse("")
1795 self.assertEqual(formatter, [])
1796
1797 formatter = parse("{0}")
1798 self.assertEqual(formatter, [
1799 ('', '0', '', None),
1800 ])
1801
1802 self.assertRaises(TypeError, _string.formatter_parser, 1)
1803
1804 def test_formatter_field_name_split(self):
1805 def split(name):
1806 items = list(_string.formatter_field_name_split(name))
1807 items[1] = list(items[1])
1808 return items
1809 self.assertEqual(split("obj"), ["obj", []])
1810 self.assertEqual(split("obj.arg"), ["obj", [(True, 'arg')]])
1811 self.assertEqual(split("obj[key]"), ["obj", [(False, 'key')]])
1812 self.assertEqual(split("obj.arg[key1][key2]"), [
1813 "obj",
1814 [(True, 'arg'),
1815 (False, 'key1'),
1816 (False, 'key2'),
1817 ]])
1818 self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
1819
Victor Stinnerab1d16b2011-11-22 01:45:37 +01001820 def test_encode_decimal(self):
1821 from _testcapi import unicode_encodedecimal
1822 self.assertEqual(unicode_encodedecimal('123'),
1823 b'123')
1824 self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
1825 b'3.14')
1826 self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
1827 b' 3.14 ')
1828 self.assertRaises(UnicodeEncodeError,
1829 unicode_encodedecimal, "123\u20ac", "strict")
1830 self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
1831 b'123?')
1832 self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
1833 b'123')
1834 self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
1835 b'123&#8364;')
1836 self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
1837 b'123\\u20ac')
1838 self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
1839 b'123? ')
1840 self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
1841 b'123??')
1842 self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
1843 b'123?0')
1844
1845 def test_transform_decimal(self):
1846 from _testcapi import unicode_transformdecimaltoascii as transform_decimal
1847 self.assertEqual(transform_decimal('123'),
1848 '123')
1849 self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
1850 '3.14')
1851 self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
1852 "\N{EM SPACE}3.14\N{EN SPACE}")
1853 self.assertEqual(transform_decimal('123\u20ac'),
1854 '123\u20ac')
1855
Eric Smitha1eac722011-01-29 11:15:35 +00001856
Walter Dörwald28256f22003-01-19 16:59:20 +00001857def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001858 support.run_unittest(__name__)
Barry Warsaw817918c2002-08-06 16:58:21 +00001859
Walter Dörwald28256f22003-01-19 16:59:20 +00001860if __name__ == "__main__":
1861 test_main()