blob: a267cf6142c4d926f3a77bef7a8ad3ff1622e6a3 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Guido van Rossum98297ee2007-11-06 21:34:58 +00009import codecs
10import struct
11import sys
12import unittest
13import warnings
Benjamin Petersonee8712c2008-05-20 21:35:26 +000014from test import support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000015
Neal Norwitz430f68b2005-11-24 22:00:56 +000016# Error handling (bad decoder return)
17def search_function(encoding):
18 def decode1(input, errors="strict"):
19 return 42 # not a tuple
20 def encode1(input, errors="strict"):
21 return 42 # not a tuple
22 def encode2(input, errors="strict"):
23 return (42, 42) # no unicode
24 def decode2(input, errors="strict"):
25 return (42, 42) # no unicode
26 if encoding=="test.unicode1":
27 return (encode1, decode1, None, None)
28 elif encoding=="test.unicode2":
29 return (encode2, decode2, None, None)
30 else:
31 return None
32codecs.register(search_function)
33
Walter Dörwald0fd583c2003-02-21 12:53:50 +000034class UnicodeTest(
35 string_tests.CommonTest,
Walter Dörwald57d88e52004-08-26 16:53:04 +000036 string_tests.MixinStrUnicodeUserStringTest,
37 string_tests.MixinStrUnicodeTest,
Walter Dörwald0fd583c2003-02-21 12:53:50 +000038 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000039 type2test = str
Walter Dörwald0fd583c2003-02-21 12:53:50 +000040
Guido van Rossum98297ee2007-11-06 21:34:58 +000041 def setUp(self):
42 self.warning_filters = warnings.filters[:]
43
44 def tearDown(self):
45 warnings.filters = self.warning_filters
46
Walter Dörwald0fd583c2003-02-21 12:53:50 +000047 def checkequalnofix(self, result, object, methodname, *args):
48 method = getattr(object, methodname)
49 realresult = method(*args)
50 self.assertEqual(realresult, result)
51 self.assert_(type(realresult) is type(result))
52
53 # if the original is returned make sure that
54 # this doesn't happen with subclasses
55 if realresult is object:
Guido van Rossumef87d6e2007-05-02 19:09:54 +000056 class usub(str):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000057 def __repr__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000058 return 'usub(%r)' % str.__repr__(self)
Walter Dörwald0fd583c2003-02-21 12:53:50 +000059 object = usub(object)
60 method = getattr(object, methodname)
61 realresult = method(*args)
62 self.assertEqual(realresult, result)
63 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000064
Jeremy Hylton504de6b2003-10-06 05:08:26 +000065 def test_literals(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000066 self.assertEqual('\xff', '\u00ff')
67 self.assertEqual('\uffff', '\U0000ffff')
Guido van Rossum36e0a922007-07-20 04:05:57 +000068 self.assertRaises(SyntaxError, eval, '\'\\Ufffffffe\'')
69 self.assertRaises(SyntaxError, eval, '\'\\Uffffffff\'')
70 self.assertRaises(SyntaxError, eval, '\'\\U%08x\'' % 0x110000)
Benjamin Petersoncd76c272008-04-05 15:09:30 +000071 # raw strings should not have unicode escapes
72 self.assertNotEquals(r"\u0020", " ")
Jeremy Hylton504de6b2003-10-06 05:08:26 +000073
Walter Dörwald28256f22003-01-19 16:59:20 +000074 def test_repr(self):
75 if not sys.platform.startswith('java'):
76 # Test basic sanity of repr()
Walter Dörwald67e83882007-05-05 12:26:27 +000077 self.assertEqual(repr('abc'), "'abc'")
78 self.assertEqual(repr('ab\\c'), "'ab\\\\c'")
79 self.assertEqual(repr('ab\\'), "'ab\\\\'")
80 self.assertEqual(repr('\\c'), "'\\\\c'")
81 self.assertEqual(repr('\\'), "'\\\\'")
82 self.assertEqual(repr('\n'), "'\\n'")
83 self.assertEqual(repr('\r'), "'\\r'")
84 self.assertEqual(repr('\t'), "'\\t'")
85 self.assertEqual(repr('\b'), "'\\x08'")
86 self.assertEqual(repr("'\""), """'\\'"'""")
87 self.assertEqual(repr("'\""), """'\\'"'""")
88 self.assertEqual(repr("'"), '''"'"''')
89 self.assertEqual(repr('"'), """'"'""")
Walter Dörwald28256f22003-01-19 16:59:20 +000090 latin1repr = (
Walter Dörwald67e83882007-05-05 12:26:27 +000091 "'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
Walter Dörwald28256f22003-01-19 16:59:20 +000092 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
93 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
94 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
95 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
96 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
Georg Brandlf954c4b2008-06-04 11:41:32 +000097 "\\x9c\\x9d\\x9e\\x9f\\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9"
98 "\xaa\xab\xac\\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
99 "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5"
100 "\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
101 "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1"
102 "\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
103 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd"
104 "\xfe\xff'")
Guido van Rossum805365e2007-05-07 22:24:25 +0000105 testrepr = repr(''.join(map(chr, range(256))))
Walter Dörwald28256f22003-01-19 16:59:20 +0000106 self.assertEqual(testrepr, latin1repr)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000107 # Test repr works on wide unicode escapes without overflow.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000108 self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096),
109 repr("\U00010000" * 39 + "\uffff" * 4096))
Walter Dörwald28256f22003-01-19 16:59:20 +0000110
Guido van Rossum49d6b072006-08-17 21:11:47 +0000111 def test_iterators(self):
112 # Make sure unicode objects have an __iter__ method
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000113 it = "\u1111\u2222\u3333".__iter__()
114 self.assertEqual(next(it), "\u1111")
115 self.assertEqual(next(it), "\u2222")
116 self.assertEqual(next(it), "\u3333")
Georg Brandla18af4e2007-04-21 15:47:16 +0000117 self.assertRaises(StopIteration, next, it)
Guido van Rossum49d6b072006-08-17 21:11:47 +0000118
Walter Dörwald28256f22003-01-19 16:59:20 +0000119 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000120 string_tests.CommonTest.test_count(self)
121 # check mixed argument types
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000122 self.checkequalnofix(3, 'aaa', 'count', 'a')
123 self.checkequalnofix(0, 'aaa', 'count', 'b')
124 self.checkequalnofix(3, 'aaa', 'count', 'a')
125 self.checkequalnofix(0, 'aaa', 'count', 'b')
126 self.checkequalnofix(0, 'aaa', 'count', 'b')
127 self.checkequalnofix(1, 'aaa', 'count', 'a', -1)
128 self.checkequalnofix(3, 'aaa', 'count', 'a', -10)
129 self.checkequalnofix(2, 'aaa', 'count', 'a', 0, -1)
130 self.checkequalnofix(0, 'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000131
Walter Dörwald28256f22003-01-19 16:59:20 +0000132 def test_find(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000133 self.checkequalnofix(0, 'abcdefghiabc', 'find', 'abc')
134 self.checkequalnofix(9, 'abcdefghiabc', 'find', 'abc', 1)
135 self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000136
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000137 self.assertRaises(TypeError, 'hello'.find)
138 self.assertRaises(TypeError, 'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000139
Walter Dörwald28256f22003-01-19 16:59:20 +0000140 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000141 string_tests.CommonTest.test_rfind(self)
142 # check mixed argument types
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000143 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc')
144 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
145 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000146
Walter Dörwald28256f22003-01-19 16:59:20 +0000147 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000148 string_tests.CommonTest.test_index(self)
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000149 self.checkequalnofix(0, 'abcdefghiabc', 'index', '')
150 self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def')
151 self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc')
152 self.checkequalnofix(9, 'abcdefghiabc', 'index', 'abc', 1)
153 self.assertRaises(ValueError, 'abcdefghiabc'.index, 'hib')
154 self.assertRaises(ValueError, 'abcdefghiab'.index, 'abc', 1)
155 self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', 8)
156 self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000157
Walter Dörwald28256f22003-01-19 16:59:20 +0000158 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000159 string_tests.CommonTest.test_rindex(self)
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000160 self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '')
161 self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def')
162 self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc')
163 self.checkequalnofix(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000164
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000165 self.assertRaises(ValueError, 'abcdefghiabc'.rindex, 'hib')
166 self.assertRaises(ValueError, 'defghiabc'.rindex, 'def', 1)
167 self.assertRaises(ValueError, 'defghiabc'.rindex, 'abc', 0, -1)
168 self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
169 self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000170
Georg Brandlceee0772007-11-27 23:48:05 +0000171 def test_maketrans_translate(self):
172 # these work with plain translate()
173 self.checkequalnofix('bbbc', 'abababc', 'translate',
174 {ord('a'): None})
175 self.checkequalnofix('iiic', 'abababc', 'translate',
176 {ord('a'): None, ord('b'): ord('i')})
177 self.checkequalnofix('iiix', 'abababc', 'translate',
178 {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
179 self.checkequalnofix('c', 'abababc', 'translate',
180 {ord('a'): None, ord('b'): ''})
181 self.checkequalnofix('xyyx', 'xzx', 'translate',
182 {ord('z'): 'yy'})
183 # this needs maketrans()
184 self.checkequalnofix('abababc', 'abababc', 'translate',
185 {'b': '<i>'})
186 tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
187 self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
188 # test alternative way of calling maketrans()
189 tbl = self.type2test.maketrans('abc', 'xyz', 'd')
190 self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
191
192 self.assertRaises(TypeError, self.type2test.maketrans)
193 self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
194 self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
195 self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
196 self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
197 self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
198 self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000199
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000200 self.assertRaises(TypeError, 'hello'.translate)
Walter Dörwald67e83882007-05-05 12:26:27 +0000201 self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202
Walter Dörwald28256f22003-01-19 16:59:20 +0000203 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000204 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000205
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000206 # Mixed arguments
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000207 self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
208 self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
209 self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000210
Walter Dörwald28256f22003-01-19 16:59:20 +0000211 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000212 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000213
Guido van Rossumf1044292007-09-27 18:01:22 +0000214 class MyWrapper:
215 def __init__(self, sval): self.sval = sval
216 def __str__(self): return self.sval
217
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000218 # mixed arguments
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000219 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
220 self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
221 self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
222 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
223 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
224 self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
225 self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
Guido van Rossum98297ee2007-11-06 21:34:58 +0000226 self.checkraises(TypeError, ' ', 'join', ['1', '2', MyWrapper('foo')])
227 self.checkraises(TypeError, ' ', 'join', ['1', '2', '3', bytes()])
228 self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
229 self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
Marc-André Lemburge5034372000-08-08 08:04:29 +0000230
Walter Dörwald28256f22003-01-19 16:59:20 +0000231 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000232 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000233
Walter Dörwald28256f22003-01-19 16:59:20 +0000234 # method call forwarded from str implementation because of unicode argument
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000235 self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
236 self.assertRaises(TypeError, 'replace'.replace, "r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000237
Guido van Rossum98297ee2007-11-06 21:34:58 +0000238 def test_bytes_comparison(self):
239 warnings.simplefilter('ignore', BytesWarning)
240 self.assertEqual('abc' == b'abc', False)
241 self.assertEqual('abc' != b'abc', True)
Guido van Rossum254348e2007-11-21 19:29:53 +0000242 self.assertEqual('abc' == bytearray(b'abc'), False)
243 self.assertEqual('abc' != bytearray(b'abc'), True)
Brett Cannon40430012007-10-22 20:24:51 +0000244
Walter Dörwald28256f22003-01-19 16:59:20 +0000245 def test_comparison(self):
246 # Comparisons:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000247 self.assertEqual('abc', 'abc')
248 self.assertEqual('abc', 'abc')
249 self.assertEqual('abc', 'abc')
250 self.assert_('abcd' > 'abc')
251 self.assert_('abcd' > 'abc')
252 self.assert_('abcd' > 'abc')
253 self.assert_('abc' < 'abcd')
254 self.assert_('abc' < 'abcd')
255 self.assert_('abc' < 'abcd')
Walter Dörwald28256f22003-01-19 16:59:20 +0000256
257 if 0:
258 # Move these tests to a Unicode collation module test...
259 # Testing UTF-16 code point order comparisons...
260
261 # No surrogates, no fixup required.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000262 self.assert_('\u0061' < '\u20ac')
Walter Dörwald28256f22003-01-19 16:59:20 +0000263 # Non surrogate below surrogate value, no fixup required
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000264 self.assert_('\u0061' < '\ud800\udc02')
Walter Dörwald28256f22003-01-19 16:59:20 +0000265
266 # Non surrogate above surrogate value, fixup required
267 def test_lecmp(s, s2):
268 self.assert_(s < s2)
269
270 def test_fixup(s):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000271 s2 = '\ud800\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000272 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000273 s2 = '\ud900\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000274 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000275 s2 = '\uda00\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000276 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000277 s2 = '\udb00\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000278 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000279 s2 = '\ud800\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000280 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000281 s2 = '\ud900\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000282 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000283 s2 = '\uda00\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000284 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000285 s2 = '\udb00\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000286 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000287 s2 = '\ud800\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000288 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000289 s2 = '\ud900\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000290 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000291 s2 = '\uda00\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000292 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000293 s2 = '\udb00\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000294 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000295 s2 = '\ud800\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000296 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000297 s2 = '\ud900\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000298 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000299 s2 = '\uda00\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000300 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000301 s2 = '\udb00\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000302 test_lecmp(s, s2)
303
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000304 test_fixup('\ue000')
305 test_fixup('\uff61')
Walter Dörwald28256f22003-01-19 16:59:20 +0000306
307 # Surrogates on both sides, no fixup required
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000308 self.assert_('\ud800\udc02' < '\ud84d\udc56')
Walter Dörwald28256f22003-01-19 16:59:20 +0000309
Walter Dörwald28256f22003-01-19 16:59:20 +0000310 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000311 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000312 self.checkequalnofix(False, '\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000313
314 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000315 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
316 if not sys.platform.startswith('java'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000317 self.checkequalnofix(False, '\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000318
319 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000320 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000321 self.checkequalnofix(True, '\u1FFc', 'istitle')
322 self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000323
324 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000325 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000326 self.checkequalnofix(True, '\u2000', 'isspace')
327 self.checkequalnofix(True, '\u200a', 'isspace')
328 self.checkequalnofix(False, '\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000329
330 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000331 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000332 self.checkequalnofix(True, '\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000333
334 def test_isdecimal(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000335 self.checkequalnofix(False, '', 'isdecimal')
336 self.checkequalnofix(False, 'a', 'isdecimal')
337 self.checkequalnofix(True, '0', 'isdecimal')
338 self.checkequalnofix(False, '\u2460', 'isdecimal') # CIRCLED DIGIT ONE
339 self.checkequalnofix(False, '\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
340 self.checkequalnofix(True, '\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
341 self.checkequalnofix(True, '0123456789', 'isdecimal')
342 self.checkequalnofix(False, '0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000343
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000344 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000345
346 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000347 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000348 self.checkequalnofix(True, '\u2460', 'isdigit')
349 self.checkequalnofix(False, '\xbc', 'isdigit')
350 self.checkequalnofix(True, '\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000351
352 def test_isnumeric(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000353 self.checkequalnofix(False, '', 'isnumeric')
354 self.checkequalnofix(False, 'a', 'isnumeric')
355 self.checkequalnofix(True, '0', 'isnumeric')
356 self.checkequalnofix(True, '\u2460', 'isnumeric')
357 self.checkequalnofix(True, '\xbc', 'isnumeric')
358 self.checkequalnofix(True, '\u0660', 'isnumeric')
359 self.checkequalnofix(True, '0123456789', 'isnumeric')
360 self.checkequalnofix(False, '0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000361
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000362 self.assertRaises(TypeError, "abc".isnumeric, 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000363
Martin v. Löwis47383402007-08-15 07:32:56 +0000364 def test_isidentifier(self):
365 self.assertTrue("a".isidentifier())
366 self.assertTrue("Z".isidentifier())
367 self.assertTrue("_".isidentifier())
368 self.assertTrue("b0".isidentifier())
369 self.assertTrue("bc".isidentifier())
370 self.assertTrue("b_".isidentifier())
371 self.assertTrue("µ".isidentifier())
372
373 self.assertFalse(" ".isidentifier())
374 self.assertFalse("[".isidentifier())
375 self.assertFalse("©".isidentifier())
376
Georg Brandlf954c4b2008-06-04 11:41:32 +0000377 def test_isprintable(self):
378 self.assertTrue("abcdefg".isprintable())
379 self.assertFalse("abcdefg\n".isprintable())
380 self.assertTrue("\u0370".isprintable())
381 self.assertFalse("\ud800".isprintable())
382
Walter Dörwald28256f22003-01-19 16:59:20 +0000383 def test_contains(self):
384 # Testing Unicode contains method
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000385 self.assert_('a' in 'abdb')
386 self.assert_('a' in 'bdab')
387 self.assert_('a' in 'bdaba')
388 self.assert_('a' in 'bdba')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000389 self.assert_('a' not in 'bdb')
390 self.assert_('a' in 'bdba')
Walter Dörwald28256f22003-01-19 16:59:20 +0000391 self.assert_('a' in ('a',1,None))
392 self.assert_('a' in (1,None,'a'))
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000393 self.assert_('a' in ('a',1,None))
394 self.assert_('a' in (1,None,'a'))
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000395 self.assert_('a' not in ('x',1,'y'))
Walter Dörwald28256f22003-01-19 16:59:20 +0000396 self.assert_('a' not in ('x',1,None))
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000397 self.assert_('abcd' not in 'abcxxxx')
398 self.assert_('ab' in 'abcd')
399 self.assert_('ab' in 'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000400 self.assert_('ab' in (1,None,'ab'))
401 self.assert_('' in 'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000402 self.assert_('' in '')
403 self.assert_('' in 'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000404 self.assert_('\0' not in 'abc')
405 self.assert_('\0' in '\0abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000406 self.assert_('\0' in 'abc\0')
407 self.assert_('a' in '\0abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000408 self.assert_('asdf' in 'asdf')
409 self.assert_('asdf' not in 'asd')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000410 self.assert_('asdf' not in '')
Walter Dörwald28256f22003-01-19 16:59:20 +0000411
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000412 self.assertRaises(TypeError, "abc".__contains__)
Walter Dörwald28256f22003-01-19 16:59:20 +0000413
Eric Smith8c663262007-08-25 02:26:07 +0000414 def test_format(self):
415 self.assertEqual(''.format(), '')
416 self.assertEqual('a'.format(), 'a')
417 self.assertEqual('ab'.format(), 'ab')
418 self.assertEqual('a{{'.format(), 'a{')
419 self.assertEqual('a}}'.format(), 'a}')
420 self.assertEqual('{{b'.format(), '{b')
421 self.assertEqual('}}b'.format(), '}b')
422 self.assertEqual('a{{b'.format(), 'a{b')
423
424 # examples from the PEP:
425 import datetime
426 self.assertEqual("My name is {0}".format('Fred'), "My name is Fred")
427 self.assertEqual("My name is {0[name]}".format(dict(name='Fred')),
428 "My name is Fred")
429 self.assertEqual("My name is {0} :-{{}}".format('Fred'),
430 "My name is Fred :-{}")
431
432 d = datetime.date(2007, 8, 18)
433 self.assertEqual("The year is {0.year}".format(d),
434 "The year is 2007")
435
Eric Smith8c663262007-08-25 02:26:07 +0000436 # classes we'll use for testing
437 class C:
438 def __init__(self, x=100):
439 self._x = x
440 def __format__(self, spec):
441 return spec
442
443 class D:
444 def __init__(self, x):
445 self.x = x
446 def __format__(self, spec):
447 return str(self.x)
448
449 # class with __str__, but no __format__
450 class E:
451 def __init__(self, x):
452 self.x = x
453 def __str__(self):
454 return 'E(' + self.x + ')'
455
456 # class with __repr__, but no __format__ or __str__
457 class F:
458 def __init__(self, x):
459 self.x = x
460 def __repr__(self):
461 return 'F(' + self.x + ')'
462
463 # class with __format__ that forwards to string, for some format_spec's
464 class G:
465 def __init__(self, x):
466 self.x = x
467 def __str__(self):
468 return "string is " + self.x
469 def __format__(self, format_spec):
470 if format_spec == 'd':
471 return 'G(' + self.x + ')'
472 return object.__format__(self, format_spec)
473
474 # class that returns a bad type from __format__
475 class H:
476 def __format__(self, format_spec):
477 return 1.0
478
Eric Smith739e2ad2007-08-27 19:07:22 +0000479 class I(datetime.date):
480 def __format__(self, format_spec):
481 return self.strftime(format_spec)
482
Eric Smith185e30c2007-08-30 22:23:08 +0000483 class J(int):
484 def __format__(self, format_spec):
485 return int.__format__(self * 2, format_spec)
486
Eric Smith8c663262007-08-25 02:26:07 +0000487
488 self.assertEqual(''.format(), '')
489 self.assertEqual('abc'.format(), 'abc')
490 self.assertEqual('{0}'.format('abc'), 'abc')
491 self.assertEqual('{0:}'.format('abc'), 'abc')
492# self.assertEqual('{ 0 }'.format('abc'), 'abc')
493 self.assertEqual('X{0}'.format('abc'), 'Xabc')
494 self.assertEqual('{0}X'.format('abc'), 'abcX')
495 self.assertEqual('X{0}Y'.format('abc'), 'XabcY')
496 self.assertEqual('{1}'.format(1, 'abc'), 'abc')
497 self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc')
498 self.assertEqual('{1}X'.format(1, 'abc'), 'abcX')
499 self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY')
500 self.assertEqual('{0}'.format(-15), '-15')
501 self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc')
502 self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc')
503 self.assertEqual('{{'.format(), '{')
504 self.assertEqual('}}'.format(), '}')
505 self.assertEqual('{{}}'.format(), '{}')
506 self.assertEqual('{{x}}'.format(), '{x}')
507 self.assertEqual('{{{0}}}'.format(123), '{123}')
508 self.assertEqual('{{{{0}}}}'.format(), '{{0}}')
509 self.assertEqual('}}{{'.format(), '}{')
510 self.assertEqual('}}x{{'.format(), '}x{')
511
Eric Smith7ade6482007-08-26 22:27:13 +0000512 # weird field names
513 self.assertEqual("{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz')
514 self.assertEqual("{0[foo bar]}".format({'foo bar':'baz'}), 'baz')
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000515 self.assertEqual("{0[ ]}".format({' ':3}), '3')
Eric Smith7ade6482007-08-26 22:27:13 +0000516
Eric Smith8c663262007-08-25 02:26:07 +0000517 self.assertEqual('{foo._x}'.format(foo=C(20)), '20')
518 self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010')
519 self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc')
520 self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc')
521 self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def')
522 self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def')
523 self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
524
Eric Smith8c663262007-08-25 02:26:07 +0000525 # strings
526 self.assertEqual('{0:.3s}'.format('abc'), 'abc')
527 self.assertEqual('{0:.3s}'.format('ab'), 'ab')
528 self.assertEqual('{0:.3s}'.format('abcdef'), 'abc')
529 self.assertEqual('{0:.0s}'.format('abcdef'), '')
530 self.assertEqual('{0:3.3s}'.format('abc'), 'abc')
531 self.assertEqual('{0:2.3s}'.format('abc'), 'abc')
532 self.assertEqual('{0:2.2s}'.format('abc'), 'ab')
533 self.assertEqual('{0:3.2s}'.format('abc'), 'ab ')
534 self.assertEqual('{0:x<0s}'.format('result'), 'result')
535 self.assertEqual('{0:x<5s}'.format('result'), 'result')
536 self.assertEqual('{0:x<6s}'.format('result'), 'result')
537 self.assertEqual('{0:x<7s}'.format('result'), 'resultx')
538 self.assertEqual('{0:x<8s}'.format('result'), 'resultxx')
539 self.assertEqual('{0: <7s}'.format('result'), 'result ')
540 self.assertEqual('{0:<7s}'.format('result'), 'result ')
541 self.assertEqual('{0:>7s}'.format('result'), ' result')
542 self.assertEqual('{0:>8s}'.format('result'), ' result')
543 self.assertEqual('{0:^8s}'.format('result'), ' result ')
544 self.assertEqual('{0:^9s}'.format('result'), ' result ')
545 self.assertEqual('{0:^10s}'.format('result'), ' result ')
546 self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999)
547 self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
548 self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
549
550 # format specifiers for user defined type
551 self.assertEqual('{0:abc}'.format(C()), 'abc')
552
553 # !r and !s coersions
554 self.assertEqual('{0!s}'.format('Hello'), 'Hello')
555 self.assertEqual('{0!s:}'.format('Hello'), 'Hello')
556 self.assertEqual('{0!s:15}'.format('Hello'), 'Hello ')
557 self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello ')
558 self.assertEqual('{0!r}'.format('Hello'), "'Hello'")
559 self.assertEqual('{0!r:}'.format('Hello'), "'Hello'")
560 self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)')
561
Eric Smith8c663262007-08-25 02:26:07 +0000562 # test fallback to object.__format__
563 self.assertEqual('{0}'.format({}), '{}')
564 self.assertEqual('{0}'.format([]), '[]')
565 self.assertEqual('{0}'.format([1]), '[1]')
566 self.assertEqual('{0}'.format(E('data')), 'E(data)')
567 self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ')
568 self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ')
569 self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
570 self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
571 self.assertEqual('{0!s}'.format(G('data')), 'string is data')
572
Eric Smith739e2ad2007-08-27 19:07:22 +0000573 self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007,
574 month=8,
575 day=27)),
576 "date: 2007-08-27")
577
Eric Smith185e30c2007-08-30 22:23:08 +0000578 # test deriving from a builtin type and overriding __format__
579 self.assertEqual("{0}".format(J(10)), "20")
580
581
Eric Smith8c663262007-08-25 02:26:07 +0000582 # string format specifiers
583 self.assertEqual('{0:}'.format('a'), 'a')
584
585 # computed format specifiers
586 self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
587 self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
588 self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
589 self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ')
590 self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ')
591
592 # test various errors
593 self.assertRaises(ValueError, '{'.format)
594 self.assertRaises(ValueError, '}'.format)
595 self.assertRaises(ValueError, 'a{'.format)
596 self.assertRaises(ValueError, 'a}'.format)
597 self.assertRaises(ValueError, '{a'.format)
598 self.assertRaises(ValueError, '}a'.format)
Eric Smith11529192007-09-04 23:04:22 +0000599 self.assertRaises(IndexError, '{0}'.format)
600 self.assertRaises(IndexError, '{1}'.format, 'abc')
601 self.assertRaises(KeyError, '{x}'.format)
Eric Smith8c663262007-08-25 02:26:07 +0000602 self.assertRaises(ValueError, "}{".format)
603 self.assertRaises(ValueError, "{".format)
604 self.assertRaises(ValueError, "}".format)
605 self.assertRaises(ValueError, "abc{0:{}".format)
606 self.assertRaises(ValueError, "{0".format)
Eric Smith11529192007-09-04 23:04:22 +0000607 self.assertRaises(IndexError, "{0.}".format)
608 self.assertRaises(ValueError, "{0.}".format, 0)
609 self.assertRaises(IndexError, "{0[}".format)
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000610 self.assertRaises(ValueError, "{0[}".format, [])
Eric Smith11529192007-09-04 23:04:22 +0000611 self.assertRaises(KeyError, "{0]}".format)
612 self.assertRaises(ValueError, "{0.[]}".format, 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000613 self.assertRaises(ValueError, "{0..foo}".format, 0)
Eric Smith11529192007-09-04 23:04:22 +0000614 self.assertRaises(ValueError, "{0[0}".format, 0)
615 self.assertRaises(ValueError, "{0[0:foo}".format, 0)
616 self.assertRaises(KeyError, "{c]}".format)
617 self.assertRaises(ValueError, "{{ {{{0}}".format, 0)
618 self.assertRaises(ValueError, "{0}}".format, 0)
619 self.assertRaises(KeyError, "{foo}".format, bar=3)
Eric Smith8c663262007-08-25 02:26:07 +0000620 self.assertRaises(ValueError, "{0!x}".format, 3)
Eric Smith11529192007-09-04 23:04:22 +0000621 self.assertRaises(ValueError, "{0!}".format, 0)
622 self.assertRaises(ValueError, "{0!rs}".format, 0)
Eric Smith8c663262007-08-25 02:26:07 +0000623 self.assertRaises(ValueError, "{!}".format)
624 self.assertRaises(ValueError, "{:}".format)
Eric Smith7ade6482007-08-26 22:27:13 +0000625 self.assertRaises(ValueError, "{:s}".format)
Eric Smith8c663262007-08-25 02:26:07 +0000626 self.assertRaises(ValueError, "{}".format)
627
628 # can't have a replacement on the field name portion
629 self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
630
631 # exceed maximum recursion depth
632 self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
633 self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
634 0, 1, 2, 3, 4, 5, 6, 7)
635
636 # string format spec errors
637 self.assertRaises(ValueError, "{0:-s}".format, '')
638 self.assertRaises(ValueError, format, "", "-")
639 self.assertRaises(ValueError, "{0:=s}".format, '')
640
Walter Dörwald28256f22003-01-19 16:59:20 +0000641 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000642 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000643 # Testing Unicode formatting strings...
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000644 self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc')
645 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00')
646 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, -2, 3), 'abc, abc, 1, -2.000000, 3.00')
647 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.5), 'abc, abc, -1, -2.000000, 3.50')
648 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
649 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000650 if not sys.platform.startswith('java'):
Walter Dörwald67e83882007-05-05 12:26:27 +0000651 self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000652 self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
653 self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000654
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000655 self.assertEqual('%c' % 0x1234, '\u1234')
656 self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000657
658 # formatting jobs delegated from the string implementation:
Walter Dörwald28256f22003-01-19 16:59:20 +0000659 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000660 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
661 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
662 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
663 self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
664 self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
665 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,"abc"), '...1...2...3...abc...')
666 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,"abc"), '...%...%s...1...2...3...abc...')
667 self.assertEqual('...%s...' % "abc", '...abc...')
668 self.assertEqual('%*s' % (5,'abc',), ' abc')
669 self.assertEqual('%*s' % (-5,'abc',), 'abc ')
670 self.assertEqual('%*.*s' % (5,2,'abc',), ' ab')
671 self.assertEqual('%*.*s' % (5,3,'abc',), ' abc')
672 self.assertEqual('%i %*.*s' % (10, 5,3,'abc',), '10 abc')
673 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, 'abc',), '103 abc')
674 self.assertEqual('%c' % 'a', 'a')
Neil Schemenauercf52c072005-08-12 17:34:58 +0000675 class Wrapper:
676 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000677 return '\u1234'
678 self.assertEqual('%s' % Wrapper(), '\u1234')
Walter Dörwald28256f22003-01-19 16:59:20 +0000679
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000680 @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
Georg Brandlda6b1072006-01-20 17:48:54 +0000681 def test_format_float(self):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000682 # should not format with a comma, but always with C locale
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000683 self.assertEqual('1.0', '%.1f' % 1.0)
Georg Brandlda6b1072006-01-20 17:48:54 +0000684
Walter Dörwald28256f22003-01-19 16:59:20 +0000685 def test_constructor(self):
686 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
687
688 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000689 str('unicode remains unicode'),
690 'unicode remains unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000691 )
692
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000693 class UnicodeSubclass(str):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000694 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000695
Walter Dörwald28256f22003-01-19 16:59:20 +0000696 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000697 str(UnicodeSubclass('unicode subclass becomes unicode')),
698 'unicode subclass becomes unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000699 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000700
Walter Dörwald28256f22003-01-19 16:59:20 +0000701 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000702 str('strings are converted to unicode'),
703 'strings are converted to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000704 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000705
Walter Dörwald28256f22003-01-19 16:59:20 +0000706 class StringCompat:
707 def __init__(self, x):
708 self.x = x
709 def __str__(self):
710 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000711
Walter Dörwald28256f22003-01-19 16:59:20 +0000712 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000713 str(StringCompat('__str__ compatible objects are recognized')),
714 '__str__ compatible objects are recognized'
Walter Dörwald28256f22003-01-19 16:59:20 +0000715 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000716
Walter Dörwald28256f22003-01-19 16:59:20 +0000717 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000718
Walter Dörwald28256f22003-01-19 16:59:20 +0000719 o = StringCompat('unicode(obj) is compatible to str()')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000720 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Walter Dörwald28256f22003-01-19 16:59:20 +0000721 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000722
Guido van Rossume2a383d2007-01-15 16:59:06 +0000723 for obj in (123, 123.45, 123):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000724 self.assertEqual(str(obj), str(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000725
Walter Dörwald28256f22003-01-19 16:59:20 +0000726 # unicode(obj, encoding, error) tests (this maps to
727 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000728
Walter Dörwald28256f22003-01-19 16:59:20 +0000729 if not sys.platform.startswith('java'):
730 self.assertRaises(
731 TypeError,
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000732 str,
733 'decoding unicode is not supported',
Walter Dörwald28256f22003-01-19 16:59:20 +0000734 'utf-8',
735 'strict'
736 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000737
Walter Dörwald28256f22003-01-19 16:59:20 +0000738 self.assertEqual(
Walter Dörwald67e83882007-05-05 12:26:27 +0000739 str(b'strings are decoded to unicode', 'utf-8', 'strict'),
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000740 'strings are decoded to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000741 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000742
Walter Dörwald28256f22003-01-19 16:59:20 +0000743 if not sys.platform.startswith('java'):
744 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000745 str(
Guido van Rossumbae07c92007-10-08 02:46:15 +0000746 memoryview(b'character buffers are decoded to unicode'),
Walter Dörwald28256f22003-01-19 16:59:20 +0000747 'utf-8',
748 'strict'
749 ),
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000750 'character buffers are decoded to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000751 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000752
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000753 self.assertRaises(TypeError, str, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000754
Walter Dörwald28256f22003-01-19 16:59:20 +0000755 def test_codecs_utf7(self):
756 utfTests = [
Walter Dörwald67e83882007-05-05 12:26:27 +0000757 ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
758 ('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example
759 ('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example
760 ('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example
761 ('+', b'+-'),
762 ('+-', b'+--'),
763 ('+?', b'+-?'),
764 ('\?', b'+AFw?'),
765 ('+?', b'+-?'),
766 (r'\\?', b'+AFwAXA?'),
767 (r'\\\?', b'+AFwAXABc?'),
768 (r'++--', b'+-+---')
Walter Dörwald28256f22003-01-19 16:59:20 +0000769 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000770
Walter Dörwald28256f22003-01-19 16:59:20 +0000771 for (x, y) in utfTests:
772 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000773
Walter Dörwald28256f22003-01-19 16:59:20 +0000774 # surrogates not supported
Walter Dörwald67e83882007-05-05 12:26:27 +0000775 self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000776
Walter Dörwald67e83882007-05-05 12:26:27 +0000777 self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000778
Walter Dörwald28256f22003-01-19 16:59:20 +0000779 def test_codecs_utf8(self):
Walter Dörwald67e83882007-05-05 12:26:27 +0000780 self.assertEqual(''.encode('utf-8'), b'')
781 self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
782 self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
783 self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
784 self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80')
785 self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80')
Walter Dörwald28256f22003-01-19 16:59:20 +0000786 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000787 ('\ud800\udc02'*1000).encode('utf-8'),
Walter Dörwald67e83882007-05-05 12:26:27 +0000788 b'\xf0\x90\x80\x82'*1000
Walter Dörwald28256f22003-01-19 16:59:20 +0000789 )
790 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000791 '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
792 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
793 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
794 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
795 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
796 ' Nunstuck git und'.encode('utf-8'),
Walter Dörwald67e83882007-05-05 12:26:27 +0000797 b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
798 b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
799 b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
800 b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
801 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
802 b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
803 b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
804 b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
805 b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
806 b'\xe3\x80\x8cWenn ist das Nunstuck git und'
Walter Dörwald28256f22003-01-19 16:59:20 +0000807 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000808
Walter Dörwald28256f22003-01-19 16:59:20 +0000809 # UTF-8 specific decoding tests
Walter Dörwald67e83882007-05-05 12:26:27 +0000810 self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
811 self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
812 self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000813
Walter Dörwald28256f22003-01-19 16:59:20 +0000814 # Other possible utf-8 test cases:
815 # * strict decoding testing for all of the
816 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000817
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000818 def test_codecs_idna(self):
819 # Test whether trailing dot is preserved
Walter Dörwald1324c6f2007-05-11 19:57:05 +0000820 self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.")
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000821
Walter Dörwald28256f22003-01-19 16:59:20 +0000822 def test_codecs_errors(self):
823 # Error handling (encoding)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000824 self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
825 self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
Walter Dörwald67e83882007-05-05 12:26:27 +0000826 self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x")
827 self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000828
Walter Dörwald28256f22003-01-19 16:59:20 +0000829 # Error handling (decoding)
Walter Dörwald67e83882007-05-05 12:26:27 +0000830 self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii')
831 self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
832 self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
833 self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000834
Walter Dörwald28256f22003-01-19 16:59:20 +0000835 # Error handling (unknown character names)
Guido van Rossum39478e82007-08-27 17:23:59 +0000836 self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000837
Walter Dörwald28256f22003-01-19 16:59:20 +0000838 # Error handling (truncated escape sequence)
Guido van Rossum9c627722007-08-27 18:31:48 +0000839 self.assertRaises(UnicodeError, b"\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000840
Guido van Rossum9c627722007-08-27 18:31:48 +0000841 self.assertRaises(TypeError, b"hello".decode, "test.unicode1")
842 self.assertRaises(TypeError, str, b"hello", "test.unicode2")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000843 self.assertRaises(TypeError, "hello".encode, "test.unicode1")
844 self.assertRaises(TypeError, "hello".encode, "test.unicode2")
Walter Dörwald28256f22003-01-19 16:59:20 +0000845 # executes PyUnicode_Encode()
846 import imp
847 self.assertRaises(
848 ImportError,
849 imp.find_module,
850 "non-existing module",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000851 ["non-existing dir"]
Walter Dörwald28256f22003-01-19 16:59:20 +0000852 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000853
Walter Dörwald28256f22003-01-19 16:59:20 +0000854 # Error handling (wrong arguments)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000855 self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000856
Walter Dörwald28256f22003-01-19 16:59:20 +0000857 # Error handling (PyUnicode_EncodeDecimal())
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000858 self.assertRaises(UnicodeError, int, "\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000859
Walter Dörwald28256f22003-01-19 16:59:20 +0000860 def test_codecs(self):
861 # Encoding
Walter Dörwald67e83882007-05-05 12:26:27 +0000862 self.assertEqual('hello'.encode('ascii'), b'hello')
863 self.assertEqual('hello'.encode('utf-7'), b'hello')
864 self.assertEqual('hello'.encode('utf-8'), b'hello')
865 self.assertEqual('hello'.encode('utf8'), b'hello')
866 self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
867 self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
868 self.assertEqual('hello'.encode('latin-1'), b'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000869
Walter Dörwald28256f22003-01-19 16:59:20 +0000870 # Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +0000871 for c in range(1024):
Guido van Rossum84fc66d2007-05-03 17:18:26 +0000872 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000873 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
874 'utf-16-be', 'raw_unicode_escape',
875 'unicode_escape', 'unicode_internal'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000876 self.assertEqual(str(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000877
Walter Dörwald28256f22003-01-19 16:59:20 +0000878 # Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +0000879 for c in range(256):
Guido van Rossum84fc66d2007-05-03 17:18:26 +0000880 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000881 for encoding in ('latin-1',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000882 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000883
Walter Dörwald28256f22003-01-19 16:59:20 +0000884 # Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +0000885 for c in range(128):
Guido van Rossum84fc66d2007-05-03 17:18:26 +0000886 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000887 for encoding in ('ascii',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000888 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000889
Walter Dörwald28256f22003-01-19 16:59:20 +0000890 # Roundtrip safety for non-BMP (just a few chars)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000891 u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
Walter Dörwald28256f22003-01-19 16:59:20 +0000892 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
893 #'raw_unicode_escape',
894 'unicode_escape', 'unicode_internal'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000895 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000896
Walter Dörwald28256f22003-01-19 16:59:20 +0000897 # UTF-8 must be roundtrip safe for all UCS-2 code points
898 # This excludes surrogates: in the full range, there would be
899 # a surrogate pair (\udbff\udc00), which gets converted back
900 # to a non-BMP character (\U0010fc00)
Walter Dörwald1324c6f2007-05-11 19:57:05 +0000901 u = ''.join(map(chr, list(range(0,0xd800)) +
902 list(range(0xe000,0x10000))))
Walter Dörwald28256f22003-01-19 16:59:20 +0000903 for encoding in ('utf-8',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000904 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000905
Walter Dörwald28256f22003-01-19 16:59:20 +0000906 def test_codecs_charmap(self):
907 # 0-127
Guido van Rossum805365e2007-05-07 22:24:25 +0000908 s = bytes(range(128))
Walter Dörwald28256f22003-01-19 16:59:20 +0000909 for encoding in (
910 'cp037', 'cp1026',
911 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
912 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
913 'cp863', 'cp865', 'cp866',
914 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
915 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
916 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
917 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000918
Walter Dörwald28256f22003-01-19 16:59:20 +0000919 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
920 'cp1256', 'cp1257', 'cp1258',
921 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000922
Walter Dörwald28256f22003-01-19 16:59:20 +0000923 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
924 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000925
Walter Dörwald28256f22003-01-19 16:59:20 +0000926 ### These have undefined mappings:
927 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000928
Walter Dörwald28256f22003-01-19 16:59:20 +0000929 ### These fail the round-trip:
930 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000931
Walter Dörwald28256f22003-01-19 16:59:20 +0000932 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000933 self.assertEqual(str(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000934
Walter Dörwald28256f22003-01-19 16:59:20 +0000935 # 128-255
Guido van Rossum805365e2007-05-07 22:24:25 +0000936 s = bytes(range(128, 256))
Walter Dörwald28256f22003-01-19 16:59:20 +0000937 for encoding in (
938 'cp037', 'cp1026',
939 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
940 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
941 'cp863', 'cp865', 'cp866',
942 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
943 'iso8859_2', 'iso8859_4', 'iso8859_5',
944 'iso8859_9', 'koi8_r', 'latin_1',
945 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000946
Walter Dörwald28256f22003-01-19 16:59:20 +0000947 ### These have undefined mappings:
948 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
949 #'cp1256', 'cp1257', 'cp1258',
950 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
951 #'iso8859_3', 'iso8859_6', 'iso8859_7',
952 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000953
Walter Dörwald28256f22003-01-19 16:59:20 +0000954 ### These fail the round-trip:
955 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000956
Walter Dörwald28256f22003-01-19 16:59:20 +0000957 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000958 self.assertEqual(str(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000959
Walter Dörwald28256f22003-01-19 16:59:20 +0000960 def test_concatenation(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000961 self.assertEqual(("abc" "def"), "abcdef")
962 self.assertEqual(("abc" "def"), "abcdef")
963 self.assertEqual(("abc" "def"), "abcdef")
964 self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
965 self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000966
Walter Dörwald28256f22003-01-19 16:59:20 +0000967 def test_printing(self):
968 class BitBucket:
969 def write(self, text):
970 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000971
Walter Dörwald28256f22003-01-19 16:59:20 +0000972 out = BitBucket()
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000973 print('abc', file=out)
974 print('abc', 'def', file=out)
975 print('abc', 'def', file=out)
976 print('abc', 'def', file=out)
977 print('abc\n', file=out)
978 print('abc\n', end=' ', file=out)
979 print('abc\n', end=' ', file=out)
980 print('def\n', file=out)
981 print('def\n', file=out)
Fred Drake004d5e62000-10-23 17:22:08 +0000982
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000983 def test_ucs4(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000984 x = '\U00100000'
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000985 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
986 self.assertEqual(x, y)
987
Christian Heimesfe337bf2008-03-23 21:54:12 +0000988 # FIXME
989 #y = r'\U00100000'
990 #x = y.encode("raw-unicode-escape").decode("raw-unicode-escape")
991 #self.assertEqual(x, y)
992 #y = r'\U00010000'
993 #x = y.encode("raw-unicode-escape").decode("raw-unicode-escape")
994 #self.assertEqual(x, y)
995
996 #try:
997 # '\U11111111'.decode("raw-unicode-escape")
998 #except UnicodeDecodeError as e:
999 # self.assertEqual(e.start, 0)
1000 # self.assertEqual(e.end, 10)
1001 #else:
1002 # self.fail("Should have raised UnicodeDecodeError")
1003
Brett Cannonc3647ac2005-04-26 03:45:26 +00001004 def test_conversion(self):
1005 # Make sure __unicode__() works properly
1006 class Foo0:
1007 def __str__(self):
1008 return "foo"
1009
1010 class Foo1:
Guido van Rossum98297ee2007-11-06 21:34:58 +00001011 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001012 return "foo"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001013
1014 class Foo2(object):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001015 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001016 return "foo"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001017
1018 class Foo3(object):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001019 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001020 return "foo"
1021
1022 class Foo4(str):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001023 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001024 return "foo"
1025
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001026 class Foo5(str):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001027 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001028 return "foo"
1029
1030 class Foo6(str):
1031 def __str__(self):
1032 return "foos"
1033
Guido van Rossum98297ee2007-11-06 21:34:58 +00001034 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001035 return "foou"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001036
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001037 class Foo7(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001038 def __str__(self):
1039 return "foos"
Guido van Rossum98297ee2007-11-06 21:34:58 +00001040 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001041 return "foou"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001042
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001043 class Foo8(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001044 def __new__(cls, content=""):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001045 return str.__new__(cls, 2*content)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001046 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001047 return self
1048
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001049 class Foo9(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001050 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001051 return "not unicode"
1052
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001053 self.assertEqual(str(Foo0()), "foo")
1054 self.assertEqual(str(Foo1()), "foo")
1055 self.assertEqual(str(Foo2()), "foo")
1056 self.assertEqual(str(Foo3()), "foo")
1057 self.assertEqual(str(Foo4("bar")), "foo")
1058 self.assertEqual(str(Foo5("bar")), "foo")
1059 self.assertEqual(str(Foo6("bar")), "foou")
1060 self.assertEqual(str(Foo7("bar")), "foou")
1061 self.assertEqual(str(Foo8("foo")), "foofoo")
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001062 self.assertEqual(str(Foo9("foo")), "not unicode")
Brett Cannonc3647ac2005-04-26 03:45:26 +00001063
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001064 def test_unicode_repr(self):
1065 class s1:
1066 def __repr__(self):
1067 return '\\n'
1068
1069 class s2:
1070 def __repr__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001071 return '\\n'
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001072
1073 self.assertEqual(repr(s1()), '\\n')
1074 self.assertEqual(repr(s2()), '\\n')
1075
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001076 def test_expandtabs_overflows_gracefully(self):
1077 # This test only affects 32-bit platforms because expandtabs can only take
1078 # an int as the max value, not a 64-bit C long. If expandtabs is changed
1079 # to take a 64-bit long, this test should apply to all platforms.
Christian Heimesa37d4c62007-12-04 23:02:19 +00001080 if sys.maxsize > (1 << 32) or struct.calcsize('P') != 4:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001081 return
Christian Heimesa37d4c62007-12-04 23:02:19 +00001082 self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001083
1084
Walter Dörwald28256f22003-01-19 16:59:20 +00001085def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001086 support.run_unittest(__name__)
Barry Warsaw817918c2002-08-06 16:58:21 +00001087
Walter Dörwald28256f22003-01-19 16:59:20 +00001088if __name__ == "__main__":
1089 test_main()