blob: eed84929cfdeabd27dca13e248a0194ca4a6f618 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Guido van Rossum98297ee2007-11-06 21:34:58 +00009import codecs
10import struct
11import sys
12import unittest
13import warnings
Walter Dörwald0fd583c2003-02-21 12:53:50 +000014from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000015
Neal Norwitz430f68b2005-11-24 22:00:56 +000016# Error handling (bad decoder return)
17def search_function(encoding):
18 def decode1(input, errors="strict"):
19 return 42 # not a tuple
20 def encode1(input, errors="strict"):
21 return 42 # not a tuple
22 def encode2(input, errors="strict"):
23 return (42, 42) # no unicode
24 def decode2(input, errors="strict"):
25 return (42, 42) # no unicode
26 if encoding=="test.unicode1":
27 return (encode1, decode1, None, None)
28 elif encoding=="test.unicode2":
29 return (encode2, decode2, None, None)
30 else:
31 return None
32codecs.register(search_function)
33
Walter Dörwald0fd583c2003-02-21 12:53:50 +000034class UnicodeTest(
35 string_tests.CommonTest,
Walter Dörwald57d88e52004-08-26 16:53:04 +000036 string_tests.MixinStrUnicodeUserStringTest,
37 string_tests.MixinStrUnicodeTest,
Walter Dörwald0fd583c2003-02-21 12:53:50 +000038 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000039 type2test = str
Walter Dörwald0fd583c2003-02-21 12:53:50 +000040
Guido van Rossum98297ee2007-11-06 21:34:58 +000041 def setUp(self):
42 self.warning_filters = warnings.filters[:]
43
44 def tearDown(self):
45 warnings.filters = self.warning_filters
46
Walter Dörwald0fd583c2003-02-21 12:53:50 +000047 def checkequalnofix(self, result, object, methodname, *args):
48 method = getattr(object, methodname)
49 realresult = method(*args)
50 self.assertEqual(realresult, result)
51 self.assert_(type(realresult) is type(result))
52
53 # if the original is returned make sure that
54 # this doesn't happen with subclasses
55 if realresult is object:
Guido van Rossumef87d6e2007-05-02 19:09:54 +000056 class usub(str):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000057 def __repr__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000058 return 'usub(%r)' % str.__repr__(self)
Walter Dörwald0fd583c2003-02-21 12:53:50 +000059 object = usub(object)
60 method = getattr(object, methodname)
61 realresult = method(*args)
62 self.assertEqual(realresult, result)
63 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000064
Jeremy Hylton504de6b2003-10-06 05:08:26 +000065 def test_literals(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +000066 self.assertEqual('\xff', '\u00ff')
67 self.assertEqual('\uffff', '\U0000ffff')
Guido van Rossum36e0a922007-07-20 04:05:57 +000068 self.assertRaises(SyntaxError, eval, '\'\\Ufffffffe\'')
69 self.assertRaises(SyntaxError, eval, '\'\\Uffffffff\'')
70 self.assertRaises(SyntaxError, eval, '\'\\U%08x\'' % 0x110000)
Jeremy Hylton504de6b2003-10-06 05:08:26 +000071
Walter Dörwald28256f22003-01-19 16:59:20 +000072 def test_repr(self):
73 if not sys.platform.startswith('java'):
74 # Test basic sanity of repr()
Walter Dörwald67e83882007-05-05 12:26:27 +000075 self.assertEqual(repr('abc'), "'abc'")
76 self.assertEqual(repr('ab\\c'), "'ab\\\\c'")
77 self.assertEqual(repr('ab\\'), "'ab\\\\'")
78 self.assertEqual(repr('\\c'), "'\\\\c'")
79 self.assertEqual(repr('\\'), "'\\\\'")
80 self.assertEqual(repr('\n'), "'\\n'")
81 self.assertEqual(repr('\r'), "'\\r'")
82 self.assertEqual(repr('\t'), "'\\t'")
83 self.assertEqual(repr('\b'), "'\\x08'")
84 self.assertEqual(repr("'\""), """'\\'"'""")
85 self.assertEqual(repr("'\""), """'\\'"'""")
86 self.assertEqual(repr("'"), '''"'"''')
87 self.assertEqual(repr('"'), """'"'""")
Walter Dörwald28256f22003-01-19 16:59:20 +000088 latin1repr = (
Walter Dörwald67e83882007-05-05 12:26:27 +000089 "'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
Walter Dörwald28256f22003-01-19 16:59:20 +000090 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
91 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
92 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
93 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
94 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
95 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
96 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
97 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
98 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
99 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
100 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
101 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
102 "\\xfe\\xff'")
Guido van Rossum805365e2007-05-07 22:24:25 +0000103 testrepr = repr(''.join(map(chr, range(256))))
Walter Dörwald28256f22003-01-19 16:59:20 +0000104 self.assertEqual(testrepr, latin1repr)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000105 # Test repr works on wide unicode escapes without overflow.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000106 self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096),
107 repr("\U00010000" * 39 + "\uffff" * 4096))
Walter Dörwald28256f22003-01-19 16:59:20 +0000108
Guido van Rossum49d6b072006-08-17 21:11:47 +0000109 def test_iterators(self):
110 # Make sure unicode objects have an __iter__ method
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000111 it = "\u1111\u2222\u3333".__iter__()
112 self.assertEqual(next(it), "\u1111")
113 self.assertEqual(next(it), "\u2222")
114 self.assertEqual(next(it), "\u3333")
Georg Brandla18af4e2007-04-21 15:47:16 +0000115 self.assertRaises(StopIteration, next, it)
Guido van Rossum49d6b072006-08-17 21:11:47 +0000116
Walter Dörwald28256f22003-01-19 16:59:20 +0000117 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000118 string_tests.CommonTest.test_count(self)
119 # check mixed argument types
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000120 self.checkequalnofix(3, 'aaa', 'count', 'a')
121 self.checkequalnofix(0, 'aaa', 'count', 'b')
122 self.checkequalnofix(3, 'aaa', 'count', 'a')
123 self.checkequalnofix(0, 'aaa', 'count', 'b')
124 self.checkequalnofix(0, 'aaa', 'count', 'b')
125 self.checkequalnofix(1, 'aaa', 'count', 'a', -1)
126 self.checkequalnofix(3, 'aaa', 'count', 'a', -10)
127 self.checkequalnofix(2, 'aaa', 'count', 'a', 0, -1)
128 self.checkequalnofix(0, 'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000129
Walter Dörwald28256f22003-01-19 16:59:20 +0000130 def test_find(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000131 self.checkequalnofix(0, 'abcdefghiabc', 'find', 'abc')
132 self.checkequalnofix(9, 'abcdefghiabc', 'find', 'abc', 1)
133 self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000134
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000135 self.assertRaises(TypeError, 'hello'.find)
136 self.assertRaises(TypeError, 'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000137
Walter Dörwald28256f22003-01-19 16:59:20 +0000138 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000139 string_tests.CommonTest.test_rfind(self)
140 # check mixed argument types
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000141 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc')
142 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
143 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000144
Walter Dörwald28256f22003-01-19 16:59:20 +0000145 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000146 string_tests.CommonTest.test_index(self)
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000147 self.checkequalnofix(0, 'abcdefghiabc', 'index', '')
148 self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def')
149 self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc')
150 self.checkequalnofix(9, 'abcdefghiabc', 'index', 'abc', 1)
151 self.assertRaises(ValueError, 'abcdefghiabc'.index, 'hib')
152 self.assertRaises(ValueError, 'abcdefghiab'.index, 'abc', 1)
153 self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', 8)
154 self.assertRaises(ValueError, 'abcdefghi'.index, 'ghi', -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000155
Walter Dörwald28256f22003-01-19 16:59:20 +0000156 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000157 string_tests.CommonTest.test_rindex(self)
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000158 self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '')
159 self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def')
160 self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc')
161 self.checkequalnofix(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000162
Walter Dörwaldaa97f042007-05-03 21:05:51 +0000163 self.assertRaises(ValueError, 'abcdefghiabc'.rindex, 'hib')
164 self.assertRaises(ValueError, 'defghiabc'.rindex, 'def', 1)
165 self.assertRaises(ValueError, 'defghiabc'.rindex, 'abc', 0, -1)
166 self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
167 self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000168
Georg Brandlceee0772007-11-27 23:48:05 +0000169 def test_maketrans_translate(self):
170 # these work with plain translate()
171 self.checkequalnofix('bbbc', 'abababc', 'translate',
172 {ord('a'): None})
173 self.checkequalnofix('iiic', 'abababc', 'translate',
174 {ord('a'): None, ord('b'): ord('i')})
175 self.checkequalnofix('iiix', 'abababc', 'translate',
176 {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
177 self.checkequalnofix('c', 'abababc', 'translate',
178 {ord('a'): None, ord('b'): ''})
179 self.checkequalnofix('xyyx', 'xzx', 'translate',
180 {ord('z'): 'yy'})
181 # this needs maketrans()
182 self.checkequalnofix('abababc', 'abababc', 'translate',
183 {'b': '<i>'})
184 tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
185 self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
186 # test alternative way of calling maketrans()
187 tbl = self.type2test.maketrans('abc', 'xyz', 'd')
188 self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
189
190 self.assertRaises(TypeError, self.type2test.maketrans)
191 self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
192 self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
193 self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
194 self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
195 self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
196 self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000197
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000198 self.assertRaises(TypeError, 'hello'.translate)
Walter Dörwald67e83882007-05-05 12:26:27 +0000199 self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000200
Walter Dörwald28256f22003-01-19 16:59:20 +0000201 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000202 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000203
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000204 # Mixed arguments
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000205 self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
206 self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
207 self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000208
Walter Dörwald28256f22003-01-19 16:59:20 +0000209 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000210 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000211
Guido van Rossumf1044292007-09-27 18:01:22 +0000212 class MyWrapper:
213 def __init__(self, sval): self.sval = sval
214 def __str__(self): return self.sval
215
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000216 # mixed arguments
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000217 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
218 self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
219 self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
220 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
221 self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
222 self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
223 self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
Guido van Rossum98297ee2007-11-06 21:34:58 +0000224 self.checkraises(TypeError, ' ', 'join', ['1', '2', MyWrapper('foo')])
225 self.checkraises(TypeError, ' ', 'join', ['1', '2', '3', bytes()])
226 self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
227 self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
Marc-André Lemburge5034372000-08-08 08:04:29 +0000228
Walter Dörwald28256f22003-01-19 16:59:20 +0000229 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000230 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000231
Walter Dörwald28256f22003-01-19 16:59:20 +0000232 # method call forwarded from str implementation because of unicode argument
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000233 self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
234 self.assertRaises(TypeError, 'replace'.replace, "r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000235
Guido van Rossum98297ee2007-11-06 21:34:58 +0000236 def test_bytes_comparison(self):
237 warnings.simplefilter('ignore', BytesWarning)
238 self.assertEqual('abc' == b'abc', False)
239 self.assertEqual('abc' != b'abc', True)
Guido van Rossum254348e2007-11-21 19:29:53 +0000240 self.assertEqual('abc' == bytearray(b'abc'), False)
241 self.assertEqual('abc' != bytearray(b'abc'), True)
Brett Cannon40430012007-10-22 20:24:51 +0000242
Walter Dörwald28256f22003-01-19 16:59:20 +0000243 def test_comparison(self):
244 # Comparisons:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000245 self.assertEqual('abc', 'abc')
246 self.assertEqual('abc', 'abc')
247 self.assertEqual('abc', 'abc')
248 self.assert_('abcd' > 'abc')
249 self.assert_('abcd' > 'abc')
250 self.assert_('abcd' > 'abc')
251 self.assert_('abc' < 'abcd')
252 self.assert_('abc' < 'abcd')
253 self.assert_('abc' < 'abcd')
Walter Dörwald28256f22003-01-19 16:59:20 +0000254
255 if 0:
256 # Move these tests to a Unicode collation module test...
257 # Testing UTF-16 code point order comparisons...
258
259 # No surrogates, no fixup required.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000260 self.assert_('\u0061' < '\u20ac')
Walter Dörwald28256f22003-01-19 16:59:20 +0000261 # Non surrogate below surrogate value, no fixup required
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000262 self.assert_('\u0061' < '\ud800\udc02')
Walter Dörwald28256f22003-01-19 16:59:20 +0000263
264 # Non surrogate above surrogate value, fixup required
265 def test_lecmp(s, s2):
266 self.assert_(s < s2)
267
268 def test_fixup(s):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000269 s2 = '\ud800\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000270 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000271 s2 = '\ud900\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000272 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000273 s2 = '\uda00\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000274 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000275 s2 = '\udb00\udc01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000276 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000277 s2 = '\ud800\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000278 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000279 s2 = '\ud900\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000280 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000281 s2 = '\uda00\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000282 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000283 s2 = '\udb00\udd01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000284 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000285 s2 = '\ud800\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000286 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000287 s2 = '\ud900\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000288 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000289 s2 = '\uda00\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000290 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000291 s2 = '\udb00\ude01'
Walter Dörwald28256f22003-01-19 16:59:20 +0000292 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000293 s2 = '\ud800\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000294 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000295 s2 = '\ud900\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000296 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000297 s2 = '\uda00\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000298 test_lecmp(s, s2)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000299 s2 = '\udb00\udfff'
Walter Dörwald28256f22003-01-19 16:59:20 +0000300 test_lecmp(s, s2)
301
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000302 test_fixup('\ue000')
303 test_fixup('\uff61')
Walter Dörwald28256f22003-01-19 16:59:20 +0000304
305 # Surrogates on both sides, no fixup required
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000306 self.assert_('\ud800\udc02' < '\ud84d\udc56')
Walter Dörwald28256f22003-01-19 16:59:20 +0000307
Walter Dörwald28256f22003-01-19 16:59:20 +0000308 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000309 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000310 self.checkequalnofix(False, '\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000311
312 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000313 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
314 if not sys.platform.startswith('java'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000315 self.checkequalnofix(False, '\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000316
317 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000318 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000319 self.checkequalnofix(True, '\u1FFc', 'istitle')
320 self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000321
322 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000323 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000324 self.checkequalnofix(True, '\u2000', 'isspace')
325 self.checkequalnofix(True, '\u200a', 'isspace')
326 self.checkequalnofix(False, '\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000327
328 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000329 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000330 self.checkequalnofix(True, '\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000331
332 def test_isdecimal(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000333 self.checkequalnofix(False, '', 'isdecimal')
334 self.checkequalnofix(False, 'a', 'isdecimal')
335 self.checkequalnofix(True, '0', 'isdecimal')
336 self.checkequalnofix(False, '\u2460', 'isdecimal') # CIRCLED DIGIT ONE
337 self.checkequalnofix(False, '\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
338 self.checkequalnofix(True, '\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
339 self.checkequalnofix(True, '0123456789', 'isdecimal')
340 self.checkequalnofix(False, '0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000341
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000342 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000343
344 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000345 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000346 self.checkequalnofix(True, '\u2460', 'isdigit')
347 self.checkequalnofix(False, '\xbc', 'isdigit')
348 self.checkequalnofix(True, '\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000349
350 def test_isnumeric(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000351 self.checkequalnofix(False, '', 'isnumeric')
352 self.checkequalnofix(False, 'a', 'isnumeric')
353 self.checkequalnofix(True, '0', 'isnumeric')
354 self.checkequalnofix(True, '\u2460', 'isnumeric')
355 self.checkequalnofix(True, '\xbc', 'isnumeric')
356 self.checkequalnofix(True, '\u0660', 'isnumeric')
357 self.checkequalnofix(True, '0123456789', 'isnumeric')
358 self.checkequalnofix(False, '0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000359
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000360 self.assertRaises(TypeError, "abc".isnumeric, 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000361
Martin v. Löwis47383402007-08-15 07:32:56 +0000362 def test_isidentifier(self):
363 self.assertTrue("a".isidentifier())
364 self.assertTrue("Z".isidentifier())
365 self.assertTrue("_".isidentifier())
366 self.assertTrue("b0".isidentifier())
367 self.assertTrue("bc".isidentifier())
368 self.assertTrue("b_".isidentifier())
369 self.assertTrue("µ".isidentifier())
370
371 self.assertFalse(" ".isidentifier())
372 self.assertFalse("[".isidentifier())
373 self.assertFalse("©".isidentifier())
374
Walter Dörwald28256f22003-01-19 16:59:20 +0000375 def test_contains(self):
376 # Testing Unicode contains method
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000377 self.assert_('a' in 'abdb')
378 self.assert_('a' in 'bdab')
379 self.assert_('a' in 'bdaba')
380 self.assert_('a' in 'bdba')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000381 self.assert_('a' not in 'bdb')
382 self.assert_('a' in 'bdba')
Walter Dörwald28256f22003-01-19 16:59:20 +0000383 self.assert_('a' in ('a',1,None))
384 self.assert_('a' in (1,None,'a'))
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000385 self.assert_('a' in ('a',1,None))
386 self.assert_('a' in (1,None,'a'))
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000387 self.assert_('a' not in ('x',1,'y'))
Walter Dörwald28256f22003-01-19 16:59:20 +0000388 self.assert_('a' not in ('x',1,None))
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000389 self.assert_('abcd' not in 'abcxxxx')
390 self.assert_('ab' in 'abcd')
391 self.assert_('ab' in 'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000392 self.assert_('ab' in (1,None,'ab'))
393 self.assert_('' in 'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000394 self.assert_('' in '')
395 self.assert_('' in 'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000396 self.assert_('\0' not in 'abc')
397 self.assert_('\0' in '\0abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000398 self.assert_('\0' in 'abc\0')
399 self.assert_('a' in '\0abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000400 self.assert_('asdf' in 'asdf')
401 self.assert_('asdf' not in 'asd')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000402 self.assert_('asdf' not in '')
Walter Dörwald28256f22003-01-19 16:59:20 +0000403
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000404 self.assertRaises(TypeError, "abc".__contains__)
Walter Dörwald28256f22003-01-19 16:59:20 +0000405
Eric Smith8c663262007-08-25 02:26:07 +0000406 def test_format(self):
407 self.assertEqual(''.format(), '')
408 self.assertEqual('a'.format(), 'a')
409 self.assertEqual('ab'.format(), 'ab')
410 self.assertEqual('a{{'.format(), 'a{')
411 self.assertEqual('a}}'.format(), 'a}')
412 self.assertEqual('{{b'.format(), '{b')
413 self.assertEqual('}}b'.format(), '}b')
414 self.assertEqual('a{{b'.format(), 'a{b')
415
416 # examples from the PEP:
417 import datetime
418 self.assertEqual("My name is {0}".format('Fred'), "My name is Fred")
419 self.assertEqual("My name is {0[name]}".format(dict(name='Fred')),
420 "My name is Fred")
421 self.assertEqual("My name is {0} :-{{}}".format('Fred'),
422 "My name is Fred :-{}")
423
424 d = datetime.date(2007, 8, 18)
425 self.assertEqual("The year is {0.year}".format(d),
426 "The year is 2007")
427
Eric Smith8c663262007-08-25 02:26:07 +0000428 # classes we'll use for testing
429 class C:
430 def __init__(self, x=100):
431 self._x = x
432 def __format__(self, spec):
433 return spec
434
435 class D:
436 def __init__(self, x):
437 self.x = x
438 def __format__(self, spec):
439 return str(self.x)
440
441 # class with __str__, but no __format__
442 class E:
443 def __init__(self, x):
444 self.x = x
445 def __str__(self):
446 return 'E(' + self.x + ')'
447
448 # class with __repr__, but no __format__ or __str__
449 class F:
450 def __init__(self, x):
451 self.x = x
452 def __repr__(self):
453 return 'F(' + self.x + ')'
454
455 # class with __format__ that forwards to string, for some format_spec's
456 class G:
457 def __init__(self, x):
458 self.x = x
459 def __str__(self):
460 return "string is " + self.x
461 def __format__(self, format_spec):
462 if format_spec == 'd':
463 return 'G(' + self.x + ')'
464 return object.__format__(self, format_spec)
465
466 # class that returns a bad type from __format__
467 class H:
468 def __format__(self, format_spec):
469 return 1.0
470
Eric Smith739e2ad2007-08-27 19:07:22 +0000471 class I(datetime.date):
472 def __format__(self, format_spec):
473 return self.strftime(format_spec)
474
Eric Smith185e30c2007-08-30 22:23:08 +0000475 class J(int):
476 def __format__(self, format_spec):
477 return int.__format__(self * 2, format_spec)
478
Eric Smith8c663262007-08-25 02:26:07 +0000479
480 self.assertEqual(''.format(), '')
481 self.assertEqual('abc'.format(), 'abc')
482 self.assertEqual('{0}'.format('abc'), 'abc')
483 self.assertEqual('{0:}'.format('abc'), 'abc')
484# self.assertEqual('{ 0 }'.format('abc'), 'abc')
485 self.assertEqual('X{0}'.format('abc'), 'Xabc')
486 self.assertEqual('{0}X'.format('abc'), 'abcX')
487 self.assertEqual('X{0}Y'.format('abc'), 'XabcY')
488 self.assertEqual('{1}'.format(1, 'abc'), 'abc')
489 self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc')
490 self.assertEqual('{1}X'.format(1, 'abc'), 'abcX')
491 self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY')
492 self.assertEqual('{0}'.format(-15), '-15')
493 self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc')
494 self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc')
495 self.assertEqual('{{'.format(), '{')
496 self.assertEqual('}}'.format(), '}')
497 self.assertEqual('{{}}'.format(), '{}')
498 self.assertEqual('{{x}}'.format(), '{x}')
499 self.assertEqual('{{{0}}}'.format(123), '{123}')
500 self.assertEqual('{{{{0}}}}'.format(), '{{0}}')
501 self.assertEqual('}}{{'.format(), '}{')
502 self.assertEqual('}}x{{'.format(), '}x{')
503
Eric Smith7ade6482007-08-26 22:27:13 +0000504 # weird field names
505 self.assertEqual("{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz')
506 self.assertEqual("{0[foo bar]}".format({'foo bar':'baz'}), 'baz')
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000507 self.assertEqual("{0[ ]}".format({' ':3}), '3')
Eric Smith7ade6482007-08-26 22:27:13 +0000508
Eric Smith8c663262007-08-25 02:26:07 +0000509 self.assertEqual('{foo._x}'.format(foo=C(20)), '20')
510 self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010')
511 self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc')
512 self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc')
513 self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def')
514 self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def')
515 self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
516
Eric Smith8c663262007-08-25 02:26:07 +0000517 # strings
518 self.assertEqual('{0:.3s}'.format('abc'), 'abc')
519 self.assertEqual('{0:.3s}'.format('ab'), 'ab')
520 self.assertEqual('{0:.3s}'.format('abcdef'), 'abc')
521 self.assertEqual('{0:.0s}'.format('abcdef'), '')
522 self.assertEqual('{0:3.3s}'.format('abc'), 'abc')
523 self.assertEqual('{0:2.3s}'.format('abc'), 'abc')
524 self.assertEqual('{0:2.2s}'.format('abc'), 'ab')
525 self.assertEqual('{0:3.2s}'.format('abc'), 'ab ')
526 self.assertEqual('{0:x<0s}'.format('result'), 'result')
527 self.assertEqual('{0:x<5s}'.format('result'), 'result')
528 self.assertEqual('{0:x<6s}'.format('result'), 'result')
529 self.assertEqual('{0:x<7s}'.format('result'), 'resultx')
530 self.assertEqual('{0:x<8s}'.format('result'), 'resultxx')
531 self.assertEqual('{0: <7s}'.format('result'), 'result ')
532 self.assertEqual('{0:<7s}'.format('result'), 'result ')
533 self.assertEqual('{0:>7s}'.format('result'), ' result')
534 self.assertEqual('{0:>8s}'.format('result'), ' result')
535 self.assertEqual('{0:^8s}'.format('result'), ' result ')
536 self.assertEqual('{0:^9s}'.format('result'), ' result ')
537 self.assertEqual('{0:^10s}'.format('result'), ' result ')
538 self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999)
539 self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
540 self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
541
542 # format specifiers for user defined type
543 self.assertEqual('{0:abc}'.format(C()), 'abc')
544
545 # !r and !s coersions
546 self.assertEqual('{0!s}'.format('Hello'), 'Hello')
547 self.assertEqual('{0!s:}'.format('Hello'), 'Hello')
548 self.assertEqual('{0!s:15}'.format('Hello'), 'Hello ')
549 self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello ')
550 self.assertEqual('{0!r}'.format('Hello'), "'Hello'")
551 self.assertEqual('{0!r:}'.format('Hello'), "'Hello'")
552 self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)')
553
Eric Smith8c663262007-08-25 02:26:07 +0000554 # test fallback to object.__format__
555 self.assertEqual('{0}'.format({}), '{}')
556 self.assertEqual('{0}'.format([]), '[]')
557 self.assertEqual('{0}'.format([1]), '[1]')
558 self.assertEqual('{0}'.format(E('data')), 'E(data)')
559 self.assertEqual('{0:^10}'.format(E('data')), ' E(data) ')
560 self.assertEqual('{0:^10s}'.format(E('data')), ' E(data) ')
561 self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
562 self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
563 self.assertEqual('{0!s}'.format(G('data')), 'string is data')
564
Eric Smith739e2ad2007-08-27 19:07:22 +0000565 self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007,
566 month=8,
567 day=27)),
568 "date: 2007-08-27")
569
Eric Smith185e30c2007-08-30 22:23:08 +0000570 # test deriving from a builtin type and overriding __format__
571 self.assertEqual("{0}".format(J(10)), "20")
572
573
Eric Smith8c663262007-08-25 02:26:07 +0000574 # string format specifiers
575 self.assertEqual('{0:}'.format('a'), 'a')
576
577 # computed format specifiers
578 self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
579 self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
580 self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
581 self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ')
582 self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ')
583
584 # test various errors
585 self.assertRaises(ValueError, '{'.format)
586 self.assertRaises(ValueError, '}'.format)
587 self.assertRaises(ValueError, 'a{'.format)
588 self.assertRaises(ValueError, 'a}'.format)
589 self.assertRaises(ValueError, '{a'.format)
590 self.assertRaises(ValueError, '}a'.format)
Eric Smith11529192007-09-04 23:04:22 +0000591 self.assertRaises(IndexError, '{0}'.format)
592 self.assertRaises(IndexError, '{1}'.format, 'abc')
593 self.assertRaises(KeyError, '{x}'.format)
Eric Smith8c663262007-08-25 02:26:07 +0000594 self.assertRaises(ValueError, "}{".format)
595 self.assertRaises(ValueError, "{".format)
596 self.assertRaises(ValueError, "}".format)
597 self.assertRaises(ValueError, "abc{0:{}".format)
598 self.assertRaises(ValueError, "{0".format)
Eric Smith11529192007-09-04 23:04:22 +0000599 self.assertRaises(IndexError, "{0.}".format)
600 self.assertRaises(ValueError, "{0.}".format, 0)
601 self.assertRaises(IndexError, "{0[}".format)
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000602 self.assertRaises(ValueError, "{0[}".format, [])
Eric Smith11529192007-09-04 23:04:22 +0000603 self.assertRaises(KeyError, "{0]}".format)
604 self.assertRaises(ValueError, "{0.[]}".format, 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000605 self.assertRaises(ValueError, "{0..foo}".format, 0)
Eric Smith11529192007-09-04 23:04:22 +0000606 self.assertRaises(ValueError, "{0[0}".format, 0)
607 self.assertRaises(ValueError, "{0[0:foo}".format, 0)
608 self.assertRaises(KeyError, "{c]}".format)
609 self.assertRaises(ValueError, "{{ {{{0}}".format, 0)
610 self.assertRaises(ValueError, "{0}}".format, 0)
611 self.assertRaises(KeyError, "{foo}".format, bar=3)
Eric Smith8c663262007-08-25 02:26:07 +0000612 self.assertRaises(ValueError, "{0!x}".format, 3)
Eric Smith11529192007-09-04 23:04:22 +0000613 self.assertRaises(ValueError, "{0!}".format, 0)
614 self.assertRaises(ValueError, "{0!rs}".format, 0)
Eric Smith8c663262007-08-25 02:26:07 +0000615 self.assertRaises(ValueError, "{!}".format)
616 self.assertRaises(ValueError, "{:}".format)
Eric Smith7ade6482007-08-26 22:27:13 +0000617 self.assertRaises(ValueError, "{:s}".format)
Eric Smith8c663262007-08-25 02:26:07 +0000618 self.assertRaises(ValueError, "{}".format)
619
620 # can't have a replacement on the field name portion
621 self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
622
623 # exceed maximum recursion depth
624 self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
625 self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
626 0, 1, 2, 3, 4, 5, 6, 7)
627
628 # string format spec errors
629 self.assertRaises(ValueError, "{0:-s}".format, '')
630 self.assertRaises(ValueError, format, "", "-")
631 self.assertRaises(ValueError, "{0:=s}".format, '')
632
Walter Dörwald28256f22003-01-19 16:59:20 +0000633 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000634 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000635 # Testing Unicode formatting strings...
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000636 self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc')
637 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00')
638 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, -2, 3), 'abc, abc, 1, -2.000000, 3.00')
639 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.5), 'abc, abc, -1, -2.000000, 3.50')
640 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
641 self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000642 if not sys.platform.startswith('java'):
Walter Dörwald67e83882007-05-05 12:26:27 +0000643 self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000644 self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
645 self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000646
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000647 self.assertEqual('%c' % 0x1234, '\u1234')
648 self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000649
650 # formatting jobs delegated from the string implementation:
Walter Dörwald28256f22003-01-19 16:59:20 +0000651 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000652 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
653 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
654 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
655 self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
656 self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
657 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,"abc"), '...1...2...3...abc...')
658 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,"abc"), '...%...%s...1...2...3...abc...')
659 self.assertEqual('...%s...' % "abc", '...abc...')
660 self.assertEqual('%*s' % (5,'abc',), ' abc')
661 self.assertEqual('%*s' % (-5,'abc',), 'abc ')
662 self.assertEqual('%*.*s' % (5,2,'abc',), ' ab')
663 self.assertEqual('%*.*s' % (5,3,'abc',), ' abc')
664 self.assertEqual('%i %*.*s' % (10, 5,3,'abc',), '10 abc')
665 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, 'abc',), '103 abc')
666 self.assertEqual('%c' % 'a', 'a')
Neil Schemenauercf52c072005-08-12 17:34:58 +0000667 class Wrapper:
668 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000669 return '\u1234'
670 self.assertEqual('%s' % Wrapper(), '\u1234')
Walter Dörwald28256f22003-01-19 16:59:20 +0000671
Thomas Wouters477c8d52006-05-27 19:21:47 +0000672 @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
Georg Brandlda6b1072006-01-20 17:48:54 +0000673 def test_format_float(self):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000674 # should not format with a comma, but always with C locale
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000675 self.assertEqual('1.0', '%.1f' % 1.0)
Georg Brandlda6b1072006-01-20 17:48:54 +0000676
Walter Dörwald28256f22003-01-19 16:59:20 +0000677 def test_constructor(self):
678 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
679
680 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000681 str('unicode remains unicode'),
682 'unicode remains unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000683 )
684
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000685 class UnicodeSubclass(str):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000686 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000687
Walter Dörwald28256f22003-01-19 16:59:20 +0000688 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000689 str(UnicodeSubclass('unicode subclass becomes unicode')),
690 'unicode subclass becomes unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000691 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000692
Walter Dörwald28256f22003-01-19 16:59:20 +0000693 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000694 str('strings are converted to unicode'),
695 'strings are converted to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000696 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000697
Walter Dörwald28256f22003-01-19 16:59:20 +0000698 class StringCompat:
699 def __init__(self, x):
700 self.x = x
701 def __str__(self):
702 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000703
Walter Dörwald28256f22003-01-19 16:59:20 +0000704 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000705 str(StringCompat('__str__ compatible objects are recognized')),
706 '__str__ compatible objects are recognized'
Walter Dörwald28256f22003-01-19 16:59:20 +0000707 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000708
Walter Dörwald28256f22003-01-19 16:59:20 +0000709 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000710
Walter Dörwald28256f22003-01-19 16:59:20 +0000711 o = StringCompat('unicode(obj) is compatible to str()')
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000712 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Walter Dörwald28256f22003-01-19 16:59:20 +0000713 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000714
Guido van Rossume2a383d2007-01-15 16:59:06 +0000715 for obj in (123, 123.45, 123):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000716 self.assertEqual(str(obj), str(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000717
Walter Dörwald28256f22003-01-19 16:59:20 +0000718 # unicode(obj, encoding, error) tests (this maps to
719 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000720
Walter Dörwald28256f22003-01-19 16:59:20 +0000721 if not sys.platform.startswith('java'):
722 self.assertRaises(
723 TypeError,
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000724 str,
725 'decoding unicode is not supported',
Walter Dörwald28256f22003-01-19 16:59:20 +0000726 'utf-8',
727 'strict'
728 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000729
Walter Dörwald28256f22003-01-19 16:59:20 +0000730 self.assertEqual(
Walter Dörwald67e83882007-05-05 12:26:27 +0000731 str(b'strings are decoded to unicode', 'utf-8', 'strict'),
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000732 'strings are decoded to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000733 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000734
Walter Dörwald28256f22003-01-19 16:59:20 +0000735 if not sys.platform.startswith('java'):
736 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000737 str(
Guido van Rossumbae07c92007-10-08 02:46:15 +0000738 memoryview(b'character buffers are decoded to unicode'),
Walter Dörwald28256f22003-01-19 16:59:20 +0000739 'utf-8',
740 'strict'
741 ),
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000742 'character buffers are decoded to unicode'
Walter Dörwald28256f22003-01-19 16:59:20 +0000743 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000744
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000745 self.assertRaises(TypeError, str, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000746
Walter Dörwald28256f22003-01-19 16:59:20 +0000747 def test_codecs_utf7(self):
748 utfTests = [
Walter Dörwald67e83882007-05-05 12:26:27 +0000749 ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
750 ('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example
751 ('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example
752 ('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example
753 ('+', b'+-'),
754 ('+-', b'+--'),
755 ('+?', b'+-?'),
756 ('\?', b'+AFw?'),
757 ('+?', b'+-?'),
758 (r'\\?', b'+AFwAXA?'),
759 (r'\\\?', b'+AFwAXABc?'),
760 (r'++--', b'+-+---')
Walter Dörwald28256f22003-01-19 16:59:20 +0000761 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000762
Walter Dörwald28256f22003-01-19 16:59:20 +0000763 for (x, y) in utfTests:
764 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000765
Walter Dörwald28256f22003-01-19 16:59:20 +0000766 # surrogates not supported
Walter Dörwald67e83882007-05-05 12:26:27 +0000767 self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000768
Walter Dörwald67e83882007-05-05 12:26:27 +0000769 self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000770
Walter Dörwald28256f22003-01-19 16:59:20 +0000771 def test_codecs_utf8(self):
Walter Dörwald67e83882007-05-05 12:26:27 +0000772 self.assertEqual(''.encode('utf-8'), b'')
773 self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
774 self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
775 self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
776 self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80')
777 self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80')
Walter Dörwald28256f22003-01-19 16:59:20 +0000778 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000779 ('\ud800\udc02'*1000).encode('utf-8'),
Walter Dörwald67e83882007-05-05 12:26:27 +0000780 b'\xf0\x90\x80\x82'*1000
Walter Dörwald28256f22003-01-19 16:59:20 +0000781 )
782 self.assertEqual(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000783 '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
784 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
785 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
786 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
787 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
788 ' Nunstuck git und'.encode('utf-8'),
Walter Dörwald67e83882007-05-05 12:26:27 +0000789 b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
790 b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
791 b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
792 b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
793 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
794 b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
795 b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
796 b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
797 b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
798 b'\xe3\x80\x8cWenn ist das Nunstuck git und'
Walter Dörwald28256f22003-01-19 16:59:20 +0000799 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000800
Walter Dörwald28256f22003-01-19 16:59:20 +0000801 # UTF-8 specific decoding tests
Walter Dörwald67e83882007-05-05 12:26:27 +0000802 self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
803 self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
804 self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000805
Walter Dörwald28256f22003-01-19 16:59:20 +0000806 # Other possible utf-8 test cases:
807 # * strict decoding testing for all of the
808 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000809
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000810 def test_codecs_idna(self):
811 # Test whether trailing dot is preserved
Walter Dörwald1324c6f2007-05-11 19:57:05 +0000812 self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.")
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000813
Walter Dörwald28256f22003-01-19 16:59:20 +0000814 def test_codecs_errors(self):
815 # Error handling (encoding)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000816 self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
817 self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
Walter Dörwald67e83882007-05-05 12:26:27 +0000818 self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x")
819 self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000820
Walter Dörwald28256f22003-01-19 16:59:20 +0000821 # Error handling (decoding)
Walter Dörwald67e83882007-05-05 12:26:27 +0000822 self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii')
823 self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
824 self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
825 self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000826
Walter Dörwald28256f22003-01-19 16:59:20 +0000827 # Error handling (unknown character names)
Guido van Rossum39478e82007-08-27 17:23:59 +0000828 self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000829
Walter Dörwald28256f22003-01-19 16:59:20 +0000830 # Error handling (truncated escape sequence)
Guido van Rossum9c627722007-08-27 18:31:48 +0000831 self.assertRaises(UnicodeError, b"\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000832
Guido van Rossum9c627722007-08-27 18:31:48 +0000833 self.assertRaises(TypeError, b"hello".decode, "test.unicode1")
834 self.assertRaises(TypeError, str, b"hello", "test.unicode2")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000835 self.assertRaises(TypeError, "hello".encode, "test.unicode1")
836 self.assertRaises(TypeError, "hello".encode, "test.unicode2")
Walter Dörwald28256f22003-01-19 16:59:20 +0000837 # executes PyUnicode_Encode()
838 import imp
839 self.assertRaises(
840 ImportError,
841 imp.find_module,
842 "non-existing module",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000843 ["non-existing dir"]
Walter Dörwald28256f22003-01-19 16:59:20 +0000844 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000845
Walter Dörwald28256f22003-01-19 16:59:20 +0000846 # Error handling (wrong arguments)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000847 self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000848
Walter Dörwald28256f22003-01-19 16:59:20 +0000849 # Error handling (PyUnicode_EncodeDecimal())
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000850 self.assertRaises(UnicodeError, int, "\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000851
Walter Dörwald28256f22003-01-19 16:59:20 +0000852 def test_codecs(self):
853 # Encoding
Walter Dörwald67e83882007-05-05 12:26:27 +0000854 self.assertEqual('hello'.encode('ascii'), b'hello')
855 self.assertEqual('hello'.encode('utf-7'), b'hello')
856 self.assertEqual('hello'.encode('utf-8'), b'hello')
857 self.assertEqual('hello'.encode('utf8'), b'hello')
858 self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
859 self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
860 self.assertEqual('hello'.encode('latin-1'), b'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000861
Walter Dörwald28256f22003-01-19 16:59:20 +0000862 # Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +0000863 for c in range(1024):
Guido van Rossum84fc66d2007-05-03 17:18:26 +0000864 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000865 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
866 'utf-16-be', 'raw_unicode_escape',
867 'unicode_escape', 'unicode_internal'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000868 self.assertEqual(str(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000869
Walter Dörwald28256f22003-01-19 16:59:20 +0000870 # Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +0000871 for c in range(256):
Guido van Rossum84fc66d2007-05-03 17:18:26 +0000872 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000873 for encoding in ('latin-1',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000874 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000875
Walter Dörwald28256f22003-01-19 16:59:20 +0000876 # Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossum805365e2007-05-07 22:24:25 +0000877 for c in range(128):
Guido van Rossum84fc66d2007-05-03 17:18:26 +0000878 u = chr(c)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000879 for encoding in ('ascii',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000880 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000881
Walter Dörwald28256f22003-01-19 16:59:20 +0000882 # Roundtrip safety for non-BMP (just a few chars)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000883 u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
Walter Dörwald28256f22003-01-19 16:59:20 +0000884 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
885 #'raw_unicode_escape',
886 'unicode_escape', 'unicode_internal'):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000887 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000888
Walter Dörwald28256f22003-01-19 16:59:20 +0000889 # UTF-8 must be roundtrip safe for all UCS-2 code points
890 # This excludes surrogates: in the full range, there would be
891 # a surrogate pair (\udbff\udc00), which gets converted back
892 # to a non-BMP character (\U0010fc00)
Walter Dörwald1324c6f2007-05-11 19:57:05 +0000893 u = ''.join(map(chr, list(range(0,0xd800)) +
894 list(range(0xe000,0x10000))))
Walter Dörwald28256f22003-01-19 16:59:20 +0000895 for encoding in ('utf-8',):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000896 self.assertEqual(str(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000897
Walter Dörwald28256f22003-01-19 16:59:20 +0000898 def test_codecs_charmap(self):
899 # 0-127
Guido van Rossum805365e2007-05-07 22:24:25 +0000900 s = bytes(range(128))
Walter Dörwald28256f22003-01-19 16:59:20 +0000901 for encoding in (
902 'cp037', 'cp1026',
903 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
904 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
905 'cp863', 'cp865', 'cp866',
906 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
907 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
908 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
909 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000910
Walter Dörwald28256f22003-01-19 16:59:20 +0000911 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
912 'cp1256', 'cp1257', 'cp1258',
913 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000914
Walter Dörwald28256f22003-01-19 16:59:20 +0000915 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
916 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000917
Walter Dörwald28256f22003-01-19 16:59:20 +0000918 ### These have undefined mappings:
919 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000920
Walter Dörwald28256f22003-01-19 16:59:20 +0000921 ### These fail the round-trip:
922 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000923
Walter Dörwald28256f22003-01-19 16:59:20 +0000924 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000925 self.assertEqual(str(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000926
Walter Dörwald28256f22003-01-19 16:59:20 +0000927 # 128-255
Guido van Rossum805365e2007-05-07 22:24:25 +0000928 s = bytes(range(128, 256))
Walter Dörwald28256f22003-01-19 16:59:20 +0000929 for encoding in (
930 'cp037', 'cp1026',
931 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
932 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
933 'cp863', 'cp865', 'cp866',
934 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
935 'iso8859_2', 'iso8859_4', 'iso8859_5',
936 'iso8859_9', 'koi8_r', 'latin_1',
937 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000938
Walter Dörwald28256f22003-01-19 16:59:20 +0000939 ### These have undefined mappings:
940 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
941 #'cp1256', 'cp1257', 'cp1258',
942 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
943 #'iso8859_3', 'iso8859_6', 'iso8859_7',
944 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000945
Walter Dörwald28256f22003-01-19 16:59:20 +0000946 ### These fail the round-trip:
947 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000948
Walter Dörwald28256f22003-01-19 16:59:20 +0000949 ):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000950 self.assertEqual(str(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000951
Walter Dörwald28256f22003-01-19 16:59:20 +0000952 def test_concatenation(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000953 self.assertEqual(("abc" "def"), "abcdef")
954 self.assertEqual(("abc" "def"), "abcdef")
955 self.assertEqual(("abc" "def"), "abcdef")
956 self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
957 self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000958
Walter Dörwald28256f22003-01-19 16:59:20 +0000959 def test_printing(self):
960 class BitBucket:
961 def write(self, text):
962 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000963
Walter Dörwald28256f22003-01-19 16:59:20 +0000964 out = BitBucket()
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000965 print('abc', file=out)
966 print('abc', 'def', file=out)
967 print('abc', 'def', file=out)
968 print('abc', 'def', file=out)
969 print('abc\n', file=out)
970 print('abc\n', end=' ', file=out)
971 print('abc\n', end=' ', file=out)
972 print('def\n', file=out)
973 print('def\n', file=out)
Fred Drake004d5e62000-10-23 17:22:08 +0000974
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000975 def test_ucs4(self):
976 if sys.maxunicode == 0xFFFF:
977 return
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000978 x = '\U00100000'
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000979 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
980 self.assertEqual(x, y)
981
Brett Cannonc3647ac2005-04-26 03:45:26 +0000982 def test_conversion(self):
983 # Make sure __unicode__() works properly
984 class Foo0:
985 def __str__(self):
986 return "foo"
987
988 class Foo1:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000989 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000990 return "foo"
Brett Cannonc3647ac2005-04-26 03:45:26 +0000991
992 class Foo2(object):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000993 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000994 return "foo"
Brett Cannonc3647ac2005-04-26 03:45:26 +0000995
996 class Foo3(object):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000997 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +0000998 return "foo"
999
1000 class Foo4(str):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001001 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001002 return "foo"
1003
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001004 class Foo5(str):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001005 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001006 return "foo"
1007
1008 class Foo6(str):
1009 def __str__(self):
1010 return "foos"
1011
Guido van Rossum98297ee2007-11-06 21:34:58 +00001012 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001013 return "foou"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001014
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001015 class Foo7(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001016 def __str__(self):
1017 return "foos"
Guido van Rossum98297ee2007-11-06 21:34:58 +00001018 def __str__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001019 return "foou"
Brett Cannonc3647ac2005-04-26 03:45:26 +00001020
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001021 class Foo8(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001022 def __new__(cls, content=""):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001023 return str.__new__(cls, 2*content)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001024 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001025 return self
1026
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001027 class Foo9(str):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001028 def __str__(self):
Brett Cannonc3647ac2005-04-26 03:45:26 +00001029 return "not unicode"
1030
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001031 self.assertEqual(str(Foo0()), "foo")
1032 self.assertEqual(str(Foo1()), "foo")
1033 self.assertEqual(str(Foo2()), "foo")
1034 self.assertEqual(str(Foo3()), "foo")
1035 self.assertEqual(str(Foo4("bar")), "foo")
1036 self.assertEqual(str(Foo5("bar")), "foo")
1037 self.assertEqual(str(Foo6("bar")), "foou")
1038 self.assertEqual(str(Foo7("bar")), "foou")
1039 self.assertEqual(str(Foo8("foo")), "foofoo")
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001040 self.assertEqual(str(Foo9("foo")), "not unicode")
Brett Cannonc3647ac2005-04-26 03:45:26 +00001041
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001042 def test_unicode_repr(self):
1043 class s1:
1044 def __repr__(self):
1045 return '\\n'
1046
1047 class s2:
1048 def __repr__(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001049 return '\\n'
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001050
1051 self.assertEqual(repr(s1()), '\\n')
1052 self.assertEqual(repr(s2()), '\\n')
1053
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001054 def test_expandtabs_overflows_gracefully(self):
1055 # This test only affects 32-bit platforms because expandtabs can only take
1056 # an int as the max value, not a 64-bit C long. If expandtabs is changed
1057 # to take a 64-bit long, this test should apply to all platforms.
Christian Heimesa37d4c62007-12-04 23:02:19 +00001058 if sys.maxsize > (1 << 32) or struct.calcsize('P') != 4:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001059 return
Christian Heimesa37d4c62007-12-04 23:02:19 +00001060 self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001061
1062
Walter Dörwald28256f22003-01-19 16:59:20 +00001063def test_main():
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064 test_support.run_unittest(__name__)
Barry Warsaw817918c2002-08-06 16:58:21 +00001065
Walter Dörwald28256f22003-01-19 16:59:20 +00001066if __name__ == "__main__":
1067 test_main()