blob: e01a4610f29627938cc02c518a2b26534d162d4a [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Walter Dörwald0fd583c2003-02-21 12:53:50 +00009import unittest, sys, string, codecs, new
10from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Walter Dörwald0fd583c2003-02-21 12:53:50 +000012class UnicodeTest(
13 string_tests.CommonTest,
14 string_tests.MixinStrUnicodeUserStringTest
15 ):
16 type2test = unicode
17
18 def checkequalnofix(self, result, object, methodname, *args):
19 method = getattr(object, methodname)
20 realresult = method(*args)
21 self.assertEqual(realresult, result)
22 self.assert_(type(realresult) is type(result))
23
24 # if the original is returned make sure that
25 # this doesn't happen with subclasses
26 if realresult is object:
27 class usub(unicode):
28 def __repr__(self):
29 return 'usub(%r)' % unicode.__repr__(self)
30 object = usub(object)
31 method = getattr(object, methodname)
32 realresult = method(*args)
33 self.assertEqual(realresult, result)
34 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000035
Jeremy Hylton504de6b2003-10-06 05:08:26 +000036 def test_literals(self):
37 self.assertEqual(u'\xff', u'\u00ff')
38 self.assertEqual(u'\uffff', u'\U0000ffff')
39 self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
40 self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
41 self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
42
Walter Dörwald28256f22003-01-19 16:59:20 +000043 def test_repr(self):
44 if not sys.platform.startswith('java'):
45 # Test basic sanity of repr()
46 self.assertEqual(repr(u'abc'), "u'abc'")
47 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
48 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
49 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
50 self.assertEqual(repr(u'\\'), "u'\\\\'")
51 self.assertEqual(repr(u'\n'), "u'\\n'")
52 self.assertEqual(repr(u'\r'), "u'\\r'")
53 self.assertEqual(repr(u'\t'), "u'\\t'")
54 self.assertEqual(repr(u'\b'), "u'\\x08'")
55 self.assertEqual(repr(u"'\""), """u'\\'"'""")
56 self.assertEqual(repr(u"'\""), """u'\\'"'""")
57 self.assertEqual(repr(u"'"), '''u"'"''')
58 self.assertEqual(repr(u'"'), """u'"'""")
59 latin1repr = (
60 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
61 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
62 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
63 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
64 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
65 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
66 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
67 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
68 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
69 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
70 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
71 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
72 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
73 "\\xfe\\xff'")
74 testrepr = repr(u''.join(map(unichr, xrange(256))))
75 self.assertEqual(testrepr, latin1repr)
76
Walter Dörwald28256f22003-01-19 16:59:20 +000077 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000078 string_tests.CommonTest.test_count(self)
79 # check mixed argument types
80 self.checkequalnofix(3, 'aaa', 'count', u'a')
81 self.checkequalnofix(0, 'aaa', 'count', u'b')
82 self.checkequalnofix(3, u'aaa', 'count', 'a')
83 self.checkequalnofix(0, u'aaa', 'count', 'b')
84 self.checkequalnofix(0, u'aaa', 'count', 'b')
85 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
86 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
87 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
88 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +000089
Walter Dörwald28256f22003-01-19 16:59:20 +000090 def test_find(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000091 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
92 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
93 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +000094
Walter Dörwald28256f22003-01-19 16:59:20 +000095 self.assertRaises(TypeError, u'hello'.find)
96 self.assertRaises(TypeError, u'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +000097
Walter Dörwald28256f22003-01-19 16:59:20 +000098 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000099 string_tests.CommonTest.test_rfind(self)
100 # check mixed argument types
101 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
102 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
103 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000104
Walter Dörwald28256f22003-01-19 16:59:20 +0000105 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000106 string_tests.CommonTest.test_index(self)
107 # check mixed argument types
108 for (t1, t2) in ((str, unicode), (unicode, str)):
109 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
110 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
111 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
112 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
113 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
114 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
115 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
116 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000117
Walter Dörwald28256f22003-01-19 16:59:20 +0000118 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000119 string_tests.CommonTest.test_rindex(self)
120 # check mixed argument types
121 for (t1, t2) in ((str, unicode), (unicode, str)):
122 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
123 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
124 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
125 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000126
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000127 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
128 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
129 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
130 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
131 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000132
Walter Dörwald28256f22003-01-19 16:59:20 +0000133 def test_translate(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000134 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
135 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
136 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
137 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
138 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
Walter Dörwaldcd736e72004-02-05 17:36:00 +0000139 self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000140
Walter Dörwald28256f22003-01-19 16:59:20 +0000141 self.assertRaises(TypeError, u'hello'.translate)
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000142 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000143
Walter Dörwald28256f22003-01-19 16:59:20 +0000144 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000145 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000146
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000147 # Mixed arguments
148 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
149 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
150 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000151
Walter Dörwald28256f22003-01-19 16:59:20 +0000152 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000153 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000154
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000155 # mixed arguments
156 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
157 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
158 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
159 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
160 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
161 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
162 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
Marc-André Lemburge5034372000-08-08 08:04:29 +0000163
Walter Dörwald28256f22003-01-19 16:59:20 +0000164 def test_strip(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000165 string_tests.CommonTest.test_strip(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000166 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
Guido van Rossuma831cac2000-03-10 23:23:21 +0000167
Walter Dörwald28256f22003-01-19 16:59:20 +0000168 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000169 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000170
Walter Dörwald28256f22003-01-19 16:59:20 +0000171 # method call forwarded from str implementation because of unicode argument
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000172 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
Walter Dörwald28256f22003-01-19 16:59:20 +0000173 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000174
Walter Dörwald28256f22003-01-19 16:59:20 +0000175 def test_comparison(self):
176 # Comparisons:
177 self.assertEqual(u'abc', 'abc')
178 self.assertEqual('abc', u'abc')
179 self.assertEqual(u'abc', u'abc')
180 self.assert_(u'abcd' > 'abc')
181 self.assert_('abcd' > u'abc')
182 self.assert_(u'abcd' > u'abc')
183 self.assert_(u'abc' < 'abcd')
184 self.assert_('abc' < u'abcd')
185 self.assert_(u'abc' < u'abcd')
186
187 if 0:
188 # Move these tests to a Unicode collation module test...
189 # Testing UTF-16 code point order comparisons...
190
191 # No surrogates, no fixup required.
192 self.assert_(u'\u0061' < u'\u20ac')
193 # Non surrogate below surrogate value, no fixup required
194 self.assert_(u'\u0061' < u'\ud800\udc02')
195
196 # Non surrogate above surrogate value, fixup required
197 def test_lecmp(s, s2):
198 self.assert_(s < s2)
199
200 def test_fixup(s):
201 s2 = u'\ud800\udc01'
202 test_lecmp(s, s2)
203 s2 = u'\ud900\udc01'
204 test_lecmp(s, s2)
205 s2 = u'\uda00\udc01'
206 test_lecmp(s, s2)
207 s2 = u'\udb00\udc01'
208 test_lecmp(s, s2)
209 s2 = u'\ud800\udd01'
210 test_lecmp(s, s2)
211 s2 = u'\ud900\udd01'
212 test_lecmp(s, s2)
213 s2 = u'\uda00\udd01'
214 test_lecmp(s, s2)
215 s2 = u'\udb00\udd01'
216 test_lecmp(s, s2)
217 s2 = u'\ud800\ude01'
218 test_lecmp(s, s2)
219 s2 = u'\ud900\ude01'
220 test_lecmp(s, s2)
221 s2 = u'\uda00\ude01'
222 test_lecmp(s, s2)
223 s2 = u'\udb00\ude01'
224 test_lecmp(s, s2)
225 s2 = u'\ud800\udfff'
226 test_lecmp(s, s2)
227 s2 = u'\ud900\udfff'
228 test_lecmp(s, s2)
229 s2 = u'\uda00\udfff'
230 test_lecmp(s, s2)
231 s2 = u'\udb00\udfff'
232 test_lecmp(s, s2)
233
234 test_fixup(u'\ue000')
235 test_fixup(u'\uff61')
236
237 # Surrogates on both sides, no fixup required
238 self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
239
Walter Dörwald28256f22003-01-19 16:59:20 +0000240 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000241 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
242 self.checkequalnofix(False, u'\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000243
244 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000245 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
246 if not sys.platform.startswith('java'):
247 self.checkequalnofix(False, u'\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000248
249 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000250 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
251 self.checkequalnofix(True, u'\u1FFc', 'istitle')
252 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000253
254 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000255 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
256 self.checkequalnofix(True, u'\u2000', 'isspace')
257 self.checkequalnofix(True, u'\u200a', 'isspace')
258 self.checkequalnofix(False, u'\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000259
260 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000261 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
262 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000263
264 def test_isdecimal(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000265 self.checkequalnofix(False, u'', 'isdecimal')
266 self.checkequalnofix(False, u'a', 'isdecimal')
267 self.checkequalnofix(True, u'0', 'isdecimal')
268 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
269 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
270 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
271 self.checkequalnofix(True, u'0123456789', 'isdecimal')
272 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000273
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000274 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000275
276 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000277 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
278 self.checkequalnofix(True, u'\u2460', 'isdigit')
279 self.checkequalnofix(False, u'\xbc', 'isdigit')
280 self.checkequalnofix(True, u'\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000281
282 def test_isnumeric(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000283 self.checkequalnofix(False, u'', 'isnumeric')
284 self.checkequalnofix(False, u'a', 'isnumeric')
285 self.checkequalnofix(True, u'0', 'isnumeric')
286 self.checkequalnofix(True, u'\u2460', 'isnumeric')
287 self.checkequalnofix(True, u'\xbc', 'isnumeric')
288 self.checkequalnofix(True, u'\u0660', 'isnumeric')
289 self.checkequalnofix(True, u'0123456789', 'isnumeric')
290 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000291
292 self.assertRaises(TypeError, u"abc".isnumeric, 42)
293
Hye-Shik Chang974ed7c2004-06-02 16:49:17 +0000294 def test_iswide(self):
295 self.checkequalnofix(False, u'', 'iswide')
296 self.checkequalnofix(False, u'\x1f', 'iswide') # Neutral
297 self.checkequalnofix(False, u'\x20', 'iswide') # Narrow
298 self.checkequalnofix(True, u'\u2329', 'iswide') # Wide
299 self.checkequalnofix(False, u'\uff64', 'iswide') # Half
300 self.checkequalnofix(True, u'\u3000', 'iswide') # Full
301 self.checkequalnofix(False, u'\u2460', 'iswide') # Ambiguous
302 self.checkequalnofix(True, u'\ud55c\uae00', 'iswide')
303 self.checkequalnofix(False, u'\ud55c\u2606\uae00', 'iswide')
304
305 def test_wide(self):
306 self.assertEqual(u''.width(), 0)
307 self.assertEqual(u'abcd'.width(), 4)
308 self.assertEqual(u'\u0187\u01c9'.width(), 2)
309 self.assertEqual(u'\u2460\u2329'.width(), 3)
310 self.assertEqual(u'\u2329\u2460'.width(), 3)
311 self.assertEqual(u'\ud55c\uae00'.width(), 4)
312 self.assertEqual(u'\ud55c\u2606\uae00'.width(), 5)
313
Walter Dörwald28256f22003-01-19 16:59:20 +0000314 def test_contains(self):
315 # Testing Unicode contains method
316 self.assert_('a' in u'abdb')
317 self.assert_('a' in u'bdab')
318 self.assert_('a' in u'bdaba')
319 self.assert_('a' in u'bdba')
320 self.assert_('a' in u'bdba')
321 self.assert_(u'a' in u'bdba')
322 self.assert_(u'a' not in u'bdb')
323 self.assert_(u'a' not in 'bdb')
324 self.assert_(u'a' in 'bdba')
325 self.assert_(u'a' in ('a',1,None))
326 self.assert_(u'a' in (1,None,'a'))
327 self.assert_(u'a' in (1,None,u'a'))
328 self.assert_('a' in ('a',1,None))
329 self.assert_('a' in (1,None,'a'))
330 self.assert_('a' in (1,None,u'a'))
331 self.assert_('a' not in ('x',1,u'y'))
332 self.assert_('a' not in ('x',1,None))
333 self.assert_(u'abcd' not in u'abcxxxx')
334 self.assert_(u'ab' in u'abcd')
335 self.assert_('ab' in u'abc')
336 self.assert_(u'ab' in 'abc')
337 self.assert_(u'ab' in (1,None,u'ab'))
338 self.assert_(u'' in u'abc')
339 self.assert_('' in u'abc')
340
341 # If the following fails either
342 # the contains operator does not propagate UnicodeErrors or
343 # someone has changed the default encoding
344 self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
345
346 self.assert_(u'' in '')
347 self.assert_('' in u'')
348 self.assert_(u'' in u'')
349 self.assert_(u'' in 'abc')
350 self.assert_('' in u'abc')
351 self.assert_(u'' in u'abc')
352 self.assert_(u'\0' not in 'abc')
353 self.assert_('\0' not in u'abc')
354 self.assert_(u'\0' not in u'abc')
355 self.assert_(u'\0' in '\0abc')
356 self.assert_('\0' in u'\0abc')
357 self.assert_(u'\0' in u'\0abc')
358 self.assert_(u'\0' in 'abc\0')
359 self.assert_('\0' in u'abc\0')
360 self.assert_(u'\0' in u'abc\0')
361 self.assert_(u'a' in '\0abc')
362 self.assert_('a' in u'\0abc')
363 self.assert_(u'a' in u'\0abc')
364 self.assert_(u'asdf' in 'asdf')
365 self.assert_('asdf' in u'asdf')
366 self.assert_(u'asdf' in u'asdf')
367 self.assert_(u'asdf' not in 'asd')
368 self.assert_('asdf' not in u'asd')
369 self.assert_(u'asdf' not in u'asd')
370 self.assert_(u'asdf' not in '')
371 self.assert_('asdf' not in u'')
372 self.assert_(u'asdf' not in u'')
373
374 self.assertRaises(TypeError, u"abc".__contains__)
375
376 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000377 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000378 # Testing Unicode formatting strings...
379 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
380 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
381 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
382 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
383 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
384 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000385 if not sys.platform.startswith('java'):
386 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
387 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000388 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000389
Walter Dörwald43440a62003-03-31 18:07:50 +0000390 self.assertEqual(u'%c' % 0x1234, u'\u1234')
Walter Dörwald44f527f2003-04-02 16:37:24 +0000391 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000392
393 # formatting jobs delegated from the string implementation:
394 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
395 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
396 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
397 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
398 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
399 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
400 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
401 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
402 self.assertEqual('...%s...' % u"abc", u'...abc...')
403 self.assertEqual('%*s' % (5,u'abc',), u' abc')
404 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
405 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
406 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
407 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000408 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
Walter Dörwald43440a62003-03-31 18:07:50 +0000409 self.assertEqual('%c' % u'a', u'a')
Walter Dörwald28256f22003-01-19 16:59:20 +0000410
Walter Dörwald28256f22003-01-19 16:59:20 +0000411
412 def test_constructor(self):
413 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
414
415 self.assertEqual(
416 unicode(u'unicode remains unicode'),
417 u'unicode remains unicode'
418 )
419
420 class UnicodeSubclass(unicode):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000421 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000422
Walter Dörwald28256f22003-01-19 16:59:20 +0000423 self.assertEqual(
424 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
425 u'unicode subclass becomes unicode'
426 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000427
Walter Dörwald28256f22003-01-19 16:59:20 +0000428 self.assertEqual(
429 unicode('strings are converted to unicode'),
430 u'strings are converted to unicode'
431 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000432
Walter Dörwald28256f22003-01-19 16:59:20 +0000433 class UnicodeCompat:
434 def __init__(self, x):
435 self.x = x
436 def __unicode__(self):
437 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000438
Walter Dörwald28256f22003-01-19 16:59:20 +0000439 self.assertEqual(
440 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
441 u'__unicode__ compatible objects are recognized')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000442
Walter Dörwald28256f22003-01-19 16:59:20 +0000443 class StringCompat:
444 def __init__(self, x):
445 self.x = x
446 def __str__(self):
447 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000448
Walter Dörwald28256f22003-01-19 16:59:20 +0000449 self.assertEqual(
450 unicode(StringCompat('__str__ compatible objects are recognized')),
451 u'__str__ compatible objects are recognized'
452 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000453
Walter Dörwald28256f22003-01-19 16:59:20 +0000454 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000455
Walter Dörwald28256f22003-01-19 16:59:20 +0000456 o = StringCompat('unicode(obj) is compatible to str()')
457 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
458 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000459
Walter Dörwald28256f22003-01-19 16:59:20 +0000460 for obj in (123, 123.45, 123L):
461 self.assertEqual(unicode(obj), unicode(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000462
Walter Dörwald28256f22003-01-19 16:59:20 +0000463 # unicode(obj, encoding, error) tests (this maps to
464 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000465
Walter Dörwald28256f22003-01-19 16:59:20 +0000466 if not sys.platform.startswith('java'):
467 self.assertRaises(
468 TypeError,
469 unicode,
470 u'decoding unicode is not supported',
471 'utf-8',
472 'strict'
473 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000474
Walter Dörwald28256f22003-01-19 16:59:20 +0000475 self.assertEqual(
476 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
477 u'strings are decoded to unicode'
478 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000479
Walter Dörwald28256f22003-01-19 16:59:20 +0000480 if not sys.platform.startswith('java'):
481 self.assertEqual(
482 unicode(
483 buffer('character buffers are decoded to unicode'),
484 'utf-8',
485 'strict'
486 ),
487 u'character buffers are decoded to unicode'
488 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000489
Walter Dörwald28256f22003-01-19 16:59:20 +0000490 self.assertRaises(TypeError, unicode, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000491
Walter Dörwald28256f22003-01-19 16:59:20 +0000492 def test_codecs_utf7(self):
493 utfTests = [
494 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
495 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
496 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
497 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
498 (u'+', '+-'),
499 (u'+-', '+--'),
500 (u'+?', '+-?'),
501 (u'\?', '+AFw?'),
502 (u'+?', '+-?'),
503 (ur'\\?', '+AFwAXA?'),
504 (ur'\\\?', '+AFwAXABc?'),
505 (ur'++--', '+-+---')
506 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000507
Walter Dörwald28256f22003-01-19 16:59:20 +0000508 for (x, y) in utfTests:
509 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000510
Walter Dörwald28256f22003-01-19 16:59:20 +0000511 # surrogates not supported
512 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000513
Walter Dörwald28256f22003-01-19 16:59:20 +0000514 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000515
Walter Dörwald28256f22003-01-19 16:59:20 +0000516 def test_codecs_utf8(self):
517 self.assertEqual(u''.encode('utf-8'), '')
518 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
519 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
520 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
521 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
522 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
523 self.assertEqual(
524 (u'\ud800\udc02'*1000).encode('utf-8'),
525 '\xf0\x90\x80\x82'*1000
526 )
527 self.assertEqual(
528 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
529 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
530 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
531 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
532 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
533 u' Nunstuck git und'.encode('utf-8'),
534 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
535 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
536 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
537 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
538 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
539 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
540 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
541 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
542 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
543 '\xe3\x80\x8cWenn ist das Nunstuck git und'
544 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000545
Walter Dörwald28256f22003-01-19 16:59:20 +0000546 # UTF-8 specific decoding tests
547 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
548 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
549 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000550
Walter Dörwald28256f22003-01-19 16:59:20 +0000551 # Other possible utf-8 test cases:
552 # * strict decoding testing for all of the
553 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000554
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000555 def test_codecs_idna(self):
556 # Test whether trailing dot is preserved
557 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
558
Walter Dörwald28256f22003-01-19 16:59:20 +0000559 def test_codecs_errors(self):
560 # Error handling (encoding)
561 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
562 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
563 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
564 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000565
Walter Dörwald28256f22003-01-19 16:59:20 +0000566 # Error handling (decoding)
567 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
568 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
569 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
570 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000571
Walter Dörwald28256f22003-01-19 16:59:20 +0000572 # Error handling (unknown character names)
573 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000574
Walter Dörwald28256f22003-01-19 16:59:20 +0000575 # Error handling (truncated escape sequence)
576 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000577
Walter Dörwald28256f22003-01-19 16:59:20 +0000578 # Error handling (bad decoder return)
579 def search_function(encoding):
580 def decode1(input, errors="strict"):
581 return 42 # not a tuple
582 def encode1(input, errors="strict"):
583 return 42 # not a tuple
584 def encode2(input, errors="strict"):
585 return (42, 42) # no unicode
586 def decode2(input, errors="strict"):
587 return (42, 42) # no unicode
588 if encoding=="test.unicode1":
589 return (encode1, decode1, None, None)
590 elif encoding=="test.unicode2":
591 return (encode2, decode2, None, None)
592 else:
593 return None
594 codecs.register(search_function)
595 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
596 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
597 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
598 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
599 # executes PyUnicode_Encode()
600 import imp
601 self.assertRaises(
602 ImportError,
603 imp.find_module,
604 "non-existing module",
605 [u"non-existing dir"]
606 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000607
Walter Dörwald28256f22003-01-19 16:59:20 +0000608 # Error handling (wrong arguments)
609 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000610
Walter Dörwald28256f22003-01-19 16:59:20 +0000611 # Error handling (PyUnicode_EncodeDecimal())
612 self.assertRaises(UnicodeError, int, u"\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000613
Walter Dörwald28256f22003-01-19 16:59:20 +0000614 def test_codecs(self):
615 # Encoding
616 self.assertEqual(u'hello'.encode('ascii'), 'hello')
617 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
618 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
619 self.assertEqual(u'hello'.encode('utf8'), 'hello')
620 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
621 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
622 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000623
Walter Dörwald28256f22003-01-19 16:59:20 +0000624 # Roundtrip safety for BMP (just the first 1024 chars)
625 u = u''.join(map(unichr, xrange(1024)))
626 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
627 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
628 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000629
Walter Dörwald28256f22003-01-19 16:59:20 +0000630 # Roundtrip safety for BMP (just the first 256 chars)
631 u = u''.join(map(unichr, xrange(256)))
632 for encoding in ('latin-1',):
633 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000634
Walter Dörwald28256f22003-01-19 16:59:20 +0000635 # Roundtrip safety for BMP (just the first 128 chars)
636 u = u''.join(map(unichr, xrange(128)))
637 for encoding in ('ascii',):
638 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000639
Walter Dörwald28256f22003-01-19 16:59:20 +0000640 # Roundtrip safety for non-BMP (just a few chars)
641 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
642 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
643 #'raw_unicode_escape',
644 'unicode_escape', 'unicode_internal'):
645 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000646
Walter Dörwald28256f22003-01-19 16:59:20 +0000647 # UTF-8 must be roundtrip safe for all UCS-2 code points
648 # This excludes surrogates: in the full range, there would be
649 # a surrogate pair (\udbff\udc00), which gets converted back
650 # to a non-BMP character (\U0010fc00)
651 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
652 for encoding in ('utf-8',):
653 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000654
Walter Dörwald28256f22003-01-19 16:59:20 +0000655 def test_codecs_charmap(self):
656 # 0-127
657 s = ''.join(map(chr, xrange(128)))
658 for encoding in (
659 'cp037', 'cp1026',
660 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
661 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
662 'cp863', 'cp865', 'cp866',
663 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
664 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
665 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
666 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000667
Walter Dörwald28256f22003-01-19 16:59:20 +0000668 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
669 'cp1256', 'cp1257', 'cp1258',
670 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000671
Walter Dörwald28256f22003-01-19 16:59:20 +0000672 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
673 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000674
Walter Dörwald28256f22003-01-19 16:59:20 +0000675 ### These have undefined mappings:
676 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000677
Walter Dörwald28256f22003-01-19 16:59:20 +0000678 ### These fail the round-trip:
679 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000680
Walter Dörwald28256f22003-01-19 16:59:20 +0000681 ):
682 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000683
Walter Dörwald28256f22003-01-19 16:59:20 +0000684 # 128-255
685 s = ''.join(map(chr, xrange(128, 256)))
686 for encoding in (
687 'cp037', 'cp1026',
688 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
689 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
690 'cp863', 'cp865', 'cp866',
691 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
692 'iso8859_2', 'iso8859_4', 'iso8859_5',
693 'iso8859_9', 'koi8_r', 'latin_1',
694 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000695
Walter Dörwald28256f22003-01-19 16:59:20 +0000696 ### These have undefined mappings:
697 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
698 #'cp1256', 'cp1257', 'cp1258',
699 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
700 #'iso8859_3', 'iso8859_6', 'iso8859_7',
701 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000702
Walter Dörwald28256f22003-01-19 16:59:20 +0000703 ### These fail the round-trip:
704 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000705
Walter Dörwald28256f22003-01-19 16:59:20 +0000706 ):
707 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000708
Walter Dörwald28256f22003-01-19 16:59:20 +0000709 def test_concatenation(self):
710 self.assertEqual((u"abc" u"def"), u"abcdef")
711 self.assertEqual(("abc" u"def"), u"abcdef")
712 self.assertEqual((u"abc" "def"), u"abcdef")
713 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
714 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000715
Walter Dörwald28256f22003-01-19 16:59:20 +0000716 def test_printing(self):
717 class BitBucket:
718 def write(self, text):
719 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000720
Walter Dörwald28256f22003-01-19 16:59:20 +0000721 out = BitBucket()
722 print >>out, u'abc'
723 print >>out, u'abc', u'def'
724 print >>out, u'abc', 'def'
725 print >>out, 'abc', u'def'
726 print >>out, u'abc\n'
727 print >>out, u'abc\n',
728 print >>out, u'abc\n',
729 print >>out, u'def\n'
730 print >>out, u'def\n'
Fred Drake004d5e62000-10-23 17:22:08 +0000731
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000732 def test_ucs4(self):
733 if sys.maxunicode == 0xFFFF:
734 return
735 x = u'\U00100000'
736 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
737 self.assertEqual(x, y)
738
Walter Dörwald28256f22003-01-19 16:59:20 +0000739def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000740 test_support.run_unittest(UnicodeTest)
Barry Warsaw817918c2002-08-06 16:58:21 +0000741
Walter Dörwald28256f22003-01-19 16:59:20 +0000742if __name__ == "__main__":
743 test_main()