blob: 34f9371658bfa1a918938c53ade01db92dea70e0 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Walter Dörwald0fd583c2003-02-21 12:53:50 +00009import unittest, sys, string, codecs, new
10from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Neal Norwitz430f68b2005-11-24 22:00:56 +000012# Error handling (bad decoder return)
13def search_function(encoding):
14 def decode1(input, errors="strict"):
15 return 42 # not a tuple
16 def encode1(input, errors="strict"):
17 return 42 # not a tuple
18 def encode2(input, errors="strict"):
19 return (42, 42) # no unicode
20 def decode2(input, errors="strict"):
21 return (42, 42) # no unicode
22 if encoding=="test.unicode1":
23 return (encode1, decode1, None, None)
24 elif encoding=="test.unicode2":
25 return (encode2, decode2, None, None)
26 else:
27 return None
28codecs.register(search_function)
29
Walter Dörwald0fd583c2003-02-21 12:53:50 +000030class UnicodeTest(
31 string_tests.CommonTest,
Walter Dörwald57d88e52004-08-26 16:53:04 +000032 string_tests.MixinStrUnicodeUserStringTest,
33 string_tests.MixinStrUnicodeTest,
Walter Dörwald0fd583c2003-02-21 12:53:50 +000034 ):
35 type2test = unicode
36
37 def checkequalnofix(self, result, object, methodname, *args):
38 method = getattr(object, methodname)
39 realresult = method(*args)
40 self.assertEqual(realresult, result)
41 self.assert_(type(realresult) is type(result))
42
43 # if the original is returned make sure that
44 # this doesn't happen with subclasses
45 if realresult is object:
46 class usub(unicode):
47 def __repr__(self):
48 return 'usub(%r)' % unicode.__repr__(self)
49 object = usub(object)
50 method = getattr(object, methodname)
51 realresult = method(*args)
52 self.assertEqual(realresult, result)
53 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000054
Jeremy Hylton504de6b2003-10-06 05:08:26 +000055 def test_literals(self):
56 self.assertEqual(u'\xff', u'\u00ff')
57 self.assertEqual(u'\uffff', u'\U0000ffff')
58 self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
59 self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
60 self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
61
Walter Dörwald28256f22003-01-19 16:59:20 +000062 def test_repr(self):
63 if not sys.platform.startswith('java'):
64 # Test basic sanity of repr()
65 self.assertEqual(repr(u'abc'), "u'abc'")
66 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
67 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
68 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
69 self.assertEqual(repr(u'\\'), "u'\\\\'")
70 self.assertEqual(repr(u'\n'), "u'\\n'")
71 self.assertEqual(repr(u'\r'), "u'\\r'")
72 self.assertEqual(repr(u'\t'), "u'\\t'")
73 self.assertEqual(repr(u'\b'), "u'\\x08'")
74 self.assertEqual(repr(u"'\""), """u'\\'"'""")
75 self.assertEqual(repr(u"'\""), """u'\\'"'""")
76 self.assertEqual(repr(u"'"), '''u"'"''')
77 self.assertEqual(repr(u'"'), """u'"'""")
78 latin1repr = (
79 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
80 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
81 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
82 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
83 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
84 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
85 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
86 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
87 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
88 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
89 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
90 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
91 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
92 "\\xfe\\xff'")
93 testrepr = repr(u''.join(map(unichr, xrange(256))))
94 self.assertEqual(testrepr, latin1repr)
95
Walter Dörwald28256f22003-01-19 16:59:20 +000096 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000097 string_tests.CommonTest.test_count(self)
98 # check mixed argument types
99 self.checkequalnofix(3, 'aaa', 'count', u'a')
100 self.checkequalnofix(0, 'aaa', 'count', u'b')
101 self.checkequalnofix(3, u'aaa', 'count', 'a')
102 self.checkequalnofix(0, u'aaa', 'count', 'b')
103 self.checkequalnofix(0, u'aaa', 'count', 'b')
104 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
105 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
106 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
107 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000108
Walter Dörwald28256f22003-01-19 16:59:20 +0000109 def test_find(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000110 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
111 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
112 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000113
Walter Dörwald28256f22003-01-19 16:59:20 +0000114 self.assertRaises(TypeError, u'hello'.find)
115 self.assertRaises(TypeError, u'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000116
Walter Dörwald28256f22003-01-19 16:59:20 +0000117 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000118 string_tests.CommonTest.test_rfind(self)
119 # check mixed argument types
120 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
121 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
122 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000123
Walter Dörwald28256f22003-01-19 16:59:20 +0000124 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000125 string_tests.CommonTest.test_index(self)
126 # check mixed argument types
127 for (t1, t2) in ((str, unicode), (unicode, str)):
128 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
129 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
130 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
131 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
132 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
133 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
134 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
135 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000136
Walter Dörwald28256f22003-01-19 16:59:20 +0000137 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000138 string_tests.CommonTest.test_rindex(self)
139 # check mixed argument types
140 for (t1, t2) in ((str, unicode), (unicode, str)):
141 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
142 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
143 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
144 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000146 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
147 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
148 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
149 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
150 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000151
Walter Dörwald28256f22003-01-19 16:59:20 +0000152 def test_translate(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000153 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
154 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
155 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
156 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
157 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
Walter Dörwaldcd736e72004-02-05 17:36:00 +0000158 self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159
Walter Dörwald28256f22003-01-19 16:59:20 +0000160 self.assertRaises(TypeError, u'hello'.translate)
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000161 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000162
Walter Dörwald28256f22003-01-19 16:59:20 +0000163 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000164 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000165
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000166 # Mixed arguments
167 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
168 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
169 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000170
Walter Dörwald28256f22003-01-19 16:59:20 +0000171 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000172 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000173
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000174 # mixed arguments
175 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
176 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
177 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
178 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
179 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
180 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
181 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
Marc-André Lemburge5034372000-08-08 08:04:29 +0000182
Walter Dörwald28256f22003-01-19 16:59:20 +0000183 def test_strip(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000184 string_tests.CommonTest.test_strip(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000185 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
Guido van Rossuma831cac2000-03-10 23:23:21 +0000186
Walter Dörwald28256f22003-01-19 16:59:20 +0000187 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000188 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000189
Walter Dörwald28256f22003-01-19 16:59:20 +0000190 # method call forwarded from str implementation because of unicode argument
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000191 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
Walter Dörwald28256f22003-01-19 16:59:20 +0000192 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000193
Walter Dörwald28256f22003-01-19 16:59:20 +0000194 def test_comparison(self):
195 # Comparisons:
196 self.assertEqual(u'abc', 'abc')
197 self.assertEqual('abc', u'abc')
198 self.assertEqual(u'abc', u'abc')
199 self.assert_(u'abcd' > 'abc')
200 self.assert_('abcd' > u'abc')
201 self.assert_(u'abcd' > u'abc')
202 self.assert_(u'abc' < 'abcd')
203 self.assert_('abc' < u'abcd')
204 self.assert_(u'abc' < u'abcd')
205
206 if 0:
207 # Move these tests to a Unicode collation module test...
208 # Testing UTF-16 code point order comparisons...
209
210 # No surrogates, no fixup required.
211 self.assert_(u'\u0061' < u'\u20ac')
212 # Non surrogate below surrogate value, no fixup required
213 self.assert_(u'\u0061' < u'\ud800\udc02')
214
215 # Non surrogate above surrogate value, fixup required
216 def test_lecmp(s, s2):
217 self.assert_(s < s2)
218
219 def test_fixup(s):
220 s2 = u'\ud800\udc01'
221 test_lecmp(s, s2)
222 s2 = u'\ud900\udc01'
223 test_lecmp(s, s2)
224 s2 = u'\uda00\udc01'
225 test_lecmp(s, s2)
226 s2 = u'\udb00\udc01'
227 test_lecmp(s, s2)
228 s2 = u'\ud800\udd01'
229 test_lecmp(s, s2)
230 s2 = u'\ud900\udd01'
231 test_lecmp(s, s2)
232 s2 = u'\uda00\udd01'
233 test_lecmp(s, s2)
234 s2 = u'\udb00\udd01'
235 test_lecmp(s, s2)
236 s2 = u'\ud800\ude01'
237 test_lecmp(s, s2)
238 s2 = u'\ud900\ude01'
239 test_lecmp(s, s2)
240 s2 = u'\uda00\ude01'
241 test_lecmp(s, s2)
242 s2 = u'\udb00\ude01'
243 test_lecmp(s, s2)
244 s2 = u'\ud800\udfff'
245 test_lecmp(s, s2)
246 s2 = u'\ud900\udfff'
247 test_lecmp(s, s2)
248 s2 = u'\uda00\udfff'
249 test_lecmp(s, s2)
250 s2 = u'\udb00\udfff'
251 test_lecmp(s, s2)
252
253 test_fixup(u'\ue000')
254 test_fixup(u'\uff61')
255
256 # Surrogates on both sides, no fixup required
257 self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
258
Walter Dörwald28256f22003-01-19 16:59:20 +0000259 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000260 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
261 self.checkequalnofix(False, u'\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000262
263 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000264 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
265 if not sys.platform.startswith('java'):
266 self.checkequalnofix(False, u'\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000267
268 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000269 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
270 self.checkequalnofix(True, u'\u1FFc', 'istitle')
271 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000272
273 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000274 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
275 self.checkequalnofix(True, u'\u2000', 'isspace')
276 self.checkequalnofix(True, u'\u200a', 'isspace')
277 self.checkequalnofix(False, u'\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000278
279 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000280 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
281 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000282
283 def test_isdecimal(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000284 self.checkequalnofix(False, u'', 'isdecimal')
285 self.checkequalnofix(False, u'a', 'isdecimal')
286 self.checkequalnofix(True, u'0', 'isdecimal')
287 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
288 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
289 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
290 self.checkequalnofix(True, u'0123456789', 'isdecimal')
291 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000292
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000293 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000294
295 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000296 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
297 self.checkequalnofix(True, u'\u2460', 'isdigit')
298 self.checkequalnofix(False, u'\xbc', 'isdigit')
299 self.checkequalnofix(True, u'\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000300
301 def test_isnumeric(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000302 self.checkequalnofix(False, u'', 'isnumeric')
303 self.checkequalnofix(False, u'a', 'isnumeric')
304 self.checkequalnofix(True, u'0', 'isnumeric')
305 self.checkequalnofix(True, u'\u2460', 'isnumeric')
306 self.checkequalnofix(True, u'\xbc', 'isnumeric')
307 self.checkequalnofix(True, u'\u0660', 'isnumeric')
308 self.checkequalnofix(True, u'0123456789', 'isnumeric')
309 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000310
311 self.assertRaises(TypeError, u"abc".isnumeric, 42)
312
Walter Dörwald28256f22003-01-19 16:59:20 +0000313 def test_contains(self):
314 # Testing Unicode contains method
315 self.assert_('a' in u'abdb')
316 self.assert_('a' in u'bdab')
317 self.assert_('a' in u'bdaba')
318 self.assert_('a' in u'bdba')
319 self.assert_('a' in u'bdba')
320 self.assert_(u'a' in u'bdba')
321 self.assert_(u'a' not in u'bdb')
322 self.assert_(u'a' not in 'bdb')
323 self.assert_(u'a' in 'bdba')
324 self.assert_(u'a' in ('a',1,None))
325 self.assert_(u'a' in (1,None,'a'))
326 self.assert_(u'a' in (1,None,u'a'))
327 self.assert_('a' in ('a',1,None))
328 self.assert_('a' in (1,None,'a'))
329 self.assert_('a' in (1,None,u'a'))
330 self.assert_('a' not in ('x',1,u'y'))
331 self.assert_('a' not in ('x',1,None))
332 self.assert_(u'abcd' not in u'abcxxxx')
333 self.assert_(u'ab' in u'abcd')
334 self.assert_('ab' in u'abc')
335 self.assert_(u'ab' in 'abc')
336 self.assert_(u'ab' in (1,None,u'ab'))
337 self.assert_(u'' in u'abc')
338 self.assert_('' in u'abc')
339
340 # If the following fails either
341 # the contains operator does not propagate UnicodeErrors or
342 # someone has changed the default encoding
343 self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
344
345 self.assert_(u'' in '')
346 self.assert_('' in u'')
347 self.assert_(u'' in u'')
348 self.assert_(u'' in 'abc')
349 self.assert_('' in u'abc')
350 self.assert_(u'' in u'abc')
351 self.assert_(u'\0' not in 'abc')
352 self.assert_('\0' not in u'abc')
353 self.assert_(u'\0' not in u'abc')
354 self.assert_(u'\0' in '\0abc')
355 self.assert_('\0' in u'\0abc')
356 self.assert_(u'\0' in u'\0abc')
357 self.assert_(u'\0' in 'abc\0')
358 self.assert_('\0' in u'abc\0')
359 self.assert_(u'\0' in u'abc\0')
360 self.assert_(u'a' in '\0abc')
361 self.assert_('a' in u'\0abc')
362 self.assert_(u'a' in u'\0abc')
363 self.assert_(u'asdf' in 'asdf')
364 self.assert_('asdf' in u'asdf')
365 self.assert_(u'asdf' in u'asdf')
366 self.assert_(u'asdf' not in 'asd')
367 self.assert_('asdf' not in u'asd')
368 self.assert_(u'asdf' not in u'asd')
369 self.assert_(u'asdf' not in '')
370 self.assert_('asdf' not in u'')
371 self.assert_(u'asdf' not in u'')
372
373 self.assertRaises(TypeError, u"abc".__contains__)
374
375 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000376 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000377 # Testing Unicode formatting strings...
378 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
379 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
380 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
381 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
382 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
383 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000384 if not sys.platform.startswith('java'):
385 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
386 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000387 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000388
Walter Dörwald43440a62003-03-31 18:07:50 +0000389 self.assertEqual(u'%c' % 0x1234, u'\u1234')
Walter Dörwald44f527f2003-04-02 16:37:24 +0000390 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000391
392 # formatting jobs delegated from the string implementation:
393 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
394 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
395 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
396 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
397 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
398 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
399 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
400 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
401 self.assertEqual('...%s...' % u"abc", u'...abc...')
402 self.assertEqual('%*s' % (5,u'abc',), u' abc')
403 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
404 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
405 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
406 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000407 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
Walter Dörwald43440a62003-03-31 18:07:50 +0000408 self.assertEqual('%c' % u'a', u'a')
Neil Schemenauercf52c072005-08-12 17:34:58 +0000409 class Wrapper:
410 def __str__(self):
411 return u'\u1234'
412 self.assertEqual('%s' % Wrapper(), u'\u1234')
Tim Peters4511a712006-05-03 04:46:14 +0000413
Georg Brandlde9b6242006-04-30 11:13:56 +0000414 @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
Georg Brandlda6b1072006-01-20 17:48:54 +0000415 def test_format_float(self):
Georg Brandlde9b6242006-04-30 11:13:56 +0000416 # should not format with a comma, but always with C locale
417 self.assertEqual(u'1.0', u'%.1f' % 1.0)
Georg Brandlda6b1072006-01-20 17:48:54 +0000418
Walter Dörwald28256f22003-01-19 16:59:20 +0000419 def test_constructor(self):
420 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
421
422 self.assertEqual(
423 unicode(u'unicode remains unicode'),
424 u'unicode remains unicode'
425 )
426
427 class UnicodeSubclass(unicode):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000428 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000429
Walter Dörwald28256f22003-01-19 16:59:20 +0000430 self.assertEqual(
431 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
432 u'unicode subclass becomes unicode'
433 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000434
Walter Dörwald28256f22003-01-19 16:59:20 +0000435 self.assertEqual(
436 unicode('strings are converted to unicode'),
437 u'strings are converted to unicode'
438 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000439
Walter Dörwald28256f22003-01-19 16:59:20 +0000440 class UnicodeCompat:
441 def __init__(self, x):
442 self.x = x
443 def __unicode__(self):
444 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000445
Walter Dörwald28256f22003-01-19 16:59:20 +0000446 self.assertEqual(
447 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
448 u'__unicode__ compatible objects are recognized')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000449
Walter Dörwald28256f22003-01-19 16:59:20 +0000450 class StringCompat:
451 def __init__(self, x):
452 self.x = x
453 def __str__(self):
454 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000455
Walter Dörwald28256f22003-01-19 16:59:20 +0000456 self.assertEqual(
457 unicode(StringCompat('__str__ compatible objects are recognized')),
458 u'__str__ compatible objects are recognized'
459 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000460
Walter Dörwald28256f22003-01-19 16:59:20 +0000461 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000462
Walter Dörwald28256f22003-01-19 16:59:20 +0000463 o = StringCompat('unicode(obj) is compatible to str()')
464 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
465 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000466
Marc-André Lemburgd25c6502004-07-23 16:13:25 +0000467 # %-formatting and .__unicode__()
468 self.assertEqual(u'%s' %
469 UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
470 u"u'%s' % obj uses obj.__unicode__()")
471 self.assertEqual(u'%s' %
472 UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
473 u"u'%s' % obj falls back to obj.__str__()")
474
Walter Dörwald28256f22003-01-19 16:59:20 +0000475 for obj in (123, 123.45, 123L):
476 self.assertEqual(unicode(obj), unicode(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000477
Walter Dörwald28256f22003-01-19 16:59:20 +0000478 # unicode(obj, encoding, error) tests (this maps to
479 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000480
Walter Dörwald28256f22003-01-19 16:59:20 +0000481 if not sys.platform.startswith('java'):
482 self.assertRaises(
483 TypeError,
484 unicode,
485 u'decoding unicode is not supported',
486 'utf-8',
487 'strict'
488 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000489
Walter Dörwald28256f22003-01-19 16:59:20 +0000490 self.assertEqual(
491 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
492 u'strings are decoded to unicode'
493 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000494
Walter Dörwald28256f22003-01-19 16:59:20 +0000495 if not sys.platform.startswith('java'):
496 self.assertEqual(
497 unicode(
498 buffer('character buffers are decoded to unicode'),
499 'utf-8',
500 'strict'
501 ),
502 u'character buffers are decoded to unicode'
503 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000504
Walter Dörwald28256f22003-01-19 16:59:20 +0000505 self.assertRaises(TypeError, unicode, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000506
Walter Dörwald28256f22003-01-19 16:59:20 +0000507 def test_codecs_utf7(self):
508 utfTests = [
509 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
510 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
511 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
512 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
513 (u'+', '+-'),
514 (u'+-', '+--'),
515 (u'+?', '+-?'),
516 (u'\?', '+AFw?'),
517 (u'+?', '+-?'),
518 (ur'\\?', '+AFwAXA?'),
519 (ur'\\\?', '+AFwAXABc?'),
520 (ur'++--', '+-+---')
521 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000522
Walter Dörwald28256f22003-01-19 16:59:20 +0000523 for (x, y) in utfTests:
524 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000525
Walter Dörwald28256f22003-01-19 16:59:20 +0000526 # surrogates not supported
527 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000528
Walter Dörwald28256f22003-01-19 16:59:20 +0000529 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000530
Walter Dörwald28256f22003-01-19 16:59:20 +0000531 def test_codecs_utf8(self):
532 self.assertEqual(u''.encode('utf-8'), '')
533 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
534 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
535 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
536 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
537 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
538 self.assertEqual(
539 (u'\ud800\udc02'*1000).encode('utf-8'),
540 '\xf0\x90\x80\x82'*1000
541 )
542 self.assertEqual(
543 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
544 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
545 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
546 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
547 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
548 u' Nunstuck git und'.encode('utf-8'),
549 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
550 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
551 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
552 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
553 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
554 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
555 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
556 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
557 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
558 '\xe3\x80\x8cWenn ist das Nunstuck git und'
559 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000560
Walter Dörwald28256f22003-01-19 16:59:20 +0000561 # UTF-8 specific decoding tests
562 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
563 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
564 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000565
Walter Dörwald28256f22003-01-19 16:59:20 +0000566 # Other possible utf-8 test cases:
567 # * strict decoding testing for all of the
568 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000569
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000570 def test_codecs_idna(self):
571 # Test whether trailing dot is preserved
572 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
573
Walter Dörwald28256f22003-01-19 16:59:20 +0000574 def test_codecs_errors(self):
575 # Error handling (encoding)
576 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
577 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
578 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
579 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000580
Walter Dörwald28256f22003-01-19 16:59:20 +0000581 # Error handling (decoding)
582 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
583 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
584 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
585 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000586
Walter Dörwald28256f22003-01-19 16:59:20 +0000587 # Error handling (unknown character names)
588 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000589
Walter Dörwald28256f22003-01-19 16:59:20 +0000590 # Error handling (truncated escape sequence)
591 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000592
Walter Dörwald28256f22003-01-19 16:59:20 +0000593 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
594 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
595 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
596 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
597 # executes PyUnicode_Encode()
598 import imp
599 self.assertRaises(
600 ImportError,
601 imp.find_module,
602 "non-existing module",
603 [u"non-existing dir"]
604 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000605
Walter Dörwald28256f22003-01-19 16:59:20 +0000606 # Error handling (wrong arguments)
607 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000608
Walter Dörwald28256f22003-01-19 16:59:20 +0000609 # Error handling (PyUnicode_EncodeDecimal())
610 self.assertRaises(UnicodeError, int, u"\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000611
Walter Dörwald28256f22003-01-19 16:59:20 +0000612 def test_codecs(self):
613 # Encoding
614 self.assertEqual(u'hello'.encode('ascii'), 'hello')
615 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
616 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
617 self.assertEqual(u'hello'.encode('utf8'), 'hello')
618 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
619 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
620 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000621
Walter Dörwald28256f22003-01-19 16:59:20 +0000622 # Roundtrip safety for BMP (just the first 1024 chars)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000623 for c in xrange(1024):
624 u = unichr(c)
625 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
626 'utf-16-be', 'raw_unicode_escape',
627 'unicode_escape', 'unicode_internal'):
628 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000629
Walter Dörwald28256f22003-01-19 16:59:20 +0000630 # Roundtrip safety for BMP (just the first 256 chars)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000631 for c in xrange(256):
632 u = unichr(c)
633 for encoding in ('latin-1',):
634 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000635
Walter Dörwald28256f22003-01-19 16:59:20 +0000636 # Roundtrip safety for BMP (just the first 128 chars)
Hye-Shik Chang835b2432005-12-17 04:38:31 +0000637 for c in xrange(128):
638 u = unichr(c)
639 for encoding in ('ascii',):
640 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000641
Walter Dörwald28256f22003-01-19 16:59:20 +0000642 # Roundtrip safety for non-BMP (just a few chars)
643 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
644 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
645 #'raw_unicode_escape',
646 'unicode_escape', 'unicode_internal'):
647 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000648
Walter Dörwald28256f22003-01-19 16:59:20 +0000649 # UTF-8 must be roundtrip safe for all UCS-2 code points
650 # This excludes surrogates: in the full range, there would be
651 # a surrogate pair (\udbff\udc00), which gets converted back
652 # to a non-BMP character (\U0010fc00)
653 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
654 for encoding in ('utf-8',):
655 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000656
Walter Dörwald28256f22003-01-19 16:59:20 +0000657 def test_codecs_charmap(self):
658 # 0-127
659 s = ''.join(map(chr, xrange(128)))
660 for encoding in (
661 'cp037', 'cp1026',
662 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
663 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
664 'cp863', 'cp865', 'cp866',
665 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
666 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
667 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
668 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000669
Walter Dörwald28256f22003-01-19 16:59:20 +0000670 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
671 'cp1256', 'cp1257', 'cp1258',
672 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000673
Walter Dörwald28256f22003-01-19 16:59:20 +0000674 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
675 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000676
Walter Dörwald28256f22003-01-19 16:59:20 +0000677 ### These have undefined mappings:
678 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000679
Walter Dörwald28256f22003-01-19 16:59:20 +0000680 ### These fail the round-trip:
681 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000682
Walter Dörwald28256f22003-01-19 16:59:20 +0000683 ):
684 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000685
Walter Dörwald28256f22003-01-19 16:59:20 +0000686 # 128-255
687 s = ''.join(map(chr, xrange(128, 256)))
688 for encoding in (
689 'cp037', 'cp1026',
690 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
691 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
692 'cp863', 'cp865', 'cp866',
693 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
694 'iso8859_2', 'iso8859_4', 'iso8859_5',
695 'iso8859_9', 'koi8_r', 'latin_1',
696 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000697
Walter Dörwald28256f22003-01-19 16:59:20 +0000698 ### These have undefined mappings:
699 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
700 #'cp1256', 'cp1257', 'cp1258',
701 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
702 #'iso8859_3', 'iso8859_6', 'iso8859_7',
703 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000704
Walter Dörwald28256f22003-01-19 16:59:20 +0000705 ### These fail the round-trip:
706 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000707
Walter Dörwald28256f22003-01-19 16:59:20 +0000708 ):
709 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000710
Walter Dörwald28256f22003-01-19 16:59:20 +0000711 def test_concatenation(self):
712 self.assertEqual((u"abc" u"def"), u"abcdef")
713 self.assertEqual(("abc" u"def"), u"abcdef")
714 self.assertEqual((u"abc" "def"), u"abcdef")
715 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
716 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000717
Walter Dörwald28256f22003-01-19 16:59:20 +0000718 def test_printing(self):
719 class BitBucket:
720 def write(self, text):
721 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000722
Walter Dörwald28256f22003-01-19 16:59:20 +0000723 out = BitBucket()
724 print >>out, u'abc'
725 print >>out, u'abc', u'def'
726 print >>out, u'abc', 'def'
727 print >>out, 'abc', u'def'
728 print >>out, u'abc\n'
729 print >>out, u'abc\n',
730 print >>out, u'abc\n',
731 print >>out, u'def\n'
732 print >>out, u'def\n'
Fred Drake004d5e62000-10-23 17:22:08 +0000733
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000734 def test_ucs4(self):
735 if sys.maxunicode == 0xFFFF:
736 return
737 x = u'\U00100000'
738 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
739 self.assertEqual(x, y)
740
Brett Cannonc3647ac2005-04-26 03:45:26 +0000741 def test_conversion(self):
742 # Make sure __unicode__() works properly
743 class Foo0:
744 def __str__(self):
745 return "foo"
746
747 class Foo1:
748 def __unicode__(self):
749 return u"foo"
750
751 class Foo2(object):
752 def __unicode__(self):
753 return u"foo"
754
755 class Foo3(object):
756 def __unicode__(self):
757 return "foo"
758
759 class Foo4(str):
760 def __unicode__(self):
761 return "foo"
762
763 class Foo5(unicode):
764 def __unicode__(self):
765 return "foo"
766
767 class Foo6(str):
768 def __str__(self):
769 return "foos"
770
771 def __unicode__(self):
772 return u"foou"
773
774 class Foo7(unicode):
775 def __str__(self):
776 return "foos"
777 def __unicode__(self):
778 return u"foou"
779
780 class Foo8(unicode):
781 def __new__(cls, content=""):
782 return unicode.__new__(cls, 2*content)
783 def __unicode__(self):
784 return self
785
786 class Foo9(unicode):
787 def __str__(self):
788 return "string"
789 def __unicode__(self):
790 return "not unicode"
791
792 self.assertEqual(unicode(Foo0()), u"foo")
793 self.assertEqual(unicode(Foo1()), u"foo")
794 self.assertEqual(unicode(Foo2()), u"foo")
795 self.assertEqual(unicode(Foo3()), u"foo")
796 self.assertEqual(unicode(Foo4("bar")), u"foo")
797 self.assertEqual(unicode(Foo5("bar")), u"foo")
798 self.assertEqual(unicode(Foo6("bar")), u"foou")
799 self.assertEqual(unicode(Foo7("bar")), u"foou")
800 self.assertEqual(unicode(Foo8("foo")), u"foofoo")
801 self.assertEqual(str(Foo9("foo")), "string")
802 self.assertEqual(unicode(Foo9("foo")), u"not unicode")
803
Anthony Baxter67b6d512006-03-30 10:54:07 +0000804 def test_unicode_repr(self):
805 class s1:
806 def __repr__(self):
807 return '\\n'
808
809 class s2:
810 def __repr__(self):
811 return u'\\n'
812
813 self.assertEqual(repr(s1()), '\\n')
814 self.assertEqual(repr(s2()), '\\n')
815
816
817
818
819
Walter Dörwald28256f22003-01-19 16:59:20 +0000820def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000821 test_support.run_unittest(UnicodeTest)
Barry Warsaw817918c2002-08-06 16:58:21 +0000822
Walter Dörwald28256f22003-01-19 16:59:20 +0000823if __name__ == "__main__":
824 test_main()