blob: d85f171dbb5b0870364b0792cffa45448a0082e8 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Walter Dörwald0fd583c2003-02-21 12:53:50 +00009import unittest, sys, string, codecs, new
10from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Walter Dörwald0fd583c2003-02-21 12:53:50 +000012class UnicodeTest(
13 string_tests.CommonTest,
Walter Dörwald57d88e52004-08-26 16:53:04 +000014 string_tests.MixinStrUnicodeUserStringTest,
15 string_tests.MixinStrUnicodeTest,
Walter Dörwald0fd583c2003-02-21 12:53:50 +000016 ):
17 type2test = unicode
18
19 def checkequalnofix(self, result, object, methodname, *args):
20 method = getattr(object, methodname)
21 realresult = method(*args)
22 self.assertEqual(realresult, result)
23 self.assert_(type(realresult) is type(result))
24
25 # if the original is returned make sure that
26 # this doesn't happen with subclasses
27 if realresult is object:
28 class usub(unicode):
29 def __repr__(self):
30 return 'usub(%r)' % unicode.__repr__(self)
31 object = usub(object)
32 method = getattr(object, methodname)
33 realresult = method(*args)
34 self.assertEqual(realresult, result)
35 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000036
Jeremy Hylton504de6b2003-10-06 05:08:26 +000037 def test_literals(self):
38 self.assertEqual(u'\xff', u'\u00ff')
39 self.assertEqual(u'\uffff', u'\U0000ffff')
40 self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
41 self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
42 self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
43
Walter Dörwald28256f22003-01-19 16:59:20 +000044 def test_repr(self):
45 if not sys.platform.startswith('java'):
46 # Test basic sanity of repr()
47 self.assertEqual(repr(u'abc'), "u'abc'")
48 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
49 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
50 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
51 self.assertEqual(repr(u'\\'), "u'\\\\'")
52 self.assertEqual(repr(u'\n'), "u'\\n'")
53 self.assertEqual(repr(u'\r'), "u'\\r'")
54 self.assertEqual(repr(u'\t'), "u'\\t'")
55 self.assertEqual(repr(u'\b'), "u'\\x08'")
56 self.assertEqual(repr(u"'\""), """u'\\'"'""")
57 self.assertEqual(repr(u"'\""), """u'\\'"'""")
58 self.assertEqual(repr(u"'"), '''u"'"''')
59 self.assertEqual(repr(u'"'), """u'"'""")
60 latin1repr = (
61 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
62 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
63 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
64 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
65 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
66 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
67 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
68 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
69 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
70 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
71 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
72 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
73 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
74 "\\xfe\\xff'")
75 testrepr = repr(u''.join(map(unichr, xrange(256))))
76 self.assertEqual(testrepr, latin1repr)
77
Walter Dörwald28256f22003-01-19 16:59:20 +000078 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000079 string_tests.CommonTest.test_count(self)
80 # check mixed argument types
81 self.checkequalnofix(3, 'aaa', 'count', u'a')
82 self.checkequalnofix(0, 'aaa', 'count', u'b')
83 self.checkequalnofix(3, u'aaa', 'count', 'a')
84 self.checkequalnofix(0, u'aaa', 'count', 'b')
85 self.checkequalnofix(0, u'aaa', 'count', 'b')
86 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
87 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
88 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
89 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Walter Dörwald28256f22003-01-19 16:59:20 +000091 def test_find(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000092 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
93 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
94 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +000095
Walter Dörwald28256f22003-01-19 16:59:20 +000096 self.assertRaises(TypeError, u'hello'.find)
97 self.assertRaises(TypeError, u'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +000098
Walter Dörwald28256f22003-01-19 16:59:20 +000099 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000100 string_tests.CommonTest.test_rfind(self)
101 # check mixed argument types
102 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
103 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
104 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000105
Walter Dörwald28256f22003-01-19 16:59:20 +0000106 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000107 string_tests.CommonTest.test_index(self)
108 # check mixed argument types
109 for (t1, t2) in ((str, unicode), (unicode, str)):
110 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
111 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
112 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
113 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
114 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
115 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
116 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
117 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000118
Walter Dörwald28256f22003-01-19 16:59:20 +0000119 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000120 string_tests.CommonTest.test_rindex(self)
121 # check mixed argument types
122 for (t1, t2) in ((str, unicode), (unicode, str)):
123 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
124 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
125 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
126 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000127
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000128 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
129 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
130 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
131 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
132 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000133
Walter Dörwald28256f22003-01-19 16:59:20 +0000134 def test_translate(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000135 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
136 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
137 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
138 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
139 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
Walter Dörwaldcd736e72004-02-05 17:36:00 +0000140 self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000141
Walter Dörwald28256f22003-01-19 16:59:20 +0000142 self.assertRaises(TypeError, u'hello'.translate)
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000143 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000144
Walter Dörwald28256f22003-01-19 16:59:20 +0000145 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000146 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000147
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000148 # Mixed arguments
149 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
150 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
151 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000152
Walter Dörwald28256f22003-01-19 16:59:20 +0000153 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000154 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000155
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000156 # mixed arguments
157 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
158 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
159 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
160 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
161 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
162 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
163 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
Marc-André Lemburge5034372000-08-08 08:04:29 +0000164
Walter Dörwald28256f22003-01-19 16:59:20 +0000165 def test_strip(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000166 string_tests.CommonTest.test_strip(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000167 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
Guido van Rossuma831cac2000-03-10 23:23:21 +0000168
Walter Dörwald28256f22003-01-19 16:59:20 +0000169 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000170 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000171
Walter Dörwald28256f22003-01-19 16:59:20 +0000172 # method call forwarded from str implementation because of unicode argument
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000173 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
Walter Dörwald28256f22003-01-19 16:59:20 +0000174 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000175
Walter Dörwald28256f22003-01-19 16:59:20 +0000176 def test_comparison(self):
177 # Comparisons:
178 self.assertEqual(u'abc', 'abc')
179 self.assertEqual('abc', u'abc')
180 self.assertEqual(u'abc', u'abc')
181 self.assert_(u'abcd' > 'abc')
182 self.assert_('abcd' > u'abc')
183 self.assert_(u'abcd' > u'abc')
184 self.assert_(u'abc' < 'abcd')
185 self.assert_('abc' < u'abcd')
186 self.assert_(u'abc' < u'abcd')
187
188 if 0:
189 # Move these tests to a Unicode collation module test...
190 # Testing UTF-16 code point order comparisons...
191
192 # No surrogates, no fixup required.
193 self.assert_(u'\u0061' < u'\u20ac')
194 # Non surrogate below surrogate value, no fixup required
195 self.assert_(u'\u0061' < u'\ud800\udc02')
196
197 # Non surrogate above surrogate value, fixup required
198 def test_lecmp(s, s2):
199 self.assert_(s < s2)
200
201 def test_fixup(s):
202 s2 = u'\ud800\udc01'
203 test_lecmp(s, s2)
204 s2 = u'\ud900\udc01'
205 test_lecmp(s, s2)
206 s2 = u'\uda00\udc01'
207 test_lecmp(s, s2)
208 s2 = u'\udb00\udc01'
209 test_lecmp(s, s2)
210 s2 = u'\ud800\udd01'
211 test_lecmp(s, s2)
212 s2 = u'\ud900\udd01'
213 test_lecmp(s, s2)
214 s2 = u'\uda00\udd01'
215 test_lecmp(s, s2)
216 s2 = u'\udb00\udd01'
217 test_lecmp(s, s2)
218 s2 = u'\ud800\ude01'
219 test_lecmp(s, s2)
220 s2 = u'\ud900\ude01'
221 test_lecmp(s, s2)
222 s2 = u'\uda00\ude01'
223 test_lecmp(s, s2)
224 s2 = u'\udb00\ude01'
225 test_lecmp(s, s2)
226 s2 = u'\ud800\udfff'
227 test_lecmp(s, s2)
228 s2 = u'\ud900\udfff'
229 test_lecmp(s, s2)
230 s2 = u'\uda00\udfff'
231 test_lecmp(s, s2)
232 s2 = u'\udb00\udfff'
233 test_lecmp(s, s2)
234
235 test_fixup(u'\ue000')
236 test_fixup(u'\uff61')
237
238 # Surrogates on both sides, no fixup required
239 self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
240
Walter Dörwald28256f22003-01-19 16:59:20 +0000241 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000242 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
243 self.checkequalnofix(False, u'\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000244
245 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000246 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
247 if not sys.platform.startswith('java'):
248 self.checkequalnofix(False, u'\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000249
250 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000251 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
252 self.checkequalnofix(True, u'\u1FFc', 'istitle')
253 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000254
255 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000256 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
257 self.checkequalnofix(True, u'\u2000', 'isspace')
258 self.checkequalnofix(True, u'\u200a', 'isspace')
259 self.checkequalnofix(False, u'\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000260
261 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000262 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
263 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000264
265 def test_isdecimal(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000266 self.checkequalnofix(False, u'', 'isdecimal')
267 self.checkequalnofix(False, u'a', 'isdecimal')
268 self.checkequalnofix(True, u'0', 'isdecimal')
269 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
270 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
271 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
272 self.checkequalnofix(True, u'0123456789', 'isdecimal')
273 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000274
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000275 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000276
277 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000278 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
279 self.checkequalnofix(True, u'\u2460', 'isdigit')
280 self.checkequalnofix(False, u'\xbc', 'isdigit')
281 self.checkequalnofix(True, u'\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000282
283 def test_isnumeric(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000284 self.checkequalnofix(False, u'', 'isnumeric')
285 self.checkequalnofix(False, u'a', 'isnumeric')
286 self.checkequalnofix(True, u'0', 'isnumeric')
287 self.checkequalnofix(True, u'\u2460', 'isnumeric')
288 self.checkequalnofix(True, u'\xbc', 'isnumeric')
289 self.checkequalnofix(True, u'\u0660', 'isnumeric')
290 self.checkequalnofix(True, u'0123456789', 'isnumeric')
291 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000292
293 self.assertRaises(TypeError, u"abc".isnumeric, 42)
294
Walter Dörwald28256f22003-01-19 16:59:20 +0000295 def test_contains(self):
296 # Testing Unicode contains method
297 self.assert_('a' in u'abdb')
298 self.assert_('a' in u'bdab')
299 self.assert_('a' in u'bdaba')
300 self.assert_('a' in u'bdba')
301 self.assert_('a' in u'bdba')
302 self.assert_(u'a' in u'bdba')
303 self.assert_(u'a' not in u'bdb')
304 self.assert_(u'a' not in 'bdb')
305 self.assert_(u'a' in 'bdba')
306 self.assert_(u'a' in ('a',1,None))
307 self.assert_(u'a' in (1,None,'a'))
308 self.assert_(u'a' in (1,None,u'a'))
309 self.assert_('a' in ('a',1,None))
310 self.assert_('a' in (1,None,'a'))
311 self.assert_('a' in (1,None,u'a'))
312 self.assert_('a' not in ('x',1,u'y'))
313 self.assert_('a' not in ('x',1,None))
314 self.assert_(u'abcd' not in u'abcxxxx')
315 self.assert_(u'ab' in u'abcd')
316 self.assert_('ab' in u'abc')
317 self.assert_(u'ab' in 'abc')
318 self.assert_(u'ab' in (1,None,u'ab'))
319 self.assert_(u'' in u'abc')
320 self.assert_('' in u'abc')
321
322 # If the following fails either
323 # the contains operator does not propagate UnicodeErrors or
324 # someone has changed the default encoding
325 self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
326
327 self.assert_(u'' in '')
328 self.assert_('' in u'')
329 self.assert_(u'' in u'')
330 self.assert_(u'' in 'abc')
331 self.assert_('' in u'abc')
332 self.assert_(u'' in u'abc')
333 self.assert_(u'\0' not in 'abc')
334 self.assert_('\0' not in u'abc')
335 self.assert_(u'\0' not in u'abc')
336 self.assert_(u'\0' in '\0abc')
337 self.assert_('\0' in u'\0abc')
338 self.assert_(u'\0' in u'\0abc')
339 self.assert_(u'\0' in 'abc\0')
340 self.assert_('\0' in u'abc\0')
341 self.assert_(u'\0' in u'abc\0')
342 self.assert_(u'a' in '\0abc')
343 self.assert_('a' in u'\0abc')
344 self.assert_(u'a' in u'\0abc')
345 self.assert_(u'asdf' in 'asdf')
346 self.assert_('asdf' in u'asdf')
347 self.assert_(u'asdf' in u'asdf')
348 self.assert_(u'asdf' not in 'asd')
349 self.assert_('asdf' not in u'asd')
350 self.assert_(u'asdf' not in u'asd')
351 self.assert_(u'asdf' not in '')
352 self.assert_('asdf' not in u'')
353 self.assert_(u'asdf' not in u'')
354
355 self.assertRaises(TypeError, u"abc".__contains__)
356
357 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000358 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000359 # Testing Unicode formatting strings...
360 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
361 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
362 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
363 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
364 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
365 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000366 if not sys.platform.startswith('java'):
367 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
368 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000369 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000370
Walter Dörwald43440a62003-03-31 18:07:50 +0000371 self.assertEqual(u'%c' % 0x1234, u'\u1234')
Walter Dörwald44f527f2003-04-02 16:37:24 +0000372 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000373
374 # formatting jobs delegated from the string implementation:
375 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
376 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
377 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
378 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
379 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
380 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
381 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
382 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
383 self.assertEqual('...%s...' % u"abc", u'...abc...')
384 self.assertEqual('%*s' % (5,u'abc',), u' abc')
385 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
386 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
387 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
388 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000389 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
Walter Dörwald43440a62003-03-31 18:07:50 +0000390 self.assertEqual('%c' % u'a', u'a')
Neil Schemenauercf52c072005-08-12 17:34:58 +0000391 class Wrapper:
392 def __str__(self):
393 return u'\u1234'
394 self.assertEqual('%s' % Wrapper(), u'\u1234')
Walter Dörwald28256f22003-01-19 16:59:20 +0000395
Walter Dörwald28256f22003-01-19 16:59:20 +0000396 def test_constructor(self):
397 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
398
399 self.assertEqual(
400 unicode(u'unicode remains unicode'),
401 u'unicode remains unicode'
402 )
403
404 class UnicodeSubclass(unicode):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000405 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000406
Walter Dörwald28256f22003-01-19 16:59:20 +0000407 self.assertEqual(
408 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
409 u'unicode subclass becomes unicode'
410 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000411
Walter Dörwald28256f22003-01-19 16:59:20 +0000412 self.assertEqual(
413 unicode('strings are converted to unicode'),
414 u'strings are converted to unicode'
415 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000416
Walter Dörwald28256f22003-01-19 16:59:20 +0000417 class UnicodeCompat:
418 def __init__(self, x):
419 self.x = x
420 def __unicode__(self):
421 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000422
Walter Dörwald28256f22003-01-19 16:59:20 +0000423 self.assertEqual(
424 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
425 u'__unicode__ compatible objects are recognized')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000426
Walter Dörwald28256f22003-01-19 16:59:20 +0000427 class StringCompat:
428 def __init__(self, x):
429 self.x = x
430 def __str__(self):
431 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000432
Walter Dörwald28256f22003-01-19 16:59:20 +0000433 self.assertEqual(
434 unicode(StringCompat('__str__ compatible objects are recognized')),
435 u'__str__ compatible objects are recognized'
436 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000437
Walter Dörwald28256f22003-01-19 16:59:20 +0000438 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000439
Walter Dörwald28256f22003-01-19 16:59:20 +0000440 o = StringCompat('unicode(obj) is compatible to str()')
441 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
442 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000443
Marc-André Lemburgd25c6502004-07-23 16:13:25 +0000444 # %-formatting and .__unicode__()
445 self.assertEqual(u'%s' %
446 UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
447 u"u'%s' % obj uses obj.__unicode__()")
448 self.assertEqual(u'%s' %
449 UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
450 u"u'%s' % obj falls back to obj.__str__()")
451
Walter Dörwald28256f22003-01-19 16:59:20 +0000452 for obj in (123, 123.45, 123L):
453 self.assertEqual(unicode(obj), unicode(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000454
Walter Dörwald28256f22003-01-19 16:59:20 +0000455 # unicode(obj, encoding, error) tests (this maps to
456 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000457
Walter Dörwald28256f22003-01-19 16:59:20 +0000458 if not sys.platform.startswith('java'):
459 self.assertRaises(
460 TypeError,
461 unicode,
462 u'decoding unicode is not supported',
463 'utf-8',
464 'strict'
465 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000466
Walter Dörwald28256f22003-01-19 16:59:20 +0000467 self.assertEqual(
468 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
469 u'strings are decoded to unicode'
470 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000471
Walter Dörwald28256f22003-01-19 16:59:20 +0000472 if not sys.platform.startswith('java'):
473 self.assertEqual(
474 unicode(
475 buffer('character buffers are decoded to unicode'),
476 'utf-8',
477 'strict'
478 ),
479 u'character buffers are decoded to unicode'
480 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000481
Walter Dörwald28256f22003-01-19 16:59:20 +0000482 self.assertRaises(TypeError, unicode, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000483
Walter Dörwald28256f22003-01-19 16:59:20 +0000484 def test_codecs_utf7(self):
485 utfTests = [
486 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
487 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
488 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
489 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
490 (u'+', '+-'),
491 (u'+-', '+--'),
492 (u'+?', '+-?'),
493 (u'\?', '+AFw?'),
494 (u'+?', '+-?'),
495 (ur'\\?', '+AFwAXA?'),
496 (ur'\\\?', '+AFwAXABc?'),
497 (ur'++--', '+-+---')
498 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000499
Walter Dörwald28256f22003-01-19 16:59:20 +0000500 for (x, y) in utfTests:
501 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000502
Walter Dörwald28256f22003-01-19 16:59:20 +0000503 # surrogates not supported
504 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000505
Walter Dörwald28256f22003-01-19 16:59:20 +0000506 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000507
Walter Dörwald28256f22003-01-19 16:59:20 +0000508 def test_codecs_utf8(self):
509 self.assertEqual(u''.encode('utf-8'), '')
510 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
511 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
512 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
513 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
514 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
515 self.assertEqual(
516 (u'\ud800\udc02'*1000).encode('utf-8'),
517 '\xf0\x90\x80\x82'*1000
518 )
519 self.assertEqual(
520 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
521 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
522 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
523 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
524 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
525 u' Nunstuck git und'.encode('utf-8'),
526 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
527 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
528 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
529 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
530 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
531 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
532 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
533 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
534 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
535 '\xe3\x80\x8cWenn ist das Nunstuck git und'
536 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000537
Walter Dörwald28256f22003-01-19 16:59:20 +0000538 # UTF-8 specific decoding tests
539 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
540 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
541 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000542
Walter Dörwald28256f22003-01-19 16:59:20 +0000543 # Other possible utf-8 test cases:
544 # * strict decoding testing for all of the
545 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000546
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000547 def test_codecs_idna(self):
548 # Test whether trailing dot is preserved
549 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
550
Walter Dörwald28256f22003-01-19 16:59:20 +0000551 def test_codecs_errors(self):
552 # Error handling (encoding)
553 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
554 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
555 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
556 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000557
Walter Dörwald28256f22003-01-19 16:59:20 +0000558 # Error handling (decoding)
559 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
560 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
561 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
562 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000563
Walter Dörwald28256f22003-01-19 16:59:20 +0000564 # Error handling (unknown character names)
565 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000566
Walter Dörwald28256f22003-01-19 16:59:20 +0000567 # Error handling (truncated escape sequence)
568 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000569
Walter Dörwald28256f22003-01-19 16:59:20 +0000570 # Error handling (bad decoder return)
571 def search_function(encoding):
572 def decode1(input, errors="strict"):
573 return 42 # not a tuple
574 def encode1(input, errors="strict"):
575 return 42 # not a tuple
576 def encode2(input, errors="strict"):
577 return (42, 42) # no unicode
578 def decode2(input, errors="strict"):
579 return (42, 42) # no unicode
580 if encoding=="test.unicode1":
581 return (encode1, decode1, None, None)
582 elif encoding=="test.unicode2":
583 return (encode2, decode2, None, None)
584 else:
585 return None
586 codecs.register(search_function)
587 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
588 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
589 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
590 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
591 # executes PyUnicode_Encode()
592 import imp
593 self.assertRaises(
594 ImportError,
595 imp.find_module,
596 "non-existing module",
597 [u"non-existing dir"]
598 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000599
Walter Dörwald28256f22003-01-19 16:59:20 +0000600 # Error handling (wrong arguments)
601 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000602
Walter Dörwald28256f22003-01-19 16:59:20 +0000603 # Error handling (PyUnicode_EncodeDecimal())
604 self.assertRaises(UnicodeError, int, u"\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000605
Walter Dörwald28256f22003-01-19 16:59:20 +0000606 def test_codecs(self):
607 # Encoding
608 self.assertEqual(u'hello'.encode('ascii'), 'hello')
609 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
610 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
611 self.assertEqual(u'hello'.encode('utf8'), 'hello')
612 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
613 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
614 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000615
Walter Dörwald28256f22003-01-19 16:59:20 +0000616 # Roundtrip safety for BMP (just the first 1024 chars)
617 u = u''.join(map(unichr, xrange(1024)))
618 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
619 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
620 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000621
Walter Dörwald28256f22003-01-19 16:59:20 +0000622 # Roundtrip safety for BMP (just the first 256 chars)
623 u = u''.join(map(unichr, xrange(256)))
624 for encoding in ('latin-1',):
625 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000626
Walter Dörwald28256f22003-01-19 16:59:20 +0000627 # Roundtrip safety for BMP (just the first 128 chars)
628 u = u''.join(map(unichr, xrange(128)))
629 for encoding in ('ascii',):
630 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000631
Walter Dörwald28256f22003-01-19 16:59:20 +0000632 # Roundtrip safety for non-BMP (just a few chars)
633 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
634 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
635 #'raw_unicode_escape',
636 'unicode_escape', 'unicode_internal'):
637 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000638
Walter Dörwald28256f22003-01-19 16:59:20 +0000639 # UTF-8 must be roundtrip safe for all UCS-2 code points
640 # This excludes surrogates: in the full range, there would be
641 # a surrogate pair (\udbff\udc00), which gets converted back
642 # to a non-BMP character (\U0010fc00)
643 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
644 for encoding in ('utf-8',):
645 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000646
Walter Dörwald28256f22003-01-19 16:59:20 +0000647 def test_codecs_charmap(self):
648 # 0-127
649 s = ''.join(map(chr, xrange(128)))
650 for encoding in (
651 'cp037', 'cp1026',
652 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
653 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
654 'cp863', 'cp865', 'cp866',
655 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
656 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
657 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
658 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000659
Walter Dörwald28256f22003-01-19 16:59:20 +0000660 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
661 'cp1256', 'cp1257', 'cp1258',
662 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000663
Walter Dörwald28256f22003-01-19 16:59:20 +0000664 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
665 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000666
Walter Dörwald28256f22003-01-19 16:59:20 +0000667 ### These have undefined mappings:
668 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000669
Walter Dörwald28256f22003-01-19 16:59:20 +0000670 ### These fail the round-trip:
671 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000672
Walter Dörwald28256f22003-01-19 16:59:20 +0000673 ):
674 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000675
Walter Dörwald28256f22003-01-19 16:59:20 +0000676 # 128-255
677 s = ''.join(map(chr, xrange(128, 256)))
678 for encoding in (
679 'cp037', 'cp1026',
680 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
681 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
682 'cp863', 'cp865', 'cp866',
683 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
684 'iso8859_2', 'iso8859_4', 'iso8859_5',
685 'iso8859_9', 'koi8_r', 'latin_1',
686 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000687
Walter Dörwald28256f22003-01-19 16:59:20 +0000688 ### These have undefined mappings:
689 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
690 #'cp1256', 'cp1257', 'cp1258',
691 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
692 #'iso8859_3', 'iso8859_6', 'iso8859_7',
693 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000694
Walter Dörwald28256f22003-01-19 16:59:20 +0000695 ### These fail the round-trip:
696 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000697
Walter Dörwald28256f22003-01-19 16:59:20 +0000698 ):
699 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000700
Walter Dörwald28256f22003-01-19 16:59:20 +0000701 def test_concatenation(self):
702 self.assertEqual((u"abc" u"def"), u"abcdef")
703 self.assertEqual(("abc" u"def"), u"abcdef")
704 self.assertEqual((u"abc" "def"), u"abcdef")
705 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
706 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000707
Walter Dörwald28256f22003-01-19 16:59:20 +0000708 def test_printing(self):
709 class BitBucket:
710 def write(self, text):
711 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000712
Walter Dörwald28256f22003-01-19 16:59:20 +0000713 out = BitBucket()
714 print >>out, u'abc'
715 print >>out, u'abc', u'def'
716 print >>out, u'abc', 'def'
717 print >>out, 'abc', u'def'
718 print >>out, u'abc\n'
719 print >>out, u'abc\n',
720 print >>out, u'abc\n',
721 print >>out, u'def\n'
722 print >>out, u'def\n'
Fred Drake004d5e62000-10-23 17:22:08 +0000723
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000724 def test_ucs4(self):
725 if sys.maxunicode == 0xFFFF:
726 return
727 x = u'\U00100000'
728 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
729 self.assertEqual(x, y)
730
Brett Cannonc3647ac2005-04-26 03:45:26 +0000731 def test_conversion(self):
732 # Make sure __unicode__() works properly
733 class Foo0:
734 def __str__(self):
735 return "foo"
736
737 class Foo1:
738 def __unicode__(self):
739 return u"foo"
740
741 class Foo2(object):
742 def __unicode__(self):
743 return u"foo"
744
745 class Foo3(object):
746 def __unicode__(self):
747 return "foo"
748
749 class Foo4(str):
750 def __unicode__(self):
751 return "foo"
752
753 class Foo5(unicode):
754 def __unicode__(self):
755 return "foo"
756
757 class Foo6(str):
758 def __str__(self):
759 return "foos"
760
761 def __unicode__(self):
762 return u"foou"
763
764 class Foo7(unicode):
765 def __str__(self):
766 return "foos"
767 def __unicode__(self):
768 return u"foou"
769
770 class Foo8(unicode):
771 def __new__(cls, content=""):
772 return unicode.__new__(cls, 2*content)
773 def __unicode__(self):
774 return self
775
776 class Foo9(unicode):
777 def __str__(self):
778 return "string"
779 def __unicode__(self):
780 return "not unicode"
781
782 self.assertEqual(unicode(Foo0()), u"foo")
783 self.assertEqual(unicode(Foo1()), u"foo")
784 self.assertEqual(unicode(Foo2()), u"foo")
785 self.assertEqual(unicode(Foo3()), u"foo")
786 self.assertEqual(unicode(Foo4("bar")), u"foo")
787 self.assertEqual(unicode(Foo5("bar")), u"foo")
788 self.assertEqual(unicode(Foo6("bar")), u"foou")
789 self.assertEqual(unicode(Foo7("bar")), u"foou")
790 self.assertEqual(unicode(Foo8("foo")), u"foofoo")
791 self.assertEqual(str(Foo9("foo")), "string")
792 self.assertEqual(unicode(Foo9("foo")), u"not unicode")
793
Walter Dörwald28256f22003-01-19 16:59:20 +0000794def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000795 test_support.run_unittest(UnicodeTest)
Barry Warsaw817918c2002-08-06 16:58:21 +0000796
Walter Dörwald28256f22003-01-19 16:59:20 +0000797if __name__ == "__main__":
798 test_main()