blob: e298a1433547c2a2b9f98ac6a1764733daa2fcbb [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Walter Dörwald0fd583c2003-02-21 12:53:50 +00009import unittest, sys, string, codecs, new
10from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Neal Norwitz430f68b2005-11-24 22:00:56 +000012# Error handling (bad decoder return)
13def search_function(encoding):
14 def decode1(input, errors="strict"):
15 return 42 # not a tuple
16 def encode1(input, errors="strict"):
17 return 42 # not a tuple
18 def encode2(input, errors="strict"):
19 return (42, 42) # no unicode
20 def decode2(input, errors="strict"):
21 return (42, 42) # no unicode
22 if encoding=="test.unicode1":
23 return (encode1, decode1, None, None)
24 elif encoding=="test.unicode2":
25 return (encode2, decode2, None, None)
26 else:
27 return None
28codecs.register(search_function)
29
Walter Dörwald0fd583c2003-02-21 12:53:50 +000030class UnicodeTest(
31 string_tests.CommonTest,
Walter Dörwald57d88e52004-08-26 16:53:04 +000032 string_tests.MixinStrUnicodeUserStringTest,
33 string_tests.MixinStrUnicodeTest,
Walter Dörwald0fd583c2003-02-21 12:53:50 +000034 ):
35 type2test = unicode
36
37 def checkequalnofix(self, result, object, methodname, *args):
38 method = getattr(object, methodname)
39 realresult = method(*args)
40 self.assertEqual(realresult, result)
41 self.assert_(type(realresult) is type(result))
42
43 # if the original is returned make sure that
44 # this doesn't happen with subclasses
45 if realresult is object:
46 class usub(unicode):
47 def __repr__(self):
48 return 'usub(%r)' % unicode.__repr__(self)
49 object = usub(object)
50 method = getattr(object, methodname)
51 realresult = method(*args)
52 self.assertEqual(realresult, result)
53 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000054
Jeremy Hylton504de6b2003-10-06 05:08:26 +000055 def test_literals(self):
56 self.assertEqual(u'\xff', u'\u00ff')
57 self.assertEqual(u'\uffff', u'\U0000ffff')
58 self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
59 self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
60 self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
61
Walter Dörwald28256f22003-01-19 16:59:20 +000062 def test_repr(self):
63 if not sys.platform.startswith('java'):
64 # Test basic sanity of repr()
65 self.assertEqual(repr(u'abc'), "u'abc'")
66 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
67 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
68 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
69 self.assertEqual(repr(u'\\'), "u'\\\\'")
70 self.assertEqual(repr(u'\n'), "u'\\n'")
71 self.assertEqual(repr(u'\r'), "u'\\r'")
72 self.assertEqual(repr(u'\t'), "u'\\t'")
73 self.assertEqual(repr(u'\b'), "u'\\x08'")
74 self.assertEqual(repr(u"'\""), """u'\\'"'""")
75 self.assertEqual(repr(u"'\""), """u'\\'"'""")
76 self.assertEqual(repr(u"'"), '''u"'"''')
77 self.assertEqual(repr(u'"'), """u'"'""")
78 latin1repr = (
79 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
80 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
81 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
82 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
83 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
84 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
85 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
86 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
87 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
88 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
89 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
90 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
91 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
92 "\\xfe\\xff'")
93 testrepr = repr(u''.join(map(unichr, xrange(256))))
94 self.assertEqual(testrepr, latin1repr)
95
Walter Dörwald28256f22003-01-19 16:59:20 +000096 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000097 string_tests.CommonTest.test_count(self)
98 # check mixed argument types
99 self.checkequalnofix(3, 'aaa', 'count', u'a')
100 self.checkequalnofix(0, 'aaa', 'count', u'b')
101 self.checkequalnofix(3, u'aaa', 'count', 'a')
102 self.checkequalnofix(0, u'aaa', 'count', 'b')
103 self.checkequalnofix(0, u'aaa', 'count', 'b')
104 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
105 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
106 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
107 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000108
Walter Dörwald28256f22003-01-19 16:59:20 +0000109 def test_find(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000110 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
111 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
112 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000113
Walter Dörwald28256f22003-01-19 16:59:20 +0000114 self.assertRaises(TypeError, u'hello'.find)
115 self.assertRaises(TypeError, u'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000116
Walter Dörwald28256f22003-01-19 16:59:20 +0000117 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000118 string_tests.CommonTest.test_rfind(self)
119 # check mixed argument types
120 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
121 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
122 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000123
Walter Dörwald28256f22003-01-19 16:59:20 +0000124 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000125 string_tests.CommonTest.test_index(self)
126 # check mixed argument types
127 for (t1, t2) in ((str, unicode), (unicode, str)):
128 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
129 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
130 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
131 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
132 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
133 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
134 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
135 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000136
Walter Dörwald28256f22003-01-19 16:59:20 +0000137 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000138 string_tests.CommonTest.test_rindex(self)
139 # check mixed argument types
140 for (t1, t2) in ((str, unicode), (unicode, str)):
141 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
142 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
143 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
144 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000146 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
147 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
148 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
149 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
150 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000151
Walter Dörwald28256f22003-01-19 16:59:20 +0000152 def test_translate(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000153 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
154 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
155 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
156 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
157 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
Walter Dörwaldcd736e72004-02-05 17:36:00 +0000158 self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159
Walter Dörwald28256f22003-01-19 16:59:20 +0000160 self.assertRaises(TypeError, u'hello'.translate)
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000161 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000162
Walter Dörwald28256f22003-01-19 16:59:20 +0000163 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000164 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000165
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000166 # Mixed arguments
167 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
168 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
169 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000170
Walter Dörwald28256f22003-01-19 16:59:20 +0000171 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000172 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000173
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000174 # mixed arguments
175 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
176 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
177 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
178 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
179 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
180 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
181 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
Marc-André Lemburge5034372000-08-08 08:04:29 +0000182
Walter Dörwald28256f22003-01-19 16:59:20 +0000183 def test_strip(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000184 string_tests.CommonTest.test_strip(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000185 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
Guido van Rossuma831cac2000-03-10 23:23:21 +0000186
Walter Dörwald28256f22003-01-19 16:59:20 +0000187 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000188 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000189
Walter Dörwald28256f22003-01-19 16:59:20 +0000190 # method call forwarded from str implementation because of unicode argument
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000191 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
Walter Dörwald28256f22003-01-19 16:59:20 +0000192 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000193
Walter Dörwald28256f22003-01-19 16:59:20 +0000194 def test_comparison(self):
195 # Comparisons:
196 self.assertEqual(u'abc', 'abc')
197 self.assertEqual('abc', u'abc')
198 self.assertEqual(u'abc', u'abc')
199 self.assert_(u'abcd' > 'abc')
200 self.assert_('abcd' > u'abc')
201 self.assert_(u'abcd' > u'abc')
202 self.assert_(u'abc' < 'abcd')
203 self.assert_('abc' < u'abcd')
204 self.assert_(u'abc' < u'abcd')
205
206 if 0:
207 # Move these tests to a Unicode collation module test...
208 # Testing UTF-16 code point order comparisons...
209
210 # No surrogates, no fixup required.
211 self.assert_(u'\u0061' < u'\u20ac')
212 # Non surrogate below surrogate value, no fixup required
213 self.assert_(u'\u0061' < u'\ud800\udc02')
214
215 # Non surrogate above surrogate value, fixup required
216 def test_lecmp(s, s2):
217 self.assert_(s < s2)
218
219 def test_fixup(s):
220 s2 = u'\ud800\udc01'
221 test_lecmp(s, s2)
222 s2 = u'\ud900\udc01'
223 test_lecmp(s, s2)
224 s2 = u'\uda00\udc01'
225 test_lecmp(s, s2)
226 s2 = u'\udb00\udc01'
227 test_lecmp(s, s2)
228 s2 = u'\ud800\udd01'
229 test_lecmp(s, s2)
230 s2 = u'\ud900\udd01'
231 test_lecmp(s, s2)
232 s2 = u'\uda00\udd01'
233 test_lecmp(s, s2)
234 s2 = u'\udb00\udd01'
235 test_lecmp(s, s2)
236 s2 = u'\ud800\ude01'
237 test_lecmp(s, s2)
238 s2 = u'\ud900\ude01'
239 test_lecmp(s, s2)
240 s2 = u'\uda00\ude01'
241 test_lecmp(s, s2)
242 s2 = u'\udb00\ude01'
243 test_lecmp(s, s2)
244 s2 = u'\ud800\udfff'
245 test_lecmp(s, s2)
246 s2 = u'\ud900\udfff'
247 test_lecmp(s, s2)
248 s2 = u'\uda00\udfff'
249 test_lecmp(s, s2)
250 s2 = u'\udb00\udfff'
251 test_lecmp(s, s2)
252
253 test_fixup(u'\ue000')
254 test_fixup(u'\uff61')
255
256 # Surrogates on both sides, no fixup required
257 self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
258
Walter Dörwald28256f22003-01-19 16:59:20 +0000259 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000260 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
261 self.checkequalnofix(False, u'\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000262
263 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000264 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
265 if not sys.platform.startswith('java'):
266 self.checkequalnofix(False, u'\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000267
268 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000269 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
270 self.checkequalnofix(True, u'\u1FFc', 'istitle')
271 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000272
273 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000274 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
275 self.checkequalnofix(True, u'\u2000', 'isspace')
276 self.checkequalnofix(True, u'\u200a', 'isspace')
277 self.checkequalnofix(False, u'\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000278
279 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000280 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
281 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000282
283 def test_isdecimal(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000284 self.checkequalnofix(False, u'', 'isdecimal')
285 self.checkequalnofix(False, u'a', 'isdecimal')
286 self.checkequalnofix(True, u'0', 'isdecimal')
287 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
288 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
289 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
290 self.checkequalnofix(True, u'0123456789', 'isdecimal')
291 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000292
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000293 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000294
295 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000296 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
297 self.checkequalnofix(True, u'\u2460', 'isdigit')
298 self.checkequalnofix(False, u'\xbc', 'isdigit')
299 self.checkequalnofix(True, u'\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000300
301 def test_isnumeric(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000302 self.checkequalnofix(False, u'', 'isnumeric')
303 self.checkequalnofix(False, u'a', 'isnumeric')
304 self.checkequalnofix(True, u'0', 'isnumeric')
305 self.checkequalnofix(True, u'\u2460', 'isnumeric')
306 self.checkequalnofix(True, u'\xbc', 'isnumeric')
307 self.checkequalnofix(True, u'\u0660', 'isnumeric')
308 self.checkequalnofix(True, u'0123456789', 'isnumeric')
309 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000310
311 self.assertRaises(TypeError, u"abc".isnumeric, 42)
312
Walter Dörwald28256f22003-01-19 16:59:20 +0000313 def test_contains(self):
314 # Testing Unicode contains method
315 self.assert_('a' in u'abdb')
316 self.assert_('a' in u'bdab')
317 self.assert_('a' in u'bdaba')
318 self.assert_('a' in u'bdba')
319 self.assert_('a' in u'bdba')
320 self.assert_(u'a' in u'bdba')
321 self.assert_(u'a' not in u'bdb')
322 self.assert_(u'a' not in 'bdb')
323 self.assert_(u'a' in 'bdba')
324 self.assert_(u'a' in ('a',1,None))
325 self.assert_(u'a' in (1,None,'a'))
326 self.assert_(u'a' in (1,None,u'a'))
327 self.assert_('a' in ('a',1,None))
328 self.assert_('a' in (1,None,'a'))
329 self.assert_('a' in (1,None,u'a'))
330 self.assert_('a' not in ('x',1,u'y'))
331 self.assert_('a' not in ('x',1,None))
332 self.assert_(u'abcd' not in u'abcxxxx')
333 self.assert_(u'ab' in u'abcd')
334 self.assert_('ab' in u'abc')
335 self.assert_(u'ab' in 'abc')
336 self.assert_(u'ab' in (1,None,u'ab'))
337 self.assert_(u'' in u'abc')
338 self.assert_('' in u'abc')
339
340 # If the following fails either
341 # the contains operator does not propagate UnicodeErrors or
342 # someone has changed the default encoding
343 self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
344
345 self.assert_(u'' in '')
346 self.assert_('' in u'')
347 self.assert_(u'' in u'')
348 self.assert_(u'' in 'abc')
349 self.assert_('' in u'abc')
350 self.assert_(u'' in u'abc')
351 self.assert_(u'\0' not in 'abc')
352 self.assert_('\0' not in u'abc')
353 self.assert_(u'\0' not in u'abc')
354 self.assert_(u'\0' in '\0abc')
355 self.assert_('\0' in u'\0abc')
356 self.assert_(u'\0' in u'\0abc')
357 self.assert_(u'\0' in 'abc\0')
358 self.assert_('\0' in u'abc\0')
359 self.assert_(u'\0' in u'abc\0')
360 self.assert_(u'a' in '\0abc')
361 self.assert_('a' in u'\0abc')
362 self.assert_(u'a' in u'\0abc')
363 self.assert_(u'asdf' in 'asdf')
364 self.assert_('asdf' in u'asdf')
365 self.assert_(u'asdf' in u'asdf')
366 self.assert_(u'asdf' not in 'asd')
367 self.assert_('asdf' not in u'asd')
368 self.assert_(u'asdf' not in u'asd')
369 self.assert_(u'asdf' not in '')
370 self.assert_('asdf' not in u'')
371 self.assert_(u'asdf' not in u'')
372
373 self.assertRaises(TypeError, u"abc".__contains__)
374
375 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000376 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000377 # Testing Unicode formatting strings...
378 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
379 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
380 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
381 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
382 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
383 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000384 if not sys.platform.startswith('java'):
385 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
386 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000387 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000388
Walter Dörwald43440a62003-03-31 18:07:50 +0000389 self.assertEqual(u'%c' % 0x1234, u'\u1234')
Walter Dörwald44f527f2003-04-02 16:37:24 +0000390 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000391
392 # formatting jobs delegated from the string implementation:
393 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
394 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
395 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
396 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
397 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
398 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
399 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
400 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
401 self.assertEqual('...%s...' % u"abc", u'...abc...')
402 self.assertEqual('%*s' % (5,u'abc',), u' abc')
403 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
404 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
405 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
406 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000407 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
Walter Dörwald43440a62003-03-31 18:07:50 +0000408 self.assertEqual('%c' % u'a', u'a')
Neil Schemenauercf52c072005-08-12 17:34:58 +0000409 class Wrapper:
410 def __str__(self):
411 return u'\u1234'
412 self.assertEqual('%s' % Wrapper(), u'\u1234')
Walter Dörwald28256f22003-01-19 16:59:20 +0000413
Walter Dörwald28256f22003-01-19 16:59:20 +0000414 def test_constructor(self):
415 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
416
417 self.assertEqual(
418 unicode(u'unicode remains unicode'),
419 u'unicode remains unicode'
420 )
421
422 class UnicodeSubclass(unicode):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000423 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000424
Walter Dörwald28256f22003-01-19 16:59:20 +0000425 self.assertEqual(
426 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
427 u'unicode subclass becomes unicode'
428 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000429
Walter Dörwald28256f22003-01-19 16:59:20 +0000430 self.assertEqual(
431 unicode('strings are converted to unicode'),
432 u'strings are converted to unicode'
433 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000434
Walter Dörwald28256f22003-01-19 16:59:20 +0000435 class UnicodeCompat:
436 def __init__(self, x):
437 self.x = x
438 def __unicode__(self):
439 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000440
Walter Dörwald28256f22003-01-19 16:59:20 +0000441 self.assertEqual(
442 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
443 u'__unicode__ compatible objects are recognized')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000444
Walter Dörwald28256f22003-01-19 16:59:20 +0000445 class StringCompat:
446 def __init__(self, x):
447 self.x = x
448 def __str__(self):
449 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000450
Walter Dörwald28256f22003-01-19 16:59:20 +0000451 self.assertEqual(
452 unicode(StringCompat('__str__ compatible objects are recognized')),
453 u'__str__ compatible objects are recognized'
454 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000455
Walter Dörwald28256f22003-01-19 16:59:20 +0000456 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000457
Walter Dörwald28256f22003-01-19 16:59:20 +0000458 o = StringCompat('unicode(obj) is compatible to str()')
459 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
460 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000461
Marc-André Lemburgd25c6502004-07-23 16:13:25 +0000462 # %-formatting and .__unicode__()
463 self.assertEqual(u'%s' %
464 UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
465 u"u'%s' % obj uses obj.__unicode__()")
466 self.assertEqual(u'%s' %
467 UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
468 u"u'%s' % obj falls back to obj.__str__()")
469
Walter Dörwald28256f22003-01-19 16:59:20 +0000470 for obj in (123, 123.45, 123L):
471 self.assertEqual(unicode(obj), unicode(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000472
Walter Dörwald28256f22003-01-19 16:59:20 +0000473 # unicode(obj, encoding, error) tests (this maps to
474 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000475
Walter Dörwald28256f22003-01-19 16:59:20 +0000476 if not sys.platform.startswith('java'):
477 self.assertRaises(
478 TypeError,
479 unicode,
480 u'decoding unicode is not supported',
481 'utf-8',
482 'strict'
483 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000484
Walter Dörwald28256f22003-01-19 16:59:20 +0000485 self.assertEqual(
486 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
487 u'strings are decoded to unicode'
488 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000489
Walter Dörwald28256f22003-01-19 16:59:20 +0000490 if not sys.platform.startswith('java'):
491 self.assertEqual(
492 unicode(
493 buffer('character buffers are decoded to unicode'),
494 'utf-8',
495 'strict'
496 ),
497 u'character buffers are decoded to unicode'
498 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000499
Walter Dörwald28256f22003-01-19 16:59:20 +0000500 self.assertRaises(TypeError, unicode, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000501
Walter Dörwald28256f22003-01-19 16:59:20 +0000502 def test_codecs_utf7(self):
503 utfTests = [
504 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
505 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
506 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
507 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
508 (u'+', '+-'),
509 (u'+-', '+--'),
510 (u'+?', '+-?'),
511 (u'\?', '+AFw?'),
512 (u'+?', '+-?'),
513 (ur'\\?', '+AFwAXA?'),
514 (ur'\\\?', '+AFwAXABc?'),
515 (ur'++--', '+-+---')
516 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000517
Walter Dörwald28256f22003-01-19 16:59:20 +0000518 for (x, y) in utfTests:
519 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000520
Walter Dörwald28256f22003-01-19 16:59:20 +0000521 # surrogates not supported
522 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000523
Walter Dörwald28256f22003-01-19 16:59:20 +0000524 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000525
Walter Dörwald28256f22003-01-19 16:59:20 +0000526 def test_codecs_utf8(self):
527 self.assertEqual(u''.encode('utf-8'), '')
528 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
529 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
530 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
531 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
532 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
533 self.assertEqual(
534 (u'\ud800\udc02'*1000).encode('utf-8'),
535 '\xf0\x90\x80\x82'*1000
536 )
537 self.assertEqual(
538 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
539 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
540 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
541 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
542 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
543 u' Nunstuck git und'.encode('utf-8'),
544 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
545 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
546 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
547 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
548 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
549 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
550 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
551 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
552 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
553 '\xe3\x80\x8cWenn ist das Nunstuck git und'
554 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000555
Walter Dörwald28256f22003-01-19 16:59:20 +0000556 # UTF-8 specific decoding tests
557 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
558 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
559 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000560
Walter Dörwald28256f22003-01-19 16:59:20 +0000561 # Other possible utf-8 test cases:
562 # * strict decoding testing for all of the
563 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000564
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000565 def test_codecs_idna(self):
566 # Test whether trailing dot is preserved
567 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
568
Walter Dörwald28256f22003-01-19 16:59:20 +0000569 def test_codecs_errors(self):
570 # Error handling (encoding)
571 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
572 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
573 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
574 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000575
Walter Dörwald28256f22003-01-19 16:59:20 +0000576 # Error handling (decoding)
577 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
578 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
579 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
580 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000581
Walter Dörwald28256f22003-01-19 16:59:20 +0000582 # Error handling (unknown character names)
583 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000584
Walter Dörwald28256f22003-01-19 16:59:20 +0000585 # Error handling (truncated escape sequence)
586 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000587
Walter Dörwald28256f22003-01-19 16:59:20 +0000588 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
589 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
590 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
591 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
592 # executes PyUnicode_Encode()
593 import imp
594 self.assertRaises(
595 ImportError,
596 imp.find_module,
597 "non-existing module",
598 [u"non-existing dir"]
599 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000600
Walter Dörwald28256f22003-01-19 16:59:20 +0000601 # Error handling (wrong arguments)
602 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000603
Walter Dörwald28256f22003-01-19 16:59:20 +0000604 # Error handling (PyUnicode_EncodeDecimal())
605 self.assertRaises(UnicodeError, int, u"\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000606
Walter Dörwald28256f22003-01-19 16:59:20 +0000607 def test_codecs(self):
608 # Encoding
609 self.assertEqual(u'hello'.encode('ascii'), 'hello')
610 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
611 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
612 self.assertEqual(u'hello'.encode('utf8'), 'hello')
613 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
614 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
615 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000616
Walter Dörwald28256f22003-01-19 16:59:20 +0000617 # Roundtrip safety for BMP (just the first 1024 chars)
618 u = u''.join(map(unichr, xrange(1024)))
619 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
620 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
621 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000622
Walter Dörwald28256f22003-01-19 16:59:20 +0000623 # Roundtrip safety for BMP (just the first 256 chars)
624 u = u''.join(map(unichr, xrange(256)))
625 for encoding in ('latin-1',):
626 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000627
Walter Dörwald28256f22003-01-19 16:59:20 +0000628 # Roundtrip safety for BMP (just the first 128 chars)
629 u = u''.join(map(unichr, xrange(128)))
630 for encoding in ('ascii',):
631 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000632
Walter Dörwald28256f22003-01-19 16:59:20 +0000633 # Roundtrip safety for non-BMP (just a few chars)
634 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
635 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
636 #'raw_unicode_escape',
637 'unicode_escape', 'unicode_internal'):
638 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000639
Walter Dörwald28256f22003-01-19 16:59:20 +0000640 # UTF-8 must be roundtrip safe for all UCS-2 code points
641 # This excludes surrogates: in the full range, there would be
642 # a surrogate pair (\udbff\udc00), which gets converted back
643 # to a non-BMP character (\U0010fc00)
644 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
645 for encoding in ('utf-8',):
646 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000647
Walter Dörwald28256f22003-01-19 16:59:20 +0000648 def test_codecs_charmap(self):
649 # 0-127
650 s = ''.join(map(chr, xrange(128)))
651 for encoding in (
652 'cp037', 'cp1026',
653 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
654 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
655 'cp863', 'cp865', 'cp866',
656 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
657 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
658 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
659 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000660
Walter Dörwald28256f22003-01-19 16:59:20 +0000661 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
662 'cp1256', 'cp1257', 'cp1258',
663 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000664
Walter Dörwald28256f22003-01-19 16:59:20 +0000665 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
666 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000667
Walter Dörwald28256f22003-01-19 16:59:20 +0000668 ### These have undefined mappings:
669 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000670
Walter Dörwald28256f22003-01-19 16:59:20 +0000671 ### These fail the round-trip:
672 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000673
Walter Dörwald28256f22003-01-19 16:59:20 +0000674 ):
675 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000676
Walter Dörwald28256f22003-01-19 16:59:20 +0000677 # 128-255
678 s = ''.join(map(chr, xrange(128, 256)))
679 for encoding in (
680 'cp037', 'cp1026',
681 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
682 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
683 'cp863', 'cp865', 'cp866',
684 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
685 'iso8859_2', 'iso8859_4', 'iso8859_5',
686 'iso8859_9', 'koi8_r', 'latin_1',
687 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000688
Walter Dörwald28256f22003-01-19 16:59:20 +0000689 ### These have undefined mappings:
690 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
691 #'cp1256', 'cp1257', 'cp1258',
692 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
693 #'iso8859_3', 'iso8859_6', 'iso8859_7',
694 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000695
Walter Dörwald28256f22003-01-19 16:59:20 +0000696 ### These fail the round-trip:
697 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000698
Walter Dörwald28256f22003-01-19 16:59:20 +0000699 ):
700 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000701
Walter Dörwald28256f22003-01-19 16:59:20 +0000702 def test_concatenation(self):
703 self.assertEqual((u"abc" u"def"), u"abcdef")
704 self.assertEqual(("abc" u"def"), u"abcdef")
705 self.assertEqual((u"abc" "def"), u"abcdef")
706 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
707 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000708
Walter Dörwald28256f22003-01-19 16:59:20 +0000709 def test_printing(self):
710 class BitBucket:
711 def write(self, text):
712 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000713
Walter Dörwald28256f22003-01-19 16:59:20 +0000714 out = BitBucket()
715 print >>out, u'abc'
716 print >>out, u'abc', u'def'
717 print >>out, u'abc', 'def'
718 print >>out, 'abc', u'def'
719 print >>out, u'abc\n'
720 print >>out, u'abc\n',
721 print >>out, u'abc\n',
722 print >>out, u'def\n'
723 print >>out, u'def\n'
Fred Drake004d5e62000-10-23 17:22:08 +0000724
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000725 def test_ucs4(self):
726 if sys.maxunicode == 0xFFFF:
727 return
728 x = u'\U00100000'
729 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
730 self.assertEqual(x, y)
731
Brett Cannonc3647ac2005-04-26 03:45:26 +0000732 def test_conversion(self):
733 # Make sure __unicode__() works properly
734 class Foo0:
735 def __str__(self):
736 return "foo"
737
738 class Foo1:
739 def __unicode__(self):
740 return u"foo"
741
742 class Foo2(object):
743 def __unicode__(self):
744 return u"foo"
745
746 class Foo3(object):
747 def __unicode__(self):
748 return "foo"
749
750 class Foo4(str):
751 def __unicode__(self):
752 return "foo"
753
754 class Foo5(unicode):
755 def __unicode__(self):
756 return "foo"
757
758 class Foo6(str):
759 def __str__(self):
760 return "foos"
761
762 def __unicode__(self):
763 return u"foou"
764
765 class Foo7(unicode):
766 def __str__(self):
767 return "foos"
768 def __unicode__(self):
769 return u"foou"
770
771 class Foo8(unicode):
772 def __new__(cls, content=""):
773 return unicode.__new__(cls, 2*content)
774 def __unicode__(self):
775 return self
776
777 class Foo9(unicode):
778 def __str__(self):
779 return "string"
780 def __unicode__(self):
781 return "not unicode"
782
783 self.assertEqual(unicode(Foo0()), u"foo")
784 self.assertEqual(unicode(Foo1()), u"foo")
785 self.assertEqual(unicode(Foo2()), u"foo")
786 self.assertEqual(unicode(Foo3()), u"foo")
787 self.assertEqual(unicode(Foo4("bar")), u"foo")
788 self.assertEqual(unicode(Foo5("bar")), u"foo")
789 self.assertEqual(unicode(Foo6("bar")), u"foou")
790 self.assertEqual(unicode(Foo7("bar")), u"foou")
791 self.assertEqual(unicode(Foo8("foo")), u"foofoo")
792 self.assertEqual(str(Foo9("foo")), "string")
793 self.assertEqual(unicode(Foo9("foo")), u"not unicode")
794
Walter Dörwald28256f22003-01-19 16:59:20 +0000795def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000796 test_support.run_unittest(UnicodeTest)
Barry Warsaw817918c2002-08-06 16:58:21 +0000797
Walter Dörwald28256f22003-01-19 16:59:20 +0000798if __name__ == "__main__":
799 test_main()