blob: 18a2d46ba98323fd02cc7b463f316bc0d8dc65bc [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Walter Dörwald0fd583c2003-02-21 12:53:50 +00009import unittest, sys, string, codecs, new
10from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Walter Dörwald0fd583c2003-02-21 12:53:50 +000012class UnicodeTest(
13 string_tests.CommonTest,
14 string_tests.MixinStrUnicodeUserStringTest
15 ):
16 type2test = unicode
17
18 def checkequalnofix(self, result, object, methodname, *args):
19 method = getattr(object, methodname)
20 realresult = method(*args)
21 self.assertEqual(realresult, result)
22 self.assert_(type(realresult) is type(result))
23
24 # if the original is returned make sure that
25 # this doesn't happen with subclasses
26 if realresult is object:
27 class usub(unicode):
28 def __repr__(self):
29 return 'usub(%r)' % unicode.__repr__(self)
30 object = usub(object)
31 method = getattr(object, methodname)
32 realresult = method(*args)
33 self.assertEqual(realresult, result)
34 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000035
Jeremy Hylton504de6b2003-10-06 05:08:26 +000036 def test_literals(self):
37 self.assertEqual(u'\xff', u'\u00ff')
38 self.assertEqual(u'\uffff', u'\U0000ffff')
39 self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
40 self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
41 self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
42
Walter Dörwald28256f22003-01-19 16:59:20 +000043 def test_repr(self):
44 if not sys.platform.startswith('java'):
45 # Test basic sanity of repr()
46 self.assertEqual(repr(u'abc'), "u'abc'")
47 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
48 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
49 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
50 self.assertEqual(repr(u'\\'), "u'\\\\'")
51 self.assertEqual(repr(u'\n'), "u'\\n'")
52 self.assertEqual(repr(u'\r'), "u'\\r'")
53 self.assertEqual(repr(u'\t'), "u'\\t'")
54 self.assertEqual(repr(u'\b'), "u'\\x08'")
55 self.assertEqual(repr(u"'\""), """u'\\'"'""")
56 self.assertEqual(repr(u"'\""), """u'\\'"'""")
57 self.assertEqual(repr(u"'"), '''u"'"''')
58 self.assertEqual(repr(u'"'), """u'"'""")
59 latin1repr = (
60 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
61 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
62 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
63 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
64 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
65 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
66 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
67 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
68 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
69 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
70 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
71 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
72 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
73 "\\xfe\\xff'")
74 testrepr = repr(u''.join(map(unichr, xrange(256))))
75 self.assertEqual(testrepr, latin1repr)
76
Walter Dörwald28256f22003-01-19 16:59:20 +000077 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000078 string_tests.CommonTest.test_count(self)
79 # check mixed argument types
80 self.checkequalnofix(3, 'aaa', 'count', u'a')
81 self.checkequalnofix(0, 'aaa', 'count', u'b')
82 self.checkequalnofix(3, u'aaa', 'count', 'a')
83 self.checkequalnofix(0, u'aaa', 'count', 'b')
84 self.checkequalnofix(0, u'aaa', 'count', 'b')
85 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
86 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
87 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
88 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +000089
Walter Dörwald28256f22003-01-19 16:59:20 +000090 def test_find(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000091 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
92 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
93 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +000094
Walter Dörwald28256f22003-01-19 16:59:20 +000095 self.assertRaises(TypeError, u'hello'.find)
96 self.assertRaises(TypeError, u'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +000097
Walter Dörwald28256f22003-01-19 16:59:20 +000098 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000099 string_tests.CommonTest.test_rfind(self)
100 # check mixed argument types
101 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
102 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
103 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000104
Walter Dörwald28256f22003-01-19 16:59:20 +0000105 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000106 string_tests.CommonTest.test_index(self)
107 # check mixed argument types
108 for (t1, t2) in ((str, unicode), (unicode, str)):
109 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
110 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
111 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
112 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
113 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
114 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
115 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
116 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000117
Walter Dörwald28256f22003-01-19 16:59:20 +0000118 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000119 string_tests.CommonTest.test_rindex(self)
120 # check mixed argument types
121 for (t1, t2) in ((str, unicode), (unicode, str)):
122 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
123 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
124 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
125 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000126
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000127 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
128 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
129 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
130 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
131 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000132
Walter Dörwald28256f22003-01-19 16:59:20 +0000133 def test_translate(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000134 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
135 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
136 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
137 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
138 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000139
Walter Dörwald28256f22003-01-19 16:59:20 +0000140 self.assertRaises(TypeError, u'hello'.translate)
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000141 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000142
Walter Dörwald28256f22003-01-19 16:59:20 +0000143 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000144 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000145
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000146 # Mixed arguments
147 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
148 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
149 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000150
Walter Dörwald28256f22003-01-19 16:59:20 +0000151 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000152 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000153
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000154 # mixed arguments
155 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
156 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
157 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
158 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
159 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
160 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
161 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
Marc-André Lemburge5034372000-08-08 08:04:29 +0000162
Walter Dörwald28256f22003-01-19 16:59:20 +0000163 def test_strip(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000164 string_tests.CommonTest.test_strip(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000165 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
Guido van Rossuma831cac2000-03-10 23:23:21 +0000166
Walter Dörwald28256f22003-01-19 16:59:20 +0000167 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000168 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000169
Walter Dörwald28256f22003-01-19 16:59:20 +0000170 # method call forwarded from str implementation because of unicode argument
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000171 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
Walter Dörwald28256f22003-01-19 16:59:20 +0000172 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000173
Walter Dörwald28256f22003-01-19 16:59:20 +0000174 def test_comparison(self):
175 # Comparisons:
176 self.assertEqual(u'abc', 'abc')
177 self.assertEqual('abc', u'abc')
178 self.assertEqual(u'abc', u'abc')
179 self.assert_(u'abcd' > 'abc')
180 self.assert_('abcd' > u'abc')
181 self.assert_(u'abcd' > u'abc')
182 self.assert_(u'abc' < 'abcd')
183 self.assert_('abc' < u'abcd')
184 self.assert_(u'abc' < u'abcd')
185
186 if 0:
187 # Move these tests to a Unicode collation module test...
188 # Testing UTF-16 code point order comparisons...
189
190 # No surrogates, no fixup required.
191 self.assert_(u'\u0061' < u'\u20ac')
192 # Non surrogate below surrogate value, no fixup required
193 self.assert_(u'\u0061' < u'\ud800\udc02')
194
195 # Non surrogate above surrogate value, fixup required
196 def test_lecmp(s, s2):
197 self.assert_(s < s2)
198
199 def test_fixup(s):
200 s2 = u'\ud800\udc01'
201 test_lecmp(s, s2)
202 s2 = u'\ud900\udc01'
203 test_lecmp(s, s2)
204 s2 = u'\uda00\udc01'
205 test_lecmp(s, s2)
206 s2 = u'\udb00\udc01'
207 test_lecmp(s, s2)
208 s2 = u'\ud800\udd01'
209 test_lecmp(s, s2)
210 s2 = u'\ud900\udd01'
211 test_lecmp(s, s2)
212 s2 = u'\uda00\udd01'
213 test_lecmp(s, s2)
214 s2 = u'\udb00\udd01'
215 test_lecmp(s, s2)
216 s2 = u'\ud800\ude01'
217 test_lecmp(s, s2)
218 s2 = u'\ud900\ude01'
219 test_lecmp(s, s2)
220 s2 = u'\uda00\ude01'
221 test_lecmp(s, s2)
222 s2 = u'\udb00\ude01'
223 test_lecmp(s, s2)
224 s2 = u'\ud800\udfff'
225 test_lecmp(s, s2)
226 s2 = u'\ud900\udfff'
227 test_lecmp(s, s2)
228 s2 = u'\uda00\udfff'
229 test_lecmp(s, s2)
230 s2 = u'\udb00\udfff'
231 test_lecmp(s, s2)
232
233 test_fixup(u'\ue000')
234 test_fixup(u'\uff61')
235
236 # Surrogates on both sides, no fixup required
237 self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
238
Walter Dörwald28256f22003-01-19 16:59:20 +0000239 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000240 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
241 self.checkequalnofix(False, u'\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000242
243 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000244 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
245 if not sys.platform.startswith('java'):
246 self.checkequalnofix(False, u'\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000247
248 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000249 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
250 self.checkequalnofix(True, u'\u1FFc', 'istitle')
251 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000252
253 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000254 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
255 self.checkequalnofix(True, u'\u2000', 'isspace')
256 self.checkequalnofix(True, u'\u200a', 'isspace')
257 self.checkequalnofix(False, u'\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000258
259 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000260 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
261 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000262
263 def test_isdecimal(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000264 self.checkequalnofix(False, u'', 'isdecimal')
265 self.checkequalnofix(False, u'a', 'isdecimal')
266 self.checkequalnofix(True, u'0', 'isdecimal')
267 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
268 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
269 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
270 self.checkequalnofix(True, u'0123456789', 'isdecimal')
271 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000272
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000273 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000274
275 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000276 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
277 self.checkequalnofix(True, u'\u2460', 'isdigit')
278 self.checkequalnofix(False, u'\xbc', 'isdigit')
279 self.checkequalnofix(True, u'\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000280
281 def test_isnumeric(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000282 self.checkequalnofix(False, u'', 'isnumeric')
283 self.checkequalnofix(False, u'a', 'isnumeric')
284 self.checkequalnofix(True, u'0', 'isnumeric')
285 self.checkequalnofix(True, u'\u2460', 'isnumeric')
286 self.checkequalnofix(True, u'\xbc', 'isnumeric')
287 self.checkequalnofix(True, u'\u0660', 'isnumeric')
288 self.checkequalnofix(True, u'0123456789', 'isnumeric')
289 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000290
291 self.assertRaises(TypeError, u"abc".isnumeric, 42)
292
Walter Dörwald28256f22003-01-19 16:59:20 +0000293 def test_contains(self):
294 # Testing Unicode contains method
295 self.assert_('a' in u'abdb')
296 self.assert_('a' in u'bdab')
297 self.assert_('a' in u'bdaba')
298 self.assert_('a' in u'bdba')
299 self.assert_('a' in u'bdba')
300 self.assert_(u'a' in u'bdba')
301 self.assert_(u'a' not in u'bdb')
302 self.assert_(u'a' not in 'bdb')
303 self.assert_(u'a' in 'bdba')
304 self.assert_(u'a' in ('a',1,None))
305 self.assert_(u'a' in (1,None,'a'))
306 self.assert_(u'a' in (1,None,u'a'))
307 self.assert_('a' in ('a',1,None))
308 self.assert_('a' in (1,None,'a'))
309 self.assert_('a' in (1,None,u'a'))
310 self.assert_('a' not in ('x',1,u'y'))
311 self.assert_('a' not in ('x',1,None))
312 self.assert_(u'abcd' not in u'abcxxxx')
313 self.assert_(u'ab' in u'abcd')
314 self.assert_('ab' in u'abc')
315 self.assert_(u'ab' in 'abc')
316 self.assert_(u'ab' in (1,None,u'ab'))
317 self.assert_(u'' in u'abc')
318 self.assert_('' in u'abc')
319
320 # If the following fails either
321 # the contains operator does not propagate UnicodeErrors or
322 # someone has changed the default encoding
323 self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
324
325 self.assert_(u'' in '')
326 self.assert_('' in u'')
327 self.assert_(u'' in u'')
328 self.assert_(u'' in 'abc')
329 self.assert_('' in u'abc')
330 self.assert_(u'' in u'abc')
331 self.assert_(u'\0' not in 'abc')
332 self.assert_('\0' not in u'abc')
333 self.assert_(u'\0' not in u'abc')
334 self.assert_(u'\0' in '\0abc')
335 self.assert_('\0' in u'\0abc')
336 self.assert_(u'\0' in u'\0abc')
337 self.assert_(u'\0' in 'abc\0')
338 self.assert_('\0' in u'abc\0')
339 self.assert_(u'\0' in u'abc\0')
340 self.assert_(u'a' in '\0abc')
341 self.assert_('a' in u'\0abc')
342 self.assert_(u'a' in u'\0abc')
343 self.assert_(u'asdf' in 'asdf')
344 self.assert_('asdf' in u'asdf')
345 self.assert_(u'asdf' in u'asdf')
346 self.assert_(u'asdf' not in 'asd')
347 self.assert_('asdf' not in u'asd')
348 self.assert_(u'asdf' not in u'asd')
349 self.assert_(u'asdf' not in '')
350 self.assert_('asdf' not in u'')
351 self.assert_(u'asdf' not in u'')
352
353 self.assertRaises(TypeError, u"abc".__contains__)
354
355 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000356 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000357 # Testing Unicode formatting strings...
358 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
359 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
360 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
361 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
362 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
363 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000364 if not sys.platform.startswith('java'):
365 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
366 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000367 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000368
Walter Dörwald43440a62003-03-31 18:07:50 +0000369 self.assertEqual(u'%c' % 0x1234, u'\u1234')
Walter Dörwald44f527f2003-04-02 16:37:24 +0000370 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000371
372 # formatting jobs delegated from the string implementation:
373 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
374 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
375 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
376 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
377 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
378 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
379 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
380 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
381 self.assertEqual('...%s...' % u"abc", u'...abc...')
382 self.assertEqual('%*s' % (5,u'abc',), u' abc')
383 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
384 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
385 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
386 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000387 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
Walter Dörwald43440a62003-03-31 18:07:50 +0000388 self.assertEqual('%c' % u'a', u'a')
Walter Dörwald28256f22003-01-19 16:59:20 +0000389
Walter Dörwald28256f22003-01-19 16:59:20 +0000390
391 def test_constructor(self):
392 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
393
394 self.assertEqual(
395 unicode(u'unicode remains unicode'),
396 u'unicode remains unicode'
397 )
398
399 class UnicodeSubclass(unicode):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000400 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000401
Walter Dörwald28256f22003-01-19 16:59:20 +0000402 self.assertEqual(
403 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
404 u'unicode subclass becomes unicode'
405 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000406
Walter Dörwald28256f22003-01-19 16:59:20 +0000407 self.assertEqual(
408 unicode('strings are converted to unicode'),
409 u'strings are converted to unicode'
410 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000411
Walter Dörwald28256f22003-01-19 16:59:20 +0000412 class UnicodeCompat:
413 def __init__(self, x):
414 self.x = x
415 def __unicode__(self):
416 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000417
Walter Dörwald28256f22003-01-19 16:59:20 +0000418 self.assertEqual(
419 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
420 u'__unicode__ compatible objects are recognized')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000421
Walter Dörwald28256f22003-01-19 16:59:20 +0000422 class StringCompat:
423 def __init__(self, x):
424 self.x = x
425 def __str__(self):
426 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000427
Walter Dörwald28256f22003-01-19 16:59:20 +0000428 self.assertEqual(
429 unicode(StringCompat('__str__ compatible objects are recognized')),
430 u'__str__ compatible objects are recognized'
431 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000432
Walter Dörwald28256f22003-01-19 16:59:20 +0000433 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000434
Walter Dörwald28256f22003-01-19 16:59:20 +0000435 o = StringCompat('unicode(obj) is compatible to str()')
436 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
437 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000438
Walter Dörwald28256f22003-01-19 16:59:20 +0000439 for obj in (123, 123.45, 123L):
440 self.assertEqual(unicode(obj), unicode(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000441
Walter Dörwald28256f22003-01-19 16:59:20 +0000442 # unicode(obj, encoding, error) tests (this maps to
443 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000444
Walter Dörwald28256f22003-01-19 16:59:20 +0000445 if not sys.platform.startswith('java'):
446 self.assertRaises(
447 TypeError,
448 unicode,
449 u'decoding unicode is not supported',
450 'utf-8',
451 'strict'
452 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000453
Walter Dörwald28256f22003-01-19 16:59:20 +0000454 self.assertEqual(
455 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
456 u'strings are decoded to unicode'
457 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000458
Walter Dörwald28256f22003-01-19 16:59:20 +0000459 if not sys.platform.startswith('java'):
460 self.assertEqual(
461 unicode(
462 buffer('character buffers are decoded to unicode'),
463 'utf-8',
464 'strict'
465 ),
466 u'character buffers are decoded to unicode'
467 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000468
Walter Dörwald28256f22003-01-19 16:59:20 +0000469 self.assertRaises(TypeError, unicode, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000470
Walter Dörwald28256f22003-01-19 16:59:20 +0000471 def test_codecs_utf7(self):
472 utfTests = [
473 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
474 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
475 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
476 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
477 (u'+', '+-'),
478 (u'+-', '+--'),
479 (u'+?', '+-?'),
480 (u'\?', '+AFw?'),
481 (u'+?', '+-?'),
482 (ur'\\?', '+AFwAXA?'),
483 (ur'\\\?', '+AFwAXABc?'),
484 (ur'++--', '+-+---')
485 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000486
Walter Dörwald28256f22003-01-19 16:59:20 +0000487 for (x, y) in utfTests:
488 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000489
Walter Dörwald28256f22003-01-19 16:59:20 +0000490 # surrogates not supported
491 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000492
Walter Dörwald28256f22003-01-19 16:59:20 +0000493 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000494
Walter Dörwald28256f22003-01-19 16:59:20 +0000495 def test_codecs_utf8(self):
496 self.assertEqual(u''.encode('utf-8'), '')
497 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
498 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
499 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
500 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
501 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
502 self.assertEqual(
503 (u'\ud800\udc02'*1000).encode('utf-8'),
504 '\xf0\x90\x80\x82'*1000
505 )
506 self.assertEqual(
507 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
508 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
509 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
510 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
511 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
512 u' Nunstuck git und'.encode('utf-8'),
513 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
514 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
515 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
516 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
517 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
518 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
519 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
520 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
521 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
522 '\xe3\x80\x8cWenn ist das Nunstuck git und'
523 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000524
Walter Dörwald28256f22003-01-19 16:59:20 +0000525 # UTF-8 specific decoding tests
526 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
527 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
528 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000529
Walter Dörwald28256f22003-01-19 16:59:20 +0000530 # Other possible utf-8 test cases:
531 # * strict decoding testing for all of the
532 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000533
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000534 def test_codecs_idna(self):
535 # Test whether trailing dot is preserved
536 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
537
Walter Dörwald28256f22003-01-19 16:59:20 +0000538 def test_codecs_errors(self):
539 # Error handling (encoding)
540 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
541 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
542 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
543 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000544
Walter Dörwald28256f22003-01-19 16:59:20 +0000545 # Error handling (decoding)
546 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
547 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
548 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
549 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000550
Walter Dörwald28256f22003-01-19 16:59:20 +0000551 # Error handling (unknown character names)
552 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000553
Walter Dörwald28256f22003-01-19 16:59:20 +0000554 # Error handling (truncated escape sequence)
555 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000556
Walter Dörwald28256f22003-01-19 16:59:20 +0000557 # Error handling (bad decoder return)
558 def search_function(encoding):
559 def decode1(input, errors="strict"):
560 return 42 # not a tuple
561 def encode1(input, errors="strict"):
562 return 42 # not a tuple
563 def encode2(input, errors="strict"):
564 return (42, 42) # no unicode
565 def decode2(input, errors="strict"):
566 return (42, 42) # no unicode
567 if encoding=="test.unicode1":
568 return (encode1, decode1, None, None)
569 elif encoding=="test.unicode2":
570 return (encode2, decode2, None, None)
571 else:
572 return None
573 codecs.register(search_function)
574 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
575 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
576 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
577 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
578 # executes PyUnicode_Encode()
579 import imp
580 self.assertRaises(
581 ImportError,
582 imp.find_module,
583 "non-existing module",
584 [u"non-existing dir"]
585 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000586
Walter Dörwald28256f22003-01-19 16:59:20 +0000587 # Error handling (wrong arguments)
588 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000589
Walter Dörwald28256f22003-01-19 16:59:20 +0000590 # Error handling (PyUnicode_EncodeDecimal())
591 self.assertRaises(UnicodeError, int, u"\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000592
Walter Dörwald28256f22003-01-19 16:59:20 +0000593 def test_codecs(self):
594 # Encoding
595 self.assertEqual(u'hello'.encode('ascii'), 'hello')
596 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
597 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
598 self.assertEqual(u'hello'.encode('utf8'), 'hello')
599 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
600 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
601 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000602
Walter Dörwald28256f22003-01-19 16:59:20 +0000603 # Roundtrip safety for BMP (just the first 1024 chars)
604 u = u''.join(map(unichr, xrange(1024)))
605 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
606 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
607 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000608
Walter Dörwald28256f22003-01-19 16:59:20 +0000609 # Roundtrip safety for BMP (just the first 256 chars)
610 u = u''.join(map(unichr, xrange(256)))
611 for encoding in ('latin-1',):
612 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000613
Walter Dörwald28256f22003-01-19 16:59:20 +0000614 # Roundtrip safety for BMP (just the first 128 chars)
615 u = u''.join(map(unichr, xrange(128)))
616 for encoding in ('ascii',):
617 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000618
Walter Dörwald28256f22003-01-19 16:59:20 +0000619 # Roundtrip safety for non-BMP (just a few chars)
620 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
621 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
622 #'raw_unicode_escape',
623 'unicode_escape', 'unicode_internal'):
624 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000625
Walter Dörwald28256f22003-01-19 16:59:20 +0000626 # UTF-8 must be roundtrip safe for all UCS-2 code points
627 # This excludes surrogates: in the full range, there would be
628 # a surrogate pair (\udbff\udc00), which gets converted back
629 # to a non-BMP character (\U0010fc00)
630 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
631 for encoding in ('utf-8',):
632 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000633
Walter Dörwald28256f22003-01-19 16:59:20 +0000634 def test_codecs_charmap(self):
635 # 0-127
636 s = ''.join(map(chr, xrange(128)))
637 for encoding in (
638 'cp037', 'cp1026',
639 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
640 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
641 'cp863', 'cp865', 'cp866',
642 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
643 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
644 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
645 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000646
Walter Dörwald28256f22003-01-19 16:59:20 +0000647 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
648 'cp1256', 'cp1257', 'cp1258',
649 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000650
Walter Dörwald28256f22003-01-19 16:59:20 +0000651 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
652 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000653
Walter Dörwald28256f22003-01-19 16:59:20 +0000654 ### These have undefined mappings:
655 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000656
Walter Dörwald28256f22003-01-19 16:59:20 +0000657 ### These fail the round-trip:
658 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000659
Walter Dörwald28256f22003-01-19 16:59:20 +0000660 ):
661 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000662
Walter Dörwald28256f22003-01-19 16:59:20 +0000663 # 128-255
664 s = ''.join(map(chr, xrange(128, 256)))
665 for encoding in (
666 'cp037', 'cp1026',
667 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
668 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
669 'cp863', 'cp865', 'cp866',
670 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
671 'iso8859_2', 'iso8859_4', 'iso8859_5',
672 'iso8859_9', 'koi8_r', 'latin_1',
673 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000674
Walter Dörwald28256f22003-01-19 16:59:20 +0000675 ### These have undefined mappings:
676 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
677 #'cp1256', 'cp1257', 'cp1258',
678 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
679 #'iso8859_3', 'iso8859_6', 'iso8859_7',
680 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000681
Walter Dörwald28256f22003-01-19 16:59:20 +0000682 ### These fail the round-trip:
683 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000684
Walter Dörwald28256f22003-01-19 16:59:20 +0000685 ):
686 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000687
Walter Dörwald28256f22003-01-19 16:59:20 +0000688 def test_concatenation(self):
689 self.assertEqual((u"abc" u"def"), u"abcdef")
690 self.assertEqual(("abc" u"def"), u"abcdef")
691 self.assertEqual((u"abc" "def"), u"abcdef")
692 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
693 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000694
Walter Dörwald28256f22003-01-19 16:59:20 +0000695 def test_printing(self):
696 class BitBucket:
697 def write(self, text):
698 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000699
Walter Dörwald28256f22003-01-19 16:59:20 +0000700 out = BitBucket()
701 print >>out, u'abc'
702 print >>out, u'abc', u'def'
703 print >>out, u'abc', 'def'
704 print >>out, 'abc', u'def'
705 print >>out, u'abc\n'
706 print >>out, u'abc\n',
707 print >>out, u'abc\n',
708 print >>out, u'def\n'
709 print >>out, u'def\n'
Fred Drake004d5e62000-10-23 17:22:08 +0000710
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000711 def test_ucs4(self):
712 if sys.maxunicode == 0xFFFF:
713 return
714 x = u'\U00100000'
715 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
716 self.assertEqual(x, y)
717
Walter Dörwald28256f22003-01-19 16:59:20 +0000718def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000719 test_support.run_unittest(UnicodeTest)
Barry Warsaw817918c2002-08-06 16:58:21 +0000720
Walter Dörwald28256f22003-01-19 16:59:20 +0000721if __name__ == "__main__":
722 test_main()