blob: 5ce2842f4e14f340b9f53edc2e0a7f7a89519366 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Walter Dörwald0fd583c2003-02-21 12:53:50 +00009import unittest, sys, string, codecs, new
10from test import test_support, string_tests
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Walter Dörwald0fd583c2003-02-21 12:53:50 +000012class UnicodeTest(
13 string_tests.CommonTest,
14 string_tests.MixinStrUnicodeUserStringTest
15 ):
16 type2test = unicode
17
18 def checkequalnofix(self, result, object, methodname, *args):
19 method = getattr(object, methodname)
20 realresult = method(*args)
21 self.assertEqual(realresult, result)
22 self.assert_(type(realresult) is type(result))
23
24 # if the original is returned make sure that
25 # this doesn't happen with subclasses
26 if realresult is object:
27 class usub(unicode):
28 def __repr__(self):
29 return 'usub(%r)' % unicode.__repr__(self)
30 object = usub(object)
31 method = getattr(object, methodname)
32 realresult = method(*args)
33 self.assertEqual(realresult, result)
34 self.assert_(object is not realresult)
Guido van Rossume4874ae2001-09-21 15:36:41 +000035
Jeremy Hylton504de6b2003-10-06 05:08:26 +000036 def test_literals(self):
37 self.assertEqual(u'\xff', u'\u00ff')
38 self.assertEqual(u'\uffff', u'\U0000ffff')
39 self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
40 self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
41 self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
42
Walter Dörwald28256f22003-01-19 16:59:20 +000043 def test_repr(self):
44 if not sys.platform.startswith('java'):
45 # Test basic sanity of repr()
46 self.assertEqual(repr(u'abc'), "u'abc'")
47 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
48 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
49 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
50 self.assertEqual(repr(u'\\'), "u'\\\\'")
51 self.assertEqual(repr(u'\n'), "u'\\n'")
52 self.assertEqual(repr(u'\r'), "u'\\r'")
53 self.assertEqual(repr(u'\t'), "u'\\t'")
54 self.assertEqual(repr(u'\b'), "u'\\x08'")
55 self.assertEqual(repr(u"'\""), """u'\\'"'""")
56 self.assertEqual(repr(u"'\""), """u'\\'"'""")
57 self.assertEqual(repr(u"'"), '''u"'"''')
58 self.assertEqual(repr(u'"'), """u'"'""")
59 latin1repr = (
60 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
61 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
62 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
63 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
64 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
65 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
66 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
67 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
68 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
69 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
70 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
71 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
72 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
73 "\\xfe\\xff'")
74 testrepr = repr(u''.join(map(unichr, xrange(256))))
75 self.assertEqual(testrepr, latin1repr)
76
Walter Dörwald28256f22003-01-19 16:59:20 +000077 def test_count(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000078 string_tests.CommonTest.test_count(self)
79 # check mixed argument types
80 self.checkequalnofix(3, 'aaa', 'count', u'a')
81 self.checkequalnofix(0, 'aaa', 'count', u'b')
82 self.checkequalnofix(3, u'aaa', 'count', 'a')
83 self.checkequalnofix(0, u'aaa', 'count', 'b')
84 self.checkequalnofix(0, u'aaa', 'count', 'b')
85 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
86 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
87 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
88 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
Guido van Rossuma831cac2000-03-10 23:23:21 +000089
Walter Dörwald28256f22003-01-19 16:59:20 +000090 def test_find(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000091 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
92 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
93 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +000094
Walter Dörwald28256f22003-01-19 16:59:20 +000095 self.assertRaises(TypeError, u'hello'.find)
96 self.assertRaises(TypeError, u'hello'.find, 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +000097
Walter Dörwald28256f22003-01-19 16:59:20 +000098 def test_rfind(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +000099 string_tests.CommonTest.test_rfind(self)
100 # check mixed argument types
101 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
102 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
103 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
Guido van Rossum8b264542000-12-19 02:22:31 +0000104
Walter Dörwald28256f22003-01-19 16:59:20 +0000105 def test_index(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000106 string_tests.CommonTest.test_index(self)
107 # check mixed argument types
108 for (t1, t2) in ((str, unicode), (unicode, str)):
109 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
110 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
111 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
112 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
113 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
114 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
115 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
116 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000117
Walter Dörwald28256f22003-01-19 16:59:20 +0000118 def test_rindex(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000119 string_tests.CommonTest.test_rindex(self)
120 # check mixed argument types
121 for (t1, t2) in ((str, unicode), (unicode, str)):
122 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
123 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
124 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
125 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000126
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000127 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
128 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
129 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
130 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
131 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000132
Walter Dörwald28256f22003-01-19 16:59:20 +0000133 def test_translate(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000134 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
135 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
136 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
137 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
138 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
Walter Dörwaldcd736e72004-02-05 17:36:00 +0000139 self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000140
Walter Dörwald28256f22003-01-19 16:59:20 +0000141 self.assertRaises(TypeError, u'hello'.translate)
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000142 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000143
Walter Dörwald28256f22003-01-19 16:59:20 +0000144 def test_split(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000145 string_tests.CommonTest.test_split(self)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000146
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000147 # Mixed arguments
148 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
149 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
150 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000151
Walter Dörwald28256f22003-01-19 16:59:20 +0000152 def test_join(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000153 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000154
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000155 # mixed arguments
156 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
157 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
158 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
159 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
160 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
161 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
162 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
Marc-André Lemburge5034372000-08-08 08:04:29 +0000163
Walter Dörwald28256f22003-01-19 16:59:20 +0000164 def test_strip(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000165 string_tests.CommonTest.test_strip(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000166 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
Guido van Rossuma831cac2000-03-10 23:23:21 +0000167
Walter Dörwald28256f22003-01-19 16:59:20 +0000168 def test_replace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000169 string_tests.CommonTest.test_replace(self)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000170
Walter Dörwald28256f22003-01-19 16:59:20 +0000171 # method call forwarded from str implementation because of unicode argument
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000172 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
Walter Dörwald28256f22003-01-19 16:59:20 +0000173 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000174
Walter Dörwald28256f22003-01-19 16:59:20 +0000175 def test_comparison(self):
176 # Comparisons:
177 self.assertEqual(u'abc', 'abc')
178 self.assertEqual('abc', u'abc')
179 self.assertEqual(u'abc', u'abc')
180 self.assert_(u'abcd' > 'abc')
181 self.assert_('abcd' > u'abc')
182 self.assert_(u'abcd' > u'abc')
183 self.assert_(u'abc' < 'abcd')
184 self.assert_('abc' < u'abcd')
185 self.assert_(u'abc' < u'abcd')
186
187 if 0:
188 # Move these tests to a Unicode collation module test...
189 # Testing UTF-16 code point order comparisons...
190
191 # No surrogates, no fixup required.
192 self.assert_(u'\u0061' < u'\u20ac')
193 # Non surrogate below surrogate value, no fixup required
194 self.assert_(u'\u0061' < u'\ud800\udc02')
195
196 # Non surrogate above surrogate value, fixup required
197 def test_lecmp(s, s2):
198 self.assert_(s < s2)
199
200 def test_fixup(s):
201 s2 = u'\ud800\udc01'
202 test_lecmp(s, s2)
203 s2 = u'\ud900\udc01'
204 test_lecmp(s, s2)
205 s2 = u'\uda00\udc01'
206 test_lecmp(s, s2)
207 s2 = u'\udb00\udc01'
208 test_lecmp(s, s2)
209 s2 = u'\ud800\udd01'
210 test_lecmp(s, s2)
211 s2 = u'\ud900\udd01'
212 test_lecmp(s, s2)
213 s2 = u'\uda00\udd01'
214 test_lecmp(s, s2)
215 s2 = u'\udb00\udd01'
216 test_lecmp(s, s2)
217 s2 = u'\ud800\ude01'
218 test_lecmp(s, s2)
219 s2 = u'\ud900\ude01'
220 test_lecmp(s, s2)
221 s2 = u'\uda00\ude01'
222 test_lecmp(s, s2)
223 s2 = u'\udb00\ude01'
224 test_lecmp(s, s2)
225 s2 = u'\ud800\udfff'
226 test_lecmp(s, s2)
227 s2 = u'\ud900\udfff'
228 test_lecmp(s, s2)
229 s2 = u'\uda00\udfff'
230 test_lecmp(s, s2)
231 s2 = u'\udb00\udfff'
232 test_lecmp(s, s2)
233
234 test_fixup(u'\ue000')
235 test_fixup(u'\uff61')
236
237 # Surrogates on both sides, no fixup required
238 self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
239
Walter Dörwald28256f22003-01-19 16:59:20 +0000240 def test_islower(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000241 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
242 self.checkequalnofix(False, u'\u1FFc', 'islower')
Walter Dörwald28256f22003-01-19 16:59:20 +0000243
244 def test_isupper(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000245 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
246 if not sys.platform.startswith('java'):
247 self.checkequalnofix(False, u'\u1FFc', 'isupper')
Walter Dörwald28256f22003-01-19 16:59:20 +0000248
249 def test_istitle(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000250 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
251 self.checkequalnofix(True, u'\u1FFc', 'istitle')
252 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
Walter Dörwald28256f22003-01-19 16:59:20 +0000253
254 def test_isspace(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000255 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
256 self.checkequalnofix(True, u'\u2000', 'isspace')
257 self.checkequalnofix(True, u'\u200a', 'isspace')
258 self.checkequalnofix(False, u'\u2014', 'isspace')
Walter Dörwald28256f22003-01-19 16:59:20 +0000259
260 def test_isalpha(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000261 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
262 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
Walter Dörwald28256f22003-01-19 16:59:20 +0000263
264 def test_isdecimal(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000265 self.checkequalnofix(False, u'', 'isdecimal')
266 self.checkequalnofix(False, u'a', 'isdecimal')
267 self.checkequalnofix(True, u'0', 'isdecimal')
268 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
269 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
270 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
271 self.checkequalnofix(True, u'0123456789', 'isdecimal')
272 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
Walter Dörwald28256f22003-01-19 16:59:20 +0000273
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000274 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
Walter Dörwald28256f22003-01-19 16:59:20 +0000275
276 def test_isdigit(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000277 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
278 self.checkequalnofix(True, u'\u2460', 'isdigit')
279 self.checkequalnofix(False, u'\xbc', 'isdigit')
280 self.checkequalnofix(True, u'\u0660', 'isdigit')
Walter Dörwald28256f22003-01-19 16:59:20 +0000281
282 def test_isnumeric(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000283 self.checkequalnofix(False, u'', 'isnumeric')
284 self.checkequalnofix(False, u'a', 'isnumeric')
285 self.checkequalnofix(True, u'0', 'isnumeric')
286 self.checkequalnofix(True, u'\u2460', 'isnumeric')
287 self.checkequalnofix(True, u'\xbc', 'isnumeric')
288 self.checkequalnofix(True, u'\u0660', 'isnumeric')
289 self.checkequalnofix(True, u'0123456789', 'isnumeric')
290 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
Walter Dörwald28256f22003-01-19 16:59:20 +0000291
292 self.assertRaises(TypeError, u"abc".isnumeric, 42)
293
Walter Dörwald28256f22003-01-19 16:59:20 +0000294 def test_contains(self):
295 # Testing Unicode contains method
296 self.assert_('a' in u'abdb')
297 self.assert_('a' in u'bdab')
298 self.assert_('a' in u'bdaba')
299 self.assert_('a' in u'bdba')
300 self.assert_('a' in u'bdba')
301 self.assert_(u'a' in u'bdba')
302 self.assert_(u'a' not in u'bdb')
303 self.assert_(u'a' not in 'bdb')
304 self.assert_(u'a' in 'bdba')
305 self.assert_(u'a' in ('a',1,None))
306 self.assert_(u'a' in (1,None,'a'))
307 self.assert_(u'a' in (1,None,u'a'))
308 self.assert_('a' in ('a',1,None))
309 self.assert_('a' in (1,None,'a'))
310 self.assert_('a' in (1,None,u'a'))
311 self.assert_('a' not in ('x',1,u'y'))
312 self.assert_('a' not in ('x',1,None))
313 self.assert_(u'abcd' not in u'abcxxxx')
314 self.assert_(u'ab' in u'abcd')
315 self.assert_('ab' in u'abc')
316 self.assert_(u'ab' in 'abc')
317 self.assert_(u'ab' in (1,None,u'ab'))
318 self.assert_(u'' in u'abc')
319 self.assert_('' in u'abc')
320
321 # If the following fails either
322 # the contains operator does not propagate UnicodeErrors or
323 # someone has changed the default encoding
324 self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
325
326 self.assert_(u'' in '')
327 self.assert_('' in u'')
328 self.assert_(u'' in u'')
329 self.assert_(u'' in 'abc')
330 self.assert_('' in u'abc')
331 self.assert_(u'' in u'abc')
332 self.assert_(u'\0' not in 'abc')
333 self.assert_('\0' not in u'abc')
334 self.assert_(u'\0' not in u'abc')
335 self.assert_(u'\0' in '\0abc')
336 self.assert_('\0' in u'\0abc')
337 self.assert_(u'\0' in u'\0abc')
338 self.assert_(u'\0' in 'abc\0')
339 self.assert_('\0' in u'abc\0')
340 self.assert_(u'\0' in u'abc\0')
341 self.assert_(u'a' in '\0abc')
342 self.assert_('a' in u'\0abc')
343 self.assert_(u'a' in u'\0abc')
344 self.assert_(u'asdf' in 'asdf')
345 self.assert_('asdf' in u'asdf')
346 self.assert_(u'asdf' in u'asdf')
347 self.assert_(u'asdf' not in 'asd')
348 self.assert_('asdf' not in u'asd')
349 self.assert_(u'asdf' not in u'asd')
350 self.assert_(u'asdf' not in '')
351 self.assert_('asdf' not in u'')
352 self.assert_(u'asdf' not in u'')
353
354 self.assertRaises(TypeError, u"abc".__contains__)
355
356 def test_formatting(self):
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000357 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
Walter Dörwald28256f22003-01-19 16:59:20 +0000358 # Testing Unicode formatting strings...
359 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
360 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
361 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
362 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
363 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
364 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
Walter Dörwald28256f22003-01-19 16:59:20 +0000365 if not sys.platform.startswith('java'):
366 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
367 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000368 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
Walter Dörwald56fbcb52003-03-31 18:18:41 +0000369
Walter Dörwald43440a62003-03-31 18:07:50 +0000370 self.assertEqual(u'%c' % 0x1234, u'\u1234')
Walter Dörwald44f527f2003-04-02 16:37:24 +0000371 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
Walter Dörwald28256f22003-01-19 16:59:20 +0000372
373 # formatting jobs delegated from the string implementation:
374 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
375 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
376 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
377 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
378 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
379 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
380 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
381 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
382 self.assertEqual('...%s...' % u"abc", u'...abc...')
383 self.assertEqual('%*s' % (5,u'abc',), u' abc')
384 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
385 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
386 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
387 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
Walter Dörwald0fd583c2003-02-21 12:53:50 +0000388 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
Walter Dörwald43440a62003-03-31 18:07:50 +0000389 self.assertEqual('%c' % u'a', u'a')
Walter Dörwald28256f22003-01-19 16:59:20 +0000390
Walter Dörwald28256f22003-01-19 16:59:20 +0000391
392 def test_constructor(self):
393 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
394
395 self.assertEqual(
396 unicode(u'unicode remains unicode'),
397 u'unicode remains unicode'
398 )
399
400 class UnicodeSubclass(unicode):
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000401 pass
Guido van Rossuma831cac2000-03-10 23:23:21 +0000402
Walter Dörwald28256f22003-01-19 16:59:20 +0000403 self.assertEqual(
404 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
405 u'unicode subclass becomes unicode'
406 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000407
Walter Dörwald28256f22003-01-19 16:59:20 +0000408 self.assertEqual(
409 unicode('strings are converted to unicode'),
410 u'strings are converted to unicode'
411 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000412
Walter Dörwald28256f22003-01-19 16:59:20 +0000413 class UnicodeCompat:
414 def __init__(self, x):
415 self.x = x
416 def __unicode__(self):
417 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000418
Walter Dörwald28256f22003-01-19 16:59:20 +0000419 self.assertEqual(
420 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
421 u'__unicode__ compatible objects are recognized')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000422
Walter Dörwald28256f22003-01-19 16:59:20 +0000423 class StringCompat:
424 def __init__(self, x):
425 self.x = x
426 def __str__(self):
427 return self.x
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000428
Walter Dörwald28256f22003-01-19 16:59:20 +0000429 self.assertEqual(
430 unicode(StringCompat('__str__ compatible objects are recognized')),
431 u'__str__ compatible objects are recognized'
432 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000433
Walter Dörwald28256f22003-01-19 16:59:20 +0000434 # unicode(obj) is compatible to str():
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000435
Walter Dörwald28256f22003-01-19 16:59:20 +0000436 o = StringCompat('unicode(obj) is compatible to str()')
437 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
438 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000439
Walter Dörwald28256f22003-01-19 16:59:20 +0000440 for obj in (123, 123.45, 123L):
441 self.assertEqual(unicode(obj), unicode(str(obj)))
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000442
Walter Dörwald28256f22003-01-19 16:59:20 +0000443 # unicode(obj, encoding, error) tests (this maps to
444 # PyUnicode_FromEncodedObject() at C level)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000445
Walter Dörwald28256f22003-01-19 16:59:20 +0000446 if not sys.platform.startswith('java'):
447 self.assertRaises(
448 TypeError,
449 unicode,
450 u'decoding unicode is not supported',
451 'utf-8',
452 'strict'
453 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000454
Walter Dörwald28256f22003-01-19 16:59:20 +0000455 self.assertEqual(
456 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
457 u'strings are decoded to unicode'
458 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000459
Walter Dörwald28256f22003-01-19 16:59:20 +0000460 if not sys.platform.startswith('java'):
461 self.assertEqual(
462 unicode(
463 buffer('character buffers are decoded to unicode'),
464 'utf-8',
465 'strict'
466 ),
467 u'character buffers are decoded to unicode'
468 )
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000469
Walter Dörwald28256f22003-01-19 16:59:20 +0000470 self.assertRaises(TypeError, unicode, 42, 42, 42)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000471
Walter Dörwald28256f22003-01-19 16:59:20 +0000472 def test_codecs_utf7(self):
473 utfTests = [
474 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
475 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
476 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
477 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
478 (u'+', '+-'),
479 (u'+-', '+--'),
480 (u'+?', '+-?'),
481 (u'\?', '+AFw?'),
482 (u'+?', '+-?'),
483 (ur'\\?', '+AFwAXA?'),
484 (ur'\\\?', '+AFwAXABc?'),
485 (ur'++--', '+-+---')
486 ]
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000487
Walter Dörwald28256f22003-01-19 16:59:20 +0000488 for (x, y) in utfTests:
489 self.assertEqual(x.encode('utf-7'), y)
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000490
Walter Dörwald28256f22003-01-19 16:59:20 +0000491 # surrogates not supported
492 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000493
Walter Dörwald28256f22003-01-19 16:59:20 +0000494 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000495
Walter Dörwald28256f22003-01-19 16:59:20 +0000496 def test_codecs_utf8(self):
497 self.assertEqual(u''.encode('utf-8'), '')
498 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
499 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
500 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
501 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
502 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
503 self.assertEqual(
504 (u'\ud800\udc02'*1000).encode('utf-8'),
505 '\xf0\x90\x80\x82'*1000
506 )
507 self.assertEqual(
508 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
509 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
510 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
511 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
512 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
513 u' Nunstuck git und'.encode('utf-8'),
514 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
515 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
516 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
517 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
518 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
519 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
520 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
521 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
522 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
523 '\xe3\x80\x8cWenn ist das Nunstuck git und'
524 )
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000525
Walter Dörwald28256f22003-01-19 16:59:20 +0000526 # UTF-8 specific decoding tests
527 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
528 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
529 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000530
Walter Dörwald28256f22003-01-19 16:59:20 +0000531 # Other possible utf-8 test cases:
532 # * strict decoding testing for all of the
533 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000534
Martin v. Löwis0d8e16c2003-08-05 06:19:47 +0000535 def test_codecs_idna(self):
536 # Test whether trailing dot is preserved
537 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
538
Walter Dörwald28256f22003-01-19 16:59:20 +0000539 def test_codecs_errors(self):
540 # Error handling (encoding)
541 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
542 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
543 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
544 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000545
Walter Dörwald28256f22003-01-19 16:59:20 +0000546 # Error handling (decoding)
547 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
548 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
549 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
550 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000551
Walter Dörwald28256f22003-01-19 16:59:20 +0000552 # Error handling (unknown character names)
553 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000554
Walter Dörwald28256f22003-01-19 16:59:20 +0000555 # Error handling (truncated escape sequence)
556 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000557
Walter Dörwald28256f22003-01-19 16:59:20 +0000558 # Error handling (bad decoder return)
559 def search_function(encoding):
560 def decode1(input, errors="strict"):
561 return 42 # not a tuple
562 def encode1(input, errors="strict"):
563 return 42 # not a tuple
564 def encode2(input, errors="strict"):
565 return (42, 42) # no unicode
566 def decode2(input, errors="strict"):
567 return (42, 42) # no unicode
568 if encoding=="test.unicode1":
569 return (encode1, decode1, None, None)
570 elif encoding=="test.unicode2":
571 return (encode2, decode2, None, None)
572 else:
573 return None
574 codecs.register(search_function)
575 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
576 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
577 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
578 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
579 # executes PyUnicode_Encode()
580 import imp
581 self.assertRaises(
582 ImportError,
583 imp.find_module,
584 "non-existing module",
585 [u"non-existing dir"]
586 )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000587
Walter Dörwald28256f22003-01-19 16:59:20 +0000588 # Error handling (wrong arguments)
589 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000590
Walter Dörwald28256f22003-01-19 16:59:20 +0000591 # Error handling (PyUnicode_EncodeDecimal())
592 self.assertRaises(UnicodeError, int, u"\u0200")
Guido van Rossum97064862000-04-10 13:52:48 +0000593
Walter Dörwald28256f22003-01-19 16:59:20 +0000594 def test_codecs(self):
595 # Encoding
596 self.assertEqual(u'hello'.encode('ascii'), 'hello')
597 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
598 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
599 self.assertEqual(u'hello'.encode('utf8'), 'hello')
600 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
601 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
602 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
Guido van Rossum97064862000-04-10 13:52:48 +0000603
Walter Dörwald28256f22003-01-19 16:59:20 +0000604 # Roundtrip safety for BMP (just the first 1024 chars)
605 u = u''.join(map(unichr, xrange(1024)))
606 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
607 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
608 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000609
Walter Dörwald28256f22003-01-19 16:59:20 +0000610 # Roundtrip safety for BMP (just the first 256 chars)
611 u = u''.join(map(unichr, xrange(256)))
612 for encoding in ('latin-1',):
613 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000614
Walter Dörwald28256f22003-01-19 16:59:20 +0000615 # Roundtrip safety for BMP (just the first 128 chars)
616 u = u''.join(map(unichr, xrange(128)))
617 for encoding in ('ascii',):
618 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000619
Walter Dörwald28256f22003-01-19 16:59:20 +0000620 # Roundtrip safety for non-BMP (just a few chars)
621 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
622 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
623 #'raw_unicode_escape',
624 'unicode_escape', 'unicode_internal'):
625 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000626
Walter Dörwald28256f22003-01-19 16:59:20 +0000627 # UTF-8 must be roundtrip safe for all UCS-2 code points
628 # This excludes surrogates: in the full range, there would be
629 # a surrogate pair (\udbff\udc00), which gets converted back
630 # to a non-BMP character (\U0010fc00)
631 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
632 for encoding in ('utf-8',):
633 self.assertEqual(unicode(u.encode(encoding),encoding), u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000634
Walter Dörwald28256f22003-01-19 16:59:20 +0000635 def test_codecs_charmap(self):
636 # 0-127
637 s = ''.join(map(chr, xrange(128)))
638 for encoding in (
639 'cp037', 'cp1026',
640 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
641 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
642 'cp863', 'cp865', 'cp866',
643 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
644 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
645 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
646 'mac_cyrillic', 'mac_latin2',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000647
Walter Dörwald28256f22003-01-19 16:59:20 +0000648 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
649 'cp1256', 'cp1257', 'cp1258',
650 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000651
Walter Dörwald28256f22003-01-19 16:59:20 +0000652 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
653 'cp1006', 'iso8859_8',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000654
Walter Dörwald28256f22003-01-19 16:59:20 +0000655 ### These have undefined mappings:
656 #'cp424',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000657
Walter Dörwald28256f22003-01-19 16:59:20 +0000658 ### These fail the round-trip:
659 #'cp875'
Guido van Rossum9e896b32000-04-05 20:11:21 +0000660
Walter Dörwald28256f22003-01-19 16:59:20 +0000661 ):
662 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000663
Walter Dörwald28256f22003-01-19 16:59:20 +0000664 # 128-255
665 s = ''.join(map(chr, xrange(128, 256)))
666 for encoding in (
667 'cp037', 'cp1026',
668 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
669 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
670 'cp863', 'cp865', 'cp866',
671 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
672 'iso8859_2', 'iso8859_4', 'iso8859_5',
673 'iso8859_9', 'koi8_r', 'latin_1',
674 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000675
Walter Dörwald28256f22003-01-19 16:59:20 +0000676 ### These have undefined mappings:
677 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
678 #'cp1256', 'cp1257', 'cp1258',
679 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
680 #'iso8859_3', 'iso8859_6', 'iso8859_7',
681 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000682
Walter Dörwald28256f22003-01-19 16:59:20 +0000683 ### These fail the round-trip:
684 #'cp1006', 'cp875', 'iso8859_8',
Tim Peters2f228e72001-05-13 00:19:31 +0000685
Walter Dörwald28256f22003-01-19 16:59:20 +0000686 ):
687 self.assertEqual(unicode(s, encoding).encode(encoding), s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000688
Walter Dörwald28256f22003-01-19 16:59:20 +0000689 def test_concatenation(self):
690 self.assertEqual((u"abc" u"def"), u"abcdef")
691 self.assertEqual(("abc" u"def"), u"abcdef")
692 self.assertEqual((u"abc" "def"), u"abcdef")
693 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
694 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
Fred Drake004d5e62000-10-23 17:22:08 +0000695
Walter Dörwald28256f22003-01-19 16:59:20 +0000696 def test_printing(self):
697 class BitBucket:
698 def write(self, text):
699 pass
Fred Drake004d5e62000-10-23 17:22:08 +0000700
Walter Dörwald28256f22003-01-19 16:59:20 +0000701 out = BitBucket()
702 print >>out, u'abc'
703 print >>out, u'abc', u'def'
704 print >>out, u'abc', 'def'
705 print >>out, 'abc', u'def'
706 print >>out, u'abc\n'
707 print >>out, u'abc\n',
708 print >>out, u'abc\n',
709 print >>out, u'def\n'
710 print >>out, u'def\n'
Fred Drake004d5e62000-10-23 17:22:08 +0000711
Martin v. Löwis9a3a9f72003-05-18 12:31:09 +0000712 def test_ucs4(self):
713 if sys.maxunicode == 0xFFFF:
714 return
715 x = u'\U00100000'
716 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
717 self.assertEqual(x, y)
718
Walter Dörwald28256f22003-01-19 16:59:20 +0000719def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000720 test_support.run_unittest(UnicodeTest)
Barry Warsaw817918c2002-08-06 16:58:21 +0000721
Walter Dörwald28256f22003-01-19 16:59:20 +0000722if __name__ == "__main__":
723 test_main()