blob: 9ee7a39fa293c51da749c8fe8f1d3d39b893c893 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +00009import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000010
Finn Bock2b29cb22001-12-10 20:57:34 +000011if not sys.platform.startswith('java'):
12 # Test basic sanity of repr()
13 verify(repr(u'abc') == "u'abc'")
14 verify(repr(u'ab\\c') == "u'ab\\\\c'")
15 verify(repr(u'ab\\') == "u'ab\\\\'")
16 verify(repr(u'\\c') == "u'\\\\c'")
17 verify(repr(u'\\') == "u'\\\\'")
18 verify(repr(u'\n') == "u'\\n'")
19 verify(repr(u'\r') == "u'\\r'")
20 verify(repr(u'\t') == "u'\\t'")
21 verify(repr(u'\b') == "u'\\x08'")
22 verify(repr(u"'\"") == """u'\\'"'""")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'") == '''u"'"''')
25 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000026 latin1repr = (
27 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
28 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
29 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
30 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
31 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
32 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
33 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
34 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
35 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
36 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
37 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
38 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
39 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
40 "\\xfe\\xff'")
41 testrepr = repr(u''.join(map(unichr, range(256))))
42 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000043
Guido van Rossuma831cac2000-03-10 23:23:21 +000044def test(method, input, output, *args):
45 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000046 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000047 try:
48 f = getattr(input, method)
49 value = apply(f, args)
50 except:
51 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000052 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000053 else:
54 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000055 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000056 if verbose:
57 print 'no'
58 print '*',f, `input`, `output`, `value`
59 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000060 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000061 else:
62 if verbose:
63 print 'yes'
64
65test('capitalize', u' hello ', u' hello ')
66test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000067test('capitalize', u'aaaa', u'Aaaa')
68test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000069
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000070test('count', u'aaa', 3, u'a')
71test('count', u'aaa', 0, u'b')
72test('count', 'aaa', 3, u'a')
73test('count', 'aaa', 0, u'b')
74test('count', u'aaa', 3, 'a')
75test('count', u'aaa', 0, 'b')
76
Guido van Rossuma831cac2000-03-10 23:23:21 +000077test('title', u' hello ', u' Hello ')
78test('title', u'hello ', u'Hello ')
79test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
80test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
81test('title', u"getInt", u'Getint')
82
83test('find', u'abcdefghiabc', 0, u'abc')
84test('find', u'abcdefghiabc', 9, u'abc', 1)
85test('find', u'abcdefghiabc', -1, u'def', 4)
86
87test('rfind', u'abcdefghiabc', 9, u'abc')
88
89test('lower', u'HeLLo', u'hello')
90test('lower', u'hello', u'hello')
91
92test('upper', u'HeLLo', u'HELLO')
93test('upper', u'HELLO', u'HELLO')
94
95if 0:
96 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
97
98 test('maketrans', u'abc', transtable, u'xyz')
99 test('maketrans', u'abc', ValueError, u'xyzq')
100
101test('split', u'this is the split function',
102 [u'this', u'is', u'the', u'split', u'function'])
103test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
104test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
105test('split', u'a b c d', [u'a', u'b c d'], None, 1)
106test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
107test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
108test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
109test('split', u'a b c d', [u'a b c d'], None, 0)
110test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
111test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000112test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
113test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
114test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
115test('split', u'endcase test', [u'endcase ', u''], u'test')
116test('split', u'endcase test', [u'endcase ', u''], 'test')
117test('split', 'endcase test', [u'endcase ', u''], u'test')
118
Guido van Rossuma831cac2000-03-10 23:23:21 +0000119
120# join now works with any sequence type
121class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000122 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000123 def __len__(self): return len(self.seq)
124 def __getitem__(self, i): return self.seq[i]
125
126test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000127test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000128test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000129test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000130test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000131test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
132test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
133test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
134test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
135test('join', ' ', u'w x y z', Sequence(u'wxyz'))
136test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000137
138result = u''
139for i in range(10):
140 if i > 0:
141 result = result + u':'
142 result = result + u'x'*10
143test('join', u':', result, [u'x' * 10] * 10)
144test('join', u':', result, (u'x' * 10,) * 10)
145
146test('strip', u' hello ', u'hello')
147test('lstrip', u' hello ', u'hello ')
148test('rstrip', u' hello ', u' hello')
149test('strip', u'hello', u'hello')
150
151test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
152
153if 0:
154 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
155
156 table = string.maketrans('a', u'A')
157 test('translate', u'abc', u'Abc', table)
158 test('translate', u'xyz', u'xyz', table)
159
160test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000161test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000162test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
163test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
164test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
165test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
166test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
167test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
168test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
169
Guido van Rossum77f6a652002-04-03 22:41:51 +0000170test('startswith', u'hello', True, u'he')
171test('startswith', u'hello', True, u'hello')
172test('startswith', u'hello', False, u'hello world')
173test('startswith', u'hello', True, u'')
174test('startswith', u'hello', False, u'ello')
175test('startswith', u'hello', True, u'ello', 1)
176test('startswith', u'hello', True, u'o', 4)
177test('startswith', u'hello', False, u'o', 5)
178test('startswith', u'hello', True, u'', 5)
179test('startswith', u'hello', False, u'lo', 6)
180test('startswith', u'helloworld', True, u'lowo', 3)
181test('startswith', u'helloworld', True, u'lowo', 3, 7)
182test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000183
Guido van Rossum77f6a652002-04-03 22:41:51 +0000184test('endswith', u'hello', True, u'lo')
185test('endswith', u'hello', False, u'he')
186test('endswith', u'hello', True, u'')
187test('endswith', u'hello', False, u'hello world')
188test('endswith', u'helloworld', False, u'worl')
189test('endswith', u'helloworld', True, u'worl', 3, 9)
190test('endswith', u'helloworld', True, u'world', 3, 12)
191test('endswith', u'helloworld', True, u'lowo', 1, 7)
192test('endswith', u'helloworld', True, u'lowo', 2, 7)
193test('endswith', u'helloworld', True, u'lowo', 3, 7)
194test('endswith', u'helloworld', False, u'lowo', 4, 7)
195test('endswith', u'helloworld', False, u'lowo', 3, 8)
196test('endswith', u'ab', False, u'ab', 0, 1)
197test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000198
199test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
200test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
201test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
202test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
203
204if 0:
205 test('capwords', u'abc def ghi', u'Abc Def Ghi')
206 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
207 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
208
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000209verify(string.zfill(u'34', 1) == u'34')
210verify(string.zfill(u'34', 5) == u'00034')
211
Guido van Rossuma831cac2000-03-10 23:23:21 +0000212# Comparisons:
213print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000214verify(u'abc' == 'abc')
215verify('abc' == u'abc')
216verify(u'abc' == u'abc')
217verify(u'abcd' > 'abc')
218verify('abcd' > u'abc')
219verify(u'abcd' > u'abc')
220verify(u'abc' < 'abcd')
221verify('abc' < u'abcd')
222verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000223print 'done.'
224
Marc-André Lemburge5034372000-08-08 08:04:29 +0000225if 0:
226 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000227
Marc-André Lemburge5034372000-08-08 08:04:29 +0000228 print 'Testing UTF-16 code point order comparisons...',
229 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000230 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000231 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000232 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000233
Marc-André Lemburge5034372000-08-08 08:04:29 +0000234 # Non surrogate above surrogate value, fixup required
235 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000236 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000237
Marc-André Lemburge5034372000-08-08 08:04:29 +0000238 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000239 s2 = u'\ud800\udc01'
240 test_lecmp(s, s2)
241 s2 = u'\ud900\udc01'
242 test_lecmp(s, s2)
243 s2 = u'\uda00\udc01'
244 test_lecmp(s, s2)
245 s2 = u'\udb00\udc01'
246 test_lecmp(s, s2)
247 s2 = u'\ud800\udd01'
248 test_lecmp(s, s2)
249 s2 = u'\ud900\udd01'
250 test_lecmp(s, s2)
251 s2 = u'\uda00\udd01'
252 test_lecmp(s, s2)
253 s2 = u'\udb00\udd01'
254 test_lecmp(s, s2)
255 s2 = u'\ud800\ude01'
256 test_lecmp(s, s2)
257 s2 = u'\ud900\ude01'
258 test_lecmp(s, s2)
259 s2 = u'\uda00\ude01'
260 test_lecmp(s, s2)
261 s2 = u'\udb00\ude01'
262 test_lecmp(s, s2)
263 s2 = u'\ud800\udfff'
264 test_lecmp(s, s2)
265 s2 = u'\ud900\udfff'
266 test_lecmp(s, s2)
267 s2 = u'\uda00\udfff'
268 test_lecmp(s, s2)
269 s2 = u'\udb00\udfff'
270 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000271
272 test_fixup(u'\ue000')
273 test_fixup(u'\uff61')
274
275 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000276 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000277 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000278
Guido van Rossuma831cac2000-03-10 23:23:21 +0000279test('ljust', u'abc', u'abc ', 10)
280test('rjust', u'abc', u' abc', 10)
281test('center', u'abc', u' abc ', 10)
282test('ljust', u'abc', u'abc ', 6)
283test('rjust', u'abc', u' abc', 6)
284test('center', u'abc', u' abc ', 6)
285test('ljust', u'abc', u'abc', 2)
286test('rjust', u'abc', u'abc', 2)
287test('center', u'abc', u'abc', 2)
288
Guido van Rossum77f6a652002-04-03 22:41:51 +0000289test('islower', u'a', True)
290test('islower', u'A', False)
291test('islower', u'\n', False)
292test('islower', u'\u1FFc', False)
293test('islower', u'abc', True)
294test('islower', u'aBc', False)
295test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000296
Guido van Rossum77f6a652002-04-03 22:41:51 +0000297test('isupper', u'a', False)
298test('isupper', u'A', True)
299test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000300if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000301 test('isupper', u'\u1FFc', False)
302test('isupper', u'ABC', True)
303test('isupper', u'AbC', False)
304test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000305
Guido van Rossum77f6a652002-04-03 22:41:51 +0000306test('istitle', u'a', False)
307test('istitle', u'A', True)
308test('istitle', u'\n', False)
309test('istitle', u'\u1FFc', True)
310test('istitle', u'A Titlecased Line', True)
311test('istitle', u'A\nTitlecased Line', True)
312test('istitle', u'A Titlecased, Line', True)
313test('istitle', u'Greek \u1FFcitlecases ...', True)
314test('istitle', u'Not a capitalized String', False)
315test('istitle', u'Not\ta Titlecase String', False)
316test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000317
Guido van Rossum77f6a652002-04-03 22:41:51 +0000318test('isalpha', u'a', True)
319test('isalpha', u'A', True)
320test('isalpha', u'\n', False)
321test('isalpha', u'\u1FFc', True)
322test('isalpha', u'abc', True)
323test('isalpha', u'aBc123', False)
324test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000325
Guido van Rossum77f6a652002-04-03 22:41:51 +0000326test('isalnum', u'a', True)
327test('isalnum', u'A', True)
328test('isalnum', u'\n', False)
329test('isalnum', u'123abc456', True)
330test('isalnum', u'a1b3c', True)
331test('isalnum', u'aBc000 ', False)
332test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000333
Guido van Rossuma831cac2000-03-10 23:23:21 +0000334test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
335test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
336test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
337test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
338test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
339test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000340test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000341
342test('translate', u"abababc", u'bbbc', {ord('a'):None})
343test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
344test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
345
Guido van Rossumd4d26842000-03-13 23:21:48 +0000346# Contains:
347print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000348verify(('a' in u'abdb') == 1)
349verify(('a' in u'bdab') == 1)
350verify(('a' in u'bdaba') == 1)
351verify(('a' in u'bdba') == 1)
352verify(('a' in u'bdba') == 1)
353verify((u'a' in u'bdba') == 1)
354verify((u'a' in u'bdb') == 0)
355verify((u'a' in 'bdb') == 0)
356verify((u'a' in 'bdba') == 1)
357verify((u'a' in ('a',1,None)) == 1)
358verify((u'a' in (1,None,'a')) == 1)
359verify((u'a' in (1,None,u'a')) == 1)
360verify(('a' in ('a',1,None)) == 1)
361verify(('a' in (1,None,'a')) == 1)
362verify(('a' in (1,None,u'a')) == 1)
363verify(('a' in ('x',1,u'y')) == 0)
364verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000365print 'done.'
366
Guido van Rossuma831cac2000-03-10 23:23:21 +0000367# Formatting:
368print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000369verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
370verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
371verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
372verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
373verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
374verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
375verify(u"%c" % (u"a",) == u'a')
376verify(u"%c" % ("a",) == u'a')
377verify(u"%c" % (34,) == u'"')
378verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000379if sys.platform[:4] != 'java':
380 value = u"%r, %r" % (u"abc", "abc")
381 if value != u"u'abc', 'abc'":
382 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000383
Marc-André Lemburg36619082001-01-17 19:11:13 +0000384verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000385try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000386 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000387except KeyError:
388 print '*** formatting failed for "%s"' % "u'abc, def'"
389else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000390 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000391
Guido van Rossum97064862000-04-10 13:52:48 +0000392# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000393verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
394verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
395verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
396verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
397verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
398verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
399verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
400verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
401verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000402verify('%*s' % (5,u'abc',) == u' abc')
403verify('%*s' % (-5,u'abc',) == u'abc ')
404verify('%*.*s' % (5,2,u'abc',) == u' ab')
405verify('%*.*s' % (5,3,u'abc',) == u' abc')
406verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
407verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000408print 'done.'
409
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000410print 'Testing builtin unicode()...',
411
412# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
413
414verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
415
416class UnicodeSubclass(unicode):
417 pass
418
419verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
420 == u'unicode subclass becomes unicode')
421
422verify(unicode('strings are converted to unicode')
423 == u'strings are converted to unicode')
424
425class UnicodeCompat:
426 def __init__(self, x):
427 self.x = x
428 def __unicode__(self):
429 return self.x
430
431verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
432 == u'__unicode__ compatible objects are recognized')
433
434class StringCompat:
435 def __init__(self, x):
436 self.x = x
437 def __str__(self):
438 return self.x
439
440verify(unicode(StringCompat('__str__ compatible objects are recognized'))
441 == u'__str__ compatible objects are recognized')
442
443# unicode(obj) is compatible to str():
444
445o = StringCompat('unicode(obj) is compatible to str()')
446verify(unicode(o) == u'unicode(obj) is compatible to str()')
447verify(str(o) == 'unicode(obj) is compatible to str()')
448
449for obj in (123, 123.45, 123L):
450 verify(unicode(obj) == unicode(str(obj)))
451
452# unicode(obj, encoding, error) tests (this maps to
453# PyUnicode_FromEncodedObject() at C level)
454
Finn Bock2b29cb22001-12-10 20:57:34 +0000455if not sys.platform.startswith('java'):
456 try:
457 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
458 except TypeError:
459 pass
460 else:
461 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000462
463verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
464 == u'strings are decoded to unicode')
465
Finn Bock2b29cb22001-12-10 20:57:34 +0000466if not sys.platform.startswith('java'):
467 verify(unicode(buffer('character buffers are decoded to unicode'),
468 'utf-8', 'strict')
469 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000470
471print 'done.'
472
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000473# Test builtin codecs
474print 'Testing builtin codecs...',
475
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000476# UTF-7 specific encoding tests:
477utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
478 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
479 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
480 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
481 (u'+', '+-'),
482 (u'+-', '+--'),
483 (u'+?', '+-?'),
484 (u'\?', '+AFw?'),
485 (u'+?', '+-?'),
486 (ur'\\?', '+AFwAXA?'),
487 (ur'\\\?', '+AFwAXABc?'),
488 (ur'++--', '+-+---')]
489
490for x,y in utfTests:
491 verify( x.encode('utf-7') == y )
492
Tim Peters527e64f2001-10-04 05:36:56 +0000493try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000494 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
495except UnicodeError:
496 pass
497else:
498 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
499
500verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
501
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000502# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000503verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000504verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
505verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
506verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
507verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
508verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
509verify((u'\ud800\udc02'*1000).encode('utf-8') ==
510 '\xf0\x90\x80\x82'*1000)
511
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000512# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000513verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
514verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
515verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000516
517# Other possible utf-8 test cases:
518# * strict decoding testing for all of the
519# UTF8_ERROR cases in PyUnicode_DecodeUTF8
520
Marc-André Lemburg36619082001-01-17 19:11:13 +0000521verify(unicode('hello','ascii') == u'hello')
522verify(unicode('hello','utf-8') == u'hello')
523verify(unicode('hello','utf8') == u'hello')
524verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000525
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000526# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000527try:
528 u'Andr\202 x'.encode('ascii')
529 u'Andr\202 x'.encode('ascii','strict')
530except ValueError:
531 pass
532else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000533 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000534verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
535verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000536
537try:
538 unicode('Andr\202 x','ascii')
539 unicode('Andr\202 x','ascii','strict')
540except ValueError:
541 pass
542else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000543 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000544verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
545verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000546
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000547verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
548try:
549 "\\".decode("unicode-escape")
550except ValueError:
551 pass
552else:
553 raise TestFailed, '"\\".decode("unicode-escape") should fail'
554
Marc-André Lemburg36619082001-01-17 19:11:13 +0000555verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000556verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000557verify(u'hello'.encode('utf-8') == 'hello')
558verify(u'hello'.encode('utf8') == 'hello')
559verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
560verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
561verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000562
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000563# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000564u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000565for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000566 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000567 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000568
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000569# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000570u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000571for encoding in (
572 'latin-1',
573 ):
574 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000575 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000576 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000577 print '*** codec "%s" failed round-trip' % encoding
578 except ValueError,why:
579 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000580
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000581# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000582u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000583for encoding in (
584 'ascii',
585 ):
586 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000587 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000588 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000589 print '*** codec "%s" failed round-trip' % encoding
590 except ValueError,why:
591 print '*** codec for "%s" failed: %s' % (encoding, why)
592
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000593# Roundtrip safety for non-BMP (just a few chars)
594u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
595for encoding in ('utf-8',
596 'utf-16', 'utf-16-le', 'utf-16-be',
597 #'raw_unicode_escape',
598 'unicode_escape', 'unicode_internal'):
599 verify(unicode(u.encode(encoding),encoding) == u)
600
601# UTF-8 must be roundtrip safe for all UCS-2 code points
602u = u''.join(map(unichr, range(0x10000)))
603for encoding in ('utf-8',):
604 verify(unicode(u.encode(encoding),encoding) == u)
605
Guido van Rossum9e896b32000-04-05 20:11:21 +0000606print 'done.'
607
608print 'Testing standard mapping codecs...',
609
610print '0-127...',
611s = ''.join(map(chr, range(128)))
612for encoding in (
613 'cp037', 'cp1026',
614 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
615 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000616 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000617 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
618 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
619 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
620 'mac_cyrillic', 'mac_latin2',
621
622 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
623 'cp1256', 'cp1257', 'cp1258',
624 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
625
626 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000627 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000628
Guido van Rossum9e896b32000-04-05 20:11:21 +0000629 ### These have undefined mappings:
630 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000631
Tim Peters2f228e72001-05-13 00:19:31 +0000632 ### These fail the round-trip:
633 #'cp875'
634
Guido van Rossum9e896b32000-04-05 20:11:21 +0000635 ):
636 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000637 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000638 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000639 print '*** codec "%s" failed round-trip' % encoding
640 except ValueError,why:
641 print '*** codec for "%s" failed: %s' % (encoding, why)
642
643print '128-255...',
644s = ''.join(map(chr, range(128,256)))
645for encoding in (
646 'cp037', 'cp1026',
647 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
648 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000649 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000650 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000651 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000652 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000653 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000654
Guido van Rossum9e896b32000-04-05 20:11:21 +0000655 ### These have undefined mappings:
656 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
657 #'cp1256', 'cp1257', 'cp1258',
658 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000659 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000660 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000661
Guido van Rossum9e896b32000-04-05 20:11:21 +0000662 ### These fail the round-trip:
663 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000664
Guido van Rossum9e896b32000-04-05 20:11:21 +0000665 ):
666 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000667 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000668 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000669 print '*** codec "%s" failed round-trip' % encoding
670 except ValueError,why:
671 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000672
673print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000674
675print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000676verify((u"abc" u"def") == u"abcdef")
677verify(("abc" u"def") == u"abcdef")
678verify((u"abc" "def") == u"abcdef")
679verify((u"abc" u"def" "ghi") == u"abcdefghi")
680verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000681print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000682
683print 'Testing Unicode printing...',
684print u'abc'
685print u'abc', u'def'
686print u'abc', 'def'
687print 'abc', u'def'
688print u'abc\n'
689print u'abc\n',
690print u'abc\n',
691print u'def\n'
692print u'def\n'
693print 'done.'