blob: 8d4bed56474136464c6e1d15c9630628b4a4e1ab [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
Finn Bock2b29cb22001-12-10 20:57:34 +000011if not sys.platform.startswith('java'):
12 # Test basic sanity of repr()
13 verify(repr(u'abc') == "u'abc'")
14 verify(repr(u'ab\\c') == "u'ab\\\\c'")
15 verify(repr(u'ab\\') == "u'ab\\\\'")
16 verify(repr(u'\\c') == "u'\\\\c'")
17 verify(repr(u'\\') == "u'\\\\'")
18 verify(repr(u'\n') == "u'\\n'")
19 verify(repr(u'\r') == "u'\\r'")
20 verify(repr(u'\t') == "u'\\t'")
21 verify(repr(u'\b') == "u'\\x08'")
22 verify(repr(u"'\"") == """u'\\'"'""")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'") == '''u"'"''')
25 verify(repr(u'"') == """u'"'""")
26 verify(repr(u''.join(map(unichr, range(256)))) ==
Marc-André Lemburg41f01992001-11-28 14:03:14 +000027 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
28 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
29 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
30 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
31 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
32 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
33 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
34 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
35 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
36 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
37 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
38 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
39 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
40 "\\xfe\\xff'")
Guido van Rossume4874ae2001-09-21 15:36:41 +000041
Guido van Rossuma831cac2000-03-10 23:23:21 +000042def test(method, input, output, *args):
43 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000044 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000045 try:
46 f = getattr(input, method)
47 value = apply(f, args)
48 except:
49 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000050 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000051 else:
52 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000053 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 if verbose:
55 print 'no'
56 print '*',f, `input`, `output`, `value`
57 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000058 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000059 else:
60 if verbose:
61 print 'yes'
62
63test('capitalize', u' hello ', u' hello ')
64test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000065test('capitalize', u'aaaa', u'Aaaa')
66test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000067
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000068test('count', u'aaa', 3, u'a')
69test('count', u'aaa', 0, u'b')
70test('count', 'aaa', 3, u'a')
71test('count', 'aaa', 0, u'b')
72test('count', u'aaa', 3, 'a')
73test('count', u'aaa', 0, 'b')
74
Guido van Rossuma831cac2000-03-10 23:23:21 +000075test('title', u' hello ', u' Hello ')
76test('title', u'hello ', u'Hello ')
77test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
78test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
79test('title', u"getInt", u'Getint')
80
81test('find', u'abcdefghiabc', 0, u'abc')
82test('find', u'abcdefghiabc', 9, u'abc', 1)
83test('find', u'abcdefghiabc', -1, u'def', 4)
84
85test('rfind', u'abcdefghiabc', 9, u'abc')
86
87test('lower', u'HeLLo', u'hello')
88test('lower', u'hello', u'hello')
89
90test('upper', u'HeLLo', u'HELLO')
91test('upper', u'HELLO', u'HELLO')
92
93if 0:
94 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
95
96 test('maketrans', u'abc', transtable, u'xyz')
97 test('maketrans', u'abc', ValueError, u'xyzq')
98
99test('split', u'this is the split function',
100 [u'this', u'is', u'the', u'split', u'function'])
101test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
102test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
103test('split', u'a b c d', [u'a', u'b c d'], None, 1)
104test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
105test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
106test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
107test('split', u'a b c d', [u'a b c d'], None, 0)
108test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
109test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000110test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
111test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
112test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
113test('split', u'endcase test', [u'endcase ', u''], u'test')
114test('split', u'endcase test', [u'endcase ', u''], 'test')
115test('split', 'endcase test', [u'endcase ', u''], u'test')
116
Guido van Rossuma831cac2000-03-10 23:23:21 +0000117
118# join now works with any sequence type
119class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000120 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000121 def __len__(self): return len(self.seq)
122 def __getitem__(self, i): return self.seq[i]
123
124test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000125test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000126test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000127test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000128test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000129test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
130test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
131test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
132test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
133test('join', ' ', u'w x y z', Sequence(u'wxyz'))
134test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000135
136result = u''
137for i in range(10):
138 if i > 0:
139 result = result + u':'
140 result = result + u'x'*10
141test('join', u':', result, [u'x' * 10] * 10)
142test('join', u':', result, (u'x' * 10,) * 10)
143
144test('strip', u' hello ', u'hello')
145test('lstrip', u' hello ', u'hello ')
146test('rstrip', u' hello ', u' hello')
147test('strip', u'hello', u'hello')
148
149test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
150
151if 0:
152 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
153
154 table = string.maketrans('a', u'A')
155 test('translate', u'abc', u'Abc', table)
156 test('translate', u'xyz', u'xyz', table)
157
158test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000159test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000160test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
161test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
162test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
163test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
164test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
165test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
166test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
167
168test('startswith', u'hello', 1, u'he')
169test('startswith', u'hello', 1, u'hello')
170test('startswith', u'hello', 0, u'hello world')
171test('startswith', u'hello', 1, u'')
172test('startswith', u'hello', 0, u'ello')
173test('startswith', u'hello', 1, u'ello', 1)
174test('startswith', u'hello', 1, u'o', 4)
175test('startswith', u'hello', 0, u'o', 5)
176test('startswith', u'hello', 1, u'', 5)
177test('startswith', u'hello', 0, u'lo', 6)
178test('startswith', u'helloworld', 1, u'lowo', 3)
179test('startswith', u'helloworld', 1, u'lowo', 3, 7)
180test('startswith', u'helloworld', 0, u'lowo', 3, 6)
181
182test('endswith', u'hello', 1, u'lo')
183test('endswith', u'hello', 0, u'he')
184test('endswith', u'hello', 1, u'')
185test('endswith', u'hello', 0, u'hello world')
186test('endswith', u'helloworld', 0, u'worl')
187test('endswith', u'helloworld', 1, u'worl', 3, 9)
188test('endswith', u'helloworld', 1, u'world', 3, 12)
189test('endswith', u'helloworld', 1, u'lowo', 1, 7)
190test('endswith', u'helloworld', 1, u'lowo', 2, 7)
191test('endswith', u'helloworld', 1, u'lowo', 3, 7)
192test('endswith', u'helloworld', 0, u'lowo', 4, 7)
193test('endswith', u'helloworld', 0, u'lowo', 3, 8)
194test('endswith', u'ab', 0, u'ab', 0, 1)
195test('endswith', u'ab', 0, u'ab', 0, 0)
196
197test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
198test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
199test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
200test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
201
202if 0:
203 test('capwords', u'abc def ghi', u'Abc Def Ghi')
204 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
205 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
206
207# Comparisons:
208print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000209verify(u'abc' == 'abc')
210verify('abc' == u'abc')
211verify(u'abc' == u'abc')
212verify(u'abcd' > 'abc')
213verify('abcd' > u'abc')
214verify(u'abcd' > u'abc')
215verify(u'abc' < 'abcd')
216verify('abc' < u'abcd')
217verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000218print 'done.'
219
Marc-André Lemburge5034372000-08-08 08:04:29 +0000220if 0:
221 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000222
Marc-André Lemburge5034372000-08-08 08:04:29 +0000223 print 'Testing UTF-16 code point order comparisons...',
224 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000225 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000226 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000227 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000228
Marc-André Lemburge5034372000-08-08 08:04:29 +0000229 # Non surrogate above surrogate value, fixup required
230 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000231 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000232
Marc-André Lemburge5034372000-08-08 08:04:29 +0000233 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000234 s2 = u'\ud800\udc01'
235 test_lecmp(s, s2)
236 s2 = u'\ud900\udc01'
237 test_lecmp(s, s2)
238 s2 = u'\uda00\udc01'
239 test_lecmp(s, s2)
240 s2 = u'\udb00\udc01'
241 test_lecmp(s, s2)
242 s2 = u'\ud800\udd01'
243 test_lecmp(s, s2)
244 s2 = u'\ud900\udd01'
245 test_lecmp(s, s2)
246 s2 = u'\uda00\udd01'
247 test_lecmp(s, s2)
248 s2 = u'\udb00\udd01'
249 test_lecmp(s, s2)
250 s2 = u'\ud800\ude01'
251 test_lecmp(s, s2)
252 s2 = u'\ud900\ude01'
253 test_lecmp(s, s2)
254 s2 = u'\uda00\ude01'
255 test_lecmp(s, s2)
256 s2 = u'\udb00\ude01'
257 test_lecmp(s, s2)
258 s2 = u'\ud800\udfff'
259 test_lecmp(s, s2)
260 s2 = u'\ud900\udfff'
261 test_lecmp(s, s2)
262 s2 = u'\uda00\udfff'
263 test_lecmp(s, s2)
264 s2 = u'\udb00\udfff'
265 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000266
267 test_fixup(u'\ue000')
268 test_fixup(u'\uff61')
269
270 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000271 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000272 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000273
Guido van Rossuma831cac2000-03-10 23:23:21 +0000274test('ljust', u'abc', u'abc ', 10)
275test('rjust', u'abc', u' abc', 10)
276test('center', u'abc', u' abc ', 10)
277test('ljust', u'abc', u'abc ', 6)
278test('rjust', u'abc', u' abc', 6)
279test('center', u'abc', u' abc ', 6)
280test('ljust', u'abc', u'abc', 2)
281test('rjust', u'abc', u'abc', 2)
282test('center', u'abc', u'abc', 2)
283
284test('islower', u'a', 1)
285test('islower', u'A', 0)
286test('islower', u'\n', 0)
287test('islower', u'\u1FFc', 0)
288test('islower', u'abc', 1)
289test('islower', u'aBc', 0)
290test('islower', u'abc\n', 1)
291
292test('isupper', u'a', 0)
293test('isupper', u'A', 1)
294test('isupper', u'\n', 0)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000295if sys.platform[:4] != 'java':
296 test('isupper', u'\u1FFc', 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000297test('isupper', u'ABC', 1)
298test('isupper', u'AbC', 0)
299test('isupper', u'ABC\n', 1)
300
301test('istitle', u'a', 0)
302test('istitle', u'A', 1)
303test('istitle', u'\n', 0)
304test('istitle', u'\u1FFc', 1)
305test('istitle', u'A Titlecased Line', 1)
306test('istitle', u'A\nTitlecased Line', 1)
307test('istitle', u'A Titlecased, Line', 1)
308test('istitle', u'Greek \u1FFcitlecases ...', 1)
309test('istitle', u'Not a capitalized String', 0)
310test('istitle', u'Not\ta Titlecase String', 0)
311test('istitle', u'Not--a Titlecase String', 0)
312
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000313test('isalpha', u'a', 1)
314test('isalpha', u'A', 1)
315test('isalpha', u'\n', 0)
316test('isalpha', u'\u1FFc', 1)
317test('isalpha', u'abc', 1)
318test('isalpha', u'aBc123', 0)
319test('isalpha', u'abc\n', 0)
320
321test('isalnum', u'a', 1)
322test('isalnum', u'A', 1)
323test('isalnum', u'\n', 0)
324test('isalnum', u'123abc456', 1)
325test('isalnum', u'a1b3c', 1)
326test('isalnum', u'aBc000 ', 0)
327test('isalnum', u'abc\n', 0)
328
Guido van Rossuma831cac2000-03-10 23:23:21 +0000329test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
330test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
331test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
332test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
333test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
334test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000335test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000336
337test('translate', u"abababc", u'bbbc', {ord('a'):None})
338test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
339test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
340
Guido van Rossumd4d26842000-03-13 23:21:48 +0000341# Contains:
342print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000343verify(('a' in u'abdb') == 1)
344verify(('a' in u'bdab') == 1)
345verify(('a' in u'bdaba') == 1)
346verify(('a' in u'bdba') == 1)
347verify(('a' in u'bdba') == 1)
348verify((u'a' in u'bdba') == 1)
349verify((u'a' in u'bdb') == 0)
350verify((u'a' in 'bdb') == 0)
351verify((u'a' in 'bdba') == 1)
352verify((u'a' in ('a',1,None)) == 1)
353verify((u'a' in (1,None,'a')) == 1)
354verify((u'a' in (1,None,u'a')) == 1)
355verify(('a' in ('a',1,None)) == 1)
356verify(('a' in (1,None,'a')) == 1)
357verify(('a' in (1,None,u'a')) == 1)
358verify(('a' in ('x',1,u'y')) == 0)
359verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000360print 'done.'
361
Guido van Rossuma831cac2000-03-10 23:23:21 +0000362# Formatting:
363print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000364verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
365verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
366verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
367verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
368verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
369verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
370verify(u"%c" % (u"a",) == u'a')
371verify(u"%c" % ("a",) == u'a')
372verify(u"%c" % (34,) == u'"')
373verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000374if sys.platform[:4] != 'java':
375 value = u"%r, %r" % (u"abc", "abc")
376 if value != u"u'abc', 'abc'":
377 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000378
Marc-André Lemburg36619082001-01-17 19:11:13 +0000379verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000380try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000381 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000382except KeyError:
383 print '*** formatting failed for "%s"' % "u'abc, def'"
384else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000385 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000386
Guido van Rossum97064862000-04-10 13:52:48 +0000387# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000388verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
389verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
390verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
391verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
392verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
393verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
394verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
395verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
396verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000397verify('%*s' % (5,u'abc',) == u' abc')
398verify('%*s' % (-5,u'abc',) == u'abc ')
399verify('%*.*s' % (5,2,u'abc',) == u' ab')
400verify('%*.*s' % (5,3,u'abc',) == u' abc')
401verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
402verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000403print 'done.'
404
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000405print 'Testing builtin unicode()...',
406
407# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
408
409verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
410
411class UnicodeSubclass(unicode):
412 pass
413
414verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
415 == u'unicode subclass becomes unicode')
416
417verify(unicode('strings are converted to unicode')
418 == u'strings are converted to unicode')
419
420class UnicodeCompat:
421 def __init__(self, x):
422 self.x = x
423 def __unicode__(self):
424 return self.x
425
426verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
427 == u'__unicode__ compatible objects are recognized')
428
429class StringCompat:
430 def __init__(self, x):
431 self.x = x
432 def __str__(self):
433 return self.x
434
435verify(unicode(StringCompat('__str__ compatible objects are recognized'))
436 == u'__str__ compatible objects are recognized')
437
438# unicode(obj) is compatible to str():
439
440o = StringCompat('unicode(obj) is compatible to str()')
441verify(unicode(o) == u'unicode(obj) is compatible to str()')
442verify(str(o) == 'unicode(obj) is compatible to str()')
443
444for obj in (123, 123.45, 123L):
445 verify(unicode(obj) == unicode(str(obj)))
446
447# unicode(obj, encoding, error) tests (this maps to
448# PyUnicode_FromEncodedObject() at C level)
449
Finn Bock2b29cb22001-12-10 20:57:34 +0000450if not sys.platform.startswith('java'):
451 try:
452 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
453 except TypeError:
454 pass
455 else:
456 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000457
458verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
459 == u'strings are decoded to unicode')
460
Finn Bock2b29cb22001-12-10 20:57:34 +0000461if not sys.platform.startswith('java'):
462 verify(unicode(buffer('character buffers are decoded to unicode'),
463 'utf-8', 'strict')
464 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000465
466print 'done.'
467
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000468# Test builtin codecs
469print 'Testing builtin codecs...',
470
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000471# UTF-7 specific encoding tests:
472utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
473 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
474 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
475 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
476 (u'+', '+-'),
477 (u'+-', '+--'),
478 (u'+?', '+-?'),
479 (u'\?', '+AFw?'),
480 (u'+?', '+-?'),
481 (ur'\\?', '+AFwAXA?'),
482 (ur'\\\?', '+AFwAXABc?'),
483 (ur'++--', '+-+---')]
484
485for x,y in utfTests:
486 verify( x.encode('utf-7') == y )
487
Tim Peters527e64f2001-10-04 05:36:56 +0000488try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000489 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
490except UnicodeError:
491 pass
492else:
493 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
494
495verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
496
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000497# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000498verify(u'\u20ac'.encode('utf-8') == \
499 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
500verify(u'\ud800\udc02'.encode('utf-8') == \
501 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
502verify(u'\ud84d\udc56'.encode('utf-8') == \
503 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000504# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000505verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000506 'utf-8') == u'\U00023456' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000507verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000508 'utf-8') == u'\U00010002' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000509verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000510 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000511
512# Other possible utf-8 test cases:
513# * strict decoding testing for all of the
514# UTF8_ERROR cases in PyUnicode_DecodeUTF8
515
Marc-André Lemburg36619082001-01-17 19:11:13 +0000516verify(unicode('hello','ascii') == u'hello')
517verify(unicode('hello','utf-8') == u'hello')
518verify(unicode('hello','utf8') == u'hello')
519verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000520
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000521# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000522try:
523 u'Andr\202 x'.encode('ascii')
524 u'Andr\202 x'.encode('ascii','strict')
525except ValueError:
526 pass
527else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000528 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000529verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
530verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000531
532try:
533 unicode('Andr\202 x','ascii')
534 unicode('Andr\202 x','ascii','strict')
535except ValueError:
536 pass
537else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000538 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000539verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
540verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000541
Marc-André Lemburg36619082001-01-17 19:11:13 +0000542verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000543verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000544verify(u'hello'.encode('utf-8') == 'hello')
545verify(u'hello'.encode('utf8') == 'hello')
546verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
547verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
548verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000549
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000550# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000551u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000552for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000553 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000554 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000555
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000556# Roundtrip safety for non-BMP (just a few chars)
557u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
558for encoding in ('utf-8',
559 'utf-16', 'utf-16-le', 'utf-16-be',
Marc-André Lemburg80d1dd52001-07-25 16:05:59 +0000560 #'raw_unicode_escape',
561 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000562 verify(unicode(u.encode(encoding),encoding) == u)
563
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000564u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000565for encoding in (
566 'latin-1',
567 ):
568 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000569 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000570 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000571 print '*** codec "%s" failed round-trip' % encoding
572 except ValueError,why:
573 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000574
575u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000576for encoding in (
577 'ascii',
578 ):
579 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000580 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000581 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000582 print '*** codec "%s" failed round-trip' % encoding
583 except ValueError,why:
584 print '*** codec for "%s" failed: %s' % (encoding, why)
585
586print 'done.'
587
588print 'Testing standard mapping codecs...',
589
590print '0-127...',
591s = ''.join(map(chr, range(128)))
592for encoding in (
593 'cp037', 'cp1026',
594 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
595 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000596 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000597 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
598 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
599 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
600 'mac_cyrillic', 'mac_latin2',
601
602 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
603 'cp1256', 'cp1257', 'cp1258',
604 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
605
606 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000607 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000608
Guido van Rossum9e896b32000-04-05 20:11:21 +0000609 ### These have undefined mappings:
610 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000611
Tim Peters2f228e72001-05-13 00:19:31 +0000612 ### These fail the round-trip:
613 #'cp875'
614
Guido van Rossum9e896b32000-04-05 20:11:21 +0000615 ):
616 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000617 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000618 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000619 print '*** codec "%s" failed round-trip' % encoding
620 except ValueError,why:
621 print '*** codec for "%s" failed: %s' % (encoding, why)
622
623print '128-255...',
624s = ''.join(map(chr, range(128,256)))
625for encoding in (
626 'cp037', 'cp1026',
627 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
628 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000629 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000630 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000631 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000632 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000633 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000634
Guido van Rossum9e896b32000-04-05 20:11:21 +0000635 ### These have undefined mappings:
636 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
637 #'cp1256', 'cp1257', 'cp1258',
638 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000639 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000640 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000641
Guido van Rossum9e896b32000-04-05 20:11:21 +0000642 ### These fail the round-trip:
643 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000644
Guido van Rossum9e896b32000-04-05 20:11:21 +0000645 ):
646 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000647 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000648 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000649 print '*** codec "%s" failed round-trip' % encoding
650 except ValueError,why:
651 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000652
653print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000654
655print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000656verify((u"abc" u"def") == u"abcdef")
657verify(("abc" u"def") == u"abcdef")
658verify((u"abc" "def") == u"abcdef")
659verify((u"abc" u"def" "ghi") == u"abcdefghi")
660verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000661print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000662
663print 'Testing Unicode printing...',
664print u'abc'
665print u'abc', u'def'
666print u'abc', 'def'
667print 'abc', u'def'
668print u'abc\n'
669print u'abc\n',
670print u'abc\n',
671print u'def\n'
672print u'def\n'
673print 'done.'