blob: 77d978b333ac06b08ecdaf9e45875a70c3d4e185 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
Guido van Rossume4874ae2001-09-21 15:36:41 +000011# Test basic sanity of repr()
12verify(repr(u'abc') == "u'abc'")
13verify(repr(u'ab\\c') == "u'ab\\\\c'")
14verify(repr(u'ab\\') == "u'ab\\\\'")
15verify(repr(u'\\c') == "u'\\\\c'")
16verify(repr(u'\\') == "u'\\\\'")
17verify(repr(u'\n') == "u'\\n'")
18verify(repr(u'\r') == "u'\\r'")
19verify(repr(u'\t') == "u'\\t'")
20verify(repr(u'\b') == "u'\\x08'")
Guido van Rossum11310bf2001-09-21 15:46:41 +000021verify(repr(u"'\"") == """u'\\'"'""")
22verify(repr(u"'\"") == """u'\\'"'""")
23verify(repr(u"'") == '''u"'"''')
24verify(repr(u'"') == """u'"'""")
Marc-André Lemburg41f01992001-11-28 14:03:14 +000025verify(repr(u''.join(map(unichr, range(256)))) ==
26 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
27 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
28 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
29 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
30 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
31 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
32 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
33 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
34 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
35 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
36 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
37 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
38 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
39 "\\xfe\\xff'")
Guido van Rossume4874ae2001-09-21 15:36:41 +000040
Guido van Rossuma831cac2000-03-10 23:23:21 +000041def test(method, input, output, *args):
42 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000043 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000044 try:
45 f = getattr(input, method)
46 value = apply(f, args)
47 except:
48 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000049 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000050 else:
51 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000052 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000053 if verbose:
54 print 'no'
55 print '*',f, `input`, `output`, `value`
56 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000057 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000058 else:
59 if verbose:
60 print 'yes'
61
62test('capitalize', u' hello ', u' hello ')
63test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000064test('capitalize', u'aaaa', u'Aaaa')
65test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000066
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000067test('count', u'aaa', 3, u'a')
68test('count', u'aaa', 0, u'b')
69test('count', 'aaa', 3, u'a')
70test('count', 'aaa', 0, u'b')
71test('count', u'aaa', 3, 'a')
72test('count', u'aaa', 0, 'b')
73
Guido van Rossuma831cac2000-03-10 23:23:21 +000074test('title', u' hello ', u' Hello ')
75test('title', u'hello ', u'Hello ')
76test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
77test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
78test('title', u"getInt", u'Getint')
79
80test('find', u'abcdefghiabc', 0, u'abc')
81test('find', u'abcdefghiabc', 9, u'abc', 1)
82test('find', u'abcdefghiabc', -1, u'def', 4)
83
84test('rfind', u'abcdefghiabc', 9, u'abc')
85
86test('lower', u'HeLLo', u'hello')
87test('lower', u'hello', u'hello')
88
89test('upper', u'HeLLo', u'HELLO')
90test('upper', u'HELLO', u'HELLO')
91
92if 0:
93 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
94
95 test('maketrans', u'abc', transtable, u'xyz')
96 test('maketrans', u'abc', ValueError, u'xyzq')
97
98test('split', u'this is the split function',
99 [u'this', u'is', u'the', u'split', u'function'])
100test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
101test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
102test('split', u'a b c d', [u'a', u'b c d'], None, 1)
103test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
104test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
105test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
106test('split', u'a b c d', [u'a b c d'], None, 0)
107test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
108test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000109test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
110test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
111test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
112test('split', u'endcase test', [u'endcase ', u''], u'test')
113test('split', u'endcase test', [u'endcase ', u''], 'test')
114test('split', 'endcase test', [u'endcase ', u''], u'test')
115
Guido van Rossuma831cac2000-03-10 23:23:21 +0000116
117# join now works with any sequence type
118class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000119 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000120 def __len__(self): return len(self.seq)
121 def __getitem__(self, i): return self.seq[i]
122
123test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000124test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000125test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000126test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000127test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000128test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
129test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
130test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
131test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
132test('join', ' ', u'w x y z', Sequence(u'wxyz'))
133test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000134
135result = u''
136for i in range(10):
137 if i > 0:
138 result = result + u':'
139 result = result + u'x'*10
140test('join', u':', result, [u'x' * 10] * 10)
141test('join', u':', result, (u'x' * 10,) * 10)
142
143test('strip', u' hello ', u'hello')
144test('lstrip', u' hello ', u'hello ')
145test('rstrip', u' hello ', u' hello')
146test('strip', u'hello', u'hello')
147
148test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
149
150if 0:
151 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
152
153 table = string.maketrans('a', u'A')
154 test('translate', u'abc', u'Abc', table)
155 test('translate', u'xyz', u'xyz', table)
156
157test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000158test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
160test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
161test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
162test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
163test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
164test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
165test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
166
167test('startswith', u'hello', 1, u'he')
168test('startswith', u'hello', 1, u'hello')
169test('startswith', u'hello', 0, u'hello world')
170test('startswith', u'hello', 1, u'')
171test('startswith', u'hello', 0, u'ello')
172test('startswith', u'hello', 1, u'ello', 1)
173test('startswith', u'hello', 1, u'o', 4)
174test('startswith', u'hello', 0, u'o', 5)
175test('startswith', u'hello', 1, u'', 5)
176test('startswith', u'hello', 0, u'lo', 6)
177test('startswith', u'helloworld', 1, u'lowo', 3)
178test('startswith', u'helloworld', 1, u'lowo', 3, 7)
179test('startswith', u'helloworld', 0, u'lowo', 3, 6)
180
181test('endswith', u'hello', 1, u'lo')
182test('endswith', u'hello', 0, u'he')
183test('endswith', u'hello', 1, u'')
184test('endswith', u'hello', 0, u'hello world')
185test('endswith', u'helloworld', 0, u'worl')
186test('endswith', u'helloworld', 1, u'worl', 3, 9)
187test('endswith', u'helloworld', 1, u'world', 3, 12)
188test('endswith', u'helloworld', 1, u'lowo', 1, 7)
189test('endswith', u'helloworld', 1, u'lowo', 2, 7)
190test('endswith', u'helloworld', 1, u'lowo', 3, 7)
191test('endswith', u'helloworld', 0, u'lowo', 4, 7)
192test('endswith', u'helloworld', 0, u'lowo', 3, 8)
193test('endswith', u'ab', 0, u'ab', 0, 1)
194test('endswith', u'ab', 0, u'ab', 0, 0)
195
196test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
197test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
198test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
199test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
200
201if 0:
202 test('capwords', u'abc def ghi', u'Abc Def Ghi')
203 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
204 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
205
206# Comparisons:
207print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000208verify(u'abc' == 'abc')
209verify('abc' == u'abc')
210verify(u'abc' == u'abc')
211verify(u'abcd' > 'abc')
212verify('abcd' > u'abc')
213verify(u'abcd' > u'abc')
214verify(u'abc' < 'abcd')
215verify('abc' < u'abcd')
216verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000217print 'done.'
218
Marc-André Lemburge5034372000-08-08 08:04:29 +0000219if 0:
220 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000221
Marc-André Lemburge5034372000-08-08 08:04:29 +0000222 print 'Testing UTF-16 code point order comparisons...',
223 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000224 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000225 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000226 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000227
Marc-André Lemburge5034372000-08-08 08:04:29 +0000228 # Non surrogate above surrogate value, fixup required
229 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000230 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000231
Marc-André Lemburge5034372000-08-08 08:04:29 +0000232 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000233 s2 = u'\ud800\udc01'
234 test_lecmp(s, s2)
235 s2 = u'\ud900\udc01'
236 test_lecmp(s, s2)
237 s2 = u'\uda00\udc01'
238 test_lecmp(s, s2)
239 s2 = u'\udb00\udc01'
240 test_lecmp(s, s2)
241 s2 = u'\ud800\udd01'
242 test_lecmp(s, s2)
243 s2 = u'\ud900\udd01'
244 test_lecmp(s, s2)
245 s2 = u'\uda00\udd01'
246 test_lecmp(s, s2)
247 s2 = u'\udb00\udd01'
248 test_lecmp(s, s2)
249 s2 = u'\ud800\ude01'
250 test_lecmp(s, s2)
251 s2 = u'\ud900\ude01'
252 test_lecmp(s, s2)
253 s2 = u'\uda00\ude01'
254 test_lecmp(s, s2)
255 s2 = u'\udb00\ude01'
256 test_lecmp(s, s2)
257 s2 = u'\ud800\udfff'
258 test_lecmp(s, s2)
259 s2 = u'\ud900\udfff'
260 test_lecmp(s, s2)
261 s2 = u'\uda00\udfff'
262 test_lecmp(s, s2)
263 s2 = u'\udb00\udfff'
264 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000265
266 test_fixup(u'\ue000')
267 test_fixup(u'\uff61')
268
269 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000270 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000271 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000272
Guido van Rossuma831cac2000-03-10 23:23:21 +0000273test('ljust', u'abc', u'abc ', 10)
274test('rjust', u'abc', u' abc', 10)
275test('center', u'abc', u' abc ', 10)
276test('ljust', u'abc', u'abc ', 6)
277test('rjust', u'abc', u' abc', 6)
278test('center', u'abc', u' abc ', 6)
279test('ljust', u'abc', u'abc', 2)
280test('rjust', u'abc', u'abc', 2)
281test('center', u'abc', u'abc', 2)
282
283test('islower', u'a', 1)
284test('islower', u'A', 0)
285test('islower', u'\n', 0)
286test('islower', u'\u1FFc', 0)
287test('islower', u'abc', 1)
288test('islower', u'aBc', 0)
289test('islower', u'abc\n', 1)
290
291test('isupper', u'a', 0)
292test('isupper', u'A', 1)
293test('isupper', u'\n', 0)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000294if sys.platform[:4] != 'java':
295 test('isupper', u'\u1FFc', 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000296test('isupper', u'ABC', 1)
297test('isupper', u'AbC', 0)
298test('isupper', u'ABC\n', 1)
299
300test('istitle', u'a', 0)
301test('istitle', u'A', 1)
302test('istitle', u'\n', 0)
303test('istitle', u'\u1FFc', 1)
304test('istitle', u'A Titlecased Line', 1)
305test('istitle', u'A\nTitlecased Line', 1)
306test('istitle', u'A Titlecased, Line', 1)
307test('istitle', u'Greek \u1FFcitlecases ...', 1)
308test('istitle', u'Not a capitalized String', 0)
309test('istitle', u'Not\ta Titlecase String', 0)
310test('istitle', u'Not--a Titlecase String', 0)
311
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000312test('isalpha', u'a', 1)
313test('isalpha', u'A', 1)
314test('isalpha', u'\n', 0)
315test('isalpha', u'\u1FFc', 1)
316test('isalpha', u'abc', 1)
317test('isalpha', u'aBc123', 0)
318test('isalpha', u'abc\n', 0)
319
320test('isalnum', u'a', 1)
321test('isalnum', u'A', 1)
322test('isalnum', u'\n', 0)
323test('isalnum', u'123abc456', 1)
324test('isalnum', u'a1b3c', 1)
325test('isalnum', u'aBc000 ', 0)
326test('isalnum', u'abc\n', 0)
327
Guido van Rossuma831cac2000-03-10 23:23:21 +0000328test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
329test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
330test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
331test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
332test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
333test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000334test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000335
336test('translate', u"abababc", u'bbbc', {ord('a'):None})
337test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
338test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
339
Guido van Rossumd4d26842000-03-13 23:21:48 +0000340# Contains:
341print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000342verify(('a' in u'abdb') == 1)
343verify(('a' in u'bdab') == 1)
344verify(('a' in u'bdaba') == 1)
345verify(('a' in u'bdba') == 1)
346verify(('a' in u'bdba') == 1)
347verify((u'a' in u'bdba') == 1)
348verify((u'a' in u'bdb') == 0)
349verify((u'a' in 'bdb') == 0)
350verify((u'a' in 'bdba') == 1)
351verify((u'a' in ('a',1,None)) == 1)
352verify((u'a' in (1,None,'a')) == 1)
353verify((u'a' in (1,None,u'a')) == 1)
354verify(('a' in ('a',1,None)) == 1)
355verify(('a' in (1,None,'a')) == 1)
356verify(('a' in (1,None,u'a')) == 1)
357verify(('a' in ('x',1,u'y')) == 0)
358verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000359print 'done.'
360
Guido van Rossuma831cac2000-03-10 23:23:21 +0000361# Formatting:
362print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000363verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
364verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
365verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
366verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
367verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
368verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
369verify(u"%c" % (u"a",) == u'a')
370verify(u"%c" % ("a",) == u'a')
371verify(u"%c" % (34,) == u'"')
372verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000373if sys.platform[:4] != 'java':
374 value = u"%r, %r" % (u"abc", "abc")
375 if value != u"u'abc', 'abc'":
376 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000377
Marc-André Lemburg36619082001-01-17 19:11:13 +0000378verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000379try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000380 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000381except KeyError:
382 print '*** formatting failed for "%s"' % "u'abc, def'"
383else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000384 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000385
Guido van Rossum97064862000-04-10 13:52:48 +0000386# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000387verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
388verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
389verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
390verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
391verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
392verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
393verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
394verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
395verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000396verify('%*s' % (5,u'abc',) == u' abc')
397verify('%*s' % (-5,u'abc',) == u'abc ')
398verify('%*.*s' % (5,2,u'abc',) == u' ab')
399verify('%*.*s' % (5,3,u'abc',) == u' abc')
400verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
401verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000402print 'done.'
403
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000404print 'Testing builtin unicode()...',
405
406# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
407
408verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
409
410class UnicodeSubclass(unicode):
411 pass
412
413verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
414 == u'unicode subclass becomes unicode')
415
416verify(unicode('strings are converted to unicode')
417 == u'strings are converted to unicode')
418
419class UnicodeCompat:
420 def __init__(self, x):
421 self.x = x
422 def __unicode__(self):
423 return self.x
424
425verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
426 == u'__unicode__ compatible objects are recognized')
427
428class StringCompat:
429 def __init__(self, x):
430 self.x = x
431 def __str__(self):
432 return self.x
433
434verify(unicode(StringCompat('__str__ compatible objects are recognized'))
435 == u'__str__ compatible objects are recognized')
436
437# unicode(obj) is compatible to str():
438
439o = StringCompat('unicode(obj) is compatible to str()')
440verify(unicode(o) == u'unicode(obj) is compatible to str()')
441verify(str(o) == 'unicode(obj) is compatible to str()')
442
443for obj in (123, 123.45, 123L):
444 verify(unicode(obj) == unicode(str(obj)))
445
446# unicode(obj, encoding, error) tests (this maps to
447# PyUnicode_FromEncodedObject() at C level)
448
449try:
450 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
451except TypeError:
452 pass
453else:
454 raise TestFailed, "decoding unicode should NOT be supported"
455
456verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
457 == u'strings are decoded to unicode')
458
459verify(unicode(buffer('character buffers are decoded to unicode'),
460 'utf-8', 'strict')
461 == u'character buffers are decoded to unicode')
462
463print 'done.'
464
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000465# Test builtin codecs
466print 'Testing builtin codecs...',
467
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000468# UTF-7 specific encoding tests:
469utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
470 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
471 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
472 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
473 (u'+', '+-'),
474 (u'+-', '+--'),
475 (u'+?', '+-?'),
476 (u'\?', '+AFw?'),
477 (u'+?', '+-?'),
478 (ur'\\?', '+AFwAXA?'),
479 (ur'\\\?', '+AFwAXABc?'),
480 (ur'++--', '+-+---')]
481
482for x,y in utfTests:
483 verify( x.encode('utf-7') == y )
484
Tim Peters527e64f2001-10-04 05:36:56 +0000485try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000486 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
487except UnicodeError:
488 pass
489else:
490 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
491
492verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
493
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000494# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000495verify(u'\u20ac'.encode('utf-8') == \
496 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
497verify(u'\ud800\udc02'.encode('utf-8') == \
498 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
499verify(u'\ud84d\udc56'.encode('utf-8') == \
500 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000501# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000502verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000503 'utf-8') == u'\U00023456' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000504verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000505 'utf-8') == u'\U00010002' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000506verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000507 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000508
509# Other possible utf-8 test cases:
510# * strict decoding testing for all of the
511# UTF8_ERROR cases in PyUnicode_DecodeUTF8
512
Marc-André Lemburg36619082001-01-17 19:11:13 +0000513verify(unicode('hello','ascii') == u'hello')
514verify(unicode('hello','utf-8') == u'hello')
515verify(unicode('hello','utf8') == u'hello')
516verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000517
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000518# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000519try:
520 u'Andr\202 x'.encode('ascii')
521 u'Andr\202 x'.encode('ascii','strict')
522except ValueError:
523 pass
524else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000525 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000526verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
527verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000528
529try:
530 unicode('Andr\202 x','ascii')
531 unicode('Andr\202 x','ascii','strict')
532except ValueError:
533 pass
534else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000535 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000536verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
537verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000538
Marc-André Lemburg36619082001-01-17 19:11:13 +0000539verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000540verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000541verify(u'hello'.encode('utf-8') == 'hello')
542verify(u'hello'.encode('utf8') == 'hello')
543verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
544verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
545verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000546
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000547# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000548u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000549for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000550 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000551 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000552
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000553# Roundtrip safety for non-BMP (just a few chars)
554u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
555for encoding in ('utf-8',
556 'utf-16', 'utf-16-le', 'utf-16-be',
Marc-André Lemburg80d1dd52001-07-25 16:05:59 +0000557 #'raw_unicode_escape',
558 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000559 verify(unicode(u.encode(encoding),encoding) == u)
560
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000561u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000562for encoding in (
563 'latin-1',
564 ):
565 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000566 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000567 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000568 print '*** codec "%s" failed round-trip' % encoding
569 except ValueError,why:
570 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000571
572u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000573for encoding in (
574 'ascii',
575 ):
576 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000577 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000578 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000579 print '*** codec "%s" failed round-trip' % encoding
580 except ValueError,why:
581 print '*** codec for "%s" failed: %s' % (encoding, why)
582
583print 'done.'
584
585print 'Testing standard mapping codecs...',
586
587print '0-127...',
588s = ''.join(map(chr, range(128)))
589for encoding in (
590 'cp037', 'cp1026',
591 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
592 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000593 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000594 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
595 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
596 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
597 'mac_cyrillic', 'mac_latin2',
598
599 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
600 'cp1256', 'cp1257', 'cp1258',
601 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
602
603 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000604 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000605
Guido van Rossum9e896b32000-04-05 20:11:21 +0000606 ### These have undefined mappings:
607 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000608
Tim Peters2f228e72001-05-13 00:19:31 +0000609 ### These fail the round-trip:
610 #'cp875'
611
Guido van Rossum9e896b32000-04-05 20:11:21 +0000612 ):
613 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000614 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000615 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000616 print '*** codec "%s" failed round-trip' % encoding
617 except ValueError,why:
618 print '*** codec for "%s" failed: %s' % (encoding, why)
619
620print '128-255...',
621s = ''.join(map(chr, range(128,256)))
622for encoding in (
623 'cp037', 'cp1026',
624 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
625 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000626 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000627 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000628 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000629 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000630 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000631
Guido van Rossum9e896b32000-04-05 20:11:21 +0000632 ### These have undefined mappings:
633 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
634 #'cp1256', 'cp1257', 'cp1258',
635 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000636 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000637 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000638
Guido van Rossum9e896b32000-04-05 20:11:21 +0000639 ### These fail the round-trip:
640 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000641
Guido van Rossum9e896b32000-04-05 20:11:21 +0000642 ):
643 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000644 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000645 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000646 print '*** codec "%s" failed round-trip' % encoding
647 except ValueError,why:
648 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000649
650print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000651
652print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000653verify((u"abc" u"def") == u"abcdef")
654verify(("abc" u"def") == u"abcdef")
655verify((u"abc" "def") == u"abcdef")
656verify((u"abc" u"def" "ghi") == u"abcdefghi")
657verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000658print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000659
660print 'Testing Unicode printing...',
661print u'abc'
662print u'abc', u'def'
663print u'abc', 'def'
664print 'abc', u'def'
665print u'abc\n'
666print u'abc\n',
667print u'abc\n',
668print u'def\n'
669print u'def\n'
670print 'done.'