blob: c701e44638b752c2161b5d260c9cd830f7094d4e [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
Guido van Rossume4874ae2001-09-21 15:36:41 +000011# Test basic sanity of repr()
12verify(repr(u'abc') == "u'abc'")
13verify(repr(u'ab\\c') == "u'ab\\\\c'")
14verify(repr(u'ab\\') == "u'ab\\\\'")
15verify(repr(u'\\c') == "u'\\\\c'")
16verify(repr(u'\\') == "u'\\\\'")
17verify(repr(u'\n') == "u'\\n'")
18verify(repr(u'\r') == "u'\\r'")
19verify(repr(u'\t') == "u'\\t'")
20verify(repr(u'\b') == "u'\\x08'")
Guido van Rossum11310bf2001-09-21 15:46:41 +000021verify(repr(u"'\"") == """u'\\'"'""")
22verify(repr(u"'\"") == """u'\\'"'""")
23verify(repr(u"'") == '''u"'"''')
24verify(repr(u'"') == """u'"'""")
Guido van Rossume4874ae2001-09-21 15:36:41 +000025
Guido van Rossuma831cac2000-03-10 23:23:21 +000026def test(method, input, output, *args):
27 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000028 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000029 try:
30 f = getattr(input, method)
31 value = apply(f, args)
32 except:
33 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000034 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000035 else:
36 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000037 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000038 if verbose:
39 print 'no'
40 print '*',f, `input`, `output`, `value`
41 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000042 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000043 else:
44 if verbose:
45 print 'yes'
46
47test('capitalize', u' hello ', u' hello ')
48test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000049test('capitalize', u'aaaa', u'Aaaa')
50test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000051
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000052test('count', u'aaa', 3, u'a')
53test('count', u'aaa', 0, u'b')
54test('count', 'aaa', 3, u'a')
55test('count', 'aaa', 0, u'b')
56test('count', u'aaa', 3, 'a')
57test('count', u'aaa', 0, 'b')
58
Guido van Rossuma831cac2000-03-10 23:23:21 +000059test('title', u' hello ', u' Hello ')
60test('title', u'hello ', u'Hello ')
61test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
62test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
63test('title', u"getInt", u'Getint')
64
65test('find', u'abcdefghiabc', 0, u'abc')
66test('find', u'abcdefghiabc', 9, u'abc', 1)
67test('find', u'abcdefghiabc', -1, u'def', 4)
68
69test('rfind', u'abcdefghiabc', 9, u'abc')
70
71test('lower', u'HeLLo', u'hello')
72test('lower', u'hello', u'hello')
73
74test('upper', u'HeLLo', u'HELLO')
75test('upper', u'HELLO', u'HELLO')
76
77if 0:
78 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
79
80 test('maketrans', u'abc', transtable, u'xyz')
81 test('maketrans', u'abc', ValueError, u'xyzq')
82
83test('split', u'this is the split function',
84 [u'this', u'is', u'the', u'split', u'function'])
85test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
86test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
87test('split', u'a b c d', [u'a', u'b c d'], None, 1)
88test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
89test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
90test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
91test('split', u'a b c d', [u'a b c d'], None, 0)
92test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
93test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000094test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
95test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
96test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
97test('split', u'endcase test', [u'endcase ', u''], u'test')
98test('split', u'endcase test', [u'endcase ', u''], 'test')
99test('split', 'endcase test', [u'endcase ', u''], u'test')
100
Guido van Rossuma831cac2000-03-10 23:23:21 +0000101
102# join now works with any sequence type
103class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000104 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000105 def __len__(self): return len(self.seq)
106 def __getitem__(self, i): return self.seq[i]
107
108test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000109test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000110test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000111test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000112test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000113test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
114test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
115test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
116test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
117test('join', ' ', u'w x y z', Sequence(u'wxyz'))
118test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000119
120result = u''
121for i in range(10):
122 if i > 0:
123 result = result + u':'
124 result = result + u'x'*10
125test('join', u':', result, [u'x' * 10] * 10)
126test('join', u':', result, (u'x' * 10,) * 10)
127
128test('strip', u' hello ', u'hello')
129test('lstrip', u' hello ', u'hello ')
130test('rstrip', u' hello ', u' hello')
131test('strip', u'hello', u'hello')
132
133test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
134
135if 0:
136 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
137
138 table = string.maketrans('a', u'A')
139 test('translate', u'abc', u'Abc', table)
140 test('translate', u'xyz', u'xyz', table)
141
142test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000143test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000144test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
145test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
146test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
147test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
148test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
149test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
150test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
151
152test('startswith', u'hello', 1, u'he')
153test('startswith', u'hello', 1, u'hello')
154test('startswith', u'hello', 0, u'hello world')
155test('startswith', u'hello', 1, u'')
156test('startswith', u'hello', 0, u'ello')
157test('startswith', u'hello', 1, u'ello', 1)
158test('startswith', u'hello', 1, u'o', 4)
159test('startswith', u'hello', 0, u'o', 5)
160test('startswith', u'hello', 1, u'', 5)
161test('startswith', u'hello', 0, u'lo', 6)
162test('startswith', u'helloworld', 1, u'lowo', 3)
163test('startswith', u'helloworld', 1, u'lowo', 3, 7)
164test('startswith', u'helloworld', 0, u'lowo', 3, 6)
165
166test('endswith', u'hello', 1, u'lo')
167test('endswith', u'hello', 0, u'he')
168test('endswith', u'hello', 1, u'')
169test('endswith', u'hello', 0, u'hello world')
170test('endswith', u'helloworld', 0, u'worl')
171test('endswith', u'helloworld', 1, u'worl', 3, 9)
172test('endswith', u'helloworld', 1, u'world', 3, 12)
173test('endswith', u'helloworld', 1, u'lowo', 1, 7)
174test('endswith', u'helloworld', 1, u'lowo', 2, 7)
175test('endswith', u'helloworld', 1, u'lowo', 3, 7)
176test('endswith', u'helloworld', 0, u'lowo', 4, 7)
177test('endswith', u'helloworld', 0, u'lowo', 3, 8)
178test('endswith', u'ab', 0, u'ab', 0, 1)
179test('endswith', u'ab', 0, u'ab', 0, 0)
180
181test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
182test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
183test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
184test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
185
186if 0:
187 test('capwords', u'abc def ghi', u'Abc Def Ghi')
188 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
189 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
190
191# Comparisons:
192print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000193verify(u'abc' == 'abc')
194verify('abc' == u'abc')
195verify(u'abc' == u'abc')
196verify(u'abcd' > 'abc')
197verify('abcd' > u'abc')
198verify(u'abcd' > u'abc')
199verify(u'abc' < 'abcd')
200verify('abc' < u'abcd')
201verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202print 'done.'
203
Marc-André Lemburge5034372000-08-08 08:04:29 +0000204if 0:
205 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000206
Marc-André Lemburge5034372000-08-08 08:04:29 +0000207 print 'Testing UTF-16 code point order comparisons...',
208 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000209 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000210 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000211 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000212
Marc-André Lemburge5034372000-08-08 08:04:29 +0000213 # Non surrogate above surrogate value, fixup required
214 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000215 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000216
Marc-André Lemburge5034372000-08-08 08:04:29 +0000217 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000218 s2 = u'\ud800\udc01'
219 test_lecmp(s, s2)
220 s2 = u'\ud900\udc01'
221 test_lecmp(s, s2)
222 s2 = u'\uda00\udc01'
223 test_lecmp(s, s2)
224 s2 = u'\udb00\udc01'
225 test_lecmp(s, s2)
226 s2 = u'\ud800\udd01'
227 test_lecmp(s, s2)
228 s2 = u'\ud900\udd01'
229 test_lecmp(s, s2)
230 s2 = u'\uda00\udd01'
231 test_lecmp(s, s2)
232 s2 = u'\udb00\udd01'
233 test_lecmp(s, s2)
234 s2 = u'\ud800\ude01'
235 test_lecmp(s, s2)
236 s2 = u'\ud900\ude01'
237 test_lecmp(s, s2)
238 s2 = u'\uda00\ude01'
239 test_lecmp(s, s2)
240 s2 = u'\udb00\ude01'
241 test_lecmp(s, s2)
242 s2 = u'\ud800\udfff'
243 test_lecmp(s, s2)
244 s2 = u'\ud900\udfff'
245 test_lecmp(s, s2)
246 s2 = u'\uda00\udfff'
247 test_lecmp(s, s2)
248 s2 = u'\udb00\udfff'
249 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000250
251 test_fixup(u'\ue000')
252 test_fixup(u'\uff61')
253
254 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000255 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000256 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000257
Guido van Rossuma831cac2000-03-10 23:23:21 +0000258test('ljust', u'abc', u'abc ', 10)
259test('rjust', u'abc', u' abc', 10)
260test('center', u'abc', u' abc ', 10)
261test('ljust', u'abc', u'abc ', 6)
262test('rjust', u'abc', u' abc', 6)
263test('center', u'abc', u' abc ', 6)
264test('ljust', u'abc', u'abc', 2)
265test('rjust', u'abc', u'abc', 2)
266test('center', u'abc', u'abc', 2)
267
268test('islower', u'a', 1)
269test('islower', u'A', 0)
270test('islower', u'\n', 0)
271test('islower', u'\u1FFc', 0)
272test('islower', u'abc', 1)
273test('islower', u'aBc', 0)
274test('islower', u'abc\n', 1)
275
276test('isupper', u'a', 0)
277test('isupper', u'A', 1)
278test('isupper', u'\n', 0)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000279if sys.platform[:4] != 'java':
280 test('isupper', u'\u1FFc', 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000281test('isupper', u'ABC', 1)
282test('isupper', u'AbC', 0)
283test('isupper', u'ABC\n', 1)
284
285test('istitle', u'a', 0)
286test('istitle', u'A', 1)
287test('istitle', u'\n', 0)
288test('istitle', u'\u1FFc', 1)
289test('istitle', u'A Titlecased Line', 1)
290test('istitle', u'A\nTitlecased Line', 1)
291test('istitle', u'A Titlecased, Line', 1)
292test('istitle', u'Greek \u1FFcitlecases ...', 1)
293test('istitle', u'Not a capitalized String', 0)
294test('istitle', u'Not\ta Titlecase String', 0)
295test('istitle', u'Not--a Titlecase String', 0)
296
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000297test('isalpha', u'a', 1)
298test('isalpha', u'A', 1)
299test('isalpha', u'\n', 0)
300test('isalpha', u'\u1FFc', 1)
301test('isalpha', u'abc', 1)
302test('isalpha', u'aBc123', 0)
303test('isalpha', u'abc\n', 0)
304
305test('isalnum', u'a', 1)
306test('isalnum', u'A', 1)
307test('isalnum', u'\n', 0)
308test('isalnum', u'123abc456', 1)
309test('isalnum', u'a1b3c', 1)
310test('isalnum', u'aBc000 ', 0)
311test('isalnum', u'abc\n', 0)
312
Guido van Rossuma831cac2000-03-10 23:23:21 +0000313test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
314test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
315test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
316test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
317test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
318test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000319test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000320
321test('translate', u"abababc", u'bbbc', {ord('a'):None})
322test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
323test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
324
Guido van Rossumd4d26842000-03-13 23:21:48 +0000325# Contains:
326print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000327verify(('a' in u'abdb') == 1)
328verify(('a' in u'bdab') == 1)
329verify(('a' in u'bdaba') == 1)
330verify(('a' in u'bdba') == 1)
331verify(('a' in u'bdba') == 1)
332verify((u'a' in u'bdba') == 1)
333verify((u'a' in u'bdb') == 0)
334verify((u'a' in 'bdb') == 0)
335verify((u'a' in 'bdba') == 1)
336verify((u'a' in ('a',1,None)) == 1)
337verify((u'a' in (1,None,'a')) == 1)
338verify((u'a' in (1,None,u'a')) == 1)
339verify(('a' in ('a',1,None)) == 1)
340verify(('a' in (1,None,'a')) == 1)
341verify(('a' in (1,None,u'a')) == 1)
342verify(('a' in ('x',1,u'y')) == 0)
343verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000344print 'done.'
345
Guido van Rossuma831cac2000-03-10 23:23:21 +0000346# Formatting:
347print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000348verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
349verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
350verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
351verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
352verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
353verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
354verify(u"%c" % (u"a",) == u'a')
355verify(u"%c" % ("a",) == u'a')
356verify(u"%c" % (34,) == u'"')
357verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000358if sys.platform[:4] != 'java':
359 value = u"%r, %r" % (u"abc", "abc")
360 if value != u"u'abc', 'abc'":
361 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000362
Marc-André Lemburg36619082001-01-17 19:11:13 +0000363verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000364try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000365 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000366except KeyError:
367 print '*** formatting failed for "%s"' % "u'abc, def'"
368else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000369 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000370
Guido van Rossum97064862000-04-10 13:52:48 +0000371# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000372verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
373verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
374verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
375verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
376verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
377verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
378verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
379verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
380verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000381verify('%*s' % (5,u'abc',) == u' abc')
382verify('%*s' % (-5,u'abc',) == u'abc ')
383verify('%*.*s' % (5,2,u'abc',) == u' ab')
384verify('%*.*s' % (5,3,u'abc',) == u' abc')
385verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
386verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000387print 'done.'
388
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000389print 'Testing builtin unicode()...',
390
391# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
392
393verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
394
395class UnicodeSubclass(unicode):
396 pass
397
398verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
399 == u'unicode subclass becomes unicode')
400
401verify(unicode('strings are converted to unicode')
402 == u'strings are converted to unicode')
403
404class UnicodeCompat:
405 def __init__(self, x):
406 self.x = x
407 def __unicode__(self):
408 return self.x
409
410verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
411 == u'__unicode__ compatible objects are recognized')
412
413class StringCompat:
414 def __init__(self, x):
415 self.x = x
416 def __str__(self):
417 return self.x
418
419verify(unicode(StringCompat('__str__ compatible objects are recognized'))
420 == u'__str__ compatible objects are recognized')
421
422# unicode(obj) is compatible to str():
423
424o = StringCompat('unicode(obj) is compatible to str()')
425verify(unicode(o) == u'unicode(obj) is compatible to str()')
426verify(str(o) == 'unicode(obj) is compatible to str()')
427
428for obj in (123, 123.45, 123L):
429 verify(unicode(obj) == unicode(str(obj)))
430
431# unicode(obj, encoding, error) tests (this maps to
432# PyUnicode_FromEncodedObject() at C level)
433
434try:
435 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
436except TypeError:
437 pass
438else:
439 raise TestFailed, "decoding unicode should NOT be supported"
440
441verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
442 == u'strings are decoded to unicode')
443
444verify(unicode(buffer('character buffers are decoded to unicode'),
445 'utf-8', 'strict')
446 == u'character buffers are decoded to unicode')
447
448print 'done.'
449
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000450# Test builtin codecs
451print 'Testing builtin codecs...',
452
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000453# UTF-7 specific encoding tests:
454utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
455 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
456 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
457 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
458 (u'+', '+-'),
459 (u'+-', '+--'),
460 (u'+?', '+-?'),
461 (u'\?', '+AFw?'),
462 (u'+?', '+-?'),
463 (ur'\\?', '+AFwAXA?'),
464 (ur'\\\?', '+AFwAXABc?'),
465 (ur'++--', '+-+---')]
466
467for x,y in utfTests:
468 verify( x.encode('utf-7') == y )
469
Tim Peters527e64f2001-10-04 05:36:56 +0000470try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000471 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
472except UnicodeError:
473 pass
474else:
475 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
476
477verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
478
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000479# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000480verify(u'\u20ac'.encode('utf-8') == \
481 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
482verify(u'\ud800\udc02'.encode('utf-8') == \
483 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
484verify(u'\ud84d\udc56'.encode('utf-8') == \
485 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000486# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000487verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000488 'utf-8') == u'\U00023456' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000489verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000490 'utf-8') == u'\U00010002' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000491verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000492 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000493
494# Other possible utf-8 test cases:
495# * strict decoding testing for all of the
496# UTF8_ERROR cases in PyUnicode_DecodeUTF8
497
Marc-André Lemburg36619082001-01-17 19:11:13 +0000498verify(unicode('hello','ascii') == u'hello')
499verify(unicode('hello','utf-8') == u'hello')
500verify(unicode('hello','utf8') == u'hello')
501verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000502
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000503# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000504try:
505 u'Andr\202 x'.encode('ascii')
506 u'Andr\202 x'.encode('ascii','strict')
507except ValueError:
508 pass
509else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000510 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000511verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
512verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000513
514try:
515 unicode('Andr\202 x','ascii')
516 unicode('Andr\202 x','ascii','strict')
517except ValueError:
518 pass
519else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000520 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000521verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
522verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000523
Marc-André Lemburg36619082001-01-17 19:11:13 +0000524verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000525verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000526verify(u'hello'.encode('utf-8') == 'hello')
527verify(u'hello'.encode('utf8') == 'hello')
528verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
529verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
530verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000531
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000532# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000533u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000534for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000535 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000536 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000537
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000538# Roundtrip safety for non-BMP (just a few chars)
539u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
540for encoding in ('utf-8',
541 'utf-16', 'utf-16-le', 'utf-16-be',
Marc-André Lemburg80d1dd52001-07-25 16:05:59 +0000542 #'raw_unicode_escape',
543 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000544 verify(unicode(u.encode(encoding),encoding) == u)
545
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000546u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000547for encoding in (
548 'latin-1',
549 ):
550 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000551 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000552 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000553 print '*** codec "%s" failed round-trip' % encoding
554 except ValueError,why:
555 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000556
557u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000558for encoding in (
559 'ascii',
560 ):
561 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000562 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000563 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000564 print '*** codec "%s" failed round-trip' % encoding
565 except ValueError,why:
566 print '*** codec for "%s" failed: %s' % (encoding, why)
567
568print 'done.'
569
570print 'Testing standard mapping codecs...',
571
572print '0-127...',
573s = ''.join(map(chr, range(128)))
574for encoding in (
575 'cp037', 'cp1026',
576 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
577 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000578 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000579 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
580 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
581 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
582 'mac_cyrillic', 'mac_latin2',
583
584 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
585 'cp1256', 'cp1257', 'cp1258',
586 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
587
588 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000589 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000590
Guido van Rossum9e896b32000-04-05 20:11:21 +0000591 ### These have undefined mappings:
592 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000593
Tim Peters2f228e72001-05-13 00:19:31 +0000594 ### These fail the round-trip:
595 #'cp875'
596
Guido van Rossum9e896b32000-04-05 20:11:21 +0000597 ):
598 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000599 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000600 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000601 print '*** codec "%s" failed round-trip' % encoding
602 except ValueError,why:
603 print '*** codec for "%s" failed: %s' % (encoding, why)
604
605print '128-255...',
606s = ''.join(map(chr, range(128,256)))
607for encoding in (
608 'cp037', 'cp1026',
609 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
610 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000611 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000612 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000613 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000614 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000615 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000616
Guido van Rossum9e896b32000-04-05 20:11:21 +0000617 ### These have undefined mappings:
618 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
619 #'cp1256', 'cp1257', 'cp1258',
620 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000621 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000622 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000623
Guido van Rossum9e896b32000-04-05 20:11:21 +0000624 ### These fail the round-trip:
625 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000626
Guido van Rossum9e896b32000-04-05 20:11:21 +0000627 ):
628 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000629 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000630 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000631 print '*** codec "%s" failed round-trip' % encoding
632 except ValueError,why:
633 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000634
635print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000636
637print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000638verify((u"abc" u"def") == u"abcdef")
639verify(("abc" u"def") == u"abcdef")
640verify((u"abc" "def") == u"abcdef")
641verify((u"abc" u"def" "ghi") == u"abcdefghi")
642verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000643print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000644
645print 'Testing Unicode printing...',
646print u'abc'
647print u'abc', u'def'
648print u'abc', 'def'
649print 'abc', u'def'
650print u'abc\n'
651print u'abc\n',
652print u'abc\n',
653print u'def\n'
654print u'def\n'
655print 'done.'
656