blob: 4f4196c8213cd7834133c56f614eb5f8574e13dc [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
19 exc = sys.exc_info()
20 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
27 print ' value == %s: %s' % (exc[:2])
28 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
209test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
210test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
214test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc\012def\015\012ghi\012\015'], 1)
216test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def\015\012ghi\012\015'], 2)
217
218test('translate', u"abababc", u'bbbc', {ord('a'):None})
219test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
220test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
221
Guido van Rossumd4d26842000-03-13 23:21:48 +0000222# Contains:
223print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000224assert ('a' in u'abdb') == 1
225assert ('a' in u'bdab') == 1
226assert ('a' in u'bdaba') == 1
227assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000228assert ('a' in u'bdba') == 1
229assert (u'a' in u'bdba') == 1
230assert (u'a' in u'bdb') == 0
231assert (u'a' in 'bdb') == 0
232assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000233assert (u'a' in ('a',1,None)) == 1
234assert (u'a' in (1,None,'a')) == 1
235assert (u'a' in (1,None,u'a')) == 1
236assert ('a' in ('a',1,None)) == 1
237assert ('a' in (1,None,'a')) == 1
238assert ('a' in (1,None,u'a')) == 1
239assert ('a' in ('x',1,u'y')) == 0
240assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000241print 'done.'
242
Guido van Rossuma831cac2000-03-10 23:23:21 +0000243# Formatting:
244print 'Testing Unicode formatting strings...',
245assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
246assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
247assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
248assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
249assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
250assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
251assert u"%c" % (u"abc",) == u'a'
252assert u"%c" % ("abc",) == u'a'
253assert u"%c" % (34,) == u'"'
254assert u"%c" % (36,) == u'$'
255assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
256assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
257assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
Guido van Rossum97064862000-04-10 13:52:48 +0000258# formatting jobs delegated from the string implementation:
259assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
260assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
261assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
262assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
263assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
264assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
265assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
266assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000267print 'done.'
268
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000269# Test builtin codecs
270print 'Testing builtin codecs...',
271
272assert unicode('hello','ascii') == u'hello'
273assert unicode('hello','utf-8') == u'hello'
274assert unicode('hello','utf8') == u'hello'
275assert unicode('hello','latin-1') == u'hello'
276
Guido van Rossum97064862000-04-10 13:52:48 +0000277try:
278 u'Andr\202 x'.encode('ascii')
279 u'Andr\202 x'.encode('ascii','strict')
280except ValueError:
281 pass
282else:
283 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
284assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
285assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
286
287try:
288 unicode('Andr\202 x','ascii')
289 unicode('Andr\202 x','ascii','strict')
290except ValueError:
291 pass
292else:
293 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
294assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
295assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
296
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000297assert u'hello'.encode('ascii') == 'hello'
298assert u'hello'.encode('utf-8') == 'hello'
299assert u'hello'.encode('utf8') == 'hello'
300assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
301assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
302assert u'hello'.encode('latin-1') == 'hello'
303
304u = u''.join(map(unichr, range(1024)))
305for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
306 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
307 assert unicode(u.encode(encoding),encoding) == u
308
309u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000310for encoding in (
311 'latin-1',
312 ):
313 try:
314 assert unicode(u.encode(encoding),encoding) == u
315 except AssertionError:
316 print '*** codec "%s" failed round-trip' % encoding
317 except ValueError,why:
318 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000319
320u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000321for encoding in (
322 'ascii',
323 ):
324 try:
325 assert unicode(u.encode(encoding),encoding) == u
326 except AssertionError:
327 print '*** codec "%s" failed round-trip' % encoding
328 except ValueError,why:
329 print '*** codec for "%s" failed: %s' % (encoding, why)
330
331print 'done.'
332
333print 'Testing standard mapping codecs...',
334
335print '0-127...',
336s = ''.join(map(chr, range(128)))
337for encoding in (
338 'cp037', 'cp1026',
339 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
340 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
341 'cp863', 'cp865', 'cp866',
342 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
343 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
344 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
345 'mac_cyrillic', 'mac_latin2',
346
347 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
348 'cp1256', 'cp1257', 'cp1258',
349 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
350
351 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
352 'cp1006', 'cp875', 'iso8859_8',
353
354 ### These have undefined mappings:
355 #'cp424',
356
357 ):
358 try:
359 assert unicode(s,encoding).encode(encoding) == s
360 except AssertionError:
361 print '*** codec "%s" failed round-trip' % encoding
362 except ValueError,why:
363 print '*** codec for "%s" failed: %s' % (encoding, why)
364
365print '128-255...',
366s = ''.join(map(chr, range(128,256)))
367for encoding in (
368 'cp037', 'cp1026',
369 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
370 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
371 'cp863', 'cp865', 'cp866',
372 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
373 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
374 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
375 'mac_cyrillic', 'mac_latin2',
376
377 ### These have undefined mappings:
378 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
379 #'cp1256', 'cp1257', 'cp1258',
380 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
381 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
382
383 ### These fail the round-trip:
384 #'cp1006', 'cp875', 'iso8859_8',
385
386 ):
387 try:
388 assert unicode(s,encoding).encode(encoding) == s
389 except AssertionError:
390 print '*** codec "%s" failed round-trip' % encoding
391 except ValueError,why:
392 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000393
394print 'done.'