blob: 58d6e840dad2d4f6be10ace43014f55aaf9ecac1 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
19 exc = sys.exc_info()
20 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
27 print ' value == %s: %s' % (exc[:2])
28 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
209test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
210test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
214test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000216
217test('translate', u"abababc", u'bbbc', {ord('a'):None})
218test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
219test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
220
Guido van Rossumd4d26842000-03-13 23:21:48 +0000221# Contains:
222print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000223assert ('a' in u'abdb') == 1
224assert ('a' in u'bdab') == 1
225assert ('a' in u'bdaba') == 1
226assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000227assert ('a' in u'bdba') == 1
228assert (u'a' in u'bdba') == 1
229assert (u'a' in u'bdb') == 0
230assert (u'a' in 'bdb') == 0
231assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000232assert (u'a' in ('a',1,None)) == 1
233assert (u'a' in (1,None,'a')) == 1
234assert (u'a' in (1,None,u'a')) == 1
235assert ('a' in ('a',1,None)) == 1
236assert ('a' in (1,None,'a')) == 1
237assert ('a' in (1,None,u'a')) == 1
238assert ('a' in ('x',1,u'y')) == 0
239assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000240print 'done.'
241
Guido van Rossuma831cac2000-03-10 23:23:21 +0000242# Formatting:
243print 'Testing Unicode formatting strings...',
244assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
245assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
246assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
247assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
248assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
249assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
250assert u"%c" % (u"abc",) == u'a'
251assert u"%c" % ("abc",) == u'a'
252assert u"%c" % (34,) == u'"'
253assert u"%c" % (36,) == u'$'
254assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
255assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
256assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
Guido van Rossum97064862000-04-10 13:52:48 +0000257# formatting jobs delegated from the string implementation:
258assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
259assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
260assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
261assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
262assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
263assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
264assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
265assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000266print 'done.'
267
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000268# Test builtin codecs
269print 'Testing builtin codecs...',
270
271assert unicode('hello','ascii') == u'hello'
272assert unicode('hello','utf-8') == u'hello'
273assert unicode('hello','utf8') == u'hello'
274assert unicode('hello','latin-1') == u'hello'
275
Guido van Rossum97064862000-04-10 13:52:48 +0000276try:
277 u'Andr\202 x'.encode('ascii')
278 u'Andr\202 x'.encode('ascii','strict')
279except ValueError:
280 pass
281else:
282 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
283assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
284assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
285
286try:
287 unicode('Andr\202 x','ascii')
288 unicode('Andr\202 x','ascii','strict')
289except ValueError:
290 pass
291else:
292 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
293assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
294assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
295
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000296assert u'hello'.encode('ascii') == 'hello'
297assert u'hello'.encode('utf-8') == 'hello'
298assert u'hello'.encode('utf8') == 'hello'
299assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
300assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
301assert u'hello'.encode('latin-1') == 'hello'
302
303u = u''.join(map(unichr, range(1024)))
304for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
305 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
306 assert unicode(u.encode(encoding),encoding) == u
307
308u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000309for encoding in (
310 'latin-1',
311 ):
312 try:
313 assert unicode(u.encode(encoding),encoding) == u
314 except AssertionError:
315 print '*** codec "%s" failed round-trip' % encoding
316 except ValueError,why:
317 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000318
319u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000320for encoding in (
321 'ascii',
322 ):
323 try:
324 assert unicode(u.encode(encoding),encoding) == u
325 except AssertionError:
326 print '*** codec "%s" failed round-trip' % encoding
327 except ValueError,why:
328 print '*** codec for "%s" failed: %s' % (encoding, why)
329
330print 'done.'
331
332print 'Testing standard mapping codecs...',
333
334print '0-127...',
335s = ''.join(map(chr, range(128)))
336for encoding in (
337 'cp037', 'cp1026',
338 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
339 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
340 'cp863', 'cp865', 'cp866',
341 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
342 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
343 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
344 'mac_cyrillic', 'mac_latin2',
345
346 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
347 'cp1256', 'cp1257', 'cp1258',
348 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
349
350 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
351 'cp1006', 'cp875', 'iso8859_8',
352
353 ### These have undefined mappings:
354 #'cp424',
355
356 ):
357 try:
358 assert unicode(s,encoding).encode(encoding) == s
359 except AssertionError:
360 print '*** codec "%s" failed round-trip' % encoding
361 except ValueError,why:
362 print '*** codec for "%s" failed: %s' % (encoding, why)
363
364print '128-255...',
365s = ''.join(map(chr, range(128,256)))
366for encoding in (
367 'cp037', 'cp1026',
368 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
369 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
370 'cp863', 'cp865', 'cp866',
371 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
372 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
373 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
374 'mac_cyrillic', 'mac_latin2',
375
376 ### These have undefined mappings:
377 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
378 #'cp1256', 'cp1257', 'cp1258',
379 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
380 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
381
382 ### These fail the round-trip:
383 #'cp1006', 'cp875', 'iso8859_8',
384
385 ):
386 try:
387 assert unicode(s,encoding).encode(encoding) == s
388 except AssertionError:
389 print '*** codec "%s" failed round-trip' % encoding
390 except ValueError,why:
391 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000392
393print 'done.'