blob: 15d87d6680989136da70c77bed3480c991374f2d [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
209test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
210test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
214test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000216
217test('translate', u"abababc", u'bbbc', {ord('a'):None})
218test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
219test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
220
Guido van Rossumd4d26842000-03-13 23:21:48 +0000221# Contains:
222print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000223assert ('a' in u'abdb') == 1
224assert ('a' in u'bdab') == 1
225assert ('a' in u'bdaba') == 1
226assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000227assert ('a' in u'bdba') == 1
228assert (u'a' in u'bdba') == 1
229assert (u'a' in u'bdb') == 0
230assert (u'a' in 'bdb') == 0
231assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000232assert (u'a' in ('a',1,None)) == 1
233assert (u'a' in (1,None,'a')) == 1
234assert (u'a' in (1,None,u'a')) == 1
235assert ('a' in ('a',1,None)) == 1
236assert ('a' in (1,None,'a')) == 1
237assert ('a' in (1,None,u'a')) == 1
238assert ('a' in ('x',1,u'y')) == 0
239assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000240print 'done.'
241
Guido van Rossuma831cac2000-03-10 23:23:21 +0000242# Formatting:
243print 'Testing Unicode formatting strings...',
244assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
245assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
246assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
247assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
248assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
249assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000250assert u"%c" % (u"a",) == u'a'
251assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000252assert u"%c" % (34,) == u'"'
253assert u"%c" % (36,) == u'$'
254assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
255assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
256assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
Guido van Rossum97064862000-04-10 13:52:48 +0000257# formatting jobs delegated from the string implementation:
258assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
259assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
260assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
261assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
262assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
263assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
264assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
265assert '...%s...' % u"abc" == u'...abc...'
Fred Drake774c9312000-05-09 19:57:46 +0000266try:
267 '...%s...äöü...' % u"abc"
268except ValueError:
269 pass
270else:
271 raise AssertionError, "'...%s...äöü...' % u'abc' failed to raise an exception"
Guido van Rossuma831cac2000-03-10 23:23:21 +0000272print 'done.'
273
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000274# Test builtin codecs
275print 'Testing builtin codecs...',
276
277assert unicode('hello','ascii') == u'hello'
278assert unicode('hello','utf-8') == u'hello'
279assert unicode('hello','utf8') == u'hello'
280assert unicode('hello','latin-1') == u'hello'
281
Guido van Rossum97064862000-04-10 13:52:48 +0000282try:
283 u'Andr\202 x'.encode('ascii')
284 u'Andr\202 x'.encode('ascii','strict')
285except ValueError:
286 pass
287else:
288 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
289assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
290assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
291
292try:
293 unicode('Andr\202 x','ascii')
294 unicode('Andr\202 x','ascii','strict')
295except ValueError:
296 pass
297else:
298 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
299assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
300assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
301
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000302assert u'hello'.encode('ascii') == 'hello'
303assert u'hello'.encode('utf-8') == 'hello'
304assert u'hello'.encode('utf8') == 'hello'
305assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
306assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
307assert u'hello'.encode('latin-1') == 'hello'
308
309u = u''.join(map(unichr, range(1024)))
310for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
311 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
312 assert unicode(u.encode(encoding),encoding) == u
313
314u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000315for encoding in (
316 'latin-1',
317 ):
318 try:
319 assert unicode(u.encode(encoding),encoding) == u
320 except AssertionError:
321 print '*** codec "%s" failed round-trip' % encoding
322 except ValueError,why:
323 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000324
325u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000326for encoding in (
327 'ascii',
328 ):
329 try:
330 assert unicode(u.encode(encoding),encoding) == u
331 except AssertionError:
332 print '*** codec "%s" failed round-trip' % encoding
333 except ValueError,why:
334 print '*** codec for "%s" failed: %s' % (encoding, why)
335
336print 'done.'
337
338print 'Testing standard mapping codecs...',
339
340print '0-127...',
341s = ''.join(map(chr, range(128)))
342for encoding in (
343 'cp037', 'cp1026',
344 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
345 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
346 'cp863', 'cp865', 'cp866',
347 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
348 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
349 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
350 'mac_cyrillic', 'mac_latin2',
351
352 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
353 'cp1256', 'cp1257', 'cp1258',
354 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
355
356 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
357 'cp1006', 'cp875', 'iso8859_8',
358
359 ### These have undefined mappings:
360 #'cp424',
361
362 ):
363 try:
364 assert unicode(s,encoding).encode(encoding) == s
365 except AssertionError:
366 print '*** codec "%s" failed round-trip' % encoding
367 except ValueError,why:
368 print '*** codec for "%s" failed: %s' % (encoding, why)
369
370print '128-255...',
371s = ''.join(map(chr, range(128,256)))
372for encoding in (
373 'cp037', 'cp1026',
374 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
375 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
376 'cp863', 'cp865', 'cp866',
377 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
378 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
379 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
380 'mac_cyrillic', 'mac_latin2',
381
382 ### These have undefined mappings:
383 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
384 #'cp1256', 'cp1257', 'cp1258',
385 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
386 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
387
388 ### These fail the round-trip:
389 #'cp1006', 'cp875', 'iso8859_8',
390
391 ):
392 try:
393 assert unicode(s,encoding).encode(encoding) == s
394 except AssertionError:
395 print '*** codec "%s" failed round-trip' % encoding
396 except ValueError,why:
397 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000398
399print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000400
401print 'Testing Unicode string concatenation...',
402assert (u"abc" u"def") == u"abcdef"
403assert ("abc" u"def") == u"abcdef"
404assert (u"abc" "def") == u"abcdef"
405assert (u"abc" u"def" "ghi") == u"abcdefghi"
406assert ("abc" "def" u"ghi") == u"abcdefghi"
407print 'done.'