blob: 237583e8dadbbf22207e3b7d0fd2cd072bf5b914 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
209test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
210test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
214test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000216
217test('translate', u"abababc", u'bbbc', {ord('a'):None})
218test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
219test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
220
Guido van Rossumd4d26842000-03-13 23:21:48 +0000221# Contains:
222print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000223assert ('a' in u'abdb') == 1
224assert ('a' in u'bdab') == 1
225assert ('a' in u'bdaba') == 1
226assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000227assert ('a' in u'bdba') == 1
228assert (u'a' in u'bdba') == 1
229assert (u'a' in u'bdb') == 0
230assert (u'a' in 'bdb') == 0
231assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000232assert (u'a' in ('a',1,None)) == 1
233assert (u'a' in (1,None,'a')) == 1
234assert (u'a' in (1,None,u'a')) == 1
235assert ('a' in ('a',1,None)) == 1
236assert ('a' in (1,None,'a')) == 1
237assert ('a' in (1,None,u'a')) == 1
238assert ('a' in ('x',1,u'y')) == 0
239assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000240print 'done.'
241
Guido van Rossuma831cac2000-03-10 23:23:21 +0000242# Formatting:
243print 'Testing Unicode formatting strings...',
244assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
245assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
246assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
247assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
248assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
249assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000250assert u"%c" % (u"a",) == u'a'
251assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000252assert u"%c" % (34,) == u'"'
253assert u"%c" % (36,) == u'$'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000254value = u"%r, %r" % (u"abc", "abc")
255if value != u"u'abc', 'abc'":
256 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
257
Guido van Rossuma831cac2000-03-10 23:23:21 +0000258assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000259try:
260 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
261except KeyError:
262 print '*** formatting failed for "%s"' % "u'abc, def'"
263else:
264 assert value == u'abc, def'
265
Guido van Rossum97064862000-04-10 13:52:48 +0000266# formatting jobs delegated from the string implementation:
267assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
268assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
269assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
270assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
271assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
272assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
273assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
274assert '...%s...' % u"abc" == u'...abc...'
Fred Drake774c9312000-05-09 19:57:46 +0000275try:
276 '...%s...äöü...' % u"abc"
277except ValueError:
278 pass
279else:
Marc-André Lemburg84625732000-06-13 12:05:36 +0000280 print "*** formatting failed ...%s...äöü...' % u'abc' failed to raise an exception"
Guido van Rossuma831cac2000-03-10 23:23:21 +0000281print 'done.'
282
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000283# Test builtin codecs
284print 'Testing builtin codecs...',
285
286assert unicode('hello','ascii') == u'hello'
287assert unicode('hello','utf-8') == u'hello'
288assert unicode('hello','utf8') == u'hello'
289assert unicode('hello','latin-1') == u'hello'
290
Guido van Rossum97064862000-04-10 13:52:48 +0000291try:
292 u'Andr\202 x'.encode('ascii')
293 u'Andr\202 x'.encode('ascii','strict')
294except ValueError:
295 pass
296else:
297 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
298assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
299assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
300
301try:
302 unicode('Andr\202 x','ascii')
303 unicode('Andr\202 x','ascii','strict')
304except ValueError:
305 pass
306else:
307 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
308assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
309assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
310
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000311assert u'hello'.encode('ascii') == 'hello'
312assert u'hello'.encode('utf-8') == 'hello'
313assert u'hello'.encode('utf8') == 'hello'
314assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
315assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
316assert u'hello'.encode('latin-1') == 'hello'
317
318u = u''.join(map(unichr, range(1024)))
319for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
320 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
321 assert unicode(u.encode(encoding),encoding) == u
322
323u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000324for encoding in (
325 'latin-1',
326 ):
327 try:
328 assert unicode(u.encode(encoding),encoding) == u
329 except AssertionError:
330 print '*** codec "%s" failed round-trip' % encoding
331 except ValueError,why:
332 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000333
334u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000335for encoding in (
336 'ascii',
337 ):
338 try:
339 assert unicode(u.encode(encoding),encoding) == u
340 except AssertionError:
341 print '*** codec "%s" failed round-trip' % encoding
342 except ValueError,why:
343 print '*** codec for "%s" failed: %s' % (encoding, why)
344
345print 'done.'
346
347print 'Testing standard mapping codecs...',
348
349print '0-127...',
350s = ''.join(map(chr, range(128)))
351for encoding in (
352 'cp037', 'cp1026',
353 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
354 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
355 'cp863', 'cp865', 'cp866',
356 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
357 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
358 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
359 'mac_cyrillic', 'mac_latin2',
360
361 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
362 'cp1256', 'cp1257', 'cp1258',
363 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
364
365 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
366 'cp1006', 'cp875', 'iso8859_8',
367
368 ### These have undefined mappings:
369 #'cp424',
370
371 ):
372 try:
373 assert unicode(s,encoding).encode(encoding) == s
374 except AssertionError:
375 print '*** codec "%s" failed round-trip' % encoding
376 except ValueError,why:
377 print '*** codec for "%s" failed: %s' % (encoding, why)
378
379print '128-255...',
380s = ''.join(map(chr, range(128,256)))
381for encoding in (
382 'cp037', 'cp1026',
383 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
384 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
385 'cp863', 'cp865', 'cp866',
386 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
387 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
388 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
389 'mac_cyrillic', 'mac_latin2',
390
391 ### These have undefined mappings:
392 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
393 #'cp1256', 'cp1257', 'cp1258',
394 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
395 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
396
397 ### These fail the round-trip:
398 #'cp1006', 'cp875', 'iso8859_8',
399
400 ):
401 try:
402 assert unicode(s,encoding).encode(encoding) == s
403 except AssertionError:
404 print '*** codec "%s" failed round-trip' % encoding
405 except ValueError,why:
406 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000407
408print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000409
410print 'Testing Unicode string concatenation...',
411assert (u"abc" u"def") == u"abcdef"
412assert ("abc" u"def") == u"abcdef"
413assert (u"abc" "def") == u"abcdef"
414assert (u"abc" u"def" "ghi") == u"abcdefghi"
415assert ("abc" "def" u"ghi") == u"abcdefghi"
416print 'done.'