blob: ec894ed41807b67f396a7a1bd97848378e9c1b8c [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000209test('isalpha', u'a', 1)
210test('isalpha', u'A', 1)
211test('isalpha', u'\n', 0)
212test('isalpha', u'\u1FFc', 1)
213test('isalpha', u'abc', 1)
214test('isalpha', u'aBc123', 0)
215test('isalpha', u'abc\n', 0)
216
217test('isalnum', u'a', 1)
218test('isalnum', u'A', 1)
219test('isalnum', u'\n', 0)
220test('isalnum', u'123abc456', 1)
221test('isalnum', u'a1b3c', 1)
222test('isalnum', u'aBc000 ', 0)
223test('isalnum', u'abc\n', 0)
224
Guido van Rossuma831cac2000-03-10 23:23:21 +0000225test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
226test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
227test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
228test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
229test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
230test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000231test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000232
233test('translate', u"abababc", u'bbbc', {ord('a'):None})
234test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
235test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
236
Guido van Rossumd4d26842000-03-13 23:21:48 +0000237# Contains:
238print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000239assert ('a' in u'abdb') == 1
240assert ('a' in u'bdab') == 1
241assert ('a' in u'bdaba') == 1
242assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000243assert ('a' in u'bdba') == 1
244assert (u'a' in u'bdba') == 1
245assert (u'a' in u'bdb') == 0
246assert (u'a' in 'bdb') == 0
247assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000248assert (u'a' in ('a',1,None)) == 1
249assert (u'a' in (1,None,'a')) == 1
250assert (u'a' in (1,None,u'a')) == 1
251assert ('a' in ('a',1,None)) == 1
252assert ('a' in (1,None,'a')) == 1
253assert ('a' in (1,None,u'a')) == 1
254assert ('a' in ('x',1,u'y')) == 0
255assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000256print 'done.'
257
Guido van Rossuma831cac2000-03-10 23:23:21 +0000258# Formatting:
259print 'Testing Unicode formatting strings...',
260assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
261assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
262assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
263assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
264assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
265assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000266assert u"%c" % (u"a",) == u'a'
267assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000268assert u"%c" % (34,) == u'"'
269assert u"%c" % (36,) == u'$'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000270value = u"%r, %r" % (u"abc", "abc")
271if value != u"u'abc', 'abc'":
272 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
273
Guido van Rossuma831cac2000-03-10 23:23:21 +0000274assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000275try:
276 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
277except KeyError:
278 print '*** formatting failed for "%s"' % "u'abc, def'"
279else:
280 assert value == u'abc, def'
281
Guido van Rossum97064862000-04-10 13:52:48 +0000282# formatting jobs delegated from the string implementation:
283assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
284assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
285assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
286assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
287assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
288assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
289assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
290assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000291print 'done.'
292
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000293# Test builtin codecs
294print 'Testing builtin codecs...',
295
296assert unicode('hello','ascii') == u'hello'
297assert unicode('hello','utf-8') == u'hello'
298assert unicode('hello','utf8') == u'hello'
299assert unicode('hello','latin-1') == u'hello'
300
Guido van Rossum97064862000-04-10 13:52:48 +0000301try:
302 u'Andr\202 x'.encode('ascii')
303 u'Andr\202 x'.encode('ascii','strict')
304except ValueError:
305 pass
306else:
307 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
308assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
309assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
310
311try:
312 unicode('Andr\202 x','ascii')
313 unicode('Andr\202 x','ascii','strict')
314except ValueError:
315 pass
316else:
317 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
318assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
319assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
320
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000321assert u'hello'.encode('ascii') == 'hello'
322assert u'hello'.encode('utf-8') == 'hello'
323assert u'hello'.encode('utf8') == 'hello'
324assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
325assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
326assert u'hello'.encode('latin-1') == 'hello'
327
328u = u''.join(map(unichr, range(1024)))
329for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
330 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
331 assert unicode(u.encode(encoding),encoding) == u
332
333u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000334for encoding in (
335 'latin-1',
336 ):
337 try:
338 assert unicode(u.encode(encoding),encoding) == u
339 except AssertionError:
340 print '*** codec "%s" failed round-trip' % encoding
341 except ValueError,why:
342 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000343
344u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000345for encoding in (
346 'ascii',
347 ):
348 try:
349 assert unicode(u.encode(encoding),encoding) == u
350 except AssertionError:
351 print '*** codec "%s" failed round-trip' % encoding
352 except ValueError,why:
353 print '*** codec for "%s" failed: %s' % (encoding, why)
354
355print 'done.'
356
357print 'Testing standard mapping codecs...',
358
359print '0-127...',
360s = ''.join(map(chr, range(128)))
361for encoding in (
362 'cp037', 'cp1026',
363 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
364 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
365 'cp863', 'cp865', 'cp866',
366 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
367 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
368 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
369 'mac_cyrillic', 'mac_latin2',
370
371 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
372 'cp1256', 'cp1257', 'cp1258',
373 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
374
375 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
376 'cp1006', 'cp875', 'iso8859_8',
377
378 ### These have undefined mappings:
379 #'cp424',
380
381 ):
382 try:
383 assert unicode(s,encoding).encode(encoding) == s
384 except AssertionError:
385 print '*** codec "%s" failed round-trip' % encoding
386 except ValueError,why:
387 print '*** codec for "%s" failed: %s' % (encoding, why)
388
389print '128-255...',
390s = ''.join(map(chr, range(128,256)))
391for encoding in (
392 'cp037', 'cp1026',
393 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
394 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
395 'cp863', 'cp865', 'cp866',
396 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
397 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
398 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
399 'mac_cyrillic', 'mac_latin2',
400
401 ### These have undefined mappings:
402 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
403 #'cp1256', 'cp1257', 'cp1258',
404 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
405 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
406
407 ### These fail the round-trip:
408 #'cp1006', 'cp875', 'iso8859_8',
409
410 ):
411 try:
412 assert unicode(s,encoding).encode(encoding) == s
413 except AssertionError:
414 print '*** codec "%s" failed round-trip' % encoding
415 except ValueError,why:
416 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000417
418print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000419
420print 'Testing Unicode string concatenation...',
421assert (u"abc" u"def") == u"abcdef"
422assert ("abc" u"def") == u"abcdef"
423assert (u"abc" "def") == u"abcdef"
424assert (u"abc" u"def" "ghi") == u"abcdefghi"
425assert ("abc" "def" u"ghi") == u"abcdefghi"
426print 'done.'
Marc-André Lemburga6f73d62000-06-28 16:41:23 +0000427