blob: ef8bd82b83b7fdc0f2cb8e6728228f53fa5ef075 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000209test('isalpha', u'a', 1)
210test('isalpha', u'A', 1)
211test('isalpha', u'\n', 0)
212test('isalpha', u'\u1FFc', 1)
213test('isalpha', u'abc', 1)
214test('isalpha', u'aBc123', 0)
215test('isalpha', u'abc\n', 0)
216
217test('isalnum', u'a', 1)
218test('isalnum', u'A', 1)
219test('isalnum', u'\n', 0)
220test('isalnum', u'123abc456', 1)
221test('isalnum', u'a1b3c', 1)
222test('isalnum', u'aBc000 ', 0)
223test('isalnum', u'abc\n', 0)
224
Guido van Rossuma831cac2000-03-10 23:23:21 +0000225test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
226test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
227test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
228test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
229test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
230test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000231test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000232
233test('translate', u"abababc", u'bbbc', {ord('a'):None})
234test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
235test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
236
Guido van Rossumd4d26842000-03-13 23:21:48 +0000237# Contains:
238print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000239assert ('a' in u'abdb') == 1
240assert ('a' in u'bdab') == 1
241assert ('a' in u'bdaba') == 1
242assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000243assert ('a' in u'bdba') == 1
244assert (u'a' in u'bdba') == 1
245assert (u'a' in u'bdb') == 0
246assert (u'a' in 'bdb') == 0
247assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000248assert (u'a' in ('a',1,None)) == 1
249assert (u'a' in (1,None,'a')) == 1
250assert (u'a' in (1,None,u'a')) == 1
251assert ('a' in ('a',1,None)) == 1
252assert ('a' in (1,None,'a')) == 1
253assert ('a' in (1,None,u'a')) == 1
254assert ('a' in ('x',1,u'y')) == 0
255assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000256print 'done.'
257
Guido van Rossuma831cac2000-03-10 23:23:21 +0000258# Formatting:
259print 'Testing Unicode formatting strings...',
260assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
261assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
262assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
263assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
264assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
265assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000266assert u"%c" % (u"a",) == u'a'
267assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000268assert u"%c" % (34,) == u'"'
269assert u"%c" % (36,) == u'$'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000270value = u"%r, %r" % (u"abc", "abc")
271if value != u"u'abc', 'abc'":
272 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
273
Guido van Rossuma831cac2000-03-10 23:23:21 +0000274assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000275try:
276 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
277except KeyError:
278 print '*** formatting failed for "%s"' % "u'abc, def'"
279else:
280 assert value == u'abc, def'
281
Guido van Rossum97064862000-04-10 13:52:48 +0000282# formatting jobs delegated from the string implementation:
283assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
284assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
285assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
286assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
287assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
288assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
289assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
290assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000291print 'done.'
292
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000293# Test builtin codecs
294print 'Testing builtin codecs...',
295
296assert unicode('hello','ascii') == u'hello'
297assert unicode('hello','utf-8') == u'hello'
298assert unicode('hello','utf8') == u'hello'
299assert unicode('hello','latin-1') == u'hello'
300
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000301class String:
302 x = ''
303 def __str__(self):
304 return self.x
305
306o = String()
307
308o.x = 'abc'
309assert unicode(o) == u'abc'
310assert str(o) == 'abc'
311
312o.x = u'abc'
313assert unicode(o) == u'abc'
314assert str(o) == 'abc'
315
Guido van Rossum97064862000-04-10 13:52:48 +0000316try:
317 u'Andr\202 x'.encode('ascii')
318 u'Andr\202 x'.encode('ascii','strict')
319except ValueError:
320 pass
321else:
322 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
323assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
324assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
325
326try:
327 unicode('Andr\202 x','ascii')
328 unicode('Andr\202 x','ascii','strict')
329except ValueError:
330 pass
331else:
332 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
333assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
334assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
335
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000336assert u'hello'.encode('ascii') == 'hello'
337assert u'hello'.encode('utf-8') == 'hello'
338assert u'hello'.encode('utf8') == 'hello'
339assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
340assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
341assert u'hello'.encode('latin-1') == 'hello'
342
343u = u''.join(map(unichr, range(1024)))
344for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
345 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
346 assert unicode(u.encode(encoding),encoding) == u
347
348u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000349for encoding in (
350 'latin-1',
351 ):
352 try:
353 assert unicode(u.encode(encoding),encoding) == u
354 except AssertionError:
355 print '*** codec "%s" failed round-trip' % encoding
356 except ValueError,why:
357 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000358
359u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000360for encoding in (
361 'ascii',
362 ):
363 try:
364 assert unicode(u.encode(encoding),encoding) == u
365 except AssertionError:
366 print '*** codec "%s" failed round-trip' % encoding
367 except ValueError,why:
368 print '*** codec for "%s" failed: %s' % (encoding, why)
369
370print 'done.'
371
372print 'Testing standard mapping codecs...',
373
374print '0-127...',
375s = ''.join(map(chr, range(128)))
376for encoding in (
377 'cp037', 'cp1026',
378 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
379 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
380 'cp863', 'cp865', 'cp866',
381 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
382 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
383 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
384 'mac_cyrillic', 'mac_latin2',
385
386 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
387 'cp1256', 'cp1257', 'cp1258',
388 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
389
390 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
391 'cp1006', 'cp875', 'iso8859_8',
392
393 ### These have undefined mappings:
394 #'cp424',
395
396 ):
397 try:
398 assert unicode(s,encoding).encode(encoding) == s
399 except AssertionError:
400 print '*** codec "%s" failed round-trip' % encoding
401 except ValueError,why:
402 print '*** codec for "%s" failed: %s' % (encoding, why)
403
404print '128-255...',
405s = ''.join(map(chr, range(128,256)))
406for encoding in (
407 'cp037', 'cp1026',
408 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
409 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
410 'cp863', 'cp865', 'cp866',
411 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
412 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
413 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
414 'mac_cyrillic', 'mac_latin2',
415
416 ### These have undefined mappings:
417 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
418 #'cp1256', 'cp1257', 'cp1258',
419 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
420 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
421
422 ### These fail the round-trip:
423 #'cp1006', 'cp875', 'iso8859_8',
424
425 ):
426 try:
427 assert unicode(s,encoding).encode(encoding) == s
428 except AssertionError:
429 print '*** codec "%s" failed round-trip' % encoding
430 except ValueError,why:
431 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000432
433print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000434
435print 'Testing Unicode string concatenation...',
436assert (u"abc" u"def") == u"abcdef"
437assert ("abc" u"def") == u"abcdef"
438assert (u"abc" "def") == u"abcdef"
439assert (u"abc" u"def" "ghi") == u"abcdefghi"
440assert ("abc" "def" u"ghi") == u"abcdefghi"
441print 'done.'
Marc-André Lemburga6f73d62000-06-28 16:41:23 +0000442