blob: 45449ee67b7c15f49f1beb23bd47c70f0f61605e [file] [log] [blame]
Guido van Rossumaad67612000-05-08 17:31:04 +00001""" Test script for the Unicode implementation.
2
3Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
19 exc = sys.exc_info()[:2]
20 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
27 print ' value == %s: %s' % (exc)
28 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
110test('replace', u'one!two!three!', u'onetwothree', '!', '')
111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
209test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
210test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
214test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
216
217test('translate', u"abababc", u'bbbc', {ord('a'):None})
218test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
219test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
220
221# Contains:
222print 'Testing Unicode contains method...',
223assert ('a' in u'abdb') == 1
224assert ('a' in u'bdab') == 1
225assert ('a' in u'bdaba') == 1
226assert ('a' in u'bdba') == 1
227assert ('a' in u'bdba') == 1
228assert (u'a' in u'bdba') == 1
229assert (u'a' in u'bdb') == 0
230assert (u'a' in 'bdb') == 0
231assert (u'a' in 'bdba') == 1
232assert (u'a' in ('a',1,None)) == 1
233assert (u'a' in (1,None,'a')) == 1
234assert (u'a' in (1,None,u'a')) == 1
235assert ('a' in ('a',1,None)) == 1
236assert ('a' in (1,None,'a')) == 1
237assert ('a' in (1,None,u'a')) == 1
238assert ('a' in ('x',1,u'y')) == 0
239assert ('a' in ('x',1,None)) == 0
240print 'done.'
241
242# Formatting:
243print 'Testing Unicode formatting strings...',
244assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
245assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
246assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
247assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
248assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
249assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
250assert u"%c" % (u"abc",) == u'a'
251assert u"%c" % ("abc",) == u'a'
252assert u"%c" % (34,) == u'"'
253assert u"%c" % (36,) == u'$'
254assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
255assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
256assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
257# formatting jobs delegated from the string implementation:
258assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
259assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
260assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
261assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
262assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
263assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
264assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
265assert '...%s...' % u"abc" == u'...abc...'
266print 'done.'
267
268# Test builtin codecs
269print 'Testing builtin codecs...',
270
271assert unicode('hello','ascii') == u'hello'
272assert unicode('hello','utf-8') == u'hello'
273assert unicode('hello','utf8') == u'hello'
274assert unicode('hello','latin-1') == u'hello'
275
276try:
277 u'Andr\202 x'.encode('ascii')
278 u'Andr\202 x'.encode('ascii','strict')
279except ValueError:
280 pass
281else:
282 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
283assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
284assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
285
286try:
287 unicode('Andr\202 x','ascii')
288 unicode('Andr\202 x','ascii','strict')
289except ValueError:
290 pass
291else:
292 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
293assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
294assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
295
296assert u'hello'.encode('ascii') == 'hello'
297assert u'hello'.encode('utf-8') == 'hello'
298assert u'hello'.encode('utf8') == 'hello'
299assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
300assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
301assert u'hello'.encode('latin-1') == 'hello'
302
303u = u''.join(map(unichr, range(1024)))
304for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
305 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
306 assert unicode(u.encode(encoding),encoding) == u
307
308u = u''.join(map(unichr, range(256)))
309for encoding in (
310 'latin-1',
311 ):
312 try:
313 assert unicode(u.encode(encoding),encoding) == u
314 except AssertionError:
315 print '*** codec "%s" failed round-trip' % encoding
316 except ValueError,why:
317 print '*** codec for "%s" failed: %s' % (encoding, why)
318
319u = u''.join(map(unichr, range(128)))
320for encoding in (
321 'ascii',
322 ):
323 try:
324 assert unicode(u.encode(encoding),encoding) == u
325 except AssertionError:
326 print '*** codec "%s" failed round-trip' % encoding
327 except ValueError,why:
328 print '*** codec for "%s" failed: %s' % (encoding, why)
329
330print 'done.'
331
332print 'Testing standard mapping codecs...',
333
334print '0-127...',
335s = ''.join(map(chr, range(128)))
336for encoding in (
337 'cp037', 'cp1026',
338 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
339 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
340 'cp863', 'cp865', 'cp866',
341 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
342 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
343 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
344 'mac_cyrillic', 'mac_latin2',
345
346 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
347 'cp1256', 'cp1257', 'cp1258',
348 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
349
350 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
351 'cp1006', 'cp875', 'iso8859_8',
352
353 ### These have undefined mappings:
354 #'cp424',
355
356 ):
357 try:
358 assert unicode(s,encoding).encode(encoding) == s
359 except AssertionError:
360 print '*** codec "%s" failed round-trip' % encoding
361 except ValueError,why:
362 print '*** codec for "%s" failed: %s' % (encoding, why)
363
364print '128-255...',
365s = ''.join(map(chr, range(128,256)))
366for encoding in (
367 'cp037', 'cp1026',
368 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
369 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
370 'cp863', 'cp865', 'cp866',
371 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
372 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
373 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
374 'mac_cyrillic', 'mac_latin2',
375
376 ### These have undefined mappings:
377 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
378 #'cp1256', 'cp1257', 'cp1258',
379 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
380 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
381
382 ### These fail the round-trip:
383 #'cp1006', 'cp875', 'iso8859_8',
384
385 ):
386 try:
387 assert unicode(s,encoding).encode(encoding) == s
388 except AssertionError:
389 print '*** codec "%s" failed round-trip' % encoding
390 except ValueError,why:
391 print '*** codec for "%s" failed: %s' % (encoding, why)
392
393print 'done.'
394
395print 'Testing Unicode string concatenation...',
396assert (u"abc" u"def") == u"abcdef"
397assert ("abc" u"def") == u"abcdef"
398assert (u"abc" "def") == u"abcdef"
399assert (u"abc" u"def" "ghi") == u"abcdefghi"
400assert ("abc" "def" u"ghi") == u"abcdefghi"
401print 'done.'