blob: 5c0a06328c13d1cda4a94078a7a9eb73894be6d0 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
19 exc = sys.exc_info()
20 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
27 print ' value == %s: %s' % (exc[:2])
28 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
171test('ljust', u'abc', u'abc ', 10)
172test('rjust', u'abc', u' abc', 10)
173test('center', u'abc', u' abc ', 10)
174test('ljust', u'abc', u'abc ', 6)
175test('rjust', u'abc', u' abc', 6)
176test('center', u'abc', u' abc ', 6)
177test('ljust', u'abc', u'abc', 2)
178test('rjust', u'abc', u'abc', 2)
179test('center', u'abc', u'abc', 2)
180
181test('islower', u'a', 1)
182test('islower', u'A', 0)
183test('islower', u'\n', 0)
184test('islower', u'\u1FFc', 0)
185test('islower', u'abc', 1)
186test('islower', u'aBc', 0)
187test('islower', u'abc\n', 1)
188
189test('isupper', u'a', 0)
190test('isupper', u'A', 1)
191test('isupper', u'\n', 0)
192test('isupper', u'\u1FFc', 0)
193test('isupper', u'ABC', 1)
194test('isupper', u'AbC', 0)
195test('isupper', u'ABC\n', 1)
196
197test('istitle', u'a', 0)
198test('istitle', u'A', 1)
199test('istitle', u'\n', 0)
200test('istitle', u'\u1FFc', 1)
201test('istitle', u'A Titlecased Line', 1)
202test('istitle', u'A\nTitlecased Line', 1)
203test('istitle', u'A Titlecased, Line', 1)
204test('istitle', u'Greek \u1FFcitlecases ...', 1)
205test('istitle', u'Not a capitalized String', 0)
206test('istitle', u'Not\ta Titlecase String', 0)
207test('istitle', u'Not--a Titlecase String', 0)
208
209test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
210test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
214test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc\012def\015\012ghi\012\015'], 1)
216test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def\015\012ghi\012\015'], 2)
217
218test('translate', u"abababc", u'bbbc', {ord('a'):None})
219test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
220test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
221
Guido van Rossumd4d26842000-03-13 23:21:48 +0000222# Contains:
223print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000224assert ('a' in u'abdb') == 1
225assert ('a' in u'bdab') == 1
226assert ('a' in u'bdaba') == 1
227assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000228assert ('a' in u'bdba') == 1
229assert (u'a' in u'bdba') == 1
230assert (u'a' in u'bdb') == 0
231assert (u'a' in 'bdb') == 0
232assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000233assert (u'a' in ('a',1,None)) == 1
234assert (u'a' in (1,None,'a')) == 1
235assert (u'a' in (1,None,u'a')) == 1
236assert ('a' in ('a',1,None)) == 1
237assert ('a' in (1,None,'a')) == 1
238assert ('a' in (1,None,u'a')) == 1
239assert ('a' in ('x',1,u'y')) == 0
240assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000241print 'done.'
242
Guido van Rossuma831cac2000-03-10 23:23:21 +0000243# Formatting:
244print 'Testing Unicode formatting strings...',
245assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
246assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
247assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
248assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
249assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
250assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
251assert u"%c" % (u"abc",) == u'a'
252assert u"%c" % ("abc",) == u'a'
253assert u"%c" % (34,) == u'"'
254assert u"%c" % (36,) == u'$'
255assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
256assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
257assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
258print 'done.'
259
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000260# Test builtin codecs
261print 'Testing builtin codecs...',
262
263assert unicode('hello','ascii') == u'hello'
264assert unicode('hello','utf-8') == u'hello'
265assert unicode('hello','utf8') == u'hello'
266assert unicode('hello','latin-1') == u'hello'
267
268assert u'hello'.encode('ascii') == 'hello'
269assert u'hello'.encode('utf-8') == 'hello'
270assert u'hello'.encode('utf8') == 'hello'
271assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
272assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
273assert u'hello'.encode('latin-1') == 'hello'
274
275u = u''.join(map(unichr, range(1024)))
276for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
277 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
278 assert unicode(u.encode(encoding),encoding) == u
279
280u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000281for encoding in (
282 'latin-1',
283 ):
284 try:
285 assert unicode(u.encode(encoding),encoding) == u
286 except AssertionError:
287 print '*** codec "%s" failed round-trip' % encoding
288 except ValueError,why:
289 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000290
291u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000292for encoding in (
293 'ascii',
294 ):
295 try:
296 assert unicode(u.encode(encoding),encoding) == u
297 except AssertionError:
298 print '*** codec "%s" failed round-trip' % encoding
299 except ValueError,why:
300 print '*** codec for "%s" failed: %s' % (encoding, why)
301
302print 'done.'
303
304print 'Testing standard mapping codecs...',
305
306print '0-127...',
307s = ''.join(map(chr, range(128)))
308for encoding in (
309 'cp037', 'cp1026',
310 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
311 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
312 'cp863', 'cp865', 'cp866',
313 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
314 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
315 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
316 'mac_cyrillic', 'mac_latin2',
317
318 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
319 'cp1256', 'cp1257', 'cp1258',
320 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
321
322 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
323 'cp1006', 'cp875', 'iso8859_8',
324
325 ### These have undefined mappings:
326 #'cp424',
327
328 ):
329 try:
330 assert unicode(s,encoding).encode(encoding) == s
331 except AssertionError:
332 print '*** codec "%s" failed round-trip' % encoding
333 except ValueError,why:
334 print '*** codec for "%s" failed: %s' % (encoding, why)
335
336print '128-255...',
337s = ''.join(map(chr, range(128,256)))
338for encoding in (
339 'cp037', 'cp1026',
340 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
341 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
342 'cp863', 'cp865', 'cp866',
343 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
344 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
345 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
346 'mac_cyrillic', 'mac_latin2',
347
348 ### These have undefined mappings:
349 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
350 #'cp1256', 'cp1257', 'cp1258',
351 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
352 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
353
354 ### These fail the round-trip:
355 #'cp1006', 'cp875', 'iso8859_8',
356
357 ):
358 try:
359 assert unicode(s,encoding).encode(encoding) == s
360 except AssertionError:
361 print '*** codec "%s" failed round-trip' % encoding
362 except ValueError,why:
363 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000364
365print 'done.'