blob: 3d15f22a4efefd936be6c67504fccf01d5fb22a1 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
3
4Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""
9from test_support import verbose
10import sys
11
12def test(method, input, output, *args):
13 if verbose:
14 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
15 try:
16 f = getattr(input, method)
17 value = apply(f, args)
18 except:
19 value = sys.exc_type
20 exc = sys.exc_info()
21 else:
22 exc = None
23 if value != output:
24 if verbose:
25 print 'no'
26 print '*',f, `input`, `output`, `value`
27 if exc:
28 print ' value == %s: %s' % (exc[:2])
29 else:
30 if verbose:
31 print 'yes'
32
33test('capitalize', u' hello ', u' hello ')
34test('capitalize', u'hello ', u'Hello ')
35
36test('title', u' hello ', u' Hello ')
37test('title', u'hello ', u'Hello ')
38test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
39test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
40test('title', u"getInt", u'Getint')
41
42test('find', u'abcdefghiabc', 0, u'abc')
43test('find', u'abcdefghiabc', 9, u'abc', 1)
44test('find', u'abcdefghiabc', -1, u'def', 4)
45
46test('rfind', u'abcdefghiabc', 9, u'abc')
47
48test('lower', u'HeLLo', u'hello')
49test('lower', u'hello', u'hello')
50
51test('upper', u'HeLLo', u'HELLO')
52test('upper', u'HELLO', u'HELLO')
53
54if 0:
55 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
56
57 test('maketrans', u'abc', transtable, u'xyz')
58 test('maketrans', u'abc', ValueError, u'xyzq')
59
60test('split', u'this is the split function',
61 [u'this', u'is', u'the', u'split', u'function'])
62test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
63test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
64test('split', u'a b c d', [u'a', u'b c d'], None, 1)
65test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
67test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
68test('split', u'a b c d', [u'a b c d'], None, 0)
69test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
70test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
71
72# join now works with any sequence type
73class Sequence:
74 def __init__(self): self.seq = 'wxyz'
75 def __len__(self): return len(self.seq)
76 def __getitem__(self, i): return self.seq[i]
77
78test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
79test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
80test('join', u' ', u'w x y z', Sequence())
81test('join', u' ', TypeError, 7)
82
83class BadSeq(Sequence):
84 def __init__(self): self.seq = [7, u'hello', 123L]
85
86test('join', u' ', TypeError, BadSeq())
87
88result = u''
89for i in range(10):
90 if i > 0:
91 result = result + u':'
92 result = result + u'x'*10
93test('join', u':', result, [u'x' * 10] * 10)
94test('join', u':', result, (u'x' * 10,) * 10)
95
96test('strip', u' hello ', u'hello')
97test('lstrip', u' hello ', u'hello ')
98test('rstrip', u' hello ', u' hello')
99test('strip', u'hello', u'hello')
100
101test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
102
103if 0:
104 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
105
106 table = string.maketrans('a', u'A')
107 test('translate', u'abc', u'Abc', table)
108 test('translate', u'xyz', u'xyz', table)
109
110test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000111test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000112test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
114test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
115test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
116test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
118test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
119
120test('startswith', u'hello', 1, u'he')
121test('startswith', u'hello', 1, u'hello')
122test('startswith', u'hello', 0, u'hello world')
123test('startswith', u'hello', 1, u'')
124test('startswith', u'hello', 0, u'ello')
125test('startswith', u'hello', 1, u'ello', 1)
126test('startswith', u'hello', 1, u'o', 4)
127test('startswith', u'hello', 0, u'o', 5)
128test('startswith', u'hello', 1, u'', 5)
129test('startswith', u'hello', 0, u'lo', 6)
130test('startswith', u'helloworld', 1, u'lowo', 3)
131test('startswith', u'helloworld', 1, u'lowo', 3, 7)
132test('startswith', u'helloworld', 0, u'lowo', 3, 6)
133
134test('endswith', u'hello', 1, u'lo')
135test('endswith', u'hello', 0, u'he')
136test('endswith', u'hello', 1, u'')
137test('endswith', u'hello', 0, u'hello world')
138test('endswith', u'helloworld', 0, u'worl')
139test('endswith', u'helloworld', 1, u'worl', 3, 9)
140test('endswith', u'helloworld', 1, u'world', 3, 12)
141test('endswith', u'helloworld', 1, u'lowo', 1, 7)
142test('endswith', u'helloworld', 1, u'lowo', 2, 7)
143test('endswith', u'helloworld', 1, u'lowo', 3, 7)
144test('endswith', u'helloworld', 0, u'lowo', 4, 7)
145test('endswith', u'helloworld', 0, u'lowo', 3, 8)
146test('endswith', u'ab', 0, u'ab', 0, 1)
147test('endswith', u'ab', 0, u'ab', 0, 0)
148
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
151test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
152test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
153
154if 0:
155 test('capwords', u'abc def ghi', u'Abc Def Ghi')
156 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
157 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
158
159# Comparisons:
160print 'Testing Unicode comparisons...',
161assert u'abc' == 'abc'
162assert 'abc' == u'abc'
163assert u'abc' == u'abc'
164assert u'abcd' > 'abc'
165assert 'abcd' > u'abc'
166assert u'abcd' > u'abc'
167assert u'abc' < 'abcd'
168assert 'abc' < u'abcd'
169assert u'abc' < u'abcd'
170print 'done.'
171
172test('ljust', u'abc', u'abc ', 10)
173test('rjust', u'abc', u' abc', 10)
174test('center', u'abc', u' abc ', 10)
175test('ljust', u'abc', u'abc ', 6)
176test('rjust', u'abc', u' abc', 6)
177test('center', u'abc', u' abc ', 6)
178test('ljust', u'abc', u'abc', 2)
179test('rjust', u'abc', u'abc', 2)
180test('center', u'abc', u'abc', 2)
181
182test('islower', u'a', 1)
183test('islower', u'A', 0)
184test('islower', u'\n', 0)
185test('islower', u'\u1FFc', 0)
186test('islower', u'abc', 1)
187test('islower', u'aBc', 0)
188test('islower', u'abc\n', 1)
189
190test('isupper', u'a', 0)
191test('isupper', u'A', 1)
192test('isupper', u'\n', 0)
193test('isupper', u'\u1FFc', 0)
194test('isupper', u'ABC', 1)
195test('isupper', u'AbC', 0)
196test('isupper', u'ABC\n', 1)
197
198test('istitle', u'a', 0)
199test('istitle', u'A', 1)
200test('istitle', u'\n', 0)
201test('istitle', u'\u1FFc', 1)
202test('istitle', u'A Titlecased Line', 1)
203test('istitle', u'A\nTitlecased Line', 1)
204test('istitle', u'A Titlecased, Line', 1)
205test('istitle', u'Greek \u1FFcitlecases ...', 1)
206test('istitle', u'Not a capitalized String', 0)
207test('istitle', u'Not\ta Titlecase String', 0)
208test('istitle', u'Not--a Titlecase String', 0)
209
210test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
211test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
212test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
213test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
214test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
215test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
216test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc\012def\015\012ghi\012\015'], 1)
217test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def\015\012ghi\012\015'], 2)
218
219test('translate', u"abababc", u'bbbc', {ord('a'):None})
220test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
221test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
222
Guido van Rossumd4d26842000-03-13 23:21:48 +0000223# Contains:
224print 'Testing Unicode contains method...',
225assert ('a' in 'abdb') == 1
226assert ('a' in 'bdab') == 1
227assert ('a' in 'bdaba') == 1
228assert ('a' in 'bdba') == 1
229assert ('a' in u'bdba') == 1
230assert (u'a' in u'bdba') == 1
231assert (u'a' in u'bdb') == 0
232assert (u'a' in 'bdb') == 0
233assert (u'a' in 'bdba') == 1
234print 'done.'
235
Guido van Rossuma831cac2000-03-10 23:23:21 +0000236# Formatting:
237print 'Testing Unicode formatting strings...',
238assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
239assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
240assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
241assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
242assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
243assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
244assert u"%c" % (u"abc",) == u'a'
245assert u"%c" % ("abc",) == u'a'
246assert u"%c" % (34,) == u'"'
247assert u"%c" % (36,) == u'$'
248assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
249assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
250assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
251print 'done.'
252
253# Test Unicode database APIs
254try:
255 import unicodedata
256except ImportError:
257 pass
258else:
259 print 'Testing unicodedata module...',
260
261 assert unicodedata.digit(u'A',None) is None
262 assert unicodedata.digit(u'9') == 9
263 assert unicodedata.digit(u'\u215b',None) is None
264 assert unicodedata.digit(u'\u2468') == 9
265
266 assert unicodedata.numeric(u'A',None) is None
267 assert unicodedata.numeric(u'9') == 9
268 assert unicodedata.numeric(u'\u215b') == 0.125
269 assert unicodedata.numeric(u'\u2468') == 9.0
270
271 assert unicodedata.decimal(u'A',None) is None
272 assert unicodedata.decimal(u'9') == 9
273 assert unicodedata.decimal(u'\u215b',None) is None
274 assert unicodedata.decimal(u'\u2468',None) is None
275
276 assert unicodedata.category(u'\uFFFE') == 'Cn'
277 assert unicodedata.category(u'a') == 'Ll'
278 assert unicodedata.category(u'A') == 'Lu'
279
280 assert unicodedata.bidirectional(u'\uFFFE') == ''
281 assert unicodedata.bidirectional(u' ') == 'WS'
282 assert unicodedata.bidirectional(u'A') == 'L'
283
284 assert unicodedata.decomposition(u'\uFFFE') == ''
285 assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
286
287 assert unicodedata.mirrored(u'\uFFFE') == 0
288 assert unicodedata.mirrored(u'a') == 0
289 assert unicodedata.mirrored(u'\u2201') == 1
290
291 assert unicodedata.combining(u'\uFFFE') == 0
292 assert unicodedata.combining(u'a') == 0
293 assert unicodedata.combining(u'\u20e1') == 230
294
295 print 'done.'
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000296
297# Test builtin codecs
298print 'Testing builtin codecs...',
299
300assert unicode('hello','ascii') == u'hello'
301assert unicode('hello','utf-8') == u'hello'
302assert unicode('hello','utf8') == u'hello'
303assert unicode('hello','latin-1') == u'hello'
304
305assert u'hello'.encode('ascii') == 'hello'
306assert u'hello'.encode('utf-8') == 'hello'
307assert u'hello'.encode('utf8') == 'hello'
308assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
309assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
310assert u'hello'.encode('latin-1') == 'hello'
311
312u = u''.join(map(unichr, range(1024)))
313for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
314 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
315 assert unicode(u.encode(encoding),encoding) == u
316
317u = u''.join(map(unichr, range(256)))
318for encoding in ('latin-1',):
319 assert unicode(u.encode(encoding),encoding) == u
320
321u = u''.join(map(unichr, range(128)))
322for encoding in ('ascii',):
323 assert unicode(u.encode(encoding),encoding) == u
324
325print 'done.'