blob: 2a94586cb9cecece916b6533cb615431128f5297 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
Guido van Rossum76afbd92002-08-20 17:29:29 +0000110test('rfind', 'abcdefghiabc', 9, u'abc')
111test('rfind', 'abcdefghiabc', 12, u'')
112test('rfind', u'abcdefghiabc', 12, '')
113test('rfind', u'abcdefghiabc', 12, u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000114
115test('lower', u'HeLLo', u'hello')
116test('lower', u'hello', u'hello')
117
118test('upper', u'HeLLo', u'HELLO')
119test('upper', u'HELLO', u'HELLO')
120
121if 0:
122 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
123
124 test('maketrans', u'abc', transtable, u'xyz')
125 test('maketrans', u'abc', ValueError, u'xyzq')
126
127test('split', u'this is the split function',
128 [u'this', u'is', u'the', u'split', u'function'])
129test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
130test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
131test('split', u'a b c d', [u'a', u'b c d'], None, 1)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
134test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
135test('split', u'a b c d', [u'a b c d'], None, 0)
136test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
137test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000138test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
139test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
140test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
141test('split', u'endcase test', [u'endcase ', u''], u'test')
142test('split', u'endcase test', [u'endcase ', u''], 'test')
143test('split', 'endcase test', [u'endcase ', u''], u'test')
144
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
146# join now works with any sequence type
147class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000148 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000149 def __len__(self): return len(self.seq)
150 def __getitem__(self, i): return self.seq[i]
151
152test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000154test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000155test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000156test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000157test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
158test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
159test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
160test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
161test('join', ' ', u'w x y z', Sequence(u'wxyz'))
162test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000163
164result = u''
165for i in range(10):
166 if i > 0:
167 result = result + u':'
168 result = result + u'x'*10
169test('join', u':', result, [u'x' * 10] * 10)
170test('join', u':', result, (u'x' * 10,) * 10)
171
172test('strip', u' hello ', u'hello')
173test('lstrip', u' hello ', u'hello ')
174test('rstrip', u' hello ', u' hello')
175test('strip', u'hello', u'hello')
176
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000177# strip/lstrip/rstrip with None arg
178test('strip', u' hello ', u'hello', None)
179test('lstrip', u' hello ', u'hello ', None)
180test('rstrip', u' hello ', u' hello', None)
181test('strip', u'hello', u'hello', None)
182
183# strip/lstrip/rstrip with unicode arg
184test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
185test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
186test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
187test('strip', u'hello', u'hello', u'xyz')
188
189# strip/lstrip/rstrip with str arg
190test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
191test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
192test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
193test('strip', u'hello', u'hello', 'xyz')
194
Guido van Rossuma831cac2000-03-10 23:23:21 +0000195test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
196
197if 0:
198 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
199
200 table = string.maketrans('a', u'A')
201 test('translate', u'abc', u'Abc', table)
202 test('translate', u'xyz', u'xyz', table)
203
204test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000205test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000206test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
207test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
208test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
209test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
210test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
211test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
212test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000213test('replace', u'abc', u'-a-b-c-', u'', u'-')
214test('replace', u'abc', u'-a-b-c', u'', u'-', 3)
215test('replace', u'abc', u'abc', u'', u'-', 0)
Guido van Rossum2023c9b2002-08-23 18:50:21 +0000216test('replace', u'abc', u'abc', u'ab', u'--', 0)
217test('replace', u'abc', u'abc', u'xy', u'--')
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000218test('replace', u'', u'', u'', u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000219
Guido van Rossum77f6a652002-04-03 22:41:51 +0000220test('startswith', u'hello', True, u'he')
221test('startswith', u'hello', True, u'hello')
222test('startswith', u'hello', False, u'hello world')
223test('startswith', u'hello', True, u'')
224test('startswith', u'hello', False, u'ello')
225test('startswith', u'hello', True, u'ello', 1)
226test('startswith', u'hello', True, u'o', 4)
227test('startswith', u'hello', False, u'o', 5)
228test('startswith', u'hello', True, u'', 5)
229test('startswith', u'hello', False, u'lo', 6)
230test('startswith', u'helloworld', True, u'lowo', 3)
231test('startswith', u'helloworld', True, u'lowo', 3, 7)
232test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000233
Guido van Rossum77f6a652002-04-03 22:41:51 +0000234test('endswith', u'hello', True, u'lo')
235test('endswith', u'hello', False, u'he')
236test('endswith', u'hello', True, u'')
237test('endswith', u'hello', False, u'hello world')
238test('endswith', u'helloworld', False, u'worl')
239test('endswith', u'helloworld', True, u'worl', 3, 9)
240test('endswith', u'helloworld', True, u'world', 3, 12)
241test('endswith', u'helloworld', True, u'lowo', 1, 7)
242test('endswith', u'helloworld', True, u'lowo', 2, 7)
243test('endswith', u'helloworld', True, u'lowo', 3, 7)
244test('endswith', u'helloworld', False, u'lowo', 4, 7)
245test('endswith', u'helloworld', False, u'lowo', 3, 8)
246test('endswith', u'ab', False, u'ab', 0, 1)
247test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossum76afbd92002-08-20 17:29:29 +0000248test('endswith', 'helloworld', True, u'd')
249test('endswith', 'helloworld', False, u'l')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000250
251test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
252test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
253test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
254test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000255test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000256
257if 0:
258 test('capwords', u'abc def ghi', u'Abc Def Ghi')
259 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
260 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
261
Walter Dörwald068325e2002-04-15 13:36:47 +0000262test('zfill', u'123', u'123', 2)
263test('zfill', u'123', u'123', 3)
264test('zfill', u'123', u'0123', 4)
265test('zfill', u'+123', u'+123', 3)
266test('zfill', u'+123', u'+123', 4)
267test('zfill', u'+123', u'+0123', 5)
268test('zfill', u'-123', u'-123', 3)
269test('zfill', u'-123', u'-123', 4)
270test('zfill', u'-123', u'-0123', 5)
271test('zfill', u'', u'000', 3)
272test('zfill', u'34', u'34', 1)
273test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000274
Guido van Rossuma831cac2000-03-10 23:23:21 +0000275# Comparisons:
276print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000277verify(u'abc' == 'abc')
278verify('abc' == u'abc')
279verify(u'abc' == u'abc')
280verify(u'abcd' > 'abc')
281verify('abcd' > u'abc')
282verify(u'abcd' > u'abc')
283verify(u'abc' < 'abcd')
284verify('abc' < u'abcd')
285verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000286print 'done.'
287
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288if 0:
289 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000290
Marc-André Lemburge5034372000-08-08 08:04:29 +0000291 print 'Testing UTF-16 code point order comparisons...',
292 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000293 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000294 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000295 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000296
Marc-André Lemburge5034372000-08-08 08:04:29 +0000297 # Non surrogate above surrogate value, fixup required
298 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000299 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000300
Marc-André Lemburge5034372000-08-08 08:04:29 +0000301 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000302 s2 = u'\ud800\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\ud900\udc01'
305 test_lecmp(s, s2)
306 s2 = u'\uda00\udc01'
307 test_lecmp(s, s2)
308 s2 = u'\udb00\udc01'
309 test_lecmp(s, s2)
310 s2 = u'\ud800\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\ud900\udd01'
313 test_lecmp(s, s2)
314 s2 = u'\uda00\udd01'
315 test_lecmp(s, s2)
316 s2 = u'\udb00\udd01'
317 test_lecmp(s, s2)
318 s2 = u'\ud800\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\ud900\ude01'
321 test_lecmp(s, s2)
322 s2 = u'\uda00\ude01'
323 test_lecmp(s, s2)
324 s2 = u'\udb00\ude01'
325 test_lecmp(s, s2)
326 s2 = u'\ud800\udfff'
327 test_lecmp(s, s2)
328 s2 = u'\ud900\udfff'
329 test_lecmp(s, s2)
330 s2 = u'\uda00\udfff'
331 test_lecmp(s, s2)
332 s2 = u'\udb00\udfff'
333 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000334
335 test_fixup(u'\ue000')
336 test_fixup(u'\uff61')
337
338 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000339 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000340 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000341
Guido van Rossuma831cac2000-03-10 23:23:21 +0000342test('ljust', u'abc', u'abc ', 10)
343test('rjust', u'abc', u' abc', 10)
344test('center', u'abc', u' abc ', 10)
345test('ljust', u'abc', u'abc ', 6)
346test('rjust', u'abc', u' abc', 6)
347test('center', u'abc', u' abc ', 6)
348test('ljust', u'abc', u'abc', 2)
349test('rjust', u'abc', u'abc', 2)
350test('center', u'abc', u'abc', 2)
351
Guido van Rossum77f6a652002-04-03 22:41:51 +0000352test('islower', u'a', True)
353test('islower', u'A', False)
354test('islower', u'\n', False)
355test('islower', u'\u1FFc', False)
356test('islower', u'abc', True)
357test('islower', u'aBc', False)
358test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000359
Guido van Rossum77f6a652002-04-03 22:41:51 +0000360test('isupper', u'a', False)
361test('isupper', u'A', True)
362test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000363if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000364 test('isupper', u'\u1FFc', False)
365test('isupper', u'ABC', True)
366test('isupper', u'AbC', False)
367test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000368
Guido van Rossum77f6a652002-04-03 22:41:51 +0000369test('istitle', u'a', False)
370test('istitle', u'A', True)
371test('istitle', u'\n', False)
372test('istitle', u'\u1FFc', True)
373test('istitle', u'A Titlecased Line', True)
374test('istitle', u'A\nTitlecased Line', True)
375test('istitle', u'A Titlecased, Line', True)
376test('istitle', u'Greek \u1FFcitlecases ...', True)
377test('istitle', u'Not a capitalized String', False)
378test('istitle', u'Not\ta Titlecase String', False)
379test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000380
Guido van Rossum77f6a652002-04-03 22:41:51 +0000381test('isalpha', u'a', True)
382test('isalpha', u'A', True)
383test('isalpha', u'\n', False)
384test('isalpha', u'\u1FFc', True)
385test('isalpha', u'abc', True)
386test('isalpha', u'aBc123', False)
387test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000388
Guido van Rossum77f6a652002-04-03 22:41:51 +0000389test('isalnum', u'a', True)
390test('isalnum', u'A', True)
391test('isalnum', u'\n', False)
392test('isalnum', u'123abc456', True)
393test('isalnum', u'a1b3c', True)
394test('isalnum', u'aBc000 ', False)
395test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000396
Guido van Rossuma831cac2000-03-10 23:23:21 +0000397test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
398test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
399test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
400test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
401test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
402test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000403test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000404
405test('translate', u"abababc", u'bbbc', {ord('a'):None})
406test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
407test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
Walter Dörwald5c1ee172002-09-04 20:31:32 +0000408test('translate', u"abababc", u'<i><i><i>c', {ord('a'):None, ord('b'):u'<i>'})
409test('translate', u"abababc", u'c', {ord('a'):None, ord('b'):u''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000410
Guido van Rossumd4d26842000-03-13 23:21:48 +0000411# Contains:
412print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000413vereq(('a' in u'abdb'), True)
414vereq(('a' in u'bdab'), True)
415vereq(('a' in u'bdaba'), True)
416vereq(('a' in u'bdba'), True)
417vereq(('a' in u'bdba'), True)
418vereq((u'a' in u'bdba'), True)
419vereq((u'a' in u'bdb'), False)
420vereq((u'a' in 'bdb'), False)
421vereq((u'a' in 'bdba'), True)
422vereq((u'a' in ('a',1,None)), True)
423vereq((u'a' in (1,None,'a')), True)
424vereq((u'a' in (1,None,u'a')), True)
425vereq(('a' in ('a',1,None)), True)
426vereq(('a' in (1,None,'a')), True)
427vereq(('a' in (1,None,u'a')), True)
428vereq(('a' in ('x',1,u'y')), False)
429vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000430vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000431vereq((u'ab' in u'abcd'), True)
432vereq(('ab' in u'abc'), True)
433vereq((u'ab' in 'abc'), True)
434vereq((u'ab' in (1,None,u'ab')), True)
435vereq((u'' in u'abc'), True)
436vereq(('' in u'abc'), True)
Marc-André Lemburg9cd87aa2002-10-23 09:02:46 +0000437try:
438 u'\xe2' in 'g\xe2teau'
439except UnicodeError:
440 pass
441else:
442 print '*** contains operator does not propagate UnicodeErrors'
Guido van Rossumd4d26842000-03-13 23:21:48 +0000443print 'done.'
444
Guido van Rossuma831cac2000-03-10 23:23:21 +0000445# Formatting:
446print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000447verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
448verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
449verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
450verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
451verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
452verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
453verify(u"%c" % (u"a",) == u'a')
454verify(u"%c" % ("a",) == u'a')
455verify(u"%c" % (34,) == u'"')
456verify(u"%c" % (36,) == u'$')
Neil Schemenauerab9e4b72002-11-18 16:11:34 +0000457verify(u"%d".__mod__(10) == u'10')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000458if sys.platform[:4] != 'java':
459 value = u"%r, %r" % (u"abc", "abc")
460 if value != u"u'abc', 'abc'":
461 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000462
Marc-André Lemburg36619082001-01-17 19:11:13 +0000463verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000464try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000465 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000466except KeyError:
467 print '*** formatting failed for "%s"' % "u'abc, def'"
468else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000469 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000470
Martin v. Löwis766e3002002-09-14 09:10:04 +0000471for ordinal in (-100, 0x200000):
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000472 try:
473 u"%c" % ordinal
474 except ValueError:
475 pass
476 else:
Martin v. Löwis766e3002002-09-14 09:10:04 +0000477 print '*** formatting u"%%c" %% %i should give a ValueError' % ordinal
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000478
Marc-André Lemburg79f57832002-12-29 19:44:06 +0000479# float formatting
480for prec in range(100):
481 formatstring = u'%%.%if' % prec
482 value = 0.01
483 for x in range(60):
484 value = value * 3.141592655 / 3.0 * 10.0
485 #print 'Overflow check for x=%i and prec=%i:' % \
486 # (x, prec),
487 try:
488 result = formatstring % value
489 except OverflowError:
490 # The formatfloat() code in stringobject.c and
491 # unicodeobject.c uses a 120 byte buffer and switches from
492 # 'f' formatting to 'g' at precision 50, so we expect
493 # OverflowErrors for the ranges x < 50 and prec >= 67.
494 if x >= 50 or \
495 prec < 67:
496 print '*** unexpected OverflowError for x=%i and prec=%i' % (x, prec)
497 else:
498 #print 'OverflowError'
499 pass
500 else:
501 #print result
502 pass
503
Guido van Rossum97064862000-04-10 13:52:48 +0000504# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000505verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
506verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
507verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
508verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
509verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
510verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
511verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
512verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
513verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000514verify('%*s' % (5,u'abc',) == u' abc')
515verify('%*s' % (-5,u'abc',) == u'abc ')
516verify('%*.*s' % (5,2,u'abc',) == u' ab')
517verify('%*.*s' % (5,3,u'abc',) == u' abc')
518verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
519verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000520print 'done.'
521
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000522print 'Testing builtin unicode()...',
523
524# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
525
526verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
527
528class UnicodeSubclass(unicode):
529 pass
530
531verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
532 == u'unicode subclass becomes unicode')
533
534verify(unicode('strings are converted to unicode')
535 == u'strings are converted to unicode')
536
537class UnicodeCompat:
538 def __init__(self, x):
539 self.x = x
540 def __unicode__(self):
541 return self.x
542
543verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
544 == u'__unicode__ compatible objects are recognized')
545
546class StringCompat:
547 def __init__(self, x):
548 self.x = x
549 def __str__(self):
550 return self.x
551
552verify(unicode(StringCompat('__str__ compatible objects are recognized'))
553 == u'__str__ compatible objects are recognized')
554
555# unicode(obj) is compatible to str():
556
557o = StringCompat('unicode(obj) is compatible to str()')
558verify(unicode(o) == u'unicode(obj) is compatible to str()')
559verify(str(o) == 'unicode(obj) is compatible to str()')
560
561for obj in (123, 123.45, 123L):
562 verify(unicode(obj) == unicode(str(obj)))
563
564# unicode(obj, encoding, error) tests (this maps to
565# PyUnicode_FromEncodedObject() at C level)
566
Finn Bock2b29cb22001-12-10 20:57:34 +0000567if not sys.platform.startswith('java'):
568 try:
569 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
570 except TypeError:
571 pass
572 else:
573 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000574
575verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
576 == u'strings are decoded to unicode')
577
Finn Bock2b29cb22001-12-10 20:57:34 +0000578if not sys.platform.startswith('java'):
579 verify(unicode(buffer('character buffers are decoded to unicode'),
580 'utf-8', 'strict')
581 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000582
583print 'done.'
584
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000585# Test builtin codecs
586print 'Testing builtin codecs...',
587
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000588# UTF-7 specific encoding tests:
589utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
590 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
591 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
592 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
593 (u'+', '+-'),
594 (u'+-', '+--'),
595 (u'+?', '+-?'),
596 (u'\?', '+AFw?'),
597 (u'+?', '+-?'),
598 (ur'\\?', '+AFwAXA?'),
599 (ur'\\\?', '+AFwAXABc?'),
600 (ur'++--', '+-+---')]
601
602for x,y in utfTests:
603 verify( x.encode('utf-7') == y )
604
Tim Peters527e64f2001-10-04 05:36:56 +0000605try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000606 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
607except UnicodeError:
608 pass
609else:
610 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
611
612verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
613
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000614# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000615verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000616verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
617verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
618verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
619verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
620verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
621verify((u'\ud800\udc02'*1000).encode('utf-8') ==
622 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000623verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
624 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
625 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
626 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
627 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000628 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000629 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
630 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
631 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
632 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
633 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
634 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
635 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
636 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
637 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
638 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000639
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000640# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000641verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
642verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
643verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000644
645# Other possible utf-8 test cases:
646# * strict decoding testing for all of the
647# UTF8_ERROR cases in PyUnicode_DecodeUTF8
648
Marc-André Lemburg36619082001-01-17 19:11:13 +0000649verify(unicode('hello','ascii') == u'hello')
650verify(unicode('hello','utf-8') == u'hello')
651verify(unicode('hello','utf8') == u'hello')
652verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000653
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000654# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000655try:
656 u'Andr\202 x'.encode('ascii')
657 u'Andr\202 x'.encode('ascii','strict')
658except ValueError:
659 pass
660else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000661 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000662verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
663verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000664
665try:
666 unicode('Andr\202 x','ascii')
667 unicode('Andr\202 x','ascii','strict')
668except ValueError:
669 pass
670else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000671 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000672verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
673verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000674
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000675verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
676try:
677 "\\".decode("unicode-escape")
678except ValueError:
679 pass
680else:
681 raise TestFailed, '"\\".decode("unicode-escape") should fail'
682
Marc-André Lemburg36619082001-01-17 19:11:13 +0000683verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000684verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000685verify(u'hello'.encode('utf-8') == 'hello')
686verify(u'hello'.encode('utf8') == 'hello')
687verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
688verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
689verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000690
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000691# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000692u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000693for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000694 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000695 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000696
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000697# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000698u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000699for encoding in (
700 'latin-1',
701 ):
702 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000703 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000704 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000705 print '*** codec "%s" failed round-trip' % encoding
706 except ValueError,why:
707 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000708
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000709# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000710u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000711for encoding in (
712 'ascii',
713 ):
714 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000715 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000716 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000717 print '*** codec "%s" failed round-trip' % encoding
718 except ValueError,why:
719 print '*** codec for "%s" failed: %s' % (encoding, why)
720
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000721# Roundtrip safety for non-BMP (just a few chars)
722u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
723for encoding in ('utf-8',
724 'utf-16', 'utf-16-le', 'utf-16-be',
725 #'raw_unicode_escape',
726 'unicode_escape', 'unicode_internal'):
727 verify(unicode(u.encode(encoding),encoding) == u)
728
729# UTF-8 must be roundtrip safe for all UCS-2 code points
Martin v. Löwis1ce4ae32002-09-14 09:19:53 +0000730# This excludes surrogates: in the full range, there would be
731# a surrogate pair (\udbff\udc00), which gets converted back
732# to a non-BMP character (\U0010fc00)
733u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000734for encoding in ('utf-8',):
735 verify(unicode(u.encode(encoding),encoding) == u)
736
Guido van Rossum9e896b32000-04-05 20:11:21 +0000737print 'done.'
738
739print 'Testing standard mapping codecs...',
740
741print '0-127...',
742s = ''.join(map(chr, range(128)))
743for encoding in (
744 'cp037', 'cp1026',
745 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
746 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000747 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000748 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
749 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
750 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
751 'mac_cyrillic', 'mac_latin2',
752
753 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
754 'cp1256', 'cp1257', 'cp1258',
755 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
756
757 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000758 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000759
Guido van Rossum9e896b32000-04-05 20:11:21 +0000760 ### These have undefined mappings:
761 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000762
Tim Peters2f228e72001-05-13 00:19:31 +0000763 ### These fail the round-trip:
764 #'cp875'
765
Guido van Rossum9e896b32000-04-05 20:11:21 +0000766 ):
767 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000768 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000769 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000770 print '*** codec "%s" failed round-trip' % encoding
771 except ValueError,why:
772 print '*** codec for "%s" failed: %s' % (encoding, why)
773
774print '128-255...',
775s = ''.join(map(chr, range(128,256)))
776for encoding in (
777 'cp037', 'cp1026',
778 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
779 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000780 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000781 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000782 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000783 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000784 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000785
Guido van Rossum9e896b32000-04-05 20:11:21 +0000786 ### These have undefined mappings:
787 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
788 #'cp1256', 'cp1257', 'cp1258',
789 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000790 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000791 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000792
Guido van Rossum9e896b32000-04-05 20:11:21 +0000793 ### These fail the round-trip:
794 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000795
Guido van Rossum9e896b32000-04-05 20:11:21 +0000796 ):
797 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000798 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000799 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000800 print '*** codec "%s" failed round-trip' % encoding
801 except ValueError,why:
802 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000803
804print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000805
806print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000807verify((u"abc" u"def") == u"abcdef")
808verify(("abc" u"def") == u"abcdef")
809verify((u"abc" "def") == u"abcdef")
810verify((u"abc" u"def" "ghi") == u"abcdefghi")
811verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000812print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000813
814print 'Testing Unicode printing...',
815print u'abc'
816print u'abc', u'def'
817print u'abc', 'def'
818print 'abc', u'def'
819print u'abc\n'
820print u'abc\n',
821print u'abc\n',
822print u'def\n'
823print u'def\n'
824print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000825
826def test_exception(lhs, rhs, msg):
827 try:
828 lhs in rhs
829 except TypeError:
830 pass
831 else:
832 raise TestFailed, msg
833
834def run_contains_tests():
835 vereq(u'' in '', True)
836 vereq('' in u'', True)
837 vereq(u'' in u'', True)
838 vereq(u'' in 'abc', True)
839 vereq('' in u'abc', True)
840 vereq(u'' in u'abc', True)
841 vereq(u'\0' in 'abc', False)
842 vereq('\0' in u'abc', False)
843 vereq(u'\0' in u'abc', False)
844 vereq(u'\0' in '\0abc', True)
845 vereq('\0' in u'\0abc', True)
846 vereq(u'\0' in u'\0abc', True)
847 vereq(u'\0' in 'abc\0', True)
848 vereq('\0' in u'abc\0', True)
849 vereq(u'\0' in u'abc\0', True)
850 vereq(u'a' in '\0abc', True)
851 vereq('a' in u'\0abc', True)
852 vereq(u'a' in u'\0abc', True)
853 vereq(u'asdf' in 'asdf', True)
854 vereq('asdf' in u'asdf', True)
855 vereq(u'asdf' in u'asdf', True)
856 vereq(u'asdf' in 'asd', False)
857 vereq('asdf' in u'asd', False)
858 vereq(u'asdf' in u'asd', False)
859 vereq(u'asdf' in '', False)
860 vereq('asdf' in u'', False)
861 vereq(u'asdf' in u'', False)
862
863run_contains_tests()