blob: 90147eb5feea906b4716bd2024ce2b62b1529919 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
Guido van Rossum76afbd92002-08-20 17:29:29 +0000110test('rfind', 'abcdefghiabc', 9, u'abc')
111test('rfind', 'abcdefghiabc', 12, u'')
112test('rfind', u'abcdefghiabc', 12, '')
113test('rfind', u'abcdefghiabc', 12, u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000114
115test('lower', u'HeLLo', u'hello')
116test('lower', u'hello', u'hello')
117
118test('upper', u'HeLLo', u'HELLO')
119test('upper', u'HELLO', u'HELLO')
120
121if 0:
122 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
123
124 test('maketrans', u'abc', transtable, u'xyz')
125 test('maketrans', u'abc', ValueError, u'xyzq')
126
127test('split', u'this is the split function',
128 [u'this', u'is', u'the', u'split', u'function'])
129test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
130test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
131test('split', u'a b c d', [u'a', u'b c d'], None, 1)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
134test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
135test('split', u'a b c d', [u'a b c d'], None, 0)
136test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
137test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000138test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
139test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
140test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
141test('split', u'endcase test', [u'endcase ', u''], u'test')
142test('split', u'endcase test', [u'endcase ', u''], 'test')
143test('split', 'endcase test', [u'endcase ', u''], u'test')
144
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
146# join now works with any sequence type
147class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000148 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000149 def __len__(self): return len(self.seq)
150 def __getitem__(self, i): return self.seq[i]
151
152test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000154test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000155test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000156test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000157test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
158test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
159test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
160test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
161test('join', ' ', u'w x y z', Sequence(u'wxyz'))
162test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000163
164result = u''
165for i in range(10):
166 if i > 0:
167 result = result + u':'
168 result = result + u'x'*10
169test('join', u':', result, [u'x' * 10] * 10)
170test('join', u':', result, (u'x' * 10,) * 10)
171
172test('strip', u' hello ', u'hello')
173test('lstrip', u' hello ', u'hello ')
174test('rstrip', u' hello ', u' hello')
175test('strip', u'hello', u'hello')
176
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000177# strip/lstrip/rstrip with None arg
178test('strip', u' hello ', u'hello', None)
179test('lstrip', u' hello ', u'hello ', None)
180test('rstrip', u' hello ', u' hello', None)
181test('strip', u'hello', u'hello', None)
182
183# strip/lstrip/rstrip with unicode arg
184test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
185test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
186test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
187test('strip', u'hello', u'hello', u'xyz')
188
189# strip/lstrip/rstrip with str arg
190test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
191test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
192test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
193test('strip', u'hello', u'hello', 'xyz')
194
Guido van Rossuma831cac2000-03-10 23:23:21 +0000195test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
196
197if 0:
198 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
199
200 table = string.maketrans('a', u'A')
201 test('translate', u'abc', u'Abc', table)
202 test('translate', u'xyz', u'xyz', table)
203
204test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000205test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000206test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
207test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
208test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
209test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
210test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
211test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
212test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000213test('replace', u'abc', u'-a-b-c-', u'', u'-')
214test('replace', u'abc', u'-a-b-c', u'', u'-', 3)
215test('replace', u'abc', u'abc', u'', u'-', 0)
216test('replace', u'', u'', u'', u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000217
Guido van Rossum77f6a652002-04-03 22:41:51 +0000218test('startswith', u'hello', True, u'he')
219test('startswith', u'hello', True, u'hello')
220test('startswith', u'hello', False, u'hello world')
221test('startswith', u'hello', True, u'')
222test('startswith', u'hello', False, u'ello')
223test('startswith', u'hello', True, u'ello', 1)
224test('startswith', u'hello', True, u'o', 4)
225test('startswith', u'hello', False, u'o', 5)
226test('startswith', u'hello', True, u'', 5)
227test('startswith', u'hello', False, u'lo', 6)
228test('startswith', u'helloworld', True, u'lowo', 3)
229test('startswith', u'helloworld', True, u'lowo', 3, 7)
230test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000231
Guido van Rossum77f6a652002-04-03 22:41:51 +0000232test('endswith', u'hello', True, u'lo')
233test('endswith', u'hello', False, u'he')
234test('endswith', u'hello', True, u'')
235test('endswith', u'hello', False, u'hello world')
236test('endswith', u'helloworld', False, u'worl')
237test('endswith', u'helloworld', True, u'worl', 3, 9)
238test('endswith', u'helloworld', True, u'world', 3, 12)
239test('endswith', u'helloworld', True, u'lowo', 1, 7)
240test('endswith', u'helloworld', True, u'lowo', 2, 7)
241test('endswith', u'helloworld', True, u'lowo', 3, 7)
242test('endswith', u'helloworld', False, u'lowo', 4, 7)
243test('endswith', u'helloworld', False, u'lowo', 3, 8)
244test('endswith', u'ab', False, u'ab', 0, 1)
245test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossum76afbd92002-08-20 17:29:29 +0000246test('endswith', 'helloworld', True, u'd')
247test('endswith', 'helloworld', False, u'l')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000248
249test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
250test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
251test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
252test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000253test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000254
255if 0:
256 test('capwords', u'abc def ghi', u'Abc Def Ghi')
257 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
258 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
259
Walter Dörwald068325e2002-04-15 13:36:47 +0000260test('zfill', u'123', u'123', 2)
261test('zfill', u'123', u'123', 3)
262test('zfill', u'123', u'0123', 4)
263test('zfill', u'+123', u'+123', 3)
264test('zfill', u'+123', u'+123', 4)
265test('zfill', u'+123', u'+0123', 5)
266test('zfill', u'-123', u'-123', 3)
267test('zfill', u'-123', u'-123', 4)
268test('zfill', u'-123', u'-0123', 5)
269test('zfill', u'', u'000', 3)
270test('zfill', u'34', u'34', 1)
271test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000272
Guido van Rossuma831cac2000-03-10 23:23:21 +0000273# Comparisons:
274print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000275verify(u'abc' == 'abc')
276verify('abc' == u'abc')
277verify(u'abc' == u'abc')
278verify(u'abcd' > 'abc')
279verify('abcd' > u'abc')
280verify(u'abcd' > u'abc')
281verify(u'abc' < 'abcd')
282verify('abc' < u'abcd')
283verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000284print 'done.'
285
Marc-André Lemburge5034372000-08-08 08:04:29 +0000286if 0:
287 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000288
Marc-André Lemburge5034372000-08-08 08:04:29 +0000289 print 'Testing UTF-16 code point order comparisons...',
290 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000291 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000292 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000293 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000294
Marc-André Lemburge5034372000-08-08 08:04:29 +0000295 # Non surrogate above surrogate value, fixup required
296 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000297 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000298
Marc-André Lemburge5034372000-08-08 08:04:29 +0000299 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000300 s2 = u'\ud800\udc01'
301 test_lecmp(s, s2)
302 s2 = u'\ud900\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\uda00\udc01'
305 test_lecmp(s, s2)
306 s2 = u'\udb00\udc01'
307 test_lecmp(s, s2)
308 s2 = u'\ud800\udd01'
309 test_lecmp(s, s2)
310 s2 = u'\ud900\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\uda00\udd01'
313 test_lecmp(s, s2)
314 s2 = u'\udb00\udd01'
315 test_lecmp(s, s2)
316 s2 = u'\ud800\ude01'
317 test_lecmp(s, s2)
318 s2 = u'\ud900\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\uda00\ude01'
321 test_lecmp(s, s2)
322 s2 = u'\udb00\ude01'
323 test_lecmp(s, s2)
324 s2 = u'\ud800\udfff'
325 test_lecmp(s, s2)
326 s2 = u'\ud900\udfff'
327 test_lecmp(s, s2)
328 s2 = u'\uda00\udfff'
329 test_lecmp(s, s2)
330 s2 = u'\udb00\udfff'
331 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000332
333 test_fixup(u'\ue000')
334 test_fixup(u'\uff61')
335
336 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000337 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000338 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000339
Guido van Rossuma831cac2000-03-10 23:23:21 +0000340test('ljust', u'abc', u'abc ', 10)
341test('rjust', u'abc', u' abc', 10)
342test('center', u'abc', u' abc ', 10)
343test('ljust', u'abc', u'abc ', 6)
344test('rjust', u'abc', u' abc', 6)
345test('center', u'abc', u' abc ', 6)
346test('ljust', u'abc', u'abc', 2)
347test('rjust', u'abc', u'abc', 2)
348test('center', u'abc', u'abc', 2)
349
Guido van Rossum77f6a652002-04-03 22:41:51 +0000350test('islower', u'a', True)
351test('islower', u'A', False)
352test('islower', u'\n', False)
353test('islower', u'\u1FFc', False)
354test('islower', u'abc', True)
355test('islower', u'aBc', False)
356test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000357
Guido van Rossum77f6a652002-04-03 22:41:51 +0000358test('isupper', u'a', False)
359test('isupper', u'A', True)
360test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000361if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000362 test('isupper', u'\u1FFc', False)
363test('isupper', u'ABC', True)
364test('isupper', u'AbC', False)
365test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000366
Guido van Rossum77f6a652002-04-03 22:41:51 +0000367test('istitle', u'a', False)
368test('istitle', u'A', True)
369test('istitle', u'\n', False)
370test('istitle', u'\u1FFc', True)
371test('istitle', u'A Titlecased Line', True)
372test('istitle', u'A\nTitlecased Line', True)
373test('istitle', u'A Titlecased, Line', True)
374test('istitle', u'Greek \u1FFcitlecases ...', True)
375test('istitle', u'Not a capitalized String', False)
376test('istitle', u'Not\ta Titlecase String', False)
377test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000378
Guido van Rossum77f6a652002-04-03 22:41:51 +0000379test('isalpha', u'a', True)
380test('isalpha', u'A', True)
381test('isalpha', u'\n', False)
382test('isalpha', u'\u1FFc', True)
383test('isalpha', u'abc', True)
384test('isalpha', u'aBc123', False)
385test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000386
Guido van Rossum77f6a652002-04-03 22:41:51 +0000387test('isalnum', u'a', True)
388test('isalnum', u'A', True)
389test('isalnum', u'\n', False)
390test('isalnum', u'123abc456', True)
391test('isalnum', u'a1b3c', True)
392test('isalnum', u'aBc000 ', False)
393test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000394
Guido van Rossuma831cac2000-03-10 23:23:21 +0000395test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
396test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
397test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
398test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
399test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
400test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000401test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000402
403test('translate', u"abababc", u'bbbc', {ord('a'):None})
404test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
405test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
406
Guido van Rossumd4d26842000-03-13 23:21:48 +0000407# Contains:
408print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000409vereq(('a' in u'abdb'), True)
410vereq(('a' in u'bdab'), True)
411vereq(('a' in u'bdaba'), True)
412vereq(('a' in u'bdba'), True)
413vereq(('a' in u'bdba'), True)
414vereq((u'a' in u'bdba'), True)
415vereq((u'a' in u'bdb'), False)
416vereq((u'a' in 'bdb'), False)
417vereq((u'a' in 'bdba'), True)
418vereq((u'a' in ('a',1,None)), True)
419vereq((u'a' in (1,None,'a')), True)
420vereq((u'a' in (1,None,u'a')), True)
421vereq(('a' in ('a',1,None)), True)
422vereq(('a' in (1,None,'a')), True)
423vereq(('a' in (1,None,u'a')), True)
424vereq(('a' in ('x',1,u'y')), False)
425vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000426vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000427vereq((u'ab' in u'abcd'), True)
428vereq(('ab' in u'abc'), True)
429vereq((u'ab' in 'abc'), True)
430vereq((u'ab' in (1,None,u'ab')), True)
431vereq((u'' in u'abc'), True)
432vereq(('' in u'abc'), True)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000433print 'done.'
434
Guido van Rossuma831cac2000-03-10 23:23:21 +0000435# Formatting:
436print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000437verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
438verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
439verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
440verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
441verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
442verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
443verify(u"%c" % (u"a",) == u'a')
444verify(u"%c" % ("a",) == u'a')
445verify(u"%c" % (34,) == u'"')
446verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000447if sys.platform[:4] != 'java':
448 value = u"%r, %r" % (u"abc", "abc")
449 if value != u"u'abc', 'abc'":
450 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000451
Marc-André Lemburg36619082001-01-17 19:11:13 +0000452verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000453try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000454 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000455except KeyError:
456 print '*** formatting failed for "%s"' % "u'abc, def'"
457else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000458 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000459
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000460for ordinal in (-100, 0x20000):
461 try:
462 u"%c" % ordinal
463 except ValueError:
464 pass
465 else:
466 print '*** formatting u"%%c" % %i should give a ValueError' % ordinal
467
Guido van Rossum97064862000-04-10 13:52:48 +0000468# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000469verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
470verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
471verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
472verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
473verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
474verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
475verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
476verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
477verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000478verify('%*s' % (5,u'abc',) == u' abc')
479verify('%*s' % (-5,u'abc',) == u'abc ')
480verify('%*.*s' % (5,2,u'abc',) == u' ab')
481verify('%*.*s' % (5,3,u'abc',) == u' abc')
482verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
483verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000484print 'done.'
485
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000486print 'Testing builtin unicode()...',
487
488# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
489
490verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
491
492class UnicodeSubclass(unicode):
493 pass
494
495verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
496 == u'unicode subclass becomes unicode')
497
498verify(unicode('strings are converted to unicode')
499 == u'strings are converted to unicode')
500
501class UnicodeCompat:
502 def __init__(self, x):
503 self.x = x
504 def __unicode__(self):
505 return self.x
506
507verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
508 == u'__unicode__ compatible objects are recognized')
509
510class StringCompat:
511 def __init__(self, x):
512 self.x = x
513 def __str__(self):
514 return self.x
515
516verify(unicode(StringCompat('__str__ compatible objects are recognized'))
517 == u'__str__ compatible objects are recognized')
518
519# unicode(obj) is compatible to str():
520
521o = StringCompat('unicode(obj) is compatible to str()')
522verify(unicode(o) == u'unicode(obj) is compatible to str()')
523verify(str(o) == 'unicode(obj) is compatible to str()')
524
525for obj in (123, 123.45, 123L):
526 verify(unicode(obj) == unicode(str(obj)))
527
528# unicode(obj, encoding, error) tests (this maps to
529# PyUnicode_FromEncodedObject() at C level)
530
Finn Bock2b29cb22001-12-10 20:57:34 +0000531if not sys.platform.startswith('java'):
532 try:
533 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
534 except TypeError:
535 pass
536 else:
537 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000538
539verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
540 == u'strings are decoded to unicode')
541
Finn Bock2b29cb22001-12-10 20:57:34 +0000542if not sys.platform.startswith('java'):
543 verify(unicode(buffer('character buffers are decoded to unicode'),
544 'utf-8', 'strict')
545 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000546
547print 'done.'
548
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000549# Test builtin codecs
550print 'Testing builtin codecs...',
551
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000552# UTF-7 specific encoding tests:
553utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
554 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
555 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
556 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
557 (u'+', '+-'),
558 (u'+-', '+--'),
559 (u'+?', '+-?'),
560 (u'\?', '+AFw?'),
561 (u'+?', '+-?'),
562 (ur'\\?', '+AFwAXA?'),
563 (ur'\\\?', '+AFwAXABc?'),
564 (ur'++--', '+-+---')]
565
566for x,y in utfTests:
567 verify( x.encode('utf-7') == y )
568
Tim Peters527e64f2001-10-04 05:36:56 +0000569try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000570 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
571except UnicodeError:
572 pass
573else:
574 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
575
576verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
577
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000578# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000579verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000580verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
581verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
582verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
583verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
584verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
585verify((u'\ud800\udc02'*1000).encode('utf-8') ==
586 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000587verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
588 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
589 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
590 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
591 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000592 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000593 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
594 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
595 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
596 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
597 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
598 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
599 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
600 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
601 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
602 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000603
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000604# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000605verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
606verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
607verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000608
609# Other possible utf-8 test cases:
610# * strict decoding testing for all of the
611# UTF8_ERROR cases in PyUnicode_DecodeUTF8
612
Marc-André Lemburg36619082001-01-17 19:11:13 +0000613verify(unicode('hello','ascii') == u'hello')
614verify(unicode('hello','utf-8') == u'hello')
615verify(unicode('hello','utf8') == u'hello')
616verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000617
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000618# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000619try:
620 u'Andr\202 x'.encode('ascii')
621 u'Andr\202 x'.encode('ascii','strict')
622except ValueError:
623 pass
624else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000625 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000626verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
627verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000628
629try:
630 unicode('Andr\202 x','ascii')
631 unicode('Andr\202 x','ascii','strict')
632except ValueError:
633 pass
634else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000635 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000636verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
637verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000638
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000639verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
640try:
641 "\\".decode("unicode-escape")
642except ValueError:
643 pass
644else:
645 raise TestFailed, '"\\".decode("unicode-escape") should fail'
646
Marc-André Lemburg36619082001-01-17 19:11:13 +0000647verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000648verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000649verify(u'hello'.encode('utf-8') == 'hello')
650verify(u'hello'.encode('utf8') == 'hello')
651verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
652verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
653verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000654
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000655# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000656u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000657for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000658 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000659 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000660
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000661# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000662u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000663for encoding in (
664 'latin-1',
665 ):
666 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000667 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000668 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000669 print '*** codec "%s" failed round-trip' % encoding
670 except ValueError,why:
671 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000672
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000673# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000674u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000675for encoding in (
676 'ascii',
677 ):
678 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000679 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000680 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000681 print '*** codec "%s" failed round-trip' % encoding
682 except ValueError,why:
683 print '*** codec for "%s" failed: %s' % (encoding, why)
684
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000685# Roundtrip safety for non-BMP (just a few chars)
686u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
687for encoding in ('utf-8',
688 'utf-16', 'utf-16-le', 'utf-16-be',
689 #'raw_unicode_escape',
690 'unicode_escape', 'unicode_internal'):
691 verify(unicode(u.encode(encoding),encoding) == u)
692
693# UTF-8 must be roundtrip safe for all UCS-2 code points
694u = u''.join(map(unichr, range(0x10000)))
695for encoding in ('utf-8',):
696 verify(unicode(u.encode(encoding),encoding) == u)
697
Guido van Rossum9e896b32000-04-05 20:11:21 +0000698print 'done.'
699
700print 'Testing standard mapping codecs...',
701
702print '0-127...',
703s = ''.join(map(chr, range(128)))
704for encoding in (
705 'cp037', 'cp1026',
706 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
707 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000708 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000709 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
710 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
711 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
712 'mac_cyrillic', 'mac_latin2',
713
714 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
715 'cp1256', 'cp1257', 'cp1258',
716 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
717
718 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000719 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000720
Guido van Rossum9e896b32000-04-05 20:11:21 +0000721 ### These have undefined mappings:
722 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000723
Tim Peters2f228e72001-05-13 00:19:31 +0000724 ### These fail the round-trip:
725 #'cp875'
726
Guido van Rossum9e896b32000-04-05 20:11:21 +0000727 ):
728 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000729 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000730 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000731 print '*** codec "%s" failed round-trip' % encoding
732 except ValueError,why:
733 print '*** codec for "%s" failed: %s' % (encoding, why)
734
735print '128-255...',
736s = ''.join(map(chr, range(128,256)))
737for encoding in (
738 'cp037', 'cp1026',
739 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
740 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000741 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000742 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000743 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000744 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000745 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000746
Guido van Rossum9e896b32000-04-05 20:11:21 +0000747 ### These have undefined mappings:
748 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
749 #'cp1256', 'cp1257', 'cp1258',
750 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000751 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000752 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000753
Guido van Rossum9e896b32000-04-05 20:11:21 +0000754 ### These fail the round-trip:
755 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000756
Guido van Rossum9e896b32000-04-05 20:11:21 +0000757 ):
758 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000759 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000760 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000761 print '*** codec "%s" failed round-trip' % encoding
762 except ValueError,why:
763 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000764
765print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000766
767print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000768verify((u"abc" u"def") == u"abcdef")
769verify(("abc" u"def") == u"abcdef")
770verify((u"abc" "def") == u"abcdef")
771verify((u"abc" u"def" "ghi") == u"abcdefghi")
772verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000773print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000774
775print 'Testing Unicode printing...',
776print u'abc'
777print u'abc', u'def'
778print u'abc', 'def'
779print 'abc', u'def'
780print u'abc\n'
781print u'abc\n',
782print u'abc\n',
783print u'def\n'
784print u'def\n'
785print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000786
787def test_exception(lhs, rhs, msg):
788 try:
789 lhs in rhs
790 except TypeError:
791 pass
792 else:
793 raise TestFailed, msg
794
795def run_contains_tests():
796 vereq(u'' in '', True)
797 vereq('' in u'', True)
798 vereq(u'' in u'', True)
799 vereq(u'' in 'abc', True)
800 vereq('' in u'abc', True)
801 vereq(u'' in u'abc', True)
802 vereq(u'\0' in 'abc', False)
803 vereq('\0' in u'abc', False)
804 vereq(u'\0' in u'abc', False)
805 vereq(u'\0' in '\0abc', True)
806 vereq('\0' in u'\0abc', True)
807 vereq(u'\0' in u'\0abc', True)
808 vereq(u'\0' in 'abc\0', True)
809 vereq('\0' in u'abc\0', True)
810 vereq(u'\0' in u'abc\0', True)
811 vereq(u'a' in '\0abc', True)
812 vereq('a' in u'\0abc', True)
813 vereq(u'a' in u'\0abc', True)
814 vereq(u'asdf' in 'asdf', True)
815 vereq('asdf' in u'asdf', True)
816 vereq(u'asdf' in u'asdf', True)
817 vereq(u'asdf' in 'asd', False)
818 vereq('asdf' in u'asd', False)
819 vereq(u'asdf' in u'asd', False)
820 vereq(u'asdf' in '', False)
821 vereq('asdf' in u'', False)
822 vereq(u'asdf' in u'', False)
823
824run_contains_tests()