blob: 66a7f916c431b7518a18fbf8ef4dd549bd04c63d [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
Guido van Rossum76afbd92002-08-20 17:29:29 +0000110test('rfind', 'abcdefghiabc', 9, u'abc')
111test('rfind', 'abcdefghiabc', 12, u'')
112test('rfind', u'abcdefghiabc', 12, '')
113test('rfind', u'abcdefghiabc', 12, u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000114
115test('lower', u'HeLLo', u'hello')
116test('lower', u'hello', u'hello')
117
118test('upper', u'HeLLo', u'HELLO')
119test('upper', u'HELLO', u'HELLO')
120
121if 0:
122 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
123
124 test('maketrans', u'abc', transtable, u'xyz')
125 test('maketrans', u'abc', ValueError, u'xyzq')
126
127test('split', u'this is the split function',
128 [u'this', u'is', u'the', u'split', u'function'])
129test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
130test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
131test('split', u'a b c d', [u'a', u'b c d'], None, 1)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
134test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
135test('split', u'a b c d', [u'a b c d'], None, 0)
136test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
137test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000138test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
139test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
140test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
141test('split', u'endcase test', [u'endcase ', u''], u'test')
142test('split', u'endcase test', [u'endcase ', u''], 'test')
143test('split', 'endcase test', [u'endcase ', u''], u'test')
144
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
146# join now works with any sequence type
147class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000148 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000149 def __len__(self): return len(self.seq)
150 def __getitem__(self, i): return self.seq[i]
151
152test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000154test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000155test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000156test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000157test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
158test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
159test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
160test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
161test('join', ' ', u'w x y z', Sequence(u'wxyz'))
162test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000163
164result = u''
165for i in range(10):
166 if i > 0:
167 result = result + u':'
168 result = result + u'x'*10
169test('join', u':', result, [u'x' * 10] * 10)
170test('join', u':', result, (u'x' * 10,) * 10)
171
172test('strip', u' hello ', u'hello')
173test('lstrip', u' hello ', u'hello ')
174test('rstrip', u' hello ', u' hello')
175test('strip', u'hello', u'hello')
176
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000177# strip/lstrip/rstrip with None arg
178test('strip', u' hello ', u'hello', None)
179test('lstrip', u' hello ', u'hello ', None)
180test('rstrip', u' hello ', u' hello', None)
181test('strip', u'hello', u'hello', None)
182
183# strip/lstrip/rstrip with unicode arg
184test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
185test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
186test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
187test('strip', u'hello', u'hello', u'xyz')
188
189# strip/lstrip/rstrip with str arg
190test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
191test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
192test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
193test('strip', u'hello', u'hello', 'xyz')
194
Guido van Rossuma831cac2000-03-10 23:23:21 +0000195test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
196
197if 0:
198 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
199
200 table = string.maketrans('a', u'A')
201 test('translate', u'abc', u'Abc', table)
202 test('translate', u'xyz', u'xyz', table)
203
204test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000205test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000206test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
207test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
208test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
209test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
210test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
211test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
212test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000213test('replace', u'abc', u'-a-b-c-', u'', u'-')
214test('replace', u'abc', u'-a-b-c', u'', u'-', 3)
215test('replace', u'abc', u'abc', u'', u'-', 0)
Guido van Rossum2023c9b2002-08-23 18:50:21 +0000216test('replace', u'abc', u'abc', u'ab', u'--', 0)
217test('replace', u'abc', u'abc', u'xy', u'--')
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000218test('replace', u'', u'', u'', u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000219
Guido van Rossum77f6a652002-04-03 22:41:51 +0000220test('startswith', u'hello', True, u'he')
221test('startswith', u'hello', True, u'hello')
222test('startswith', u'hello', False, u'hello world')
223test('startswith', u'hello', True, u'')
224test('startswith', u'hello', False, u'ello')
225test('startswith', u'hello', True, u'ello', 1)
226test('startswith', u'hello', True, u'o', 4)
227test('startswith', u'hello', False, u'o', 5)
228test('startswith', u'hello', True, u'', 5)
229test('startswith', u'hello', False, u'lo', 6)
230test('startswith', u'helloworld', True, u'lowo', 3)
231test('startswith', u'helloworld', True, u'lowo', 3, 7)
232test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000233
Guido van Rossum77f6a652002-04-03 22:41:51 +0000234test('endswith', u'hello', True, u'lo')
235test('endswith', u'hello', False, u'he')
236test('endswith', u'hello', True, u'')
237test('endswith', u'hello', False, u'hello world')
238test('endswith', u'helloworld', False, u'worl')
239test('endswith', u'helloworld', True, u'worl', 3, 9)
240test('endswith', u'helloworld', True, u'world', 3, 12)
241test('endswith', u'helloworld', True, u'lowo', 1, 7)
242test('endswith', u'helloworld', True, u'lowo', 2, 7)
243test('endswith', u'helloworld', True, u'lowo', 3, 7)
244test('endswith', u'helloworld', False, u'lowo', 4, 7)
245test('endswith', u'helloworld', False, u'lowo', 3, 8)
246test('endswith', u'ab', False, u'ab', 0, 1)
247test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossum76afbd92002-08-20 17:29:29 +0000248test('endswith', 'helloworld', True, u'd')
249test('endswith', 'helloworld', False, u'l')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000250
251test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
252test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
253test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
254test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000255test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000256
257if 0:
258 test('capwords', u'abc def ghi', u'Abc Def Ghi')
259 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
260 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
261
Walter Dörwald068325e2002-04-15 13:36:47 +0000262test('zfill', u'123', u'123', 2)
263test('zfill', u'123', u'123', 3)
264test('zfill', u'123', u'0123', 4)
265test('zfill', u'+123', u'+123', 3)
266test('zfill', u'+123', u'+123', 4)
267test('zfill', u'+123', u'+0123', 5)
268test('zfill', u'-123', u'-123', 3)
269test('zfill', u'-123', u'-123', 4)
270test('zfill', u'-123', u'-0123', 5)
271test('zfill', u'', u'000', 3)
272test('zfill', u'34', u'34', 1)
273test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000274
Guido van Rossuma831cac2000-03-10 23:23:21 +0000275# Comparisons:
276print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000277verify(u'abc' == 'abc')
278verify('abc' == u'abc')
279verify(u'abc' == u'abc')
280verify(u'abcd' > 'abc')
281verify('abcd' > u'abc')
282verify(u'abcd' > u'abc')
283verify(u'abc' < 'abcd')
284verify('abc' < u'abcd')
285verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000286print 'done.'
287
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288if 0:
289 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000290
Marc-André Lemburge5034372000-08-08 08:04:29 +0000291 print 'Testing UTF-16 code point order comparisons...',
292 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000293 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000294 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000295 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000296
Marc-André Lemburge5034372000-08-08 08:04:29 +0000297 # Non surrogate above surrogate value, fixup required
298 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000299 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000300
Marc-André Lemburge5034372000-08-08 08:04:29 +0000301 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000302 s2 = u'\ud800\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\ud900\udc01'
305 test_lecmp(s, s2)
306 s2 = u'\uda00\udc01'
307 test_lecmp(s, s2)
308 s2 = u'\udb00\udc01'
309 test_lecmp(s, s2)
310 s2 = u'\ud800\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\ud900\udd01'
313 test_lecmp(s, s2)
314 s2 = u'\uda00\udd01'
315 test_lecmp(s, s2)
316 s2 = u'\udb00\udd01'
317 test_lecmp(s, s2)
318 s2 = u'\ud800\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\ud900\ude01'
321 test_lecmp(s, s2)
322 s2 = u'\uda00\ude01'
323 test_lecmp(s, s2)
324 s2 = u'\udb00\ude01'
325 test_lecmp(s, s2)
326 s2 = u'\ud800\udfff'
327 test_lecmp(s, s2)
328 s2 = u'\ud900\udfff'
329 test_lecmp(s, s2)
330 s2 = u'\uda00\udfff'
331 test_lecmp(s, s2)
332 s2 = u'\udb00\udfff'
333 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000334
335 test_fixup(u'\ue000')
336 test_fixup(u'\uff61')
337
338 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000339 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000340 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000341
Guido van Rossuma831cac2000-03-10 23:23:21 +0000342test('ljust', u'abc', u'abc ', 10)
343test('rjust', u'abc', u' abc', 10)
344test('center', u'abc', u' abc ', 10)
345test('ljust', u'abc', u'abc ', 6)
346test('rjust', u'abc', u' abc', 6)
347test('center', u'abc', u' abc ', 6)
348test('ljust', u'abc', u'abc', 2)
349test('rjust', u'abc', u'abc', 2)
350test('center', u'abc', u'abc', 2)
351
Guido van Rossum77f6a652002-04-03 22:41:51 +0000352test('islower', u'a', True)
353test('islower', u'A', False)
354test('islower', u'\n', False)
355test('islower', u'\u1FFc', False)
356test('islower', u'abc', True)
357test('islower', u'aBc', False)
358test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000359
Guido van Rossum77f6a652002-04-03 22:41:51 +0000360test('isupper', u'a', False)
361test('isupper', u'A', True)
362test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000363if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000364 test('isupper', u'\u1FFc', False)
365test('isupper', u'ABC', True)
366test('isupper', u'AbC', False)
367test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000368
Guido van Rossum77f6a652002-04-03 22:41:51 +0000369test('istitle', u'a', False)
370test('istitle', u'A', True)
371test('istitle', u'\n', False)
372test('istitle', u'\u1FFc', True)
373test('istitle', u'A Titlecased Line', True)
374test('istitle', u'A\nTitlecased Line', True)
375test('istitle', u'A Titlecased, Line', True)
376test('istitle', u'Greek \u1FFcitlecases ...', True)
377test('istitle', u'Not a capitalized String', False)
378test('istitle', u'Not\ta Titlecase String', False)
379test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000380
Guido van Rossum77f6a652002-04-03 22:41:51 +0000381test('isalpha', u'a', True)
382test('isalpha', u'A', True)
383test('isalpha', u'\n', False)
384test('isalpha', u'\u1FFc', True)
385test('isalpha', u'abc', True)
386test('isalpha', u'aBc123', False)
387test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000388
Guido van Rossum77f6a652002-04-03 22:41:51 +0000389test('isalnum', u'a', True)
390test('isalnum', u'A', True)
391test('isalnum', u'\n', False)
392test('isalnum', u'123abc456', True)
393test('isalnum', u'a1b3c', True)
394test('isalnum', u'aBc000 ', False)
395test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000396
Guido van Rossuma831cac2000-03-10 23:23:21 +0000397test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
398test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
399test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
400test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
401test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
402test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000403test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000404
405test('translate', u"abababc", u'bbbc', {ord('a'):None})
406test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
407test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
Walter Dörwald5c1ee172002-09-04 20:31:32 +0000408test('translate', u"abababc", u'<i><i><i>c', {ord('a'):None, ord('b'):u'<i>'})
409test('translate', u"abababc", u'c', {ord('a'):None, ord('b'):u''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000410
Guido van Rossumd4d26842000-03-13 23:21:48 +0000411# Contains:
412print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000413vereq(('a' in u'abdb'), True)
414vereq(('a' in u'bdab'), True)
415vereq(('a' in u'bdaba'), True)
416vereq(('a' in u'bdba'), True)
417vereq(('a' in u'bdba'), True)
418vereq((u'a' in u'bdba'), True)
419vereq((u'a' in u'bdb'), False)
420vereq((u'a' in 'bdb'), False)
421vereq((u'a' in 'bdba'), True)
422vereq((u'a' in ('a',1,None)), True)
423vereq((u'a' in (1,None,'a')), True)
424vereq((u'a' in (1,None,u'a')), True)
425vereq(('a' in ('a',1,None)), True)
426vereq(('a' in (1,None,'a')), True)
427vereq(('a' in (1,None,u'a')), True)
428vereq(('a' in ('x',1,u'y')), False)
429vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000430vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000431vereq((u'ab' in u'abcd'), True)
432vereq(('ab' in u'abc'), True)
433vereq((u'ab' in 'abc'), True)
434vereq((u'ab' in (1,None,u'ab')), True)
435vereq((u'' in u'abc'), True)
436vereq(('' in u'abc'), True)
Marc-André Lemburg9cd87aa2002-10-23 09:02:46 +0000437try:
438 u'\xe2' in 'g\xe2teau'
439except UnicodeError:
440 pass
441else:
442 print '*** contains operator does not propagate UnicodeErrors'
Guido van Rossumd4d26842000-03-13 23:21:48 +0000443print 'done.'
444
Guido van Rossuma831cac2000-03-10 23:23:21 +0000445# Formatting:
446print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000447verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
448verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
449verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
450verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
451verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
452verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
453verify(u"%c" % (u"a",) == u'a')
454verify(u"%c" % ("a",) == u'a')
455verify(u"%c" % (34,) == u'"')
456verify(u"%c" % (36,) == u'$')
Neil Schemenauerab9e4b72002-11-18 16:11:34 +0000457verify(u"%d".__mod__(10) == u'10')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000458if sys.platform[:4] != 'java':
459 value = u"%r, %r" % (u"abc", "abc")
460 if value != u"u'abc', 'abc'":
461 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000462
Marc-André Lemburg36619082001-01-17 19:11:13 +0000463verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000464try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000465 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000466except KeyError:
467 print '*** formatting failed for "%s"' % "u'abc, def'"
468else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000469 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000470
Martin v. Löwis766e3002002-09-14 09:10:04 +0000471for ordinal in (-100, 0x200000):
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000472 try:
473 u"%c" % ordinal
474 except ValueError:
475 pass
476 else:
Martin v. Löwis766e3002002-09-14 09:10:04 +0000477 print '*** formatting u"%%c" %% %i should give a ValueError' % ordinal
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000478
Guido van Rossum97064862000-04-10 13:52:48 +0000479# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000480verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
481verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
482verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
483verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
484verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
485verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
486verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
487verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
488verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000489verify('%*s' % (5,u'abc',) == u' abc')
490verify('%*s' % (-5,u'abc',) == u'abc ')
491verify('%*.*s' % (5,2,u'abc',) == u' ab')
492verify('%*.*s' % (5,3,u'abc',) == u' abc')
493verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
494verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000495print 'done.'
496
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000497print 'Testing builtin unicode()...',
498
499# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
500
501verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
502
503class UnicodeSubclass(unicode):
504 pass
505
506verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
507 == u'unicode subclass becomes unicode')
508
509verify(unicode('strings are converted to unicode')
510 == u'strings are converted to unicode')
511
512class UnicodeCompat:
513 def __init__(self, x):
514 self.x = x
515 def __unicode__(self):
516 return self.x
517
518verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
519 == u'__unicode__ compatible objects are recognized')
520
521class StringCompat:
522 def __init__(self, x):
523 self.x = x
524 def __str__(self):
525 return self.x
526
527verify(unicode(StringCompat('__str__ compatible objects are recognized'))
528 == u'__str__ compatible objects are recognized')
529
530# unicode(obj) is compatible to str():
531
532o = StringCompat('unicode(obj) is compatible to str()')
533verify(unicode(o) == u'unicode(obj) is compatible to str()')
534verify(str(o) == 'unicode(obj) is compatible to str()')
535
536for obj in (123, 123.45, 123L):
537 verify(unicode(obj) == unicode(str(obj)))
538
539# unicode(obj, encoding, error) tests (this maps to
540# PyUnicode_FromEncodedObject() at C level)
541
Finn Bock2b29cb22001-12-10 20:57:34 +0000542if not sys.platform.startswith('java'):
543 try:
544 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
545 except TypeError:
546 pass
547 else:
548 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000549
550verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
551 == u'strings are decoded to unicode')
552
Finn Bock2b29cb22001-12-10 20:57:34 +0000553if not sys.platform.startswith('java'):
554 verify(unicode(buffer('character buffers are decoded to unicode'),
555 'utf-8', 'strict')
556 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000557
558print 'done.'
559
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000560# Test builtin codecs
561print 'Testing builtin codecs...',
562
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000563# UTF-7 specific encoding tests:
564utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
565 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
566 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
567 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
568 (u'+', '+-'),
569 (u'+-', '+--'),
570 (u'+?', '+-?'),
571 (u'\?', '+AFw?'),
572 (u'+?', '+-?'),
573 (ur'\\?', '+AFwAXA?'),
574 (ur'\\\?', '+AFwAXABc?'),
575 (ur'++--', '+-+---')]
576
577for x,y in utfTests:
578 verify( x.encode('utf-7') == y )
579
Tim Peters527e64f2001-10-04 05:36:56 +0000580try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000581 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
582except UnicodeError:
583 pass
584else:
585 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
586
587verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
588
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000589# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000590verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000591verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
592verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
593verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
594verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
595verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
596verify((u'\ud800\udc02'*1000).encode('utf-8') ==
597 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000598verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
599 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
600 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
601 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
602 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000603 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000604 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
605 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
606 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
607 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
608 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
609 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
610 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
611 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
612 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
613 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000614
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000615# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000616verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
617verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
618verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000619
620# Other possible utf-8 test cases:
621# * strict decoding testing for all of the
622# UTF8_ERROR cases in PyUnicode_DecodeUTF8
623
Marc-André Lemburg36619082001-01-17 19:11:13 +0000624verify(unicode('hello','ascii') == u'hello')
625verify(unicode('hello','utf-8') == u'hello')
626verify(unicode('hello','utf8') == u'hello')
627verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000628
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000629# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000630try:
631 u'Andr\202 x'.encode('ascii')
632 u'Andr\202 x'.encode('ascii','strict')
633except ValueError:
634 pass
635else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000636 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000637verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
638verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000639
640try:
641 unicode('Andr\202 x','ascii')
642 unicode('Andr\202 x','ascii','strict')
643except ValueError:
644 pass
645else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000646 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000647verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
648verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000649
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000650verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
651try:
652 "\\".decode("unicode-escape")
653except ValueError:
654 pass
655else:
656 raise TestFailed, '"\\".decode("unicode-escape") should fail'
657
Marc-André Lemburg36619082001-01-17 19:11:13 +0000658verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000659verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000660verify(u'hello'.encode('utf-8') == 'hello')
661verify(u'hello'.encode('utf8') == 'hello')
662verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
663verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
664verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000665
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000666# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000667u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000668for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000669 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000670 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000671
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000672# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000673u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000674for encoding in (
675 'latin-1',
676 ):
677 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000678 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000679 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000680 print '*** codec "%s" failed round-trip' % encoding
681 except ValueError,why:
682 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000683
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000684# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000685u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000686for encoding in (
687 'ascii',
688 ):
689 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000690 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000691 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000692 print '*** codec "%s" failed round-trip' % encoding
693 except ValueError,why:
694 print '*** codec for "%s" failed: %s' % (encoding, why)
695
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000696# Roundtrip safety for non-BMP (just a few chars)
697u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
698for encoding in ('utf-8',
699 'utf-16', 'utf-16-le', 'utf-16-be',
700 #'raw_unicode_escape',
701 'unicode_escape', 'unicode_internal'):
702 verify(unicode(u.encode(encoding),encoding) == u)
703
704# UTF-8 must be roundtrip safe for all UCS-2 code points
Martin v. Löwis1ce4ae32002-09-14 09:19:53 +0000705# This excludes surrogates: in the full range, there would be
706# a surrogate pair (\udbff\udc00), which gets converted back
707# to a non-BMP character (\U0010fc00)
708u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000709for encoding in ('utf-8',):
710 verify(unicode(u.encode(encoding),encoding) == u)
711
Guido van Rossum9e896b32000-04-05 20:11:21 +0000712print 'done.'
713
714print 'Testing standard mapping codecs...',
715
716print '0-127...',
717s = ''.join(map(chr, range(128)))
718for encoding in (
719 'cp037', 'cp1026',
720 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
721 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000722 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000723 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
724 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
725 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
726 'mac_cyrillic', 'mac_latin2',
727
728 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
729 'cp1256', 'cp1257', 'cp1258',
730 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
731
732 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000733 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000734
Guido van Rossum9e896b32000-04-05 20:11:21 +0000735 ### These have undefined mappings:
736 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000737
Tim Peters2f228e72001-05-13 00:19:31 +0000738 ### These fail the round-trip:
739 #'cp875'
740
Guido van Rossum9e896b32000-04-05 20:11:21 +0000741 ):
742 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000743 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000744 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000745 print '*** codec "%s" failed round-trip' % encoding
746 except ValueError,why:
747 print '*** codec for "%s" failed: %s' % (encoding, why)
748
749print '128-255...',
750s = ''.join(map(chr, range(128,256)))
751for encoding in (
752 'cp037', 'cp1026',
753 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
754 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000755 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000756 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000757 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000758 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000759 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000760
Guido van Rossum9e896b32000-04-05 20:11:21 +0000761 ### These have undefined mappings:
762 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
763 #'cp1256', 'cp1257', 'cp1258',
764 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000765 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000766 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000767
Guido van Rossum9e896b32000-04-05 20:11:21 +0000768 ### These fail the round-trip:
769 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000770
Guido van Rossum9e896b32000-04-05 20:11:21 +0000771 ):
772 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000773 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000774 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000775 print '*** codec "%s" failed round-trip' % encoding
776 except ValueError,why:
777 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000778
779print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000780
781print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000782verify((u"abc" u"def") == u"abcdef")
783verify(("abc" u"def") == u"abcdef")
784verify((u"abc" "def") == u"abcdef")
785verify((u"abc" u"def" "ghi") == u"abcdefghi")
786verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000787print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000788
789print 'Testing Unicode printing...',
790print u'abc'
791print u'abc', u'def'
792print u'abc', 'def'
793print 'abc', u'def'
794print u'abc\n'
795print u'abc\n',
796print u'abc\n',
797print u'def\n'
798print u'def\n'
799print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000800
801def test_exception(lhs, rhs, msg):
802 try:
803 lhs in rhs
804 except TypeError:
805 pass
806 else:
807 raise TestFailed, msg
808
809def run_contains_tests():
810 vereq(u'' in '', True)
811 vereq('' in u'', True)
812 vereq(u'' in u'', True)
813 vereq(u'' in 'abc', True)
814 vereq('' in u'abc', True)
815 vereq(u'' in u'abc', True)
816 vereq(u'\0' in 'abc', False)
817 vereq('\0' in u'abc', False)
818 vereq(u'\0' in u'abc', False)
819 vereq(u'\0' in '\0abc', True)
820 vereq('\0' in u'\0abc', True)
821 vereq(u'\0' in u'\0abc', True)
822 vereq(u'\0' in 'abc\0', True)
823 vereq('\0' in u'abc\0', True)
824 vereq(u'\0' in u'abc\0', True)
825 vereq(u'a' in '\0abc', True)
826 vereq('a' in u'\0abc', True)
827 vereq(u'a' in u'\0abc', True)
828 vereq(u'asdf' in 'asdf', True)
829 vereq('asdf' in u'asdf', True)
830 vereq(u'asdf' in u'asdf', True)
831 vereq(u'asdf' in 'asd', False)
832 vereq('asdf' in u'asd', False)
833 vereq(u'asdf' in u'asd', False)
834 vereq(u'asdf' in '', False)
835 vereq('asdf' in u'', False)
836 vereq(u'asdf' in u'', False)
837
838run_contains_tests()