blob: cd40a4bbf1c938f72ae740f8d7f4e31b6355d9d7 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
Guido van Rossum76afbd92002-08-20 17:29:29 +0000110test('rfind', 'abcdefghiabc', 9, u'abc')
111test('rfind', 'abcdefghiabc', 12, u'')
112test('rfind', u'abcdefghiabc', 12, '')
113test('rfind', u'abcdefghiabc', 12, u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000114
115test('lower', u'HeLLo', u'hello')
116test('lower', u'hello', u'hello')
117
118test('upper', u'HeLLo', u'HELLO')
119test('upper', u'HELLO', u'HELLO')
120
121if 0:
122 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
123
124 test('maketrans', u'abc', transtable, u'xyz')
125 test('maketrans', u'abc', ValueError, u'xyzq')
126
127test('split', u'this is the split function',
128 [u'this', u'is', u'the', u'split', u'function'])
129test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
130test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
131test('split', u'a b c d', [u'a', u'b c d'], None, 1)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
134test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
135test('split', u'a b c d', [u'a b c d'], None, 0)
136test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
137test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000138test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
139test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
140test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
141test('split', u'endcase test', [u'endcase ', u''], u'test')
142test('split', u'endcase test', [u'endcase ', u''], 'test')
143test('split', 'endcase test', [u'endcase ', u''], u'test')
144
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
146# join now works with any sequence type
147class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000148 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000149 def __len__(self): return len(self.seq)
150 def __getitem__(self, i): return self.seq[i]
151
152test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000154test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000155test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000156test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000157test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
158test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
159test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
160test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
161test('join', ' ', u'w x y z', Sequence(u'wxyz'))
162test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000163
164result = u''
165for i in range(10):
166 if i > 0:
167 result = result + u':'
168 result = result + u'x'*10
169test('join', u':', result, [u'x' * 10] * 10)
170test('join', u':', result, (u'x' * 10,) * 10)
171
172test('strip', u' hello ', u'hello')
173test('lstrip', u' hello ', u'hello ')
174test('rstrip', u' hello ', u' hello')
175test('strip', u'hello', u'hello')
176
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000177# strip/lstrip/rstrip with None arg
178test('strip', u' hello ', u'hello', None)
179test('lstrip', u' hello ', u'hello ', None)
180test('rstrip', u' hello ', u' hello', None)
181test('strip', u'hello', u'hello', None)
182
183# strip/lstrip/rstrip with unicode arg
184test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
185test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
186test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
187test('strip', u'hello', u'hello', u'xyz')
188
189# strip/lstrip/rstrip with str arg
190test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
191test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
192test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
193test('strip', u'hello', u'hello', 'xyz')
194
Guido van Rossuma831cac2000-03-10 23:23:21 +0000195test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
196
197if 0:
198 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
199
200 table = string.maketrans('a', u'A')
201 test('translate', u'abc', u'Abc', table)
202 test('translate', u'xyz', u'xyz', table)
203
204test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000205test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000206test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
207test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
208test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
209test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
210test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
211test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
212test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000213test('replace', u'abc', u'-a-b-c-', u'', u'-')
214test('replace', u'abc', u'-a-b-c', u'', u'-', 3)
215test('replace', u'abc', u'abc', u'', u'-', 0)
Guido van Rossum2023c9b2002-08-23 18:50:21 +0000216test('replace', u'abc', u'abc', u'ab', u'--', 0)
217test('replace', u'abc', u'abc', u'xy', u'--')
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000218test('replace', u'', u'', u'', u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000219
Guido van Rossum77f6a652002-04-03 22:41:51 +0000220test('startswith', u'hello', True, u'he')
221test('startswith', u'hello', True, u'hello')
222test('startswith', u'hello', False, u'hello world')
223test('startswith', u'hello', True, u'')
224test('startswith', u'hello', False, u'ello')
225test('startswith', u'hello', True, u'ello', 1)
226test('startswith', u'hello', True, u'o', 4)
227test('startswith', u'hello', False, u'o', 5)
228test('startswith', u'hello', True, u'', 5)
229test('startswith', u'hello', False, u'lo', 6)
230test('startswith', u'helloworld', True, u'lowo', 3)
231test('startswith', u'helloworld', True, u'lowo', 3, 7)
232test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000233
Guido van Rossum77f6a652002-04-03 22:41:51 +0000234test('endswith', u'hello', True, u'lo')
235test('endswith', u'hello', False, u'he')
236test('endswith', u'hello', True, u'')
237test('endswith', u'hello', False, u'hello world')
238test('endswith', u'helloworld', False, u'worl')
239test('endswith', u'helloworld', True, u'worl', 3, 9)
240test('endswith', u'helloworld', True, u'world', 3, 12)
241test('endswith', u'helloworld', True, u'lowo', 1, 7)
242test('endswith', u'helloworld', True, u'lowo', 2, 7)
243test('endswith', u'helloworld', True, u'lowo', 3, 7)
244test('endswith', u'helloworld', False, u'lowo', 4, 7)
245test('endswith', u'helloworld', False, u'lowo', 3, 8)
246test('endswith', u'ab', False, u'ab', 0, 1)
247test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossum76afbd92002-08-20 17:29:29 +0000248test('endswith', 'helloworld', True, u'd')
249test('endswith', 'helloworld', False, u'l')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000250
251test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
252test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
253test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
254test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000255test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000256
257if 0:
258 test('capwords', u'abc def ghi', u'Abc Def Ghi')
259 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
260 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
261
Walter Dörwald068325e2002-04-15 13:36:47 +0000262test('zfill', u'123', u'123', 2)
263test('zfill', u'123', u'123', 3)
264test('zfill', u'123', u'0123', 4)
265test('zfill', u'+123', u'+123', 3)
266test('zfill', u'+123', u'+123', 4)
267test('zfill', u'+123', u'+0123', 5)
268test('zfill', u'-123', u'-123', 3)
269test('zfill', u'-123', u'-123', 4)
270test('zfill', u'-123', u'-0123', 5)
271test('zfill', u'', u'000', 3)
272test('zfill', u'34', u'34', 1)
273test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000274
Guido van Rossuma831cac2000-03-10 23:23:21 +0000275# Comparisons:
276print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000277verify(u'abc' == 'abc')
278verify('abc' == u'abc')
279verify(u'abc' == u'abc')
280verify(u'abcd' > 'abc')
281verify('abcd' > u'abc')
282verify(u'abcd' > u'abc')
283verify(u'abc' < 'abcd')
284verify('abc' < u'abcd')
285verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000286print 'done.'
287
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288if 0:
289 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000290
Marc-André Lemburge5034372000-08-08 08:04:29 +0000291 print 'Testing UTF-16 code point order comparisons...',
292 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000293 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000294 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000295 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000296
Marc-André Lemburge5034372000-08-08 08:04:29 +0000297 # Non surrogate above surrogate value, fixup required
298 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000299 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000300
Marc-André Lemburge5034372000-08-08 08:04:29 +0000301 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000302 s2 = u'\ud800\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\ud900\udc01'
305 test_lecmp(s, s2)
306 s2 = u'\uda00\udc01'
307 test_lecmp(s, s2)
308 s2 = u'\udb00\udc01'
309 test_lecmp(s, s2)
310 s2 = u'\ud800\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\ud900\udd01'
313 test_lecmp(s, s2)
314 s2 = u'\uda00\udd01'
315 test_lecmp(s, s2)
316 s2 = u'\udb00\udd01'
317 test_lecmp(s, s2)
318 s2 = u'\ud800\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\ud900\ude01'
321 test_lecmp(s, s2)
322 s2 = u'\uda00\ude01'
323 test_lecmp(s, s2)
324 s2 = u'\udb00\ude01'
325 test_lecmp(s, s2)
326 s2 = u'\ud800\udfff'
327 test_lecmp(s, s2)
328 s2 = u'\ud900\udfff'
329 test_lecmp(s, s2)
330 s2 = u'\uda00\udfff'
331 test_lecmp(s, s2)
332 s2 = u'\udb00\udfff'
333 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000334
335 test_fixup(u'\ue000')
336 test_fixup(u'\uff61')
337
338 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000339 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000340 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000341
Guido van Rossuma831cac2000-03-10 23:23:21 +0000342test('ljust', u'abc', u'abc ', 10)
343test('rjust', u'abc', u' abc', 10)
344test('center', u'abc', u' abc ', 10)
345test('ljust', u'abc', u'abc ', 6)
346test('rjust', u'abc', u' abc', 6)
347test('center', u'abc', u' abc ', 6)
348test('ljust', u'abc', u'abc', 2)
349test('rjust', u'abc', u'abc', 2)
350test('center', u'abc', u'abc', 2)
351
Guido van Rossum77f6a652002-04-03 22:41:51 +0000352test('islower', u'a', True)
353test('islower', u'A', False)
354test('islower', u'\n', False)
355test('islower', u'\u1FFc', False)
356test('islower', u'abc', True)
357test('islower', u'aBc', False)
358test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000359
Guido van Rossum77f6a652002-04-03 22:41:51 +0000360test('isupper', u'a', False)
361test('isupper', u'A', True)
362test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000363if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000364 test('isupper', u'\u1FFc', False)
365test('isupper', u'ABC', True)
366test('isupper', u'AbC', False)
367test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000368
Guido van Rossum77f6a652002-04-03 22:41:51 +0000369test('istitle', u'a', False)
370test('istitle', u'A', True)
371test('istitle', u'\n', False)
372test('istitle', u'\u1FFc', True)
373test('istitle', u'A Titlecased Line', True)
374test('istitle', u'A\nTitlecased Line', True)
375test('istitle', u'A Titlecased, Line', True)
376test('istitle', u'Greek \u1FFcitlecases ...', True)
377test('istitle', u'Not a capitalized String', False)
378test('istitle', u'Not\ta Titlecase String', False)
379test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000380
Guido van Rossum77f6a652002-04-03 22:41:51 +0000381test('isalpha', u'a', True)
382test('isalpha', u'A', True)
383test('isalpha', u'\n', False)
384test('isalpha', u'\u1FFc', True)
385test('isalpha', u'abc', True)
386test('isalpha', u'aBc123', False)
387test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000388
Guido van Rossum77f6a652002-04-03 22:41:51 +0000389test('isalnum', u'a', True)
390test('isalnum', u'A', True)
391test('isalnum', u'\n', False)
392test('isalnum', u'123abc456', True)
393test('isalnum', u'a1b3c', True)
394test('isalnum', u'aBc000 ', False)
395test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000396
Guido van Rossuma831cac2000-03-10 23:23:21 +0000397test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
398test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
399test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
400test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
401test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
402test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000403test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000404
405test('translate', u"abababc", u'bbbc', {ord('a'):None})
406test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
407test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
Walter Dörwald5c1ee172002-09-04 20:31:32 +0000408test('translate', u"abababc", u'<i><i><i>c', {ord('a'):None, ord('b'):u'<i>'})
409test('translate', u"abababc", u'c', {ord('a'):None, ord('b'):u''})
Guido van Rossuma831cac2000-03-10 23:23:21 +0000410
Guido van Rossumd4d26842000-03-13 23:21:48 +0000411# Contains:
412print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000413vereq(('a' in u'abdb'), True)
414vereq(('a' in u'bdab'), True)
415vereq(('a' in u'bdaba'), True)
416vereq(('a' in u'bdba'), True)
417vereq(('a' in u'bdba'), True)
418vereq((u'a' in u'bdba'), True)
419vereq((u'a' in u'bdb'), False)
420vereq((u'a' in 'bdb'), False)
421vereq((u'a' in 'bdba'), True)
422vereq((u'a' in ('a',1,None)), True)
423vereq((u'a' in (1,None,'a')), True)
424vereq((u'a' in (1,None,u'a')), True)
425vereq(('a' in ('a',1,None)), True)
426vereq(('a' in (1,None,'a')), True)
427vereq(('a' in (1,None,u'a')), True)
428vereq(('a' in ('x',1,u'y')), False)
429vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000430vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000431vereq((u'ab' in u'abcd'), True)
432vereq(('ab' in u'abc'), True)
433vereq((u'ab' in 'abc'), True)
434vereq((u'ab' in (1,None,u'ab')), True)
435vereq((u'' in u'abc'), True)
436vereq(('' in u'abc'), True)
Marc-André Lemburg9cd87aa2002-10-23 09:02:46 +0000437try:
438 u'\xe2' in 'g\xe2teau'
439except UnicodeError:
440 pass
441else:
442 print '*** contains operator does not propagate UnicodeErrors'
Guido van Rossumd4d26842000-03-13 23:21:48 +0000443print 'done.'
444
Guido van Rossuma831cac2000-03-10 23:23:21 +0000445# Formatting:
446print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000447verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
448verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
449verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
450verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
451verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
452verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
453verify(u"%c" % (u"a",) == u'a')
454verify(u"%c" % ("a",) == u'a')
455verify(u"%c" % (34,) == u'"')
456verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000457if sys.platform[:4] != 'java':
458 value = u"%r, %r" % (u"abc", "abc")
459 if value != u"u'abc', 'abc'":
460 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000461
Marc-André Lemburg36619082001-01-17 19:11:13 +0000462verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000463try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000464 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000465except KeyError:
466 print '*** formatting failed for "%s"' % "u'abc, def'"
467else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000468 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000469
Martin v. Löwis766e3002002-09-14 09:10:04 +0000470for ordinal in (-100, 0x200000):
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000471 try:
472 u"%c" % ordinal
473 except ValueError:
474 pass
475 else:
Martin v. Löwis766e3002002-09-14 09:10:04 +0000476 print '*** formatting u"%%c" %% %i should give a ValueError' % ordinal
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000477
Guido van Rossum97064862000-04-10 13:52:48 +0000478# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000479verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
480verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
481verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
482verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
483verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
484verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
485verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
486verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
487verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000488verify('%*s' % (5,u'abc',) == u' abc')
489verify('%*s' % (-5,u'abc',) == u'abc ')
490verify('%*.*s' % (5,2,u'abc',) == u' ab')
491verify('%*.*s' % (5,3,u'abc',) == u' abc')
492verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
493verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000494print 'done.'
495
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000496print 'Testing builtin unicode()...',
497
498# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
499
500verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
501
502class UnicodeSubclass(unicode):
503 pass
504
505verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
506 == u'unicode subclass becomes unicode')
507
508verify(unicode('strings are converted to unicode')
509 == u'strings are converted to unicode')
510
511class UnicodeCompat:
512 def __init__(self, x):
513 self.x = x
514 def __unicode__(self):
515 return self.x
516
517verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
518 == u'__unicode__ compatible objects are recognized')
519
520class StringCompat:
521 def __init__(self, x):
522 self.x = x
523 def __str__(self):
524 return self.x
525
526verify(unicode(StringCompat('__str__ compatible objects are recognized'))
527 == u'__str__ compatible objects are recognized')
528
529# unicode(obj) is compatible to str():
530
531o = StringCompat('unicode(obj) is compatible to str()')
532verify(unicode(o) == u'unicode(obj) is compatible to str()')
533verify(str(o) == 'unicode(obj) is compatible to str()')
534
535for obj in (123, 123.45, 123L):
536 verify(unicode(obj) == unicode(str(obj)))
537
538# unicode(obj, encoding, error) tests (this maps to
539# PyUnicode_FromEncodedObject() at C level)
540
Finn Bock2b29cb22001-12-10 20:57:34 +0000541if not sys.platform.startswith('java'):
542 try:
543 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
544 except TypeError:
545 pass
546 else:
547 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000548
549verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
550 == u'strings are decoded to unicode')
551
Finn Bock2b29cb22001-12-10 20:57:34 +0000552if not sys.platform.startswith('java'):
553 verify(unicode(buffer('character buffers are decoded to unicode'),
554 'utf-8', 'strict')
555 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000556
557print 'done.'
558
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000559# Test builtin codecs
560print 'Testing builtin codecs...',
561
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000562# UTF-7 specific encoding tests:
563utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
564 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
565 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
566 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
567 (u'+', '+-'),
568 (u'+-', '+--'),
569 (u'+?', '+-?'),
570 (u'\?', '+AFw?'),
571 (u'+?', '+-?'),
572 (ur'\\?', '+AFwAXA?'),
573 (ur'\\\?', '+AFwAXABc?'),
574 (ur'++--', '+-+---')]
575
576for x,y in utfTests:
577 verify( x.encode('utf-7') == y )
578
Tim Peters527e64f2001-10-04 05:36:56 +0000579try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000580 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
581except UnicodeError:
582 pass
583else:
584 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
585
586verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
587
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000588# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000589verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000590verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
591verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
592verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
593verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
594verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
595verify((u'\ud800\udc02'*1000).encode('utf-8') ==
596 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000597verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
598 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
599 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
600 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
601 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000602 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000603 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
604 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
605 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
606 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
607 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
608 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
609 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
610 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
611 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
612 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000613
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000614# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000615verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
616verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
617verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000618
619# Other possible utf-8 test cases:
620# * strict decoding testing for all of the
621# UTF8_ERROR cases in PyUnicode_DecodeUTF8
622
Marc-André Lemburg36619082001-01-17 19:11:13 +0000623verify(unicode('hello','ascii') == u'hello')
624verify(unicode('hello','utf-8') == u'hello')
625verify(unicode('hello','utf8') == u'hello')
626verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000627
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000628# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000629try:
630 u'Andr\202 x'.encode('ascii')
631 u'Andr\202 x'.encode('ascii','strict')
632except ValueError:
633 pass
634else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000635 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000636verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
637verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000638
639try:
640 unicode('Andr\202 x','ascii')
641 unicode('Andr\202 x','ascii','strict')
642except ValueError:
643 pass
644else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000645 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000646verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
647verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000648
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000649verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
650try:
651 "\\".decode("unicode-escape")
652except ValueError:
653 pass
654else:
655 raise TestFailed, '"\\".decode("unicode-escape") should fail'
656
Marc-André Lemburg36619082001-01-17 19:11:13 +0000657verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000658verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000659verify(u'hello'.encode('utf-8') == 'hello')
660verify(u'hello'.encode('utf8') == 'hello')
661verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
662verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
663verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000664
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000665# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000666u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000667for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000668 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000669 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000670
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000671# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000672u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000673for encoding in (
674 'latin-1',
675 ):
676 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000677 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000678 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000679 print '*** codec "%s" failed round-trip' % encoding
680 except ValueError,why:
681 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000682
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000683# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000684u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000685for encoding in (
686 'ascii',
687 ):
688 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000689 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000690 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000691 print '*** codec "%s" failed round-trip' % encoding
692 except ValueError,why:
693 print '*** codec for "%s" failed: %s' % (encoding, why)
694
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000695# Roundtrip safety for non-BMP (just a few chars)
696u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
697for encoding in ('utf-8',
698 'utf-16', 'utf-16-le', 'utf-16-be',
699 #'raw_unicode_escape',
700 'unicode_escape', 'unicode_internal'):
701 verify(unicode(u.encode(encoding),encoding) == u)
702
703# UTF-8 must be roundtrip safe for all UCS-2 code points
Martin v. Löwis1ce4ae32002-09-14 09:19:53 +0000704# This excludes surrogates: in the full range, there would be
705# a surrogate pair (\udbff\udc00), which gets converted back
706# to a non-BMP character (\U0010fc00)
707u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000708for encoding in ('utf-8',):
709 verify(unicode(u.encode(encoding),encoding) == u)
710
Guido van Rossum9e896b32000-04-05 20:11:21 +0000711print 'done.'
712
713print 'Testing standard mapping codecs...',
714
715print '0-127...',
716s = ''.join(map(chr, range(128)))
717for encoding in (
718 'cp037', 'cp1026',
719 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
720 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000721 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000722 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
723 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
724 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
725 'mac_cyrillic', 'mac_latin2',
726
727 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
728 'cp1256', 'cp1257', 'cp1258',
729 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
730
731 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000732 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000733
Guido van Rossum9e896b32000-04-05 20:11:21 +0000734 ### These have undefined mappings:
735 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000736
Tim Peters2f228e72001-05-13 00:19:31 +0000737 ### These fail the round-trip:
738 #'cp875'
739
Guido van Rossum9e896b32000-04-05 20:11:21 +0000740 ):
741 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000742 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000743 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000744 print '*** codec "%s" failed round-trip' % encoding
745 except ValueError,why:
746 print '*** codec for "%s" failed: %s' % (encoding, why)
747
748print '128-255...',
749s = ''.join(map(chr, range(128,256)))
750for encoding in (
751 'cp037', 'cp1026',
752 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
753 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000754 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000755 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000756 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000757 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000758 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000759
Guido van Rossum9e896b32000-04-05 20:11:21 +0000760 ### These have undefined mappings:
761 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
762 #'cp1256', 'cp1257', 'cp1258',
763 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000764 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000765 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000766
Guido van Rossum9e896b32000-04-05 20:11:21 +0000767 ### These fail the round-trip:
768 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000769
Guido van Rossum9e896b32000-04-05 20:11:21 +0000770 ):
771 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000772 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000773 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000774 print '*** codec "%s" failed round-trip' % encoding
775 except ValueError,why:
776 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000777
778print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000779
780print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000781verify((u"abc" u"def") == u"abcdef")
782verify(("abc" u"def") == u"abcdef")
783verify((u"abc" "def") == u"abcdef")
784verify((u"abc" u"def" "ghi") == u"abcdefghi")
785verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000786print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000787
788print 'Testing Unicode printing...',
789print u'abc'
790print u'abc', u'def'
791print u'abc', 'def'
792print 'abc', u'def'
793print u'abc\n'
794print u'abc\n',
795print u'abc\n',
796print u'def\n'
797print u'def\n'
798print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000799
800def test_exception(lhs, rhs, msg):
801 try:
802 lhs in rhs
803 except TypeError:
804 pass
805 else:
806 raise TestFailed, msg
807
808def run_contains_tests():
809 vereq(u'' in '', True)
810 vereq('' in u'', True)
811 vereq(u'' in u'', True)
812 vereq(u'' in 'abc', True)
813 vereq('' in u'abc', True)
814 vereq(u'' in u'abc', True)
815 vereq(u'\0' in 'abc', False)
816 vereq('\0' in u'abc', False)
817 vereq(u'\0' in u'abc', False)
818 vereq(u'\0' in '\0abc', True)
819 vereq('\0' in u'\0abc', True)
820 vereq(u'\0' in u'\0abc', True)
821 vereq(u'\0' in 'abc\0', True)
822 vereq('\0' in u'abc\0', True)
823 vereq(u'\0' in u'abc\0', True)
824 vereq(u'a' in '\0abc', True)
825 vereq('a' in u'\0abc', True)
826 vereq(u'a' in u'\0abc', True)
827 vereq(u'asdf' in 'asdf', True)
828 vereq('asdf' in u'asdf', True)
829 vereq(u'asdf' in u'asdf', True)
830 vereq(u'asdf' in 'asd', False)
831 vereq('asdf' in u'asd', False)
832 vereq(u'asdf' in u'asd', False)
833 vereq(u'asdf' in '', False)
834 vereq('asdf' in u'', False)
835 vereq(u'asdf' in u'', False)
836
837run_contains_tests()