blob: 9e36316d79f5ddcd15429e6fde32950e80021f85 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
Guido van Rossum76afbd92002-08-20 17:29:29 +0000110test('rfind', 'abcdefghiabc', 9, u'abc')
111test('rfind', 'abcdefghiabc', 12, u'')
112test('rfind', u'abcdefghiabc', 12, '')
113test('rfind', u'abcdefghiabc', 12, u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000114
115test('lower', u'HeLLo', u'hello')
116test('lower', u'hello', u'hello')
117
118test('upper', u'HeLLo', u'HELLO')
119test('upper', u'HELLO', u'HELLO')
120
121if 0:
122 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
123
124 test('maketrans', u'abc', transtable, u'xyz')
125 test('maketrans', u'abc', ValueError, u'xyzq')
126
127test('split', u'this is the split function',
128 [u'this', u'is', u'the', u'split', u'function'])
129test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
130test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
131test('split', u'a b c d', [u'a', u'b c d'], None, 1)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
134test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
135test('split', u'a b c d', [u'a b c d'], None, 0)
136test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
137test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000138test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
139test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
140test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
141test('split', u'endcase test', [u'endcase ', u''], u'test')
142test('split', u'endcase test', [u'endcase ', u''], 'test')
143test('split', 'endcase test', [u'endcase ', u''], u'test')
144
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145
146# join now works with any sequence type
147class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000148 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000149 def __len__(self): return len(self.seq)
150 def __getitem__(self, i): return self.seq[i]
151
152test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000154test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000155test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000156test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000157test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
158test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
159test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
160test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
161test('join', ' ', u'w x y z', Sequence(u'wxyz'))
162test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000163
164result = u''
165for i in range(10):
166 if i > 0:
167 result = result + u':'
168 result = result + u'x'*10
169test('join', u':', result, [u'x' * 10] * 10)
170test('join', u':', result, (u'x' * 10,) * 10)
171
172test('strip', u' hello ', u'hello')
173test('lstrip', u' hello ', u'hello ')
174test('rstrip', u' hello ', u' hello')
175test('strip', u'hello', u'hello')
176
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000177# strip/lstrip/rstrip with None arg
178test('strip', u' hello ', u'hello', None)
179test('lstrip', u' hello ', u'hello ', None)
180test('rstrip', u' hello ', u' hello', None)
181test('strip', u'hello', u'hello', None)
182
183# strip/lstrip/rstrip with unicode arg
184test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
185test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
186test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
187test('strip', u'hello', u'hello', u'xyz')
188
189# strip/lstrip/rstrip with str arg
190test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
191test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
192test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
193test('strip', u'hello', u'hello', 'xyz')
194
Guido van Rossuma831cac2000-03-10 23:23:21 +0000195test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
196
197if 0:
198 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
199
200 table = string.maketrans('a', u'A')
201 test('translate', u'abc', u'Abc', table)
202 test('translate', u'xyz', u'xyz', table)
203
204test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000205test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000206test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
207test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
208test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
209test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
210test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
211test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
212test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000213test('replace', u'abc', u'-a-b-c-', u'', u'-')
214test('replace', u'abc', u'-a-b-c', u'', u'-', 3)
215test('replace', u'abc', u'abc', u'', u'-', 0)
Guido van Rossum2023c9b2002-08-23 18:50:21 +0000216test('replace', u'abc', u'abc', u'ab', u'--', 0)
217test('replace', u'abc', u'abc', u'xy', u'--')
Guido van Rossum8b1a6d62002-08-23 18:21:28 +0000218test('replace', u'', u'', u'', u'')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000219
Guido van Rossum77f6a652002-04-03 22:41:51 +0000220test('startswith', u'hello', True, u'he')
221test('startswith', u'hello', True, u'hello')
222test('startswith', u'hello', False, u'hello world')
223test('startswith', u'hello', True, u'')
224test('startswith', u'hello', False, u'ello')
225test('startswith', u'hello', True, u'ello', 1)
226test('startswith', u'hello', True, u'o', 4)
227test('startswith', u'hello', False, u'o', 5)
228test('startswith', u'hello', True, u'', 5)
229test('startswith', u'hello', False, u'lo', 6)
230test('startswith', u'helloworld', True, u'lowo', 3)
231test('startswith', u'helloworld', True, u'lowo', 3, 7)
232test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000233
Guido van Rossum77f6a652002-04-03 22:41:51 +0000234test('endswith', u'hello', True, u'lo')
235test('endswith', u'hello', False, u'he')
236test('endswith', u'hello', True, u'')
237test('endswith', u'hello', False, u'hello world')
238test('endswith', u'helloworld', False, u'worl')
239test('endswith', u'helloworld', True, u'worl', 3, 9)
240test('endswith', u'helloworld', True, u'world', 3, 12)
241test('endswith', u'helloworld', True, u'lowo', 1, 7)
242test('endswith', u'helloworld', True, u'lowo', 2, 7)
243test('endswith', u'helloworld', True, u'lowo', 3, 7)
244test('endswith', u'helloworld', False, u'lowo', 4, 7)
245test('endswith', u'helloworld', False, u'lowo', 3, 8)
246test('endswith', u'ab', False, u'ab', 0, 1)
247test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossum76afbd92002-08-20 17:29:29 +0000248test('endswith', 'helloworld', True, u'd')
249test('endswith', 'helloworld', False, u'l')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000250
251test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
252test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
253test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
254test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000255test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000256
257if 0:
258 test('capwords', u'abc def ghi', u'Abc Def Ghi')
259 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
260 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
261
Walter Dörwald068325e2002-04-15 13:36:47 +0000262test('zfill', u'123', u'123', 2)
263test('zfill', u'123', u'123', 3)
264test('zfill', u'123', u'0123', 4)
265test('zfill', u'+123', u'+123', 3)
266test('zfill', u'+123', u'+123', 4)
267test('zfill', u'+123', u'+0123', 5)
268test('zfill', u'-123', u'-123', 3)
269test('zfill', u'-123', u'-123', 4)
270test('zfill', u'-123', u'-0123', 5)
271test('zfill', u'', u'000', 3)
272test('zfill', u'34', u'34', 1)
273test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000274
Guido van Rossuma831cac2000-03-10 23:23:21 +0000275# Comparisons:
276print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000277verify(u'abc' == 'abc')
278verify('abc' == u'abc')
279verify(u'abc' == u'abc')
280verify(u'abcd' > 'abc')
281verify('abcd' > u'abc')
282verify(u'abcd' > u'abc')
283verify(u'abc' < 'abcd')
284verify('abc' < u'abcd')
285verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000286print 'done.'
287
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288if 0:
289 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000290
Marc-André Lemburge5034372000-08-08 08:04:29 +0000291 print 'Testing UTF-16 code point order comparisons...',
292 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000293 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000294 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000295 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000296
Marc-André Lemburge5034372000-08-08 08:04:29 +0000297 # Non surrogate above surrogate value, fixup required
298 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000299 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000300
Marc-André Lemburge5034372000-08-08 08:04:29 +0000301 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000302 s2 = u'\ud800\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\ud900\udc01'
305 test_lecmp(s, s2)
306 s2 = u'\uda00\udc01'
307 test_lecmp(s, s2)
308 s2 = u'\udb00\udc01'
309 test_lecmp(s, s2)
310 s2 = u'\ud800\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\ud900\udd01'
313 test_lecmp(s, s2)
314 s2 = u'\uda00\udd01'
315 test_lecmp(s, s2)
316 s2 = u'\udb00\udd01'
317 test_lecmp(s, s2)
318 s2 = u'\ud800\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\ud900\ude01'
321 test_lecmp(s, s2)
322 s2 = u'\uda00\ude01'
323 test_lecmp(s, s2)
324 s2 = u'\udb00\ude01'
325 test_lecmp(s, s2)
326 s2 = u'\ud800\udfff'
327 test_lecmp(s, s2)
328 s2 = u'\ud900\udfff'
329 test_lecmp(s, s2)
330 s2 = u'\uda00\udfff'
331 test_lecmp(s, s2)
332 s2 = u'\udb00\udfff'
333 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000334
335 test_fixup(u'\ue000')
336 test_fixup(u'\uff61')
337
338 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000339 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000340 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000341
Guido van Rossuma831cac2000-03-10 23:23:21 +0000342test('ljust', u'abc', u'abc ', 10)
343test('rjust', u'abc', u' abc', 10)
344test('center', u'abc', u' abc ', 10)
345test('ljust', u'abc', u'abc ', 6)
346test('rjust', u'abc', u' abc', 6)
347test('center', u'abc', u' abc ', 6)
348test('ljust', u'abc', u'abc', 2)
349test('rjust', u'abc', u'abc', 2)
350test('center', u'abc', u'abc', 2)
351
Guido van Rossum77f6a652002-04-03 22:41:51 +0000352test('islower', u'a', True)
353test('islower', u'A', False)
354test('islower', u'\n', False)
355test('islower', u'\u1FFc', False)
356test('islower', u'abc', True)
357test('islower', u'aBc', False)
358test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000359
Guido van Rossum77f6a652002-04-03 22:41:51 +0000360test('isupper', u'a', False)
361test('isupper', u'A', True)
362test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000363if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000364 test('isupper', u'\u1FFc', False)
365test('isupper', u'ABC', True)
366test('isupper', u'AbC', False)
367test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000368
Guido van Rossum77f6a652002-04-03 22:41:51 +0000369test('istitle', u'a', False)
370test('istitle', u'A', True)
371test('istitle', u'\n', False)
372test('istitle', u'\u1FFc', True)
373test('istitle', u'A Titlecased Line', True)
374test('istitle', u'A\nTitlecased Line', True)
375test('istitle', u'A Titlecased, Line', True)
376test('istitle', u'Greek \u1FFcitlecases ...', True)
377test('istitle', u'Not a capitalized String', False)
378test('istitle', u'Not\ta Titlecase String', False)
379test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000380
Guido van Rossum77f6a652002-04-03 22:41:51 +0000381test('isalpha', u'a', True)
382test('isalpha', u'A', True)
383test('isalpha', u'\n', False)
384test('isalpha', u'\u1FFc', True)
385test('isalpha', u'abc', True)
386test('isalpha', u'aBc123', False)
387test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000388
Guido van Rossum77f6a652002-04-03 22:41:51 +0000389test('isalnum', u'a', True)
390test('isalnum', u'A', True)
391test('isalnum', u'\n', False)
392test('isalnum', u'123abc456', True)
393test('isalnum', u'a1b3c', True)
394test('isalnum', u'aBc000 ', False)
395test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000396
Guido van Rossuma831cac2000-03-10 23:23:21 +0000397test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
398test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
399test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
400test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
401test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
402test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000403test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000404
405test('translate', u"abababc", u'bbbc', {ord('a'):None})
406test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
407test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
408
Guido van Rossumd4d26842000-03-13 23:21:48 +0000409# Contains:
410print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000411vereq(('a' in u'abdb'), True)
412vereq(('a' in u'bdab'), True)
413vereq(('a' in u'bdaba'), True)
414vereq(('a' in u'bdba'), True)
415vereq(('a' in u'bdba'), True)
416vereq((u'a' in u'bdba'), True)
417vereq((u'a' in u'bdb'), False)
418vereq((u'a' in 'bdb'), False)
419vereq((u'a' in 'bdba'), True)
420vereq((u'a' in ('a',1,None)), True)
421vereq((u'a' in (1,None,'a')), True)
422vereq((u'a' in (1,None,u'a')), True)
423vereq(('a' in ('a',1,None)), True)
424vereq(('a' in (1,None,'a')), True)
425vereq(('a' in (1,None,u'a')), True)
426vereq(('a' in ('x',1,u'y')), False)
427vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000428vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000429vereq((u'ab' in u'abcd'), True)
430vereq(('ab' in u'abc'), True)
431vereq((u'ab' in 'abc'), True)
432vereq((u'ab' in (1,None,u'ab')), True)
433vereq((u'' in u'abc'), True)
434vereq(('' in u'abc'), True)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000435print 'done.'
436
Guido van Rossuma831cac2000-03-10 23:23:21 +0000437# Formatting:
438print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000439verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
440verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
441verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
442verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
443verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
444verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
445verify(u"%c" % (u"a",) == u'a')
446verify(u"%c" % ("a",) == u'a')
447verify(u"%c" % (34,) == u'"')
448verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000449if sys.platform[:4] != 'java':
450 value = u"%r, %r" % (u"abc", "abc")
451 if value != u"u'abc', 'abc'":
452 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000453
Marc-André Lemburg36619082001-01-17 19:11:13 +0000454verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000455try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000456 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000457except KeyError:
458 print '*** formatting failed for "%s"' % "u'abc, def'"
459else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000460 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000461
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000462for ordinal in (-100, 0x20000):
463 try:
464 u"%c" % ordinal
465 except ValueError:
466 pass
467 else:
468 print '*** formatting u"%%c" % %i should give a ValueError' % ordinal
469
Guido van Rossum97064862000-04-10 13:52:48 +0000470# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000471verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
472verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
473verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
474verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
475verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
476verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
477verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
478verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
479verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000480verify('%*s' % (5,u'abc',) == u' abc')
481verify('%*s' % (-5,u'abc',) == u'abc ')
482verify('%*.*s' % (5,2,u'abc',) == u' ab')
483verify('%*.*s' % (5,3,u'abc',) == u' abc')
484verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
485verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000486print 'done.'
487
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000488print 'Testing builtin unicode()...',
489
490# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
491
492verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
493
494class UnicodeSubclass(unicode):
495 pass
496
497verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
498 == u'unicode subclass becomes unicode')
499
500verify(unicode('strings are converted to unicode')
501 == u'strings are converted to unicode')
502
503class UnicodeCompat:
504 def __init__(self, x):
505 self.x = x
506 def __unicode__(self):
507 return self.x
508
509verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
510 == u'__unicode__ compatible objects are recognized')
511
512class StringCompat:
513 def __init__(self, x):
514 self.x = x
515 def __str__(self):
516 return self.x
517
518verify(unicode(StringCompat('__str__ compatible objects are recognized'))
519 == u'__str__ compatible objects are recognized')
520
521# unicode(obj) is compatible to str():
522
523o = StringCompat('unicode(obj) is compatible to str()')
524verify(unicode(o) == u'unicode(obj) is compatible to str()')
525verify(str(o) == 'unicode(obj) is compatible to str()')
526
527for obj in (123, 123.45, 123L):
528 verify(unicode(obj) == unicode(str(obj)))
529
530# unicode(obj, encoding, error) tests (this maps to
531# PyUnicode_FromEncodedObject() at C level)
532
Finn Bock2b29cb22001-12-10 20:57:34 +0000533if not sys.platform.startswith('java'):
534 try:
535 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
536 except TypeError:
537 pass
538 else:
539 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000540
541verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
542 == u'strings are decoded to unicode')
543
Finn Bock2b29cb22001-12-10 20:57:34 +0000544if not sys.platform.startswith('java'):
545 verify(unicode(buffer('character buffers are decoded to unicode'),
546 'utf-8', 'strict')
547 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000548
549print 'done.'
550
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000551# Test builtin codecs
552print 'Testing builtin codecs...',
553
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000554# UTF-7 specific encoding tests:
555utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
556 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
557 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
558 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
559 (u'+', '+-'),
560 (u'+-', '+--'),
561 (u'+?', '+-?'),
562 (u'\?', '+AFw?'),
563 (u'+?', '+-?'),
564 (ur'\\?', '+AFwAXA?'),
565 (ur'\\\?', '+AFwAXABc?'),
566 (ur'++--', '+-+---')]
567
568for x,y in utfTests:
569 verify( x.encode('utf-7') == y )
570
Tim Peters527e64f2001-10-04 05:36:56 +0000571try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000572 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
573except UnicodeError:
574 pass
575else:
576 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
577
578verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
579
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000580# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000581verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000582verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
583verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
584verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
585verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
586verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
587verify((u'\ud800\udc02'*1000).encode('utf-8') ==
588 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000589verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
590 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
591 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
592 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
593 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000594 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000595 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
596 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
597 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
598 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
599 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
600 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
601 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
602 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
603 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
604 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000605
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000606# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000607verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
608verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
609verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000610
611# Other possible utf-8 test cases:
612# * strict decoding testing for all of the
613# UTF8_ERROR cases in PyUnicode_DecodeUTF8
614
Marc-André Lemburg36619082001-01-17 19:11:13 +0000615verify(unicode('hello','ascii') == u'hello')
616verify(unicode('hello','utf-8') == u'hello')
617verify(unicode('hello','utf8') == u'hello')
618verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000619
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000620# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000621try:
622 u'Andr\202 x'.encode('ascii')
623 u'Andr\202 x'.encode('ascii','strict')
624except ValueError:
625 pass
626else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000627 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000628verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
629verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000630
631try:
632 unicode('Andr\202 x','ascii')
633 unicode('Andr\202 x','ascii','strict')
634except ValueError:
635 pass
636else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000637 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000638verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
639verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000640
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000641verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
642try:
643 "\\".decode("unicode-escape")
644except ValueError:
645 pass
646else:
647 raise TestFailed, '"\\".decode("unicode-escape") should fail'
648
Marc-André Lemburg36619082001-01-17 19:11:13 +0000649verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000650verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000651verify(u'hello'.encode('utf-8') == 'hello')
652verify(u'hello'.encode('utf8') == 'hello')
653verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
654verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
655verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000656
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000657# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000658u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000659for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000660 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000661 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000662
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000663# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000664u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000665for encoding in (
666 'latin-1',
667 ):
668 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000669 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000670 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000671 print '*** codec "%s" failed round-trip' % encoding
672 except ValueError,why:
673 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000674
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000675# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000676u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000677for encoding in (
678 'ascii',
679 ):
680 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000681 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000682 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000683 print '*** codec "%s" failed round-trip' % encoding
684 except ValueError,why:
685 print '*** codec for "%s" failed: %s' % (encoding, why)
686
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000687# Roundtrip safety for non-BMP (just a few chars)
688u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
689for encoding in ('utf-8',
690 'utf-16', 'utf-16-le', 'utf-16-be',
691 #'raw_unicode_escape',
692 'unicode_escape', 'unicode_internal'):
693 verify(unicode(u.encode(encoding),encoding) == u)
694
695# UTF-8 must be roundtrip safe for all UCS-2 code points
696u = u''.join(map(unichr, range(0x10000)))
697for encoding in ('utf-8',):
698 verify(unicode(u.encode(encoding),encoding) == u)
699
Guido van Rossum9e896b32000-04-05 20:11:21 +0000700print 'done.'
701
702print 'Testing standard mapping codecs...',
703
704print '0-127...',
705s = ''.join(map(chr, range(128)))
706for encoding in (
707 'cp037', 'cp1026',
708 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
709 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000710 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000711 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
712 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
713 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
714 'mac_cyrillic', 'mac_latin2',
715
716 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
717 'cp1256', 'cp1257', 'cp1258',
718 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
719
720 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000721 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000722
Guido van Rossum9e896b32000-04-05 20:11:21 +0000723 ### These have undefined mappings:
724 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000725
Tim Peters2f228e72001-05-13 00:19:31 +0000726 ### These fail the round-trip:
727 #'cp875'
728
Guido van Rossum9e896b32000-04-05 20:11:21 +0000729 ):
730 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000731 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000732 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000733 print '*** codec "%s" failed round-trip' % encoding
734 except ValueError,why:
735 print '*** codec for "%s" failed: %s' % (encoding, why)
736
737print '128-255...',
738s = ''.join(map(chr, range(128,256)))
739for encoding in (
740 'cp037', 'cp1026',
741 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
742 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000743 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000744 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000745 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000746 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000747 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000748
Guido van Rossum9e896b32000-04-05 20:11:21 +0000749 ### These have undefined mappings:
750 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
751 #'cp1256', 'cp1257', 'cp1258',
752 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000753 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000754 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000755
Guido van Rossum9e896b32000-04-05 20:11:21 +0000756 ### These fail the round-trip:
757 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000758
Guido van Rossum9e896b32000-04-05 20:11:21 +0000759 ):
760 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000761 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000762 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000763 print '*** codec "%s" failed round-trip' % encoding
764 except ValueError,why:
765 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000766
767print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000768
769print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000770verify((u"abc" u"def") == u"abcdef")
771verify(("abc" u"def") == u"abcdef")
772verify((u"abc" "def") == u"abcdef")
773verify((u"abc" u"def" "ghi") == u"abcdefghi")
774verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000775print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000776
777print 'Testing Unicode printing...',
778print u'abc'
779print u'abc', u'def'
780print u'abc', 'def'
781print 'abc', u'def'
782print u'abc\n'
783print u'abc\n',
784print u'abc\n',
785print u'def\n'
786print u'def\n'
787print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000788
789def test_exception(lhs, rhs, msg):
790 try:
791 lhs in rhs
792 except TypeError:
793 pass
794 else:
795 raise TestFailed, msg
796
797def run_contains_tests():
798 vereq(u'' in '', True)
799 vereq('' in u'', True)
800 vereq(u'' in u'', True)
801 vereq(u'' in 'abc', True)
802 vereq('' in u'abc', True)
803 vereq(u'' in u'abc', True)
804 vereq(u'\0' in 'abc', False)
805 vereq('\0' in u'abc', False)
806 vereq(u'\0' in u'abc', False)
807 vereq(u'\0' in '\0abc', True)
808 vereq('\0' in u'\0abc', True)
809 vereq(u'\0' in u'\0abc', True)
810 vereq(u'\0' in 'abc\0', True)
811 vereq('\0' in u'abc\0', True)
812 vereq(u'\0' in u'abc\0', True)
813 vereq(u'a' in '\0abc', True)
814 vereq('a' in u'\0abc', True)
815 vereq(u'a' in u'\0abc', True)
816 vereq(u'asdf' in 'asdf', True)
817 vereq('asdf' in u'asdf', True)
818 vereq(u'asdf' in u'asdf', True)
819 vereq(u'asdf' in 'asd', False)
820 vereq('asdf' in u'asd', False)
821 vereq(u'asdf' in u'asd', False)
822 vereq(u'asdf' in '', False)
823 vereq('asdf' in u'', False)
824 vereq(u'asdf' in u'', False)
825
826run_contains_tests()