blob: f5f4245ca21a336821a79fe588a11fc728db52ed [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
110
111test('lower', u'HeLLo', u'hello')
112test('lower', u'hello', u'hello')
113
114test('upper', u'HeLLo', u'HELLO')
115test('upper', u'HELLO', u'HELLO')
116
117if 0:
118 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
119
120 test('maketrans', u'abc', transtable, u'xyz')
121 test('maketrans', u'abc', ValueError, u'xyzq')
122
123test('split', u'this is the split function',
124 [u'this', u'is', u'the', u'split', u'function'])
125test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
126test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
127test('split', u'a b c d', [u'a', u'b c d'], None, 1)
128test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
129test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
130test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
131test('split', u'a b c d', [u'a b c d'], None, 0)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000134test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
135test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
136test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
137test('split', u'endcase test', [u'endcase ', u''], u'test')
138test('split', u'endcase test', [u'endcase ', u''], 'test')
139test('split', 'endcase test', [u'endcase ', u''], u'test')
140
Guido van Rossuma831cac2000-03-10 23:23:21 +0000141
142# join now works with any sequence type
143class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000144 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145 def __len__(self): return len(self.seq)
146 def __getitem__(self, i): return self.seq[i]
147
148test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000149test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000150test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000151test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000152test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
154test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
155test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
156test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
157test('join', ' ', u'w x y z', Sequence(u'wxyz'))
158test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159
160result = u''
161for i in range(10):
162 if i > 0:
163 result = result + u':'
164 result = result + u'x'*10
165test('join', u':', result, [u'x' * 10] * 10)
166test('join', u':', result, (u'x' * 10,) * 10)
167
168test('strip', u' hello ', u'hello')
169test('lstrip', u' hello ', u'hello ')
170test('rstrip', u' hello ', u' hello')
171test('strip', u'hello', u'hello')
172
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000173# strip/lstrip/rstrip with None arg
174test('strip', u' hello ', u'hello', None)
175test('lstrip', u' hello ', u'hello ', None)
176test('rstrip', u' hello ', u' hello', None)
177test('strip', u'hello', u'hello', None)
178
179# strip/lstrip/rstrip with unicode arg
180test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
181test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
182test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
183test('strip', u'hello', u'hello', u'xyz')
184
185# strip/lstrip/rstrip with str arg
186test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
187test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
188test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
189test('strip', u'hello', u'hello', 'xyz')
190
Guido van Rossuma831cac2000-03-10 23:23:21 +0000191test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
192
193if 0:
194 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
195
196 table = string.maketrans('a', u'A')
197 test('translate', u'abc', u'Abc', table)
198 test('translate', u'xyz', u'xyz', table)
199
200test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000201test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
203test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
204test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
205test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
206test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
207test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
208test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossumf36921c2002-08-09 15:36:48 +0000209try:
210 u"abc".replace(u"", u"x")
211except ValueError:
212 pass
213else:
214 raise TestFailed, "u.replace('', ...) should raise ValueError"
Guido van Rossuma831cac2000-03-10 23:23:21 +0000215
Guido van Rossum77f6a652002-04-03 22:41:51 +0000216test('startswith', u'hello', True, u'he')
217test('startswith', u'hello', True, u'hello')
218test('startswith', u'hello', False, u'hello world')
219test('startswith', u'hello', True, u'')
220test('startswith', u'hello', False, u'ello')
221test('startswith', u'hello', True, u'ello', 1)
222test('startswith', u'hello', True, u'o', 4)
223test('startswith', u'hello', False, u'o', 5)
224test('startswith', u'hello', True, u'', 5)
225test('startswith', u'hello', False, u'lo', 6)
226test('startswith', u'helloworld', True, u'lowo', 3)
227test('startswith', u'helloworld', True, u'lowo', 3, 7)
228test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000229
Guido van Rossum77f6a652002-04-03 22:41:51 +0000230test('endswith', u'hello', True, u'lo')
231test('endswith', u'hello', False, u'he')
232test('endswith', u'hello', True, u'')
233test('endswith', u'hello', False, u'hello world')
234test('endswith', u'helloworld', False, u'worl')
235test('endswith', u'helloworld', True, u'worl', 3, 9)
236test('endswith', u'helloworld', True, u'world', 3, 12)
237test('endswith', u'helloworld', True, u'lowo', 1, 7)
238test('endswith', u'helloworld', True, u'lowo', 2, 7)
239test('endswith', u'helloworld', True, u'lowo', 3, 7)
240test('endswith', u'helloworld', False, u'lowo', 4, 7)
241test('endswith', u'helloworld', False, u'lowo', 3, 8)
242test('endswith', u'ab', False, u'ab', 0, 1)
243test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000244
245test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
246test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
247test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
248test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000249test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000250
251if 0:
252 test('capwords', u'abc def ghi', u'Abc Def Ghi')
253 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
254 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
255
Walter Dörwald068325e2002-04-15 13:36:47 +0000256test('zfill', u'123', u'123', 2)
257test('zfill', u'123', u'123', 3)
258test('zfill', u'123', u'0123', 4)
259test('zfill', u'+123', u'+123', 3)
260test('zfill', u'+123', u'+123', 4)
261test('zfill', u'+123', u'+0123', 5)
262test('zfill', u'-123', u'-123', 3)
263test('zfill', u'-123', u'-123', 4)
264test('zfill', u'-123', u'-0123', 5)
265test('zfill', u'', u'000', 3)
266test('zfill', u'34', u'34', 1)
267test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000268
Guido van Rossuma831cac2000-03-10 23:23:21 +0000269# Comparisons:
270print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000271verify(u'abc' == 'abc')
272verify('abc' == u'abc')
273verify(u'abc' == u'abc')
274verify(u'abcd' > 'abc')
275verify('abcd' > u'abc')
276verify(u'abcd' > u'abc')
277verify(u'abc' < 'abcd')
278verify('abc' < u'abcd')
279verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000280print 'done.'
281
Marc-André Lemburge5034372000-08-08 08:04:29 +0000282if 0:
283 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000284
Marc-André Lemburge5034372000-08-08 08:04:29 +0000285 print 'Testing UTF-16 code point order comparisons...',
286 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000287 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000289 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000290
Marc-André Lemburge5034372000-08-08 08:04:29 +0000291 # Non surrogate above surrogate value, fixup required
292 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000293 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000294
Marc-André Lemburge5034372000-08-08 08:04:29 +0000295 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000296 s2 = u'\ud800\udc01'
297 test_lecmp(s, s2)
298 s2 = u'\ud900\udc01'
299 test_lecmp(s, s2)
300 s2 = u'\uda00\udc01'
301 test_lecmp(s, s2)
302 s2 = u'\udb00\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\ud800\udd01'
305 test_lecmp(s, s2)
306 s2 = u'\ud900\udd01'
307 test_lecmp(s, s2)
308 s2 = u'\uda00\udd01'
309 test_lecmp(s, s2)
310 s2 = u'\udb00\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\ud800\ude01'
313 test_lecmp(s, s2)
314 s2 = u'\ud900\ude01'
315 test_lecmp(s, s2)
316 s2 = u'\uda00\ude01'
317 test_lecmp(s, s2)
318 s2 = u'\udb00\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\ud800\udfff'
321 test_lecmp(s, s2)
322 s2 = u'\ud900\udfff'
323 test_lecmp(s, s2)
324 s2 = u'\uda00\udfff'
325 test_lecmp(s, s2)
326 s2 = u'\udb00\udfff'
327 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000328
329 test_fixup(u'\ue000')
330 test_fixup(u'\uff61')
331
332 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000333 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000334 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000335
Guido van Rossuma831cac2000-03-10 23:23:21 +0000336test('ljust', u'abc', u'abc ', 10)
337test('rjust', u'abc', u' abc', 10)
338test('center', u'abc', u' abc ', 10)
339test('ljust', u'abc', u'abc ', 6)
340test('rjust', u'abc', u' abc', 6)
341test('center', u'abc', u' abc ', 6)
342test('ljust', u'abc', u'abc', 2)
343test('rjust', u'abc', u'abc', 2)
344test('center', u'abc', u'abc', 2)
345
Guido van Rossum77f6a652002-04-03 22:41:51 +0000346test('islower', u'a', True)
347test('islower', u'A', False)
348test('islower', u'\n', False)
349test('islower', u'\u1FFc', False)
350test('islower', u'abc', True)
351test('islower', u'aBc', False)
352test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000353
Guido van Rossum77f6a652002-04-03 22:41:51 +0000354test('isupper', u'a', False)
355test('isupper', u'A', True)
356test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000357if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000358 test('isupper', u'\u1FFc', False)
359test('isupper', u'ABC', True)
360test('isupper', u'AbC', False)
361test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000362
Guido van Rossum77f6a652002-04-03 22:41:51 +0000363test('istitle', u'a', False)
364test('istitle', u'A', True)
365test('istitle', u'\n', False)
366test('istitle', u'\u1FFc', True)
367test('istitle', u'A Titlecased Line', True)
368test('istitle', u'A\nTitlecased Line', True)
369test('istitle', u'A Titlecased, Line', True)
370test('istitle', u'Greek \u1FFcitlecases ...', True)
371test('istitle', u'Not a capitalized String', False)
372test('istitle', u'Not\ta Titlecase String', False)
373test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000374
Guido van Rossum77f6a652002-04-03 22:41:51 +0000375test('isalpha', u'a', True)
376test('isalpha', u'A', True)
377test('isalpha', u'\n', False)
378test('isalpha', u'\u1FFc', True)
379test('isalpha', u'abc', True)
380test('isalpha', u'aBc123', False)
381test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000382
Guido van Rossum77f6a652002-04-03 22:41:51 +0000383test('isalnum', u'a', True)
384test('isalnum', u'A', True)
385test('isalnum', u'\n', False)
386test('isalnum', u'123abc456', True)
387test('isalnum', u'a1b3c', True)
388test('isalnum', u'aBc000 ', False)
389test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000390
Guido van Rossuma831cac2000-03-10 23:23:21 +0000391test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
392test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
393test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
394test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
395test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
396test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000397test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000398
399test('translate', u"abababc", u'bbbc', {ord('a'):None})
400test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
401test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
402
Guido van Rossumd4d26842000-03-13 23:21:48 +0000403# Contains:
404print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000405vereq(('a' in u'abdb'), True)
406vereq(('a' in u'bdab'), True)
407vereq(('a' in u'bdaba'), True)
408vereq(('a' in u'bdba'), True)
409vereq(('a' in u'bdba'), True)
410vereq((u'a' in u'bdba'), True)
411vereq((u'a' in u'bdb'), False)
412vereq((u'a' in 'bdb'), False)
413vereq((u'a' in 'bdba'), True)
414vereq((u'a' in ('a',1,None)), True)
415vereq((u'a' in (1,None,'a')), True)
416vereq((u'a' in (1,None,u'a')), True)
417vereq(('a' in ('a',1,None)), True)
418vereq(('a' in (1,None,'a')), True)
419vereq(('a' in (1,None,u'a')), True)
420vereq(('a' in ('x',1,u'y')), False)
421vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000422vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000423vereq((u'ab' in u'abcd'), True)
424vereq(('ab' in u'abc'), True)
425vereq((u'ab' in 'abc'), True)
426vereq((u'ab' in (1,None,u'ab')), True)
427vereq((u'' in u'abc'), True)
428vereq(('' in u'abc'), True)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000429print 'done.'
430
Guido van Rossuma831cac2000-03-10 23:23:21 +0000431# Formatting:
432print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000433verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
434verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
435verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
436verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
437verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
438verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
439verify(u"%c" % (u"a",) == u'a')
440verify(u"%c" % ("a",) == u'a')
441verify(u"%c" % (34,) == u'"')
442verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000443if sys.platform[:4] != 'java':
444 value = u"%r, %r" % (u"abc", "abc")
445 if value != u"u'abc', 'abc'":
446 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000447
Marc-André Lemburg36619082001-01-17 19:11:13 +0000448verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000449try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000450 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000451except KeyError:
452 print '*** formatting failed for "%s"' % "u'abc, def'"
453else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000454 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000455
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000456for ordinal in (-100, 0x20000):
457 try:
458 u"%c" % ordinal
459 except ValueError:
460 pass
461 else:
462 print '*** formatting u"%%c" % %i should give a ValueError' % ordinal
463
Guido van Rossum97064862000-04-10 13:52:48 +0000464# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000465verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
466verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
467verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
468verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
469verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
470verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
471verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
472verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
473verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000474verify('%*s' % (5,u'abc',) == u' abc')
475verify('%*s' % (-5,u'abc',) == u'abc ')
476verify('%*.*s' % (5,2,u'abc',) == u' ab')
477verify('%*.*s' % (5,3,u'abc',) == u' abc')
478verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
479verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000480print 'done.'
481
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000482print 'Testing builtin unicode()...',
483
484# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
485
486verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
487
488class UnicodeSubclass(unicode):
489 pass
490
491verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
492 == u'unicode subclass becomes unicode')
493
494verify(unicode('strings are converted to unicode')
495 == u'strings are converted to unicode')
496
497class UnicodeCompat:
498 def __init__(self, x):
499 self.x = x
500 def __unicode__(self):
501 return self.x
502
503verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
504 == u'__unicode__ compatible objects are recognized')
505
506class StringCompat:
507 def __init__(self, x):
508 self.x = x
509 def __str__(self):
510 return self.x
511
512verify(unicode(StringCompat('__str__ compatible objects are recognized'))
513 == u'__str__ compatible objects are recognized')
514
515# unicode(obj) is compatible to str():
516
517o = StringCompat('unicode(obj) is compatible to str()')
518verify(unicode(o) == u'unicode(obj) is compatible to str()')
519verify(str(o) == 'unicode(obj) is compatible to str()')
520
521for obj in (123, 123.45, 123L):
522 verify(unicode(obj) == unicode(str(obj)))
523
524# unicode(obj, encoding, error) tests (this maps to
525# PyUnicode_FromEncodedObject() at C level)
526
Finn Bock2b29cb22001-12-10 20:57:34 +0000527if not sys.platform.startswith('java'):
528 try:
529 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
530 except TypeError:
531 pass
532 else:
533 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000534
535verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
536 == u'strings are decoded to unicode')
537
Finn Bock2b29cb22001-12-10 20:57:34 +0000538if not sys.platform.startswith('java'):
539 verify(unicode(buffer('character buffers are decoded to unicode'),
540 'utf-8', 'strict')
541 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000542
543print 'done.'
544
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000545# Test builtin codecs
546print 'Testing builtin codecs...',
547
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000548# UTF-7 specific encoding tests:
549utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
550 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
551 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
552 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
553 (u'+', '+-'),
554 (u'+-', '+--'),
555 (u'+?', '+-?'),
556 (u'\?', '+AFw?'),
557 (u'+?', '+-?'),
558 (ur'\\?', '+AFwAXA?'),
559 (ur'\\\?', '+AFwAXABc?'),
560 (ur'++--', '+-+---')]
561
562for x,y in utfTests:
563 verify( x.encode('utf-7') == y )
564
Tim Peters527e64f2001-10-04 05:36:56 +0000565try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000566 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
567except UnicodeError:
568 pass
569else:
570 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
571
572verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
573
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000574# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000575verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000576verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
577verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
578verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
579verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
580verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
581verify((u'\ud800\udc02'*1000).encode('utf-8') ==
582 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000583verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
584 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
585 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
586 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
587 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000588 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000589 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
590 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
591 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
592 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
593 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
594 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
595 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
596 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
597 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
598 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000599
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000600# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000601verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
602verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
603verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000604
605# Other possible utf-8 test cases:
606# * strict decoding testing for all of the
607# UTF8_ERROR cases in PyUnicode_DecodeUTF8
608
Marc-André Lemburg36619082001-01-17 19:11:13 +0000609verify(unicode('hello','ascii') == u'hello')
610verify(unicode('hello','utf-8') == u'hello')
611verify(unicode('hello','utf8') == u'hello')
612verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000613
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000614# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000615try:
616 u'Andr\202 x'.encode('ascii')
617 u'Andr\202 x'.encode('ascii','strict')
618except ValueError:
619 pass
620else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000621 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000622verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
623verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000624
625try:
626 unicode('Andr\202 x','ascii')
627 unicode('Andr\202 x','ascii','strict')
628except ValueError:
629 pass
630else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000631 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000632verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
633verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000634
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000635verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
636try:
637 "\\".decode("unicode-escape")
638except ValueError:
639 pass
640else:
641 raise TestFailed, '"\\".decode("unicode-escape") should fail'
642
Marc-André Lemburg36619082001-01-17 19:11:13 +0000643verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000644verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000645verify(u'hello'.encode('utf-8') == 'hello')
646verify(u'hello'.encode('utf8') == 'hello')
647verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
648verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
649verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000650
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000651# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000652u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000653for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000654 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000655 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000656
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000657# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000658u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000659for encoding in (
660 'latin-1',
661 ):
662 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000663 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000664 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000665 print '*** codec "%s" failed round-trip' % encoding
666 except ValueError,why:
667 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000668
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000669# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000670u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000671for encoding in (
672 'ascii',
673 ):
674 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000675 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000676 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000677 print '*** codec "%s" failed round-trip' % encoding
678 except ValueError,why:
679 print '*** codec for "%s" failed: %s' % (encoding, why)
680
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000681# Roundtrip safety for non-BMP (just a few chars)
682u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
683for encoding in ('utf-8',
684 'utf-16', 'utf-16-le', 'utf-16-be',
685 #'raw_unicode_escape',
686 'unicode_escape', 'unicode_internal'):
687 verify(unicode(u.encode(encoding),encoding) == u)
688
689# UTF-8 must be roundtrip safe for all UCS-2 code points
690u = u''.join(map(unichr, range(0x10000)))
691for encoding in ('utf-8',):
692 verify(unicode(u.encode(encoding),encoding) == u)
693
Guido van Rossum9e896b32000-04-05 20:11:21 +0000694print 'done.'
695
696print 'Testing standard mapping codecs...',
697
698print '0-127...',
699s = ''.join(map(chr, range(128)))
700for encoding in (
701 'cp037', 'cp1026',
702 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
703 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000704 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000705 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
706 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
707 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
708 'mac_cyrillic', 'mac_latin2',
709
710 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
711 'cp1256', 'cp1257', 'cp1258',
712 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
713
714 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000715 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000716
Guido van Rossum9e896b32000-04-05 20:11:21 +0000717 ### These have undefined mappings:
718 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000719
Tim Peters2f228e72001-05-13 00:19:31 +0000720 ### These fail the round-trip:
721 #'cp875'
722
Guido van Rossum9e896b32000-04-05 20:11:21 +0000723 ):
724 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000725 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000726 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000727 print '*** codec "%s" failed round-trip' % encoding
728 except ValueError,why:
729 print '*** codec for "%s" failed: %s' % (encoding, why)
730
731print '128-255...',
732s = ''.join(map(chr, range(128,256)))
733for encoding in (
734 'cp037', 'cp1026',
735 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
736 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000737 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000738 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000739 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000740 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000741 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000742
Guido van Rossum9e896b32000-04-05 20:11:21 +0000743 ### These have undefined mappings:
744 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
745 #'cp1256', 'cp1257', 'cp1258',
746 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000747 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000748 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000749
Guido van Rossum9e896b32000-04-05 20:11:21 +0000750 ### These fail the round-trip:
751 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000752
Guido van Rossum9e896b32000-04-05 20:11:21 +0000753 ):
754 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000755 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000756 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000757 print '*** codec "%s" failed round-trip' % encoding
758 except ValueError,why:
759 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000760
761print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000762
763print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000764verify((u"abc" u"def") == u"abcdef")
765verify(("abc" u"def") == u"abcdef")
766verify((u"abc" "def") == u"abcdef")
767verify((u"abc" u"def" "ghi") == u"abcdefghi")
768verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000769print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000770
771print 'Testing Unicode printing...',
772print u'abc'
773print u'abc', u'def'
774print u'abc', 'def'
775print 'abc', u'def'
776print u'abc\n'
777print u'abc\n',
778print u'abc\n',
779print u'def\n'
780print u'def\n'
781print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000782
783def test_exception(lhs, rhs, msg):
784 try:
785 lhs in rhs
786 except TypeError:
787 pass
788 else:
789 raise TestFailed, msg
790
791def run_contains_tests():
792 vereq(u'' in '', True)
793 vereq('' in u'', True)
794 vereq(u'' in u'', True)
795 vereq(u'' in 'abc', True)
796 vereq('' in u'abc', True)
797 vereq(u'' in u'abc', True)
798 vereq(u'\0' in 'abc', False)
799 vereq('\0' in u'abc', False)
800 vereq(u'\0' in u'abc', False)
801 vereq(u'\0' in '\0abc', True)
802 vereq('\0' in u'\0abc', True)
803 vereq(u'\0' in u'\0abc', True)
804 vereq(u'\0' in 'abc\0', True)
805 vereq('\0' in u'abc\0', True)
806 vereq(u'\0' in u'abc\0', True)
807 vereq(u'a' in '\0abc', True)
808 vereq('a' in u'\0abc', True)
809 vereq(u'a' in u'\0abc', True)
810 vereq(u'asdf' in 'asdf', True)
811 vereq('asdf' in u'asdf', True)
812 vereq(u'asdf' in u'asdf', True)
813 vereq(u'asdf' in 'asd', False)
814 vereq('asdf' in u'asd', False)
815 vereq(u'asdf' in u'asd', False)
816 vereq(u'asdf' in '', False)
817 vereq('asdf' in u'', False)
818 vereq(u'asdf' in u'', False)
819
820run_contains_tests()