blob: a915b2e35862210da8281a74f4b2822acbed60de [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
110
111test('lower', u'HeLLo', u'hello')
112test('lower', u'hello', u'hello')
113
114test('upper', u'HeLLo', u'HELLO')
115test('upper', u'HELLO', u'HELLO')
116
117if 0:
118 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
119
120 test('maketrans', u'abc', transtable, u'xyz')
121 test('maketrans', u'abc', ValueError, u'xyzq')
122
123test('split', u'this is the split function',
124 [u'this', u'is', u'the', u'split', u'function'])
125test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
126test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
127test('split', u'a b c d', [u'a', u'b c d'], None, 1)
128test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
129test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
130test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
131test('split', u'a b c d', [u'a b c d'], None, 0)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000134test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
135test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
136test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
137test('split', u'endcase test', [u'endcase ', u''], u'test')
138test('split', u'endcase test', [u'endcase ', u''], 'test')
139test('split', 'endcase test', [u'endcase ', u''], u'test')
140
Guido van Rossuma831cac2000-03-10 23:23:21 +0000141
142# join now works with any sequence type
143class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000144 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145 def __len__(self): return len(self.seq)
146 def __getitem__(self, i): return self.seq[i]
147
148test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000149test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000150test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000151test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000152test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
154test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
155test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
156test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
157test('join', ' ', u'w x y z', Sequence(u'wxyz'))
158test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159
160result = u''
161for i in range(10):
162 if i > 0:
163 result = result + u':'
164 result = result + u'x'*10
165test('join', u':', result, [u'x' * 10] * 10)
166test('join', u':', result, (u'x' * 10,) * 10)
167
168test('strip', u' hello ', u'hello')
169test('lstrip', u' hello ', u'hello ')
170test('rstrip', u' hello ', u' hello')
171test('strip', u'hello', u'hello')
172
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000173# strip/lstrip/rstrip with None arg
174test('strip', u' hello ', u'hello', None)
175test('lstrip', u' hello ', u'hello ', None)
176test('rstrip', u' hello ', u' hello', None)
177test('strip', u'hello', u'hello', None)
178
179# strip/lstrip/rstrip with unicode arg
180test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
181test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
182test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
183test('strip', u'hello', u'hello', u'xyz')
184
185# strip/lstrip/rstrip with str arg
186test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
187test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
188test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
189test('strip', u'hello', u'hello', 'xyz')
190
Guido van Rossuma831cac2000-03-10 23:23:21 +0000191test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
192
193if 0:
194 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
195
196 table = string.maketrans('a', u'A')
197 test('translate', u'abc', u'Abc', table)
198 test('translate', u'xyz', u'xyz', table)
199
200test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000201test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
203test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
204test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
205test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
206test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
207test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
208test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
Guido van Rossumf36921c2002-08-09 15:36:48 +0000209try:
210 u"abc".replace(u"", u"x")
211except ValueError:
212 pass
213else:
214 raise TestFailed, "u.replace('', ...) should raise ValueError"
Guido van Rossuma831cac2000-03-10 23:23:21 +0000215
Guido van Rossum77f6a652002-04-03 22:41:51 +0000216test('startswith', u'hello', True, u'he')
217test('startswith', u'hello', True, u'hello')
218test('startswith', u'hello', False, u'hello world')
219test('startswith', u'hello', True, u'')
220test('startswith', u'hello', False, u'ello')
221test('startswith', u'hello', True, u'ello', 1)
222test('startswith', u'hello', True, u'o', 4)
223test('startswith', u'hello', False, u'o', 5)
224test('startswith', u'hello', True, u'', 5)
225test('startswith', u'hello', False, u'lo', 6)
226test('startswith', u'helloworld', True, u'lowo', 3)
227test('startswith', u'helloworld', True, u'lowo', 3, 7)
228test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000229
Guido van Rossum77f6a652002-04-03 22:41:51 +0000230test('endswith', u'hello', True, u'lo')
231test('endswith', u'hello', False, u'he')
232test('endswith', u'hello', True, u'')
233test('endswith', u'hello', False, u'hello world')
234test('endswith', u'helloworld', False, u'worl')
235test('endswith', u'helloworld', True, u'worl', 3, 9)
236test('endswith', u'helloworld', True, u'world', 3, 12)
237test('endswith', u'helloworld', True, u'lowo', 1, 7)
238test('endswith', u'helloworld', True, u'lowo', 2, 7)
239test('endswith', u'helloworld', True, u'lowo', 3, 7)
240test('endswith', u'helloworld', False, u'lowo', 4, 7)
241test('endswith', u'helloworld', False, u'lowo', 3, 8)
242test('endswith', u'ab', False, u'ab', 0, 1)
243test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000244
245test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
246test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
247test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
248test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000249test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000250
251if 0:
252 test('capwords', u'abc def ghi', u'Abc Def Ghi')
253 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
254 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
255
Walter Dörwald068325e2002-04-15 13:36:47 +0000256test('zfill', u'123', u'123', 2)
257test('zfill', u'123', u'123', 3)
258test('zfill', u'123', u'0123', 4)
259test('zfill', u'+123', u'+123', 3)
260test('zfill', u'+123', u'+123', 4)
261test('zfill', u'+123', u'+0123', 5)
262test('zfill', u'-123', u'-123', 3)
263test('zfill', u'-123', u'-123', 4)
264test('zfill', u'-123', u'-0123', 5)
265test('zfill', u'', u'000', 3)
266test('zfill', u'34', u'34', 1)
267test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000268
Guido van Rossuma831cac2000-03-10 23:23:21 +0000269# Comparisons:
270print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000271verify(u'abc' == 'abc')
272verify('abc' == u'abc')
273verify(u'abc' == u'abc')
274verify(u'abcd' > 'abc')
275verify('abcd' > u'abc')
276verify(u'abcd' > u'abc')
277verify(u'abc' < 'abcd')
278verify('abc' < u'abcd')
279verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000280print 'done.'
281
Marc-André Lemburge5034372000-08-08 08:04:29 +0000282if 0:
283 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000284
Marc-André Lemburge5034372000-08-08 08:04:29 +0000285 print 'Testing UTF-16 code point order comparisons...',
286 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000287 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000289 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000290
Marc-André Lemburge5034372000-08-08 08:04:29 +0000291 # Non surrogate above surrogate value, fixup required
292 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000293 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000294
Marc-André Lemburge5034372000-08-08 08:04:29 +0000295 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000296 s2 = u'\ud800\udc01'
297 test_lecmp(s, s2)
298 s2 = u'\ud900\udc01'
299 test_lecmp(s, s2)
300 s2 = u'\uda00\udc01'
301 test_lecmp(s, s2)
302 s2 = u'\udb00\udc01'
303 test_lecmp(s, s2)
304 s2 = u'\ud800\udd01'
305 test_lecmp(s, s2)
306 s2 = u'\ud900\udd01'
307 test_lecmp(s, s2)
308 s2 = u'\uda00\udd01'
309 test_lecmp(s, s2)
310 s2 = u'\udb00\udd01'
311 test_lecmp(s, s2)
312 s2 = u'\ud800\ude01'
313 test_lecmp(s, s2)
314 s2 = u'\ud900\ude01'
315 test_lecmp(s, s2)
316 s2 = u'\uda00\ude01'
317 test_lecmp(s, s2)
318 s2 = u'\udb00\ude01'
319 test_lecmp(s, s2)
320 s2 = u'\ud800\udfff'
321 test_lecmp(s, s2)
322 s2 = u'\ud900\udfff'
323 test_lecmp(s, s2)
324 s2 = u'\uda00\udfff'
325 test_lecmp(s, s2)
326 s2 = u'\udb00\udfff'
327 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000328
329 test_fixup(u'\ue000')
330 test_fixup(u'\uff61')
331
332 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000333 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000334 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000335
Guido van Rossuma831cac2000-03-10 23:23:21 +0000336test('ljust', u'abc', u'abc ', 10)
337test('rjust', u'abc', u' abc', 10)
338test('center', u'abc', u' abc ', 10)
339test('ljust', u'abc', u'abc ', 6)
340test('rjust', u'abc', u' abc', 6)
341test('center', u'abc', u' abc ', 6)
342test('ljust', u'abc', u'abc', 2)
343test('rjust', u'abc', u'abc', 2)
344test('center', u'abc', u'abc', 2)
345
Guido van Rossum77f6a652002-04-03 22:41:51 +0000346test('islower', u'a', True)
347test('islower', u'A', False)
348test('islower', u'\n', False)
349test('islower', u'\u1FFc', False)
350test('islower', u'abc', True)
351test('islower', u'aBc', False)
352test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000353
Guido van Rossum77f6a652002-04-03 22:41:51 +0000354test('isupper', u'a', False)
355test('isupper', u'A', True)
356test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000357if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000358 test('isupper', u'\u1FFc', False)
359test('isupper', u'ABC', True)
360test('isupper', u'AbC', False)
361test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000362
Guido van Rossum77f6a652002-04-03 22:41:51 +0000363test('istitle', u'a', False)
364test('istitle', u'A', True)
365test('istitle', u'\n', False)
366test('istitle', u'\u1FFc', True)
367test('istitle', u'A Titlecased Line', True)
368test('istitle', u'A\nTitlecased Line', True)
369test('istitle', u'A Titlecased, Line', True)
370test('istitle', u'Greek \u1FFcitlecases ...', True)
371test('istitle', u'Not a capitalized String', False)
372test('istitle', u'Not\ta Titlecase String', False)
373test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000374
Guido van Rossum77f6a652002-04-03 22:41:51 +0000375test('isalpha', u'a', True)
376test('isalpha', u'A', True)
377test('isalpha', u'\n', False)
378test('isalpha', u'\u1FFc', True)
379test('isalpha', u'abc', True)
380test('isalpha', u'aBc123', False)
381test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000382
Guido van Rossum77f6a652002-04-03 22:41:51 +0000383test('isalnum', u'a', True)
384test('isalnum', u'A', True)
385test('isalnum', u'\n', False)
386test('isalnum', u'123abc456', True)
387test('isalnum', u'a1b3c', True)
388test('isalnum', u'aBc000 ', False)
389test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000390
Guido van Rossuma831cac2000-03-10 23:23:21 +0000391test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
392test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
393test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
394test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
395test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
396test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000397test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000398
399test('translate', u"abababc", u'bbbc', {ord('a'):None})
400test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
401test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
402
Guido van Rossumd4d26842000-03-13 23:21:48 +0000403# Contains:
404print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000405vereq(('a' in u'abdb'), True)
406vereq(('a' in u'bdab'), True)
407vereq(('a' in u'bdaba'), True)
408vereq(('a' in u'bdba'), True)
409vereq(('a' in u'bdba'), True)
410vereq((u'a' in u'bdba'), True)
411vereq((u'a' in u'bdb'), False)
412vereq((u'a' in 'bdb'), False)
413vereq((u'a' in 'bdba'), True)
414vereq((u'a' in ('a',1,None)), True)
415vereq((u'a' in (1,None,'a')), True)
416vereq((u'a' in (1,None,u'a')), True)
417vereq(('a' in ('a',1,None)), True)
418vereq(('a' in (1,None,'a')), True)
419vereq(('a' in (1,None,u'a')), True)
420vereq(('a' in ('x',1,u'y')), False)
421vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000422vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000423vereq((u'ab' in u'abcd'), True)
424vereq(('ab' in u'abc'), True)
425vereq((u'ab' in 'abc'), True)
426vereq((u'ab' in (1,None,u'ab')), True)
427vereq((u'' in u'abc'), True)
428vereq(('' in u'abc'), True)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000429print 'done.'
430
Guido van Rossuma831cac2000-03-10 23:23:21 +0000431# Formatting:
432print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000433verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
434verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
435verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
436verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
437verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
438verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
439verify(u"%c" % (u"a",) == u'a')
440verify(u"%c" % ("a",) == u'a')
441verify(u"%c" % (34,) == u'"')
442verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000443if sys.platform[:4] != 'java':
444 value = u"%r, %r" % (u"abc", "abc")
445 if value != u"u'abc', 'abc'":
446 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000447
Marc-André Lemburg36619082001-01-17 19:11:13 +0000448verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000449try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000450 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000451except KeyError:
452 print '*** formatting failed for "%s"' % "u'abc, def'"
453else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000454 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000455
Guido van Rossum97064862000-04-10 13:52:48 +0000456# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000457verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
458verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
459verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
460verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
461verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
462verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
463verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
464verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
465verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000466verify('%*s' % (5,u'abc',) == u' abc')
467verify('%*s' % (-5,u'abc',) == u'abc ')
468verify('%*.*s' % (5,2,u'abc',) == u' ab')
469verify('%*.*s' % (5,3,u'abc',) == u' abc')
470verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
471verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000472print 'done.'
473
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000474print 'Testing builtin unicode()...',
475
476# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
477
478verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
479
480class UnicodeSubclass(unicode):
481 pass
482
483verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
484 == u'unicode subclass becomes unicode')
485
486verify(unicode('strings are converted to unicode')
487 == u'strings are converted to unicode')
488
489class UnicodeCompat:
490 def __init__(self, x):
491 self.x = x
492 def __unicode__(self):
493 return self.x
494
495verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
496 == u'__unicode__ compatible objects are recognized')
497
498class StringCompat:
499 def __init__(self, x):
500 self.x = x
501 def __str__(self):
502 return self.x
503
504verify(unicode(StringCompat('__str__ compatible objects are recognized'))
505 == u'__str__ compatible objects are recognized')
506
507# unicode(obj) is compatible to str():
508
509o = StringCompat('unicode(obj) is compatible to str()')
510verify(unicode(o) == u'unicode(obj) is compatible to str()')
511verify(str(o) == 'unicode(obj) is compatible to str()')
512
513for obj in (123, 123.45, 123L):
514 verify(unicode(obj) == unicode(str(obj)))
515
516# unicode(obj, encoding, error) tests (this maps to
517# PyUnicode_FromEncodedObject() at C level)
518
Finn Bock2b29cb22001-12-10 20:57:34 +0000519if not sys.platform.startswith('java'):
520 try:
521 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
522 except TypeError:
523 pass
524 else:
525 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000526
527verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
528 == u'strings are decoded to unicode')
529
Finn Bock2b29cb22001-12-10 20:57:34 +0000530if not sys.platform.startswith('java'):
531 verify(unicode(buffer('character buffers are decoded to unicode'),
532 'utf-8', 'strict')
533 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000534
535print 'done.'
536
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000537# Test builtin codecs
538print 'Testing builtin codecs...',
539
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000540# UTF-7 specific encoding tests:
541utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
542 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
543 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
544 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
545 (u'+', '+-'),
546 (u'+-', '+--'),
547 (u'+?', '+-?'),
548 (u'\?', '+AFw?'),
549 (u'+?', '+-?'),
550 (ur'\\?', '+AFwAXA?'),
551 (ur'\\\?', '+AFwAXABc?'),
552 (ur'++--', '+-+---')]
553
554for x,y in utfTests:
555 verify( x.encode('utf-7') == y )
556
Tim Peters527e64f2001-10-04 05:36:56 +0000557try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000558 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
559except UnicodeError:
560 pass
561else:
562 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
563
564verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
565
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000566# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000567verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000568verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
569verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
570verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
571verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
572verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
573verify((u'\ud800\udc02'*1000).encode('utf-8') ==
574 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000575verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
576 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
577 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
578 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
579 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000580 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000581 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
582 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
583 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
584 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
585 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
586 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
587 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
588 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
589 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
590 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000591
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000592# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000593verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
594verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
595verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000596
597# Other possible utf-8 test cases:
598# * strict decoding testing for all of the
599# UTF8_ERROR cases in PyUnicode_DecodeUTF8
600
Marc-André Lemburg36619082001-01-17 19:11:13 +0000601verify(unicode('hello','ascii') == u'hello')
602verify(unicode('hello','utf-8') == u'hello')
603verify(unicode('hello','utf8') == u'hello')
604verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000605
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000606# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000607try:
608 u'Andr\202 x'.encode('ascii')
609 u'Andr\202 x'.encode('ascii','strict')
610except ValueError:
611 pass
612else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000613 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000614verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
615verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000616
617try:
618 unicode('Andr\202 x','ascii')
619 unicode('Andr\202 x','ascii','strict')
620except ValueError:
621 pass
622else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000623 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000624verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
625verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000626
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000627verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
628try:
629 "\\".decode("unicode-escape")
630except ValueError:
631 pass
632else:
633 raise TestFailed, '"\\".decode("unicode-escape") should fail'
634
Marc-André Lemburg36619082001-01-17 19:11:13 +0000635verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000636verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000637verify(u'hello'.encode('utf-8') == 'hello')
638verify(u'hello'.encode('utf8') == 'hello')
639verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
640verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
641verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000642
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000643# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000644u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000645for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000646 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000647 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000648
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000649# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000650u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000651for encoding in (
652 'latin-1',
653 ):
654 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000655 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000656 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000657 print '*** codec "%s" failed round-trip' % encoding
658 except ValueError,why:
659 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000660
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000661# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000662u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000663for encoding in (
664 'ascii',
665 ):
666 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000667 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000668 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000669 print '*** codec "%s" failed round-trip' % encoding
670 except ValueError,why:
671 print '*** codec for "%s" failed: %s' % (encoding, why)
672
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000673# Roundtrip safety for non-BMP (just a few chars)
674u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
675for encoding in ('utf-8',
676 'utf-16', 'utf-16-le', 'utf-16-be',
677 #'raw_unicode_escape',
678 'unicode_escape', 'unicode_internal'):
679 verify(unicode(u.encode(encoding),encoding) == u)
680
681# UTF-8 must be roundtrip safe for all UCS-2 code points
682u = u''.join(map(unichr, range(0x10000)))
683for encoding in ('utf-8',):
684 verify(unicode(u.encode(encoding),encoding) == u)
685
Guido van Rossum9e896b32000-04-05 20:11:21 +0000686print 'done.'
687
688print 'Testing standard mapping codecs...',
689
690print '0-127...',
691s = ''.join(map(chr, range(128)))
692for encoding in (
693 'cp037', 'cp1026',
694 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
695 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000696 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000697 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
698 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
699 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
700 'mac_cyrillic', 'mac_latin2',
701
702 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
703 'cp1256', 'cp1257', 'cp1258',
704 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
705
706 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000707 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000708
Guido van Rossum9e896b32000-04-05 20:11:21 +0000709 ### These have undefined mappings:
710 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000711
Tim Peters2f228e72001-05-13 00:19:31 +0000712 ### These fail the round-trip:
713 #'cp875'
714
Guido van Rossum9e896b32000-04-05 20:11:21 +0000715 ):
716 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000717 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000718 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000719 print '*** codec "%s" failed round-trip' % encoding
720 except ValueError,why:
721 print '*** codec for "%s" failed: %s' % (encoding, why)
722
723print '128-255...',
724s = ''.join(map(chr, range(128,256)))
725for encoding in (
726 'cp037', 'cp1026',
727 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
728 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000729 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000730 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000731 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000732 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000733 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000734
Guido van Rossum9e896b32000-04-05 20:11:21 +0000735 ### These have undefined mappings:
736 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
737 #'cp1256', 'cp1257', 'cp1258',
738 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000739 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000740 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000741
Guido van Rossum9e896b32000-04-05 20:11:21 +0000742 ### These fail the round-trip:
743 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000744
Guido van Rossum9e896b32000-04-05 20:11:21 +0000745 ):
746 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000747 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000748 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000749 print '*** codec "%s" failed round-trip' % encoding
750 except ValueError,why:
751 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000752
753print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000754
755print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000756verify((u"abc" u"def") == u"abcdef")
757verify(("abc" u"def") == u"abcdef")
758verify((u"abc" "def") == u"abcdef")
759verify((u"abc" u"def" "ghi") == u"abcdefghi")
760verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000761print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000762
763print 'Testing Unicode printing...',
764print u'abc'
765print u'abc', u'def'
766print u'abc', 'def'
767print 'abc', u'def'
768print u'abc\n'
769print u'abc\n',
770print u'abc\n',
771print u'def\n'
772print u'def\n'
773print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000774
775def test_exception(lhs, rhs, msg):
776 try:
777 lhs in rhs
778 except TypeError:
779 pass
780 else:
781 raise TestFailed, msg
782
783def run_contains_tests():
784 vereq(u'' in '', True)
785 vereq('' in u'', True)
786 vereq(u'' in u'', True)
787 vereq(u'' in 'abc', True)
788 vereq('' in u'abc', True)
789 vereq(u'' in u'abc', True)
790 vereq(u'\0' in 'abc', False)
791 vereq('\0' in u'abc', False)
792 vereq(u'\0' in u'abc', False)
793 vereq(u'\0' in '\0abc', True)
794 vereq('\0' in u'\0abc', True)
795 vereq(u'\0' in u'\0abc', True)
796 vereq(u'\0' in 'abc\0', True)
797 vereq('\0' in u'abc\0', True)
798 vereq(u'\0' in u'abc\0', True)
799 vereq(u'a' in '\0abc', True)
800 vereq('a' in u'\0abc', True)
801 vereq(u'a' in u'\0abc', True)
802 vereq(u'asdf' in 'asdf', True)
803 vereq('asdf' in u'asdf', True)
804 vereq(u'asdf' in u'asdf', True)
805 vereq(u'asdf' in 'asd', False)
806 vereq('asdf' in u'asd', False)
807 vereq(u'asdf' in u'asd', False)
808 vereq(u'asdf' in '', False)
809 vereq('asdf' in u'', False)
810 vereq(u'asdf' in u'', False)
811
812run_contains_tests()