blob: 429b673d9f58ac1a881ef23a78b0e7578e025aca [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
110
111test('lower', u'HeLLo', u'hello')
112test('lower', u'hello', u'hello')
113
114test('upper', u'HeLLo', u'HELLO')
115test('upper', u'HELLO', u'HELLO')
116
117if 0:
118 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
119
120 test('maketrans', u'abc', transtable, u'xyz')
121 test('maketrans', u'abc', ValueError, u'xyzq')
122
123test('split', u'this is the split function',
124 [u'this', u'is', u'the', u'split', u'function'])
125test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
126test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
127test('split', u'a b c d', [u'a', u'b c d'], None, 1)
128test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
129test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
130test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
131test('split', u'a b c d', [u'a b c d'], None, 0)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000134test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
135test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
136test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
137test('split', u'endcase test', [u'endcase ', u''], u'test')
138test('split', u'endcase test', [u'endcase ', u''], 'test')
139test('split', 'endcase test', [u'endcase ', u''], u'test')
140
Guido van Rossuma831cac2000-03-10 23:23:21 +0000141
142# join now works with any sequence type
143class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000144 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145 def __len__(self): return len(self.seq)
146 def __getitem__(self, i): return self.seq[i]
147
148test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000149test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000150test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000151test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000152test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
154test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
155test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
156test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
157test('join', ' ', u'w x y z', Sequence(u'wxyz'))
158test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159
160result = u''
161for i in range(10):
162 if i > 0:
163 result = result + u':'
164 result = result + u'x'*10
165test('join', u':', result, [u'x' * 10] * 10)
166test('join', u':', result, (u'x' * 10,) * 10)
167
168test('strip', u' hello ', u'hello')
169test('lstrip', u' hello ', u'hello ')
170test('rstrip', u' hello ', u' hello')
171test('strip', u'hello', u'hello')
172
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000173# strip/lstrip/rstrip with None arg
174test('strip', u' hello ', u'hello', None)
175test('lstrip', u' hello ', u'hello ', None)
176test('rstrip', u' hello ', u' hello', None)
177test('strip', u'hello', u'hello', None)
178
179# strip/lstrip/rstrip with unicode arg
180test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
181test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
182test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
183test('strip', u'hello', u'hello', u'xyz')
184
185# strip/lstrip/rstrip with str arg
186test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
187test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
188test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
189test('strip', u'hello', u'hello', 'xyz')
190
Guido van Rossuma831cac2000-03-10 23:23:21 +0000191test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
192
193if 0:
194 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
195
196 table = string.maketrans('a', u'A')
197 test('translate', u'abc', u'Abc', table)
198 test('translate', u'xyz', u'xyz', table)
199
200test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000201test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
203test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
204test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
205test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
206test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
207test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
208test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
209
Guido van Rossum77f6a652002-04-03 22:41:51 +0000210test('startswith', u'hello', True, u'he')
211test('startswith', u'hello', True, u'hello')
212test('startswith', u'hello', False, u'hello world')
213test('startswith', u'hello', True, u'')
214test('startswith', u'hello', False, u'ello')
215test('startswith', u'hello', True, u'ello', 1)
216test('startswith', u'hello', True, u'o', 4)
217test('startswith', u'hello', False, u'o', 5)
218test('startswith', u'hello', True, u'', 5)
219test('startswith', u'hello', False, u'lo', 6)
220test('startswith', u'helloworld', True, u'lowo', 3)
221test('startswith', u'helloworld', True, u'lowo', 3, 7)
222test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000223
Guido van Rossum77f6a652002-04-03 22:41:51 +0000224test('endswith', u'hello', True, u'lo')
225test('endswith', u'hello', False, u'he')
226test('endswith', u'hello', True, u'')
227test('endswith', u'hello', False, u'hello world')
228test('endswith', u'helloworld', False, u'worl')
229test('endswith', u'helloworld', True, u'worl', 3, 9)
230test('endswith', u'helloworld', True, u'world', 3, 12)
231test('endswith', u'helloworld', True, u'lowo', 1, 7)
232test('endswith', u'helloworld', True, u'lowo', 2, 7)
233test('endswith', u'helloworld', True, u'lowo', 3, 7)
234test('endswith', u'helloworld', False, u'lowo', 4, 7)
235test('endswith', u'helloworld', False, u'lowo', 3, 8)
236test('endswith', u'ab', False, u'ab', 0, 1)
237test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000238
239test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
240test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
241test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
242test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000243test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000244
245if 0:
246 test('capwords', u'abc def ghi', u'Abc Def Ghi')
247 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
248 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
249
Walter Dörwald068325e2002-04-15 13:36:47 +0000250test('zfill', u'123', u'123', 2)
251test('zfill', u'123', u'123', 3)
252test('zfill', u'123', u'0123', 4)
253test('zfill', u'+123', u'+123', 3)
254test('zfill', u'+123', u'+123', 4)
255test('zfill', u'+123', u'+0123', 5)
256test('zfill', u'-123', u'-123', 3)
257test('zfill', u'-123', u'-123', 4)
258test('zfill', u'-123', u'-0123', 5)
259test('zfill', u'', u'000', 3)
260test('zfill', u'34', u'34', 1)
261test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000262
Guido van Rossuma831cac2000-03-10 23:23:21 +0000263# Comparisons:
264print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000265verify(u'abc' == 'abc')
266verify('abc' == u'abc')
267verify(u'abc' == u'abc')
268verify(u'abcd' > 'abc')
269verify('abcd' > u'abc')
270verify(u'abcd' > u'abc')
271verify(u'abc' < 'abcd')
272verify('abc' < u'abcd')
273verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000274print 'done.'
275
Marc-André Lemburge5034372000-08-08 08:04:29 +0000276if 0:
277 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000278
Marc-André Lemburge5034372000-08-08 08:04:29 +0000279 print 'Testing UTF-16 code point order comparisons...',
280 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000281 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000282 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000283 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000284
Marc-André Lemburge5034372000-08-08 08:04:29 +0000285 # Non surrogate above surrogate value, fixup required
286 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000287 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000288
Marc-André Lemburge5034372000-08-08 08:04:29 +0000289 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000290 s2 = u'\ud800\udc01'
291 test_lecmp(s, s2)
292 s2 = u'\ud900\udc01'
293 test_lecmp(s, s2)
294 s2 = u'\uda00\udc01'
295 test_lecmp(s, s2)
296 s2 = u'\udb00\udc01'
297 test_lecmp(s, s2)
298 s2 = u'\ud800\udd01'
299 test_lecmp(s, s2)
300 s2 = u'\ud900\udd01'
301 test_lecmp(s, s2)
302 s2 = u'\uda00\udd01'
303 test_lecmp(s, s2)
304 s2 = u'\udb00\udd01'
305 test_lecmp(s, s2)
306 s2 = u'\ud800\ude01'
307 test_lecmp(s, s2)
308 s2 = u'\ud900\ude01'
309 test_lecmp(s, s2)
310 s2 = u'\uda00\ude01'
311 test_lecmp(s, s2)
312 s2 = u'\udb00\ude01'
313 test_lecmp(s, s2)
314 s2 = u'\ud800\udfff'
315 test_lecmp(s, s2)
316 s2 = u'\ud900\udfff'
317 test_lecmp(s, s2)
318 s2 = u'\uda00\udfff'
319 test_lecmp(s, s2)
320 s2 = u'\udb00\udfff'
321 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000322
323 test_fixup(u'\ue000')
324 test_fixup(u'\uff61')
325
326 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000327 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000328 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000329
Guido van Rossuma831cac2000-03-10 23:23:21 +0000330test('ljust', u'abc', u'abc ', 10)
331test('rjust', u'abc', u' abc', 10)
332test('center', u'abc', u' abc ', 10)
333test('ljust', u'abc', u'abc ', 6)
334test('rjust', u'abc', u' abc', 6)
335test('center', u'abc', u' abc ', 6)
336test('ljust', u'abc', u'abc', 2)
337test('rjust', u'abc', u'abc', 2)
338test('center', u'abc', u'abc', 2)
339
Guido van Rossum77f6a652002-04-03 22:41:51 +0000340test('islower', u'a', True)
341test('islower', u'A', False)
342test('islower', u'\n', False)
343test('islower', u'\u1FFc', False)
344test('islower', u'abc', True)
345test('islower', u'aBc', False)
346test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000347
Guido van Rossum77f6a652002-04-03 22:41:51 +0000348test('isupper', u'a', False)
349test('isupper', u'A', True)
350test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000351if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000352 test('isupper', u'\u1FFc', False)
353test('isupper', u'ABC', True)
354test('isupper', u'AbC', False)
355test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000356
Guido van Rossum77f6a652002-04-03 22:41:51 +0000357test('istitle', u'a', False)
358test('istitle', u'A', True)
359test('istitle', u'\n', False)
360test('istitle', u'\u1FFc', True)
361test('istitle', u'A Titlecased Line', True)
362test('istitle', u'A\nTitlecased Line', True)
363test('istitle', u'A Titlecased, Line', True)
364test('istitle', u'Greek \u1FFcitlecases ...', True)
365test('istitle', u'Not a capitalized String', False)
366test('istitle', u'Not\ta Titlecase String', False)
367test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000368
Guido van Rossum77f6a652002-04-03 22:41:51 +0000369test('isalpha', u'a', True)
370test('isalpha', u'A', True)
371test('isalpha', u'\n', False)
372test('isalpha', u'\u1FFc', True)
373test('isalpha', u'abc', True)
374test('isalpha', u'aBc123', False)
375test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000376
Guido van Rossum77f6a652002-04-03 22:41:51 +0000377test('isalnum', u'a', True)
378test('isalnum', u'A', True)
379test('isalnum', u'\n', False)
380test('isalnum', u'123abc456', True)
381test('isalnum', u'a1b3c', True)
382test('isalnum', u'aBc000 ', False)
383test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000384
Guido van Rossuma831cac2000-03-10 23:23:21 +0000385test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
386test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
387test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
388test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
389test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
390test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000391test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000392
393test('translate', u"abababc", u'bbbc', {ord('a'):None})
394test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
395test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
396
Guido van Rossumd4d26842000-03-13 23:21:48 +0000397# Contains:
398print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000399vereq(('a' in u'abdb'), True)
400vereq(('a' in u'bdab'), True)
401vereq(('a' in u'bdaba'), True)
402vereq(('a' in u'bdba'), True)
403vereq(('a' in u'bdba'), True)
404vereq((u'a' in u'bdba'), True)
405vereq((u'a' in u'bdb'), False)
406vereq((u'a' in 'bdb'), False)
407vereq((u'a' in 'bdba'), True)
408vereq((u'a' in ('a',1,None)), True)
409vereq((u'a' in (1,None,'a')), True)
410vereq((u'a' in (1,None,u'a')), True)
411vereq(('a' in ('a',1,None)), True)
412vereq(('a' in (1,None,'a')), True)
413vereq(('a' in (1,None,u'a')), True)
414vereq(('a' in ('x',1,u'y')), False)
415vereq(('a' in ('x',1,None)), False)
Barry Warsawe0674172002-08-06 19:03:56 +0000416vereq(u'abcd' in u'abcxxxx', False)
Raymond Hettingerca84d652002-08-06 23:08:51 +0000417vereq((u'ab' in u'abcd'), True)
418vereq(('ab' in u'abc'), True)
419vereq((u'ab' in 'abc'), True)
420vereq((u'ab' in (1,None,u'ab')), True)
421vereq((u'' in u'abc'), True)
422vereq(('' in u'abc'), True)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000423print 'done.'
424
Guido van Rossuma831cac2000-03-10 23:23:21 +0000425# Formatting:
426print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000427verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
428verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
429verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
430verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
431verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
432verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
433verify(u"%c" % (u"a",) == u'a')
434verify(u"%c" % ("a",) == u'a')
435verify(u"%c" % (34,) == u'"')
436verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000437if sys.platform[:4] != 'java':
438 value = u"%r, %r" % (u"abc", "abc")
439 if value != u"u'abc', 'abc'":
440 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000441
Marc-André Lemburg36619082001-01-17 19:11:13 +0000442verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000443try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000444 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000445except KeyError:
446 print '*** formatting failed for "%s"' % "u'abc, def'"
447else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000448 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000449
Guido van Rossum97064862000-04-10 13:52:48 +0000450# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000451verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
452verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
453verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
454verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
455verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
456verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
457verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
458verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
459verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000460verify('%*s' % (5,u'abc',) == u' abc')
461verify('%*s' % (-5,u'abc',) == u'abc ')
462verify('%*.*s' % (5,2,u'abc',) == u' ab')
463verify('%*.*s' % (5,3,u'abc',) == u' abc')
464verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
465verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000466print 'done.'
467
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000468print 'Testing builtin unicode()...',
469
470# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
471
472verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
473
474class UnicodeSubclass(unicode):
475 pass
476
477verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
478 == u'unicode subclass becomes unicode')
479
480verify(unicode('strings are converted to unicode')
481 == u'strings are converted to unicode')
482
483class UnicodeCompat:
484 def __init__(self, x):
485 self.x = x
486 def __unicode__(self):
487 return self.x
488
489verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
490 == u'__unicode__ compatible objects are recognized')
491
492class StringCompat:
493 def __init__(self, x):
494 self.x = x
495 def __str__(self):
496 return self.x
497
498verify(unicode(StringCompat('__str__ compatible objects are recognized'))
499 == u'__str__ compatible objects are recognized')
500
501# unicode(obj) is compatible to str():
502
503o = StringCompat('unicode(obj) is compatible to str()')
504verify(unicode(o) == u'unicode(obj) is compatible to str()')
505verify(str(o) == 'unicode(obj) is compatible to str()')
506
507for obj in (123, 123.45, 123L):
508 verify(unicode(obj) == unicode(str(obj)))
509
510# unicode(obj, encoding, error) tests (this maps to
511# PyUnicode_FromEncodedObject() at C level)
512
Finn Bock2b29cb22001-12-10 20:57:34 +0000513if not sys.platform.startswith('java'):
514 try:
515 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
516 except TypeError:
517 pass
518 else:
519 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000520
521verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
522 == u'strings are decoded to unicode')
523
Finn Bock2b29cb22001-12-10 20:57:34 +0000524if not sys.platform.startswith('java'):
525 verify(unicode(buffer('character buffers are decoded to unicode'),
526 'utf-8', 'strict')
527 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000528
529print 'done.'
530
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000531# Test builtin codecs
532print 'Testing builtin codecs...',
533
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000534# UTF-7 specific encoding tests:
535utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
536 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
537 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
538 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
539 (u'+', '+-'),
540 (u'+-', '+--'),
541 (u'+?', '+-?'),
542 (u'\?', '+AFw?'),
543 (u'+?', '+-?'),
544 (ur'\\?', '+AFwAXA?'),
545 (ur'\\\?', '+AFwAXABc?'),
546 (ur'++--', '+-+---')]
547
548for x,y in utfTests:
549 verify( x.encode('utf-7') == y )
550
Tim Peters527e64f2001-10-04 05:36:56 +0000551try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000552 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
553except UnicodeError:
554 pass
555else:
556 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
557
558verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
559
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000560# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000561verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000562verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
563verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
564verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
565verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
566verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
567verify((u'\ud800\udc02'*1000).encode('utf-8') ==
568 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000569verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
570 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
571 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
572 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
573 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000574 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000575 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
576 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
577 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
578 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
579 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
580 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
581 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
582 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
583 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
584 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000585
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000586# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000587verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
588verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
589verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000590
591# Other possible utf-8 test cases:
592# * strict decoding testing for all of the
593# UTF8_ERROR cases in PyUnicode_DecodeUTF8
594
Marc-André Lemburg36619082001-01-17 19:11:13 +0000595verify(unicode('hello','ascii') == u'hello')
596verify(unicode('hello','utf-8') == u'hello')
597verify(unicode('hello','utf8') == u'hello')
598verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000599
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000600# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000601try:
602 u'Andr\202 x'.encode('ascii')
603 u'Andr\202 x'.encode('ascii','strict')
604except ValueError:
605 pass
606else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000607 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000608verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
609verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000610
611try:
612 unicode('Andr\202 x','ascii')
613 unicode('Andr\202 x','ascii','strict')
614except ValueError:
615 pass
616else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000617 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000618verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
619verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000620
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000621verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
622try:
623 "\\".decode("unicode-escape")
624except ValueError:
625 pass
626else:
627 raise TestFailed, '"\\".decode("unicode-escape") should fail'
628
Marc-André Lemburg36619082001-01-17 19:11:13 +0000629verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000630verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000631verify(u'hello'.encode('utf-8') == 'hello')
632verify(u'hello'.encode('utf8') == 'hello')
633verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
634verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
635verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000636
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000637# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000638u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000639for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000640 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000641 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000642
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000643# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000644u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000645for encoding in (
646 'latin-1',
647 ):
648 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000649 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000650 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000651 print '*** codec "%s" failed round-trip' % encoding
652 except ValueError,why:
653 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000654
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000655# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000656u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000657for encoding in (
658 'ascii',
659 ):
660 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000661 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000662 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000663 print '*** codec "%s" failed round-trip' % encoding
664 except ValueError,why:
665 print '*** codec for "%s" failed: %s' % (encoding, why)
666
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000667# Roundtrip safety for non-BMP (just a few chars)
668u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
669for encoding in ('utf-8',
670 'utf-16', 'utf-16-le', 'utf-16-be',
671 #'raw_unicode_escape',
672 'unicode_escape', 'unicode_internal'):
673 verify(unicode(u.encode(encoding),encoding) == u)
674
675# UTF-8 must be roundtrip safe for all UCS-2 code points
676u = u''.join(map(unichr, range(0x10000)))
677for encoding in ('utf-8',):
678 verify(unicode(u.encode(encoding),encoding) == u)
679
Guido van Rossum9e896b32000-04-05 20:11:21 +0000680print 'done.'
681
682print 'Testing standard mapping codecs...',
683
684print '0-127...',
685s = ''.join(map(chr, range(128)))
686for encoding in (
687 'cp037', 'cp1026',
688 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
689 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000690 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000691 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
692 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
693 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
694 'mac_cyrillic', 'mac_latin2',
695
696 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
697 'cp1256', 'cp1257', 'cp1258',
698 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
699
700 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000701 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000702
Guido van Rossum9e896b32000-04-05 20:11:21 +0000703 ### These have undefined mappings:
704 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000705
Tim Peters2f228e72001-05-13 00:19:31 +0000706 ### These fail the round-trip:
707 #'cp875'
708
Guido van Rossum9e896b32000-04-05 20:11:21 +0000709 ):
710 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000711 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000712 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000713 print '*** codec "%s" failed round-trip' % encoding
714 except ValueError,why:
715 print '*** codec for "%s" failed: %s' % (encoding, why)
716
717print '128-255...',
718s = ''.join(map(chr, range(128,256)))
719for encoding in (
720 'cp037', 'cp1026',
721 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
722 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000723 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000724 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000725 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000726 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000727 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000728
Guido van Rossum9e896b32000-04-05 20:11:21 +0000729 ### These have undefined mappings:
730 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
731 #'cp1256', 'cp1257', 'cp1258',
732 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000733 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000734 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000735
Guido van Rossum9e896b32000-04-05 20:11:21 +0000736 ### These fail the round-trip:
737 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000738
Guido van Rossum9e896b32000-04-05 20:11:21 +0000739 ):
740 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000741 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000742 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000743 print '*** codec "%s" failed round-trip' % encoding
744 except ValueError,why:
745 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000746
747print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000748
749print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000750verify((u"abc" u"def") == u"abcdef")
751verify(("abc" u"def") == u"abcdef")
752verify((u"abc" "def") == u"abcdef")
753verify((u"abc" u"def" "ghi") == u"abcdefghi")
754verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000755print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000756
757print 'Testing Unicode printing...',
758print u'abc'
759print u'abc', u'def'
760print u'abc', 'def'
761print 'abc', u'def'
762print u'abc\n'
763print u'abc\n',
764print u'abc\n',
765print u'def\n'
766print u'def\n'
767print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000768
769def test_exception(lhs, rhs, msg):
770 try:
771 lhs in rhs
772 except TypeError:
773 pass
774 else:
775 raise TestFailed, msg
776
777def run_contains_tests():
778 vereq(u'' in '', True)
779 vereq('' in u'', True)
780 vereq(u'' in u'', True)
781 vereq(u'' in 'abc', True)
782 vereq('' in u'abc', True)
783 vereq(u'' in u'abc', True)
784 vereq(u'\0' in 'abc', False)
785 vereq('\0' in u'abc', False)
786 vereq(u'\0' in u'abc', False)
787 vereq(u'\0' in '\0abc', True)
788 vereq('\0' in u'\0abc', True)
789 vereq(u'\0' in u'\0abc', True)
790 vereq(u'\0' in 'abc\0', True)
791 vereq('\0' in u'abc\0', True)
792 vereq(u'\0' in u'abc\0', True)
793 vereq(u'a' in '\0abc', True)
794 vereq('a' in u'\0abc', True)
795 vereq(u'a' in u'\0abc', True)
796 vereq(u'asdf' in 'asdf', True)
797 vereq('asdf' in u'asdf', True)
798 vereq(u'asdf' in u'asdf', True)
799 vereq(u'asdf' in 'asd', False)
800 vereq('asdf' in u'asd', False)
801 vereq(u'asdf' in u'asd', False)
802 vereq(u'asdf' in '', False)
803 vereq('asdf' in u'', False)
804 vereq(u'asdf' in u'', False)
805
806run_contains_tests()