blob: f38467ad0d7305101513aa967f4fdf73f03bd243 [file] [log] [blame]
Martin v. Löwisa729daf2002-08-04 17:28:33 +00001# -*- coding: iso-8859-1 -*-
Guido van Rossuma831cac2000-03-10 23:23:21 +00002""" Test script for the Unicode implementation.
3
Guido van Rossuma831cac2000-03-10 23:23:21 +00004Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
Marc-André Lemburg36619082001-01-17 19:11:13 +00008"""#"
Barry Warsaw817918c2002-08-06 16:58:21 +00009from test.test_support import verify, vereq, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +000010import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000011
Finn Bock2b29cb22001-12-10 20:57:34 +000012if not sys.platform.startswith('java'):
13 # Test basic sanity of repr()
14 verify(repr(u'abc') == "u'abc'")
15 verify(repr(u'ab\\c') == "u'ab\\\\c'")
16 verify(repr(u'ab\\') == "u'ab\\\\'")
17 verify(repr(u'\\c') == "u'\\\\c'")
18 verify(repr(u'\\') == "u'\\\\'")
19 verify(repr(u'\n') == "u'\\n'")
20 verify(repr(u'\r') == "u'\\r'")
21 verify(repr(u'\t') == "u'\\t'")
22 verify(repr(u'\b') == "u'\\x08'")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'\"") == """u'\\'"'""")
25 verify(repr(u"'") == '''u"'"''')
26 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000027 latin1repr = (
28 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
29 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
30 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
31 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
32 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
33 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
34 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
35 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
36 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
37 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
38 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
39 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
40 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 "\\xfe\\xff'")
42 testrepr = repr(u''.join(map(unichr, range(256))))
43 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000044
Guido van Rossuma831cac2000-03-10 23:23:21 +000045def test(method, input, output, *args):
46 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000047 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000048 try:
49 f = getattr(input, method)
50 value = apply(f, args)
51 except:
52 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000053 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000054 else:
55 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000056 if value == output and type(value) is type(output):
57 # if the original is returned make sure that
58 # this doesn't happen with subclasses
59 if value is input:
60 class usub(unicode):
61 def __repr__(self):
62 return 'usub(%r)' % unicode.__repr__(self)
63 input = usub(input)
64 try:
65 f = getattr(input, method)
66 value = apply(f, args)
67 except:
68 value = sys.exc_type
69 exc = sys.exc_info()[:2]
70 if value is input:
71 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000072 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000073 print '*',f, `input`, `output`, `value`
74 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000075 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000076 if verbose:
77 print 'no'
78 print '*',f, `input`, `output`, `value`
79 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000080 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 else:
82 if verbose:
83 print 'yes'
84
85test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000086test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000087test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000088test('capitalize', u'aaaa', u'Aaaa')
89test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000090
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000091test('count', u'aaa', 3, u'a')
92test('count', u'aaa', 0, u'b')
93test('count', 'aaa', 3, u'a')
94test('count', 'aaa', 0, u'b')
95test('count', u'aaa', 3, 'a')
96test('count', u'aaa', 0, 'b')
97
Guido van Rossuma831cac2000-03-10 23:23:21 +000098test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000099test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000100test('title', u'hello ', u'Hello ')
101test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
102test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
103test('title', u"getInt", u'Getint')
104
105test('find', u'abcdefghiabc', 0, u'abc')
106test('find', u'abcdefghiabc', 9, u'abc', 1)
107test('find', u'abcdefghiabc', -1, u'def', 4)
108
109test('rfind', u'abcdefghiabc', 9, u'abc')
110
111test('lower', u'HeLLo', u'hello')
112test('lower', u'hello', u'hello')
113
114test('upper', u'HeLLo', u'HELLO')
115test('upper', u'HELLO', u'HELLO')
116
117if 0:
118 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
119
120 test('maketrans', u'abc', transtable, u'xyz')
121 test('maketrans', u'abc', ValueError, u'xyzq')
122
123test('split', u'this is the split function',
124 [u'this', u'is', u'the', u'split', u'function'])
125test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
126test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
127test('split', u'a b c d', [u'a', u'b c d'], None, 1)
128test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
129test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
130test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
131test('split', u'a b c d', [u'a b c d'], None, 0)
132test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
133test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000134test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
135test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
136test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
137test('split', u'endcase test', [u'endcase ', u''], u'test')
138test('split', u'endcase test', [u'endcase ', u''], 'test')
139test('split', 'endcase test', [u'endcase ', u''], u'test')
140
Guido van Rossuma831cac2000-03-10 23:23:21 +0000141
142# join now works with any sequence type
143class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000144 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000145 def __len__(self): return len(self.seq)
146 def __getitem__(self, i): return self.seq[i]
147
148test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000149test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000150test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000151test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000152test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000153test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
154test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
155test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
156test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
157test('join', ' ', u'w x y z', Sequence(u'wxyz'))
158test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000159
160result = u''
161for i in range(10):
162 if i > 0:
163 result = result + u':'
164 result = result + u'x'*10
165test('join', u':', result, [u'x' * 10] * 10)
166test('join', u':', result, (u'x' * 10,) * 10)
167
168test('strip', u' hello ', u'hello')
169test('lstrip', u' hello ', u'hello ')
170test('rstrip', u' hello ', u' hello')
171test('strip', u'hello', u'hello')
172
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000173# strip/lstrip/rstrip with None arg
174test('strip', u' hello ', u'hello', None)
175test('lstrip', u' hello ', u'hello ', None)
176test('rstrip', u' hello ', u' hello', None)
177test('strip', u'hello', u'hello', None)
178
179# strip/lstrip/rstrip with unicode arg
180test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
181test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
182test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
183test('strip', u'hello', u'hello', u'xyz')
184
185# strip/lstrip/rstrip with str arg
186test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
187test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
188test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
189test('strip', u'hello', u'hello', 'xyz')
190
Guido van Rossuma831cac2000-03-10 23:23:21 +0000191test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
192
193if 0:
194 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
195
196 table = string.maketrans('a', u'A')
197 test('translate', u'abc', u'Abc', table)
198 test('translate', u'xyz', u'xyz', table)
199
200test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000201test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
203test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
204test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
205test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
206test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
207test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
208test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
209
Guido van Rossum77f6a652002-04-03 22:41:51 +0000210test('startswith', u'hello', True, u'he')
211test('startswith', u'hello', True, u'hello')
212test('startswith', u'hello', False, u'hello world')
213test('startswith', u'hello', True, u'')
214test('startswith', u'hello', False, u'ello')
215test('startswith', u'hello', True, u'ello', 1)
216test('startswith', u'hello', True, u'o', 4)
217test('startswith', u'hello', False, u'o', 5)
218test('startswith', u'hello', True, u'', 5)
219test('startswith', u'hello', False, u'lo', 6)
220test('startswith', u'helloworld', True, u'lowo', 3)
221test('startswith', u'helloworld', True, u'lowo', 3, 7)
222test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000223
Guido van Rossum77f6a652002-04-03 22:41:51 +0000224test('endswith', u'hello', True, u'lo')
225test('endswith', u'hello', False, u'he')
226test('endswith', u'hello', True, u'')
227test('endswith', u'hello', False, u'hello world')
228test('endswith', u'helloworld', False, u'worl')
229test('endswith', u'helloworld', True, u'worl', 3, 9)
230test('endswith', u'helloworld', True, u'world', 3, 12)
231test('endswith', u'helloworld', True, u'lowo', 1, 7)
232test('endswith', u'helloworld', True, u'lowo', 2, 7)
233test('endswith', u'helloworld', True, u'lowo', 3, 7)
234test('endswith', u'helloworld', False, u'lowo', 4, 7)
235test('endswith', u'helloworld', False, u'lowo', 3, 8)
236test('endswith', u'ab', False, u'ab', 0, 1)
237test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000238
239test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
240test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
241test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
242test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000243test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000244
245if 0:
246 test('capwords', u'abc def ghi', u'Abc Def Ghi')
247 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
248 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
249
Walter Dörwald068325e2002-04-15 13:36:47 +0000250test('zfill', u'123', u'123', 2)
251test('zfill', u'123', u'123', 3)
252test('zfill', u'123', u'0123', 4)
253test('zfill', u'+123', u'+123', 3)
254test('zfill', u'+123', u'+123', 4)
255test('zfill', u'+123', u'+0123', 5)
256test('zfill', u'-123', u'-123', 3)
257test('zfill', u'-123', u'-123', 4)
258test('zfill', u'-123', u'-0123', 5)
259test('zfill', u'', u'000', 3)
260test('zfill', u'34', u'34', 1)
261test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000262
Guido van Rossuma831cac2000-03-10 23:23:21 +0000263# Comparisons:
264print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000265verify(u'abc' == 'abc')
266verify('abc' == u'abc')
267verify(u'abc' == u'abc')
268verify(u'abcd' > 'abc')
269verify('abcd' > u'abc')
270verify(u'abcd' > u'abc')
271verify(u'abc' < 'abcd')
272verify('abc' < u'abcd')
273verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000274print 'done.'
275
Marc-André Lemburge5034372000-08-08 08:04:29 +0000276if 0:
277 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000278
Marc-André Lemburge5034372000-08-08 08:04:29 +0000279 print 'Testing UTF-16 code point order comparisons...',
280 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000281 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000282 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000283 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000284
Marc-André Lemburge5034372000-08-08 08:04:29 +0000285 # Non surrogate above surrogate value, fixup required
286 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000287 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000288
Marc-André Lemburge5034372000-08-08 08:04:29 +0000289 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000290 s2 = u'\ud800\udc01'
291 test_lecmp(s, s2)
292 s2 = u'\ud900\udc01'
293 test_lecmp(s, s2)
294 s2 = u'\uda00\udc01'
295 test_lecmp(s, s2)
296 s2 = u'\udb00\udc01'
297 test_lecmp(s, s2)
298 s2 = u'\ud800\udd01'
299 test_lecmp(s, s2)
300 s2 = u'\ud900\udd01'
301 test_lecmp(s, s2)
302 s2 = u'\uda00\udd01'
303 test_lecmp(s, s2)
304 s2 = u'\udb00\udd01'
305 test_lecmp(s, s2)
306 s2 = u'\ud800\ude01'
307 test_lecmp(s, s2)
308 s2 = u'\ud900\ude01'
309 test_lecmp(s, s2)
310 s2 = u'\uda00\ude01'
311 test_lecmp(s, s2)
312 s2 = u'\udb00\ude01'
313 test_lecmp(s, s2)
314 s2 = u'\ud800\udfff'
315 test_lecmp(s, s2)
316 s2 = u'\ud900\udfff'
317 test_lecmp(s, s2)
318 s2 = u'\uda00\udfff'
319 test_lecmp(s, s2)
320 s2 = u'\udb00\udfff'
321 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000322
323 test_fixup(u'\ue000')
324 test_fixup(u'\uff61')
325
326 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000327 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000328 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000329
Guido van Rossuma831cac2000-03-10 23:23:21 +0000330test('ljust', u'abc', u'abc ', 10)
331test('rjust', u'abc', u' abc', 10)
332test('center', u'abc', u' abc ', 10)
333test('ljust', u'abc', u'abc ', 6)
334test('rjust', u'abc', u' abc', 6)
335test('center', u'abc', u' abc ', 6)
336test('ljust', u'abc', u'abc', 2)
337test('rjust', u'abc', u'abc', 2)
338test('center', u'abc', u'abc', 2)
339
Guido van Rossum77f6a652002-04-03 22:41:51 +0000340test('islower', u'a', True)
341test('islower', u'A', False)
342test('islower', u'\n', False)
343test('islower', u'\u1FFc', False)
344test('islower', u'abc', True)
345test('islower', u'aBc', False)
346test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000347
Guido van Rossum77f6a652002-04-03 22:41:51 +0000348test('isupper', u'a', False)
349test('isupper', u'A', True)
350test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000351if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000352 test('isupper', u'\u1FFc', False)
353test('isupper', u'ABC', True)
354test('isupper', u'AbC', False)
355test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000356
Guido van Rossum77f6a652002-04-03 22:41:51 +0000357test('istitle', u'a', False)
358test('istitle', u'A', True)
359test('istitle', u'\n', False)
360test('istitle', u'\u1FFc', True)
361test('istitle', u'A Titlecased Line', True)
362test('istitle', u'A\nTitlecased Line', True)
363test('istitle', u'A Titlecased, Line', True)
364test('istitle', u'Greek \u1FFcitlecases ...', True)
365test('istitle', u'Not a capitalized String', False)
366test('istitle', u'Not\ta Titlecase String', False)
367test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000368
Guido van Rossum77f6a652002-04-03 22:41:51 +0000369test('isalpha', u'a', True)
370test('isalpha', u'A', True)
371test('isalpha', u'\n', False)
372test('isalpha', u'\u1FFc', True)
373test('isalpha', u'abc', True)
374test('isalpha', u'aBc123', False)
375test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000376
Guido van Rossum77f6a652002-04-03 22:41:51 +0000377test('isalnum', u'a', True)
378test('isalnum', u'A', True)
379test('isalnum', u'\n', False)
380test('isalnum', u'123abc456', True)
381test('isalnum', u'a1b3c', True)
382test('isalnum', u'aBc000 ', False)
383test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000384
Guido van Rossuma831cac2000-03-10 23:23:21 +0000385test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
386test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
387test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
388test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
389test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
390test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000391test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000392
393test('translate', u"abababc", u'bbbc', {ord('a'):None})
394test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
395test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
396
Guido van Rossumd4d26842000-03-13 23:21:48 +0000397# Contains:
398print 'Testing Unicode contains method...',
Barry Warsaw817918c2002-08-06 16:58:21 +0000399vereq(('a' in u'abdb'), True)
400vereq(('a' in u'bdab'), True)
401vereq(('a' in u'bdaba'), True)
402vereq(('a' in u'bdba'), True)
403vereq(('a' in u'bdba'), True)
404vereq((u'a' in u'bdba'), True)
405vereq((u'a' in u'bdb'), False)
406vereq((u'a' in 'bdb'), False)
407vereq((u'a' in 'bdba'), True)
408vereq((u'a' in ('a',1,None)), True)
409vereq((u'a' in (1,None,'a')), True)
410vereq((u'a' in (1,None,u'a')), True)
411vereq(('a' in ('a',1,None)), True)
412vereq(('a' in (1,None,'a')), True)
413vereq(('a' in (1,None,u'a')), True)
414vereq(('a' in ('x',1,u'y')), False)
415vereq(('a' in ('x',1,None)), False)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000416print 'done.'
417
Guido van Rossuma831cac2000-03-10 23:23:21 +0000418# Formatting:
419print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000420verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
421verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
422verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
423verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
424verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
425verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
426verify(u"%c" % (u"a",) == u'a')
427verify(u"%c" % ("a",) == u'a')
428verify(u"%c" % (34,) == u'"')
429verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000430if sys.platform[:4] != 'java':
431 value = u"%r, %r" % (u"abc", "abc")
432 if value != u"u'abc', 'abc'":
433 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000434
Marc-André Lemburg36619082001-01-17 19:11:13 +0000435verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000436try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000437 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000438except KeyError:
439 print '*** formatting failed for "%s"' % "u'abc, def'"
440else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000441 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000442
Guido van Rossum97064862000-04-10 13:52:48 +0000443# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000444verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
445verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
446verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
447verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
448verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
449verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
450verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
451verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
452verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000453verify('%*s' % (5,u'abc',) == u' abc')
454verify('%*s' % (-5,u'abc',) == u'abc ')
455verify('%*.*s' % (5,2,u'abc',) == u' ab')
456verify('%*.*s' % (5,3,u'abc',) == u' abc')
457verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
458verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000459print 'done.'
460
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000461print 'Testing builtin unicode()...',
462
463# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
464
465verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
466
467class UnicodeSubclass(unicode):
468 pass
469
470verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
471 == u'unicode subclass becomes unicode')
472
473verify(unicode('strings are converted to unicode')
474 == u'strings are converted to unicode')
475
476class UnicodeCompat:
477 def __init__(self, x):
478 self.x = x
479 def __unicode__(self):
480 return self.x
481
482verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
483 == u'__unicode__ compatible objects are recognized')
484
485class StringCompat:
486 def __init__(self, x):
487 self.x = x
488 def __str__(self):
489 return self.x
490
491verify(unicode(StringCompat('__str__ compatible objects are recognized'))
492 == u'__str__ compatible objects are recognized')
493
494# unicode(obj) is compatible to str():
495
496o = StringCompat('unicode(obj) is compatible to str()')
497verify(unicode(o) == u'unicode(obj) is compatible to str()')
498verify(str(o) == 'unicode(obj) is compatible to str()')
499
500for obj in (123, 123.45, 123L):
501 verify(unicode(obj) == unicode(str(obj)))
502
503# unicode(obj, encoding, error) tests (this maps to
504# PyUnicode_FromEncodedObject() at C level)
505
Finn Bock2b29cb22001-12-10 20:57:34 +0000506if not sys.platform.startswith('java'):
507 try:
508 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
509 except TypeError:
510 pass
511 else:
512 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000513
514verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
515 == u'strings are decoded to unicode')
516
Finn Bock2b29cb22001-12-10 20:57:34 +0000517if not sys.platform.startswith('java'):
518 verify(unicode(buffer('character buffers are decoded to unicode'),
519 'utf-8', 'strict')
520 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000521
522print 'done.'
523
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000524# Test builtin codecs
525print 'Testing builtin codecs...',
526
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000527# UTF-7 specific encoding tests:
528utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
529 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
530 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
531 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
532 (u'+', '+-'),
533 (u'+-', '+--'),
534 (u'+?', '+-?'),
535 (u'\?', '+AFw?'),
536 (u'+?', '+-?'),
537 (ur'\\?', '+AFwAXA?'),
538 (ur'\\\?', '+AFwAXABc?'),
539 (ur'++--', '+-+---')]
540
541for x,y in utfTests:
542 verify( x.encode('utf-7') == y )
543
Tim Peters527e64f2001-10-04 05:36:56 +0000544try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000545 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
546except UnicodeError:
547 pass
548else:
549 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
550
551verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
552
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000553# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000554verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000555verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
556verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
557verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
558verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
559verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
560verify((u'\ud800\udc02'*1000).encode('utf-8') ==
561 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000562verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
563 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
564 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
565 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
566 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000567 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000568 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
569 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
570 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
571 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
572 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
573 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
574 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
575 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
576 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
577 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000578
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000579# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000580verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
581verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
582verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000583
584# Other possible utf-8 test cases:
585# * strict decoding testing for all of the
586# UTF8_ERROR cases in PyUnicode_DecodeUTF8
587
Marc-André Lemburg36619082001-01-17 19:11:13 +0000588verify(unicode('hello','ascii') == u'hello')
589verify(unicode('hello','utf-8') == u'hello')
590verify(unicode('hello','utf8') == u'hello')
591verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000592
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000593# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000594try:
595 u'Andr\202 x'.encode('ascii')
596 u'Andr\202 x'.encode('ascii','strict')
597except ValueError:
598 pass
599else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000600 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000601verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
602verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000603
604try:
605 unicode('Andr\202 x','ascii')
606 unicode('Andr\202 x','ascii','strict')
607except ValueError:
608 pass
609else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000610 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000611verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
612verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000613
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000614verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
615try:
616 "\\".decode("unicode-escape")
617except ValueError:
618 pass
619else:
620 raise TestFailed, '"\\".decode("unicode-escape") should fail'
621
Marc-André Lemburg36619082001-01-17 19:11:13 +0000622verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000623verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000624verify(u'hello'.encode('utf-8') == 'hello')
625verify(u'hello'.encode('utf8') == 'hello')
626verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
627verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
628verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000629
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000630# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000631u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000632for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000633 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000634 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000635
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000636# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000637u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000638for encoding in (
639 'latin-1',
640 ):
641 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000642 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000643 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000644 print '*** codec "%s" failed round-trip' % encoding
645 except ValueError,why:
646 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000647
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000648# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000649u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000650for encoding in (
651 'ascii',
652 ):
653 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000654 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000655 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000656 print '*** codec "%s" failed round-trip' % encoding
657 except ValueError,why:
658 print '*** codec for "%s" failed: %s' % (encoding, why)
659
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000660# Roundtrip safety for non-BMP (just a few chars)
661u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
662for encoding in ('utf-8',
663 'utf-16', 'utf-16-le', 'utf-16-be',
664 #'raw_unicode_escape',
665 'unicode_escape', 'unicode_internal'):
666 verify(unicode(u.encode(encoding),encoding) == u)
667
668# UTF-8 must be roundtrip safe for all UCS-2 code points
669u = u''.join(map(unichr, range(0x10000)))
670for encoding in ('utf-8',):
671 verify(unicode(u.encode(encoding),encoding) == u)
672
Guido van Rossum9e896b32000-04-05 20:11:21 +0000673print 'done.'
674
675print 'Testing standard mapping codecs...',
676
677print '0-127...',
678s = ''.join(map(chr, range(128)))
679for encoding in (
680 'cp037', 'cp1026',
681 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
682 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000683 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000684 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
685 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
686 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
687 'mac_cyrillic', 'mac_latin2',
688
689 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
690 'cp1256', 'cp1257', 'cp1258',
691 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
692
693 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000694 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000695
Guido van Rossum9e896b32000-04-05 20:11:21 +0000696 ### These have undefined mappings:
697 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000698
Tim Peters2f228e72001-05-13 00:19:31 +0000699 ### These fail the round-trip:
700 #'cp875'
701
Guido van Rossum9e896b32000-04-05 20:11:21 +0000702 ):
703 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000704 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000705 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000706 print '*** codec "%s" failed round-trip' % encoding
707 except ValueError,why:
708 print '*** codec for "%s" failed: %s' % (encoding, why)
709
710print '128-255...',
711s = ''.join(map(chr, range(128,256)))
712for encoding in (
713 'cp037', 'cp1026',
714 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
715 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000716 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000717 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000718 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000719 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000720 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000721
Guido van Rossum9e896b32000-04-05 20:11:21 +0000722 ### These have undefined mappings:
723 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
724 #'cp1256', 'cp1257', 'cp1258',
725 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000726 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000727 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000728
Guido van Rossum9e896b32000-04-05 20:11:21 +0000729 ### These fail the round-trip:
730 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000731
Guido van Rossum9e896b32000-04-05 20:11:21 +0000732 ):
733 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000734 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000735 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000736 print '*** codec "%s" failed round-trip' % encoding
737 except ValueError,why:
738 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000739
740print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000741
742print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000743verify((u"abc" u"def") == u"abcdef")
744verify(("abc" u"def") == u"abcdef")
745verify((u"abc" "def") == u"abcdef")
746verify((u"abc" u"def" "ghi") == u"abcdefghi")
747verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000748print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000749
750print 'Testing Unicode printing...',
751print u'abc'
752print u'abc', u'def'
753print u'abc', 'def'
754print 'abc', u'def'
755print u'abc\n'
756print u'abc\n',
757print u'abc\n',
758print u'def\n'
759print u'def\n'
760print 'done.'
Barry Warsaw817918c2002-08-06 16:58:21 +0000761
762def test_exception(lhs, rhs, msg):
763 try:
764 lhs in rhs
765 except TypeError:
766 pass
767 else:
768 raise TestFailed, msg
769
770def run_contains_tests():
771 vereq(u'' in '', True)
772 vereq('' in u'', True)
773 vereq(u'' in u'', True)
774 vereq(u'' in 'abc', True)
775 vereq('' in u'abc', True)
776 vereq(u'' in u'abc', True)
777 vereq(u'\0' in 'abc', False)
778 vereq('\0' in u'abc', False)
779 vereq(u'\0' in u'abc', False)
780 vereq(u'\0' in '\0abc', True)
781 vereq('\0' in u'\0abc', True)
782 vereq(u'\0' in u'\0abc', True)
783 vereq(u'\0' in 'abc\0', True)
784 vereq('\0' in u'abc\0', True)
785 vereq(u'\0' in u'abc\0', True)
786 vereq(u'a' in '\0abc', True)
787 vereq('a' in u'\0abc', True)
788 vereq(u'a' in u'\0abc', True)
789 vereq(u'asdf' in 'asdf', True)
790 vereq('asdf' in u'asdf', True)
791 vereq(u'asdf' in u'asdf', True)
792 vereq(u'asdf' in 'asd', False)
793 vereq('asdf' in u'asd', False)
794 vereq(u'asdf' in u'asd', False)
795 vereq(u'asdf' in '', False)
796 vereq('asdf' in u'', False)
797 vereq(u'asdf' in u'', False)
798
799run_contains_tests()