blob: 546505176f2b0d5cb1c0f15764c7487fc6a9ab4b [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +00009import sys, string
Guido van Rossuma831cac2000-03-10 23:23:21 +000010
Finn Bock2b29cb22001-12-10 20:57:34 +000011if not sys.platform.startswith('java'):
12 # Test basic sanity of repr()
13 verify(repr(u'abc') == "u'abc'")
14 verify(repr(u'ab\\c') == "u'ab\\\\c'")
15 verify(repr(u'ab\\') == "u'ab\\\\'")
16 verify(repr(u'\\c') == "u'\\\\c'")
17 verify(repr(u'\\') == "u'\\\\'")
18 verify(repr(u'\n') == "u'\\n'")
19 verify(repr(u'\r') == "u'\\r'")
20 verify(repr(u'\t') == "u'\\t'")
21 verify(repr(u'\b') == "u'\\x08'")
22 verify(repr(u"'\"") == """u'\\'"'""")
23 verify(repr(u"'\"") == """u'\\'"'""")
24 verify(repr(u"'") == '''u"'"''')
25 verify(repr(u'"') == """u'"'""")
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +000026 latin1repr = (
27 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
28 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
29 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
30 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
31 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
32 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
33 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
34 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
35 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
36 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
37 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
38 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
39 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
40 "\\xfe\\xff'")
41 testrepr = repr(u''.join(map(unichr, range(256))))
42 verify(testrepr == latin1repr)
Guido van Rossume4874ae2001-09-21 15:36:41 +000043
Guido van Rossuma831cac2000-03-10 23:23:21 +000044def test(method, input, output, *args):
45 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000046 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000047 try:
48 f = getattr(input, method)
49 value = apply(f, args)
50 except:
51 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000052 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000053 else:
54 exc = None
Walter Dörwald2ee4be02002-04-17 21:34:05 +000055 if value == output and type(value) is type(output):
56 # if the original is returned make sure that
57 # this doesn't happen with subclasses
58 if value is input:
59 class usub(unicode):
60 def __repr__(self):
61 return 'usub(%r)' % unicode.__repr__(self)
62 input = usub(input)
63 try:
64 f = getattr(input, method)
65 value = apply(f, args)
66 except:
67 value = sys.exc_type
68 exc = sys.exc_info()[:2]
69 if value is input:
70 if verbose:
Tim Peters8ac14952002-05-23 15:15:30 +000071 print 'no'
Walter Dörwald2ee4be02002-04-17 21:34:05 +000072 print '*',f, `input`, `output`, `value`
73 return
Guido van Rossum15ffc712000-11-29 12:13:59 +000074 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000075 if verbose:
76 print 'no'
77 print '*',f, `input`, `output`, `value`
78 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000079 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000080 else:
81 if verbose:
82 print 'yes'
83
84test('capitalize', u' hello ', u' hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000085test('capitalize', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000086test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000087test('capitalize', u'aaaa', u'Aaaa')
88test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000089
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000090test('count', u'aaa', 3, u'a')
91test('count', u'aaa', 0, u'b')
92test('count', 'aaa', 3, u'a')
93test('count', 'aaa', 0, u'b')
94test('count', u'aaa', 3, 'a')
95test('count', u'aaa', 0, 'b')
96
Guido van Rossuma831cac2000-03-10 23:23:21 +000097test('title', u' hello ', u' Hello ')
Walter Dörwald2ee4be02002-04-17 21:34:05 +000098test('title', u'Hello ', u'Hello ')
Guido van Rossuma831cac2000-03-10 23:23:21 +000099test('title', u'hello ', u'Hello ')
100test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
101test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
102test('title', u"getInt", u'Getint')
103
104test('find', u'abcdefghiabc', 0, u'abc')
105test('find', u'abcdefghiabc', 9, u'abc', 1)
106test('find', u'abcdefghiabc', -1, u'def', 4)
107
108test('rfind', u'abcdefghiabc', 9, u'abc')
109
110test('lower', u'HeLLo', u'hello')
111test('lower', u'hello', u'hello')
112
113test('upper', u'HeLLo', u'HELLO')
114test('upper', u'HELLO', u'HELLO')
115
116if 0:
117 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
118
119 test('maketrans', u'abc', transtable, u'xyz')
120 test('maketrans', u'abc', ValueError, u'xyzq')
121
122test('split', u'this is the split function',
123 [u'this', u'is', u'the', u'split', u'function'])
124test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
125test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
126test('split', u'a b c d', [u'a', u'b c d'], None, 1)
127test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
128test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
129test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
130test('split', u'a b c d', [u'a b c d'], None, 0)
131test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
132test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +0000133test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
134test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
135test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
136test('split', u'endcase test', [u'endcase ', u''], u'test')
137test('split', u'endcase test', [u'endcase ', u''], 'test')
138test('split', 'endcase test', [u'endcase ', u''], u'test')
139
Guido van Rossuma831cac2000-03-10 23:23:21 +0000140
141# join now works with any sequence type
142class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000143 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000144 def __len__(self): return len(self.seq)
145 def __getitem__(self, i): return self.seq[i]
146
147test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000148test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000149test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000150test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000151test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000152test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
153test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
154test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
155test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
156test('join', ' ', u'w x y z', Sequence(u'wxyz'))
157test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000158
159result = u''
160for i in range(10):
161 if i > 0:
162 result = result + u':'
163 result = result + u'x'*10
164test('join', u':', result, [u'x' * 10] * 10)
165test('join', u':', result, (u'x' * 10,) * 10)
166
167test('strip', u' hello ', u'hello')
168test('lstrip', u' hello ', u'hello ')
169test('rstrip', u' hello ', u' hello')
170test('strip', u'hello', u'hello')
171
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000172# strip/lstrip/rstrip with None arg
173test('strip', u' hello ', u'hello', None)
174test('lstrip', u' hello ', u'hello ', None)
175test('rstrip', u' hello ', u' hello', None)
176test('strip', u'hello', u'hello', None)
177
178# strip/lstrip/rstrip with unicode arg
179test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
180test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
181test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
182test('strip', u'hello', u'hello', u'xyz')
183
184# strip/lstrip/rstrip with str arg
185test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
186test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
187test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
188test('strip', u'hello', u'hello', 'xyz')
189
Guido van Rossuma831cac2000-03-10 23:23:21 +0000190test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
191
192if 0:
193 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
194
195 table = string.maketrans('a', u'A')
196 test('translate', u'abc', u'Abc', table)
197 test('translate', u'xyz', u'xyz', table)
198
199test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000200test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000201test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
202test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
203test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
204test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
205test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
206test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
207test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
208
Guido van Rossum77f6a652002-04-03 22:41:51 +0000209test('startswith', u'hello', True, u'he')
210test('startswith', u'hello', True, u'hello')
211test('startswith', u'hello', False, u'hello world')
212test('startswith', u'hello', True, u'')
213test('startswith', u'hello', False, u'ello')
214test('startswith', u'hello', True, u'ello', 1)
215test('startswith', u'hello', True, u'o', 4)
216test('startswith', u'hello', False, u'o', 5)
217test('startswith', u'hello', True, u'', 5)
218test('startswith', u'hello', False, u'lo', 6)
219test('startswith', u'helloworld', True, u'lowo', 3)
220test('startswith', u'helloworld', True, u'lowo', 3, 7)
221test('startswith', u'helloworld', False, u'lowo', 3, 6)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000222
Guido van Rossum77f6a652002-04-03 22:41:51 +0000223test('endswith', u'hello', True, u'lo')
224test('endswith', u'hello', False, u'he')
225test('endswith', u'hello', True, u'')
226test('endswith', u'hello', False, u'hello world')
227test('endswith', u'helloworld', False, u'worl')
228test('endswith', u'helloworld', True, u'worl', 3, 9)
229test('endswith', u'helloworld', True, u'world', 3, 12)
230test('endswith', u'helloworld', True, u'lowo', 1, 7)
231test('endswith', u'helloworld', True, u'lowo', 2, 7)
232test('endswith', u'helloworld', True, u'lowo', 3, 7)
233test('endswith', u'helloworld', False, u'lowo', 4, 7)
234test('endswith', u'helloworld', False, u'lowo', 3, 8)
235test('endswith', u'ab', False, u'ab', 0, 1)
236test('endswith', u'ab', False, u'ab', 0, 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000237
238test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
239test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
240test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
241test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
Walter Dörwald2ee4be02002-04-17 21:34:05 +0000242test('expandtabs', u'abc\r\nab\r\ndef\ng\r\nhi', u'abc\r\nab\r\ndef\ng\r\nhi', 4)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000243
244if 0:
245 test('capwords', u'abc def ghi', u'Abc Def Ghi')
246 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
247 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
248
Walter Dörwald068325e2002-04-15 13:36:47 +0000249test('zfill', u'123', u'123', 2)
250test('zfill', u'123', u'123', 3)
251test('zfill', u'123', u'0123', 4)
252test('zfill', u'+123', u'+123', 3)
253test('zfill', u'+123', u'+123', 4)
254test('zfill', u'+123', u'+0123', 5)
255test('zfill', u'-123', u'-123', 3)
256test('zfill', u'-123', u'-123', 4)
257test('zfill', u'-123', u'-0123', 5)
258test('zfill', u'', u'000', 3)
259test('zfill', u'34', u'34', 1)
260test('zfill', u'34', u'00034', 5)
Andrew M. Kuchlingeddd68d2002-03-29 16:21:44 +0000261
Guido van Rossuma831cac2000-03-10 23:23:21 +0000262# Comparisons:
263print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000264verify(u'abc' == 'abc')
265verify('abc' == u'abc')
266verify(u'abc' == u'abc')
267verify(u'abcd' > 'abc')
268verify('abcd' > u'abc')
269verify(u'abcd' > u'abc')
270verify(u'abc' < 'abcd')
271verify('abc' < u'abcd')
272verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000273print 'done.'
274
Marc-André Lemburge5034372000-08-08 08:04:29 +0000275if 0:
276 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000277
Marc-André Lemburge5034372000-08-08 08:04:29 +0000278 print 'Testing UTF-16 code point order comparisons...',
279 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000280 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000281 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000282 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000283
Marc-André Lemburge5034372000-08-08 08:04:29 +0000284 # Non surrogate above surrogate value, fixup required
285 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000286 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000287
Marc-André Lemburge5034372000-08-08 08:04:29 +0000288 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000289 s2 = u'\ud800\udc01'
290 test_lecmp(s, s2)
291 s2 = u'\ud900\udc01'
292 test_lecmp(s, s2)
293 s2 = u'\uda00\udc01'
294 test_lecmp(s, s2)
295 s2 = u'\udb00\udc01'
296 test_lecmp(s, s2)
297 s2 = u'\ud800\udd01'
298 test_lecmp(s, s2)
299 s2 = u'\ud900\udd01'
300 test_lecmp(s, s2)
301 s2 = u'\uda00\udd01'
302 test_lecmp(s, s2)
303 s2 = u'\udb00\udd01'
304 test_lecmp(s, s2)
305 s2 = u'\ud800\ude01'
306 test_lecmp(s, s2)
307 s2 = u'\ud900\ude01'
308 test_lecmp(s, s2)
309 s2 = u'\uda00\ude01'
310 test_lecmp(s, s2)
311 s2 = u'\udb00\ude01'
312 test_lecmp(s, s2)
313 s2 = u'\ud800\udfff'
314 test_lecmp(s, s2)
315 s2 = u'\ud900\udfff'
316 test_lecmp(s, s2)
317 s2 = u'\uda00\udfff'
318 test_lecmp(s, s2)
319 s2 = u'\udb00\udfff'
320 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000321
322 test_fixup(u'\ue000')
323 test_fixup(u'\uff61')
324
325 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000326 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000327 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000328
Guido van Rossuma831cac2000-03-10 23:23:21 +0000329test('ljust', u'abc', u'abc ', 10)
330test('rjust', u'abc', u' abc', 10)
331test('center', u'abc', u' abc ', 10)
332test('ljust', u'abc', u'abc ', 6)
333test('rjust', u'abc', u' abc', 6)
334test('center', u'abc', u' abc ', 6)
335test('ljust', u'abc', u'abc', 2)
336test('rjust', u'abc', u'abc', 2)
337test('center', u'abc', u'abc', 2)
338
Guido van Rossum77f6a652002-04-03 22:41:51 +0000339test('islower', u'a', True)
340test('islower', u'A', False)
341test('islower', u'\n', False)
342test('islower', u'\u1FFc', False)
343test('islower', u'abc', True)
344test('islower', u'aBc', False)
345test('islower', u'abc\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000346
Guido van Rossum77f6a652002-04-03 22:41:51 +0000347test('isupper', u'a', False)
348test('isupper', u'A', True)
349test('isupper', u'\n', False)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000350if sys.platform[:4] != 'java':
Guido van Rossum77f6a652002-04-03 22:41:51 +0000351 test('isupper', u'\u1FFc', False)
352test('isupper', u'ABC', True)
353test('isupper', u'AbC', False)
354test('isupper', u'ABC\n', True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000355
Guido van Rossum77f6a652002-04-03 22:41:51 +0000356test('istitle', u'a', False)
357test('istitle', u'A', True)
358test('istitle', u'\n', False)
359test('istitle', u'\u1FFc', True)
360test('istitle', u'A Titlecased Line', True)
361test('istitle', u'A\nTitlecased Line', True)
362test('istitle', u'A Titlecased, Line', True)
363test('istitle', u'Greek \u1FFcitlecases ...', True)
364test('istitle', u'Not a capitalized String', False)
365test('istitle', u'Not\ta Titlecase String', False)
366test('istitle', u'Not--a Titlecase String', False)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000367
Guido van Rossum77f6a652002-04-03 22:41:51 +0000368test('isalpha', u'a', True)
369test('isalpha', u'A', True)
370test('isalpha', u'\n', False)
371test('isalpha', u'\u1FFc', True)
372test('isalpha', u'abc', True)
373test('isalpha', u'aBc123', False)
374test('isalpha', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000375
Guido van Rossum77f6a652002-04-03 22:41:51 +0000376test('isalnum', u'a', True)
377test('isalnum', u'A', True)
378test('isalnum', u'\n', False)
379test('isalnum', u'123abc456', True)
380test('isalnum', u'a1b3c', True)
381test('isalnum', u'aBc000 ', False)
382test('isalnum', u'abc\n', False)
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000383
Guido van Rossuma831cac2000-03-10 23:23:21 +0000384test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
385test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
386test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
387test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
388test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
389test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum77f6a652002-04-03 22:41:51 +0000390test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], True)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000391
392test('translate', u"abababc", u'bbbc', {ord('a'):None})
393test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
394test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
395
Guido van Rossumd4d26842000-03-13 23:21:48 +0000396# Contains:
397print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000398verify(('a' in u'abdb') == 1)
399verify(('a' in u'bdab') == 1)
400verify(('a' in u'bdaba') == 1)
401verify(('a' in u'bdba') == 1)
402verify(('a' in u'bdba') == 1)
403verify((u'a' in u'bdba') == 1)
404verify((u'a' in u'bdb') == 0)
405verify((u'a' in 'bdb') == 0)
406verify((u'a' in 'bdba') == 1)
407verify((u'a' in ('a',1,None)) == 1)
408verify((u'a' in (1,None,'a')) == 1)
409verify((u'a' in (1,None,u'a')) == 1)
410verify(('a' in ('a',1,None)) == 1)
411verify(('a' in (1,None,'a')) == 1)
412verify(('a' in (1,None,u'a')) == 1)
413verify(('a' in ('x',1,u'y')) == 0)
414verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000415print 'done.'
416
Guido van Rossuma831cac2000-03-10 23:23:21 +0000417# Formatting:
418print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000419verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
420verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
421verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
422verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
423verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
424verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
425verify(u"%c" % (u"a",) == u'a')
426verify(u"%c" % ("a",) == u'a')
427verify(u"%c" % (34,) == u'"')
428verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000429if sys.platform[:4] != 'java':
430 value = u"%r, %r" % (u"abc", "abc")
431 if value != u"u'abc', 'abc'":
432 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000433
Marc-André Lemburg36619082001-01-17 19:11:13 +0000434verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000435try:
Marc-André Lemburg72f82132001-11-20 15:18:49 +0000436 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000437except KeyError:
438 print '*** formatting failed for "%s"' % "u'abc, def'"
439else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000440 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000441
Guido van Rossum97064862000-04-10 13:52:48 +0000442# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000443verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
444verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
445verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
446verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
447verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
448verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
449verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
450verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
451verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000452verify('%*s' % (5,u'abc',) == u' abc')
453verify('%*s' % (-5,u'abc',) == u'abc ')
454verify('%*.*s' % (5,2,u'abc',) == u' ab')
455verify('%*.*s' % (5,3,u'abc',) == u' abc')
456verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
457verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000458print 'done.'
459
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000460print 'Testing builtin unicode()...',
461
462# unicode(obj) tests (this maps to PyObject_Unicode() at C level)
463
464verify(unicode(u'unicode remains unicode') == u'unicode remains unicode')
465
466class UnicodeSubclass(unicode):
467 pass
468
469verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
470 == u'unicode subclass becomes unicode')
471
472verify(unicode('strings are converted to unicode')
473 == u'strings are converted to unicode')
474
475class UnicodeCompat:
476 def __init__(self, x):
477 self.x = x
478 def __unicode__(self):
479 return self.x
480
481verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
482 == u'__unicode__ compatible objects are recognized')
483
484class StringCompat:
485 def __init__(self, x):
486 self.x = x
487 def __str__(self):
488 return self.x
489
490verify(unicode(StringCompat('__str__ compatible objects are recognized'))
491 == u'__str__ compatible objects are recognized')
492
493# unicode(obj) is compatible to str():
494
495o = StringCompat('unicode(obj) is compatible to str()')
496verify(unicode(o) == u'unicode(obj) is compatible to str()')
497verify(str(o) == 'unicode(obj) is compatible to str()')
498
499for obj in (123, 123.45, 123L):
500 verify(unicode(obj) == unicode(str(obj)))
501
502# unicode(obj, encoding, error) tests (this maps to
503# PyUnicode_FromEncodedObject() at C level)
504
Finn Bock2b29cb22001-12-10 20:57:34 +0000505if not sys.platform.startswith('java'):
506 try:
507 unicode(u'decoding unicode is not supported', 'utf-8', 'strict')
508 except TypeError:
509 pass
510 else:
511 raise TestFailed, "decoding unicode should NOT be supported"
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000512
513verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
514 == u'strings are decoded to unicode')
515
Finn Bock2b29cb22001-12-10 20:57:34 +0000516if not sys.platform.startswith('java'):
517 verify(unicode(buffer('character buffers are decoded to unicode'),
518 'utf-8', 'strict')
519 == u'character buffers are decoded to unicode')
Marc-André Lemburgb5507ec2001-10-19 12:02:29 +0000520
521print 'done.'
522
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000523# Test builtin codecs
524print 'Testing builtin codecs...',
525
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000526# UTF-7 specific encoding tests:
527utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
528 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
529 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
530 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
531 (u'+', '+-'),
532 (u'+-', '+--'),
533 (u'+?', '+-?'),
534 (u'\?', '+AFw?'),
535 (u'+?', '+-?'),
536 (ur'\\?', '+AFwAXA?'),
537 (ur'\\\?', '+AFwAXABc?'),
538 (ur'++--', '+-+---')]
539
540for x,y in utfTests:
541 verify( x.encode('utf-7') == y )
542
Tim Peters527e64f2001-10-04 05:36:56 +0000543try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000544 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
545except UnicodeError:
546 pass
547else:
548 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
549
550verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
551
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000552# UTF-8 specific encoding tests:
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000553verify(u''.encode('utf-8') == '')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000554verify(u'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
555verify(u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
556verify(u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
557verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80')
558verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
559verify((u'\ud800\udc02'*1000).encode('utf-8') ==
560 '\xf0\x90\x80\x82'*1000)
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000561verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
562 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
563 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
564 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
565 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
Tim Peters863ac442002-04-16 01:38:40 +0000566 u' Nunstuck git und'.encode('utf-8') ==
Marc-André Lemburgce0b6642002-04-10 17:18:02 +0000567 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
568 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
569 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
570 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
571 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
572 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
573 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
574 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
575 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
576 '\xe3\x80\x8cWenn ist das Nunstuck git und')
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000577
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000578# UTF-8 specific decoding tests
Marc-André Lemburg3688a882002-02-06 18:09:02 +0000579verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )
580verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' )
581verify(unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000582
583# Other possible utf-8 test cases:
584# * strict decoding testing for all of the
585# UTF8_ERROR cases in PyUnicode_DecodeUTF8
586
Marc-André Lemburg36619082001-01-17 19:11:13 +0000587verify(unicode('hello','ascii') == u'hello')
588verify(unicode('hello','utf-8') == u'hello')
589verify(unicode('hello','utf8') == u'hello')
590verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000591
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000592# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000593try:
594 u'Andr\202 x'.encode('ascii')
595 u'Andr\202 x'.encode('ascii','strict')
596except ValueError:
597 pass
598else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000599 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000600verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
601verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000602
603try:
604 unicode('Andr\202 x','ascii')
605 unicode('Andr\202 x','ascii','strict')
606except ValueError:
607 pass
608else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000609 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000610verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
611verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000612
Martin v. Löwis047c05e2002-03-21 08:55:28 +0000613verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
614try:
615 "\\".decode("unicode-escape")
616except ValueError:
617 pass
618else:
619 raise TestFailed, '"\\".decode("unicode-escape") should fail'
620
Marc-André Lemburg36619082001-01-17 19:11:13 +0000621verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000622verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000623verify(u'hello'.encode('utf-8') == 'hello')
624verify(u'hello'.encode('utf8') == 'hello')
625verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
626verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
627verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000628
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000629# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000630u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000631for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000632 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000633 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000634
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000635# Roundtrip safety for BMP (just the first 256 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000636u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000637for encoding in (
638 'latin-1',
639 ):
640 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000641 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000642 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000643 print '*** codec "%s" failed round-trip' % encoding
644 except ValueError,why:
645 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000646
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000647# Roundtrip safety for BMP (just the first 128 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000648u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000649for encoding in (
650 'ascii',
651 ):
652 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000653 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000654 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000655 print '*** codec "%s" failed round-trip' % encoding
656 except ValueError,why:
657 print '*** codec for "%s" failed: %s' % (encoding, why)
658
Marc-André Lemburgbd3be8f2002-02-07 11:33:49 +0000659# Roundtrip safety for non-BMP (just a few chars)
660u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
661for encoding in ('utf-8',
662 'utf-16', 'utf-16-le', 'utf-16-be',
663 #'raw_unicode_escape',
664 'unicode_escape', 'unicode_internal'):
665 verify(unicode(u.encode(encoding),encoding) == u)
666
667# UTF-8 must be roundtrip safe for all UCS-2 code points
668u = u''.join(map(unichr, range(0x10000)))
669for encoding in ('utf-8',):
670 verify(unicode(u.encode(encoding),encoding) == u)
671
Guido van Rossum9e896b32000-04-05 20:11:21 +0000672print 'done.'
673
674print 'Testing standard mapping codecs...',
675
676print '0-127...',
677s = ''.join(map(chr, range(128)))
678for encoding in (
679 'cp037', 'cp1026',
680 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
681 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000682 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000683 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
684 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
685 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
686 'mac_cyrillic', 'mac_latin2',
687
688 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
689 'cp1256', 'cp1257', 'cp1258',
690 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
691
692 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000693 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000694
Guido van Rossum9e896b32000-04-05 20:11:21 +0000695 ### These have undefined mappings:
696 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000697
Tim Peters2f228e72001-05-13 00:19:31 +0000698 ### These fail the round-trip:
699 #'cp875'
700
Guido van Rossum9e896b32000-04-05 20:11:21 +0000701 ):
702 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000703 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000704 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000705 print '*** codec "%s" failed round-trip' % encoding
706 except ValueError,why:
707 print '*** codec for "%s" failed: %s' % (encoding, why)
708
709print '128-255...',
710s = ''.join(map(chr, range(128,256)))
711for encoding in (
712 'cp037', 'cp1026',
713 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
714 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000715 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000716 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000717 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000718 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000719 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000720
Guido van Rossum9e896b32000-04-05 20:11:21 +0000721 ### These have undefined mappings:
722 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
723 #'cp1256', 'cp1257', 'cp1258',
724 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000725 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000726 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000727
Guido van Rossum9e896b32000-04-05 20:11:21 +0000728 ### These fail the round-trip:
729 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000730
Guido van Rossum9e896b32000-04-05 20:11:21 +0000731 ):
732 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000733 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000734 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000735 print '*** codec "%s" failed round-trip' % encoding
736 except ValueError,why:
737 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000738
739print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000740
741print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000742verify((u"abc" u"def") == u"abcdef")
743verify(("abc" u"def") == u"abcdef")
744verify((u"abc" "def") == u"abcdef")
745verify((u"abc" u"def" "ghi") == u"abcdefghi")
746verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000747print 'done.'
Marc-André Lemburg0c4d8d02001-11-20 15:17:25 +0000748
749print 'Testing Unicode printing...',
750print u'abc'
751print u'abc', u'def'
752print u'abc', 'def'
753print 'abc', u'def'
754print u'abc\n'
755print u'abc\n',
756print u'abc\n',
757print u'def\n'
758print u'def\n'
759print 'done.'