blob: 68eae13115d2a3a6072c48183f61490fe35d873a [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
Guido van Rossume4874ae2001-09-21 15:36:41 +000011# Test basic sanity of repr()
12verify(repr(u'abc') == "u'abc'")
13verify(repr(u'ab\\c') == "u'ab\\\\c'")
14verify(repr(u'ab\\') == "u'ab\\\\'")
15verify(repr(u'\\c') == "u'\\\\c'")
16verify(repr(u'\\') == "u'\\\\'")
17verify(repr(u'\n') == "u'\\n'")
18verify(repr(u'\r') == "u'\\r'")
19verify(repr(u'\t') == "u'\\t'")
20verify(repr(u'\b') == "u'\\x08'")
Guido van Rossum11310bf2001-09-21 15:46:41 +000021verify(repr(u"'\"") == """u'\\'"'""")
22verify(repr(u"'\"") == """u'\\'"'""")
23verify(repr(u"'") == '''u"'"''')
24verify(repr(u'"') == """u'"'""")
Guido van Rossume4874ae2001-09-21 15:36:41 +000025
Guido van Rossuma831cac2000-03-10 23:23:21 +000026def test(method, input, output, *args):
27 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000028 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000029 try:
30 f = getattr(input, method)
31 value = apply(f, args)
32 except:
33 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000034 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000035 else:
36 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000037 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000038 if verbose:
39 print 'no'
40 print '*',f, `input`, `output`, `value`
41 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000042 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000043 else:
44 if verbose:
45 print 'yes'
46
47test('capitalize', u' hello ', u' hello ')
48test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000049test('capitalize', u'aaaa', u'Aaaa')
50test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000051
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000052test('count', u'aaa', 3, u'a')
53test('count', u'aaa', 0, u'b')
54test('count', 'aaa', 3, u'a')
55test('count', 'aaa', 0, u'b')
56test('count', u'aaa', 3, 'a')
57test('count', u'aaa', 0, 'b')
58
Guido van Rossuma831cac2000-03-10 23:23:21 +000059test('title', u' hello ', u' Hello ')
60test('title', u'hello ', u'Hello ')
61test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
62test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
63test('title', u"getInt", u'Getint')
64
65test('find', u'abcdefghiabc', 0, u'abc')
66test('find', u'abcdefghiabc', 9, u'abc', 1)
67test('find', u'abcdefghiabc', -1, u'def', 4)
68
69test('rfind', u'abcdefghiabc', 9, u'abc')
70
71test('lower', u'HeLLo', u'hello')
72test('lower', u'hello', u'hello')
73
74test('upper', u'HeLLo', u'HELLO')
75test('upper', u'HELLO', u'HELLO')
76
77if 0:
78 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
79
80 test('maketrans', u'abc', transtable, u'xyz')
81 test('maketrans', u'abc', ValueError, u'xyzq')
82
83test('split', u'this is the split function',
84 [u'this', u'is', u'the', u'split', u'function'])
85test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
86test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
87test('split', u'a b c d', [u'a', u'b c d'], None, 1)
88test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
89test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
90test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
91test('split', u'a b c d', [u'a b c d'], None, 0)
92test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
93test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000094test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
95test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
96test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
97test('split', u'endcase test', [u'endcase ', u''], u'test')
98test('split', u'endcase test', [u'endcase ', u''], 'test')
99test('split', 'endcase test', [u'endcase ', u''], u'test')
100
Guido van Rossuma831cac2000-03-10 23:23:21 +0000101
102# join now works with any sequence type
103class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000104 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000105 def __len__(self): return len(self.seq)
106 def __getitem__(self, i): return self.seq[i]
107
108test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000109test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000110test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000111test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000112test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000113test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
114test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
115test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
116test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
117test('join', ' ', u'w x y z', Sequence(u'wxyz'))
118test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000119
120result = u''
121for i in range(10):
122 if i > 0:
123 result = result + u':'
124 result = result + u'x'*10
125test('join', u':', result, [u'x' * 10] * 10)
126test('join', u':', result, (u'x' * 10,) * 10)
127
128test('strip', u' hello ', u'hello')
129test('lstrip', u' hello ', u'hello ')
130test('rstrip', u' hello ', u' hello')
131test('strip', u'hello', u'hello')
132
133test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
134
135if 0:
136 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
137
138 table = string.maketrans('a', u'A')
139 test('translate', u'abc', u'Abc', table)
140 test('translate', u'xyz', u'xyz', table)
141
142test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000143test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000144test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
145test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
146test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
147test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
148test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
149test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
150test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
151
152test('startswith', u'hello', 1, u'he')
153test('startswith', u'hello', 1, u'hello')
154test('startswith', u'hello', 0, u'hello world')
155test('startswith', u'hello', 1, u'')
156test('startswith', u'hello', 0, u'ello')
157test('startswith', u'hello', 1, u'ello', 1)
158test('startswith', u'hello', 1, u'o', 4)
159test('startswith', u'hello', 0, u'o', 5)
160test('startswith', u'hello', 1, u'', 5)
161test('startswith', u'hello', 0, u'lo', 6)
162test('startswith', u'helloworld', 1, u'lowo', 3)
163test('startswith', u'helloworld', 1, u'lowo', 3, 7)
164test('startswith', u'helloworld', 0, u'lowo', 3, 6)
165
166test('endswith', u'hello', 1, u'lo')
167test('endswith', u'hello', 0, u'he')
168test('endswith', u'hello', 1, u'')
169test('endswith', u'hello', 0, u'hello world')
170test('endswith', u'helloworld', 0, u'worl')
171test('endswith', u'helloworld', 1, u'worl', 3, 9)
172test('endswith', u'helloworld', 1, u'world', 3, 12)
173test('endswith', u'helloworld', 1, u'lowo', 1, 7)
174test('endswith', u'helloworld', 1, u'lowo', 2, 7)
175test('endswith', u'helloworld', 1, u'lowo', 3, 7)
176test('endswith', u'helloworld', 0, u'lowo', 4, 7)
177test('endswith', u'helloworld', 0, u'lowo', 3, 8)
178test('endswith', u'ab', 0, u'ab', 0, 1)
179test('endswith', u'ab', 0, u'ab', 0, 0)
180
181test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
182test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
183test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
184test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
185
186if 0:
187 test('capwords', u'abc def ghi', u'Abc Def Ghi')
188 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
189 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
190
191# Comparisons:
192print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000193verify(u'abc' == 'abc')
194verify('abc' == u'abc')
195verify(u'abc' == u'abc')
196verify(u'abcd' > 'abc')
197verify('abcd' > u'abc')
198verify(u'abcd' > u'abc')
199verify(u'abc' < 'abcd')
200verify('abc' < u'abcd')
201verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000202print 'done.'
203
Marc-André Lemburge5034372000-08-08 08:04:29 +0000204if 0:
205 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000206
Marc-André Lemburge5034372000-08-08 08:04:29 +0000207 print 'Testing UTF-16 code point order comparisons...',
208 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000209 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000210 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000211 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000212
Marc-André Lemburge5034372000-08-08 08:04:29 +0000213 # Non surrogate above surrogate value, fixup required
214 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000215 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000216
Marc-André Lemburge5034372000-08-08 08:04:29 +0000217 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000218 s2 = u'\ud800\udc01'
219 test_lecmp(s, s2)
220 s2 = u'\ud900\udc01'
221 test_lecmp(s, s2)
222 s2 = u'\uda00\udc01'
223 test_lecmp(s, s2)
224 s2 = u'\udb00\udc01'
225 test_lecmp(s, s2)
226 s2 = u'\ud800\udd01'
227 test_lecmp(s, s2)
228 s2 = u'\ud900\udd01'
229 test_lecmp(s, s2)
230 s2 = u'\uda00\udd01'
231 test_lecmp(s, s2)
232 s2 = u'\udb00\udd01'
233 test_lecmp(s, s2)
234 s2 = u'\ud800\ude01'
235 test_lecmp(s, s2)
236 s2 = u'\ud900\ude01'
237 test_lecmp(s, s2)
238 s2 = u'\uda00\ude01'
239 test_lecmp(s, s2)
240 s2 = u'\udb00\ude01'
241 test_lecmp(s, s2)
242 s2 = u'\ud800\udfff'
243 test_lecmp(s, s2)
244 s2 = u'\ud900\udfff'
245 test_lecmp(s, s2)
246 s2 = u'\uda00\udfff'
247 test_lecmp(s, s2)
248 s2 = u'\udb00\udfff'
249 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000250
251 test_fixup(u'\ue000')
252 test_fixup(u'\uff61')
253
254 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000255 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000256 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000257
Guido van Rossuma831cac2000-03-10 23:23:21 +0000258test('ljust', u'abc', u'abc ', 10)
259test('rjust', u'abc', u' abc', 10)
260test('center', u'abc', u' abc ', 10)
261test('ljust', u'abc', u'abc ', 6)
262test('rjust', u'abc', u' abc', 6)
263test('center', u'abc', u' abc ', 6)
264test('ljust', u'abc', u'abc', 2)
265test('rjust', u'abc', u'abc', 2)
266test('center', u'abc', u'abc', 2)
267
268test('islower', u'a', 1)
269test('islower', u'A', 0)
270test('islower', u'\n', 0)
271test('islower', u'\u1FFc', 0)
272test('islower', u'abc', 1)
273test('islower', u'aBc', 0)
274test('islower', u'abc\n', 1)
275
276test('isupper', u'a', 0)
277test('isupper', u'A', 1)
278test('isupper', u'\n', 0)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000279if sys.platform[:4] != 'java':
280 test('isupper', u'\u1FFc', 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000281test('isupper', u'ABC', 1)
282test('isupper', u'AbC', 0)
283test('isupper', u'ABC\n', 1)
284
285test('istitle', u'a', 0)
286test('istitle', u'A', 1)
287test('istitle', u'\n', 0)
288test('istitle', u'\u1FFc', 1)
289test('istitle', u'A Titlecased Line', 1)
290test('istitle', u'A\nTitlecased Line', 1)
291test('istitle', u'A Titlecased, Line', 1)
292test('istitle', u'Greek \u1FFcitlecases ...', 1)
293test('istitle', u'Not a capitalized String', 0)
294test('istitle', u'Not\ta Titlecase String', 0)
295test('istitle', u'Not--a Titlecase String', 0)
296
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000297test('isalpha', u'a', 1)
298test('isalpha', u'A', 1)
299test('isalpha', u'\n', 0)
300test('isalpha', u'\u1FFc', 1)
301test('isalpha', u'abc', 1)
302test('isalpha', u'aBc123', 0)
303test('isalpha', u'abc\n', 0)
304
305test('isalnum', u'a', 1)
306test('isalnum', u'A', 1)
307test('isalnum', u'\n', 0)
308test('isalnum', u'123abc456', 1)
309test('isalnum', u'a1b3c', 1)
310test('isalnum', u'aBc000 ', 0)
311test('isalnum', u'abc\n', 0)
312
Guido van Rossuma831cac2000-03-10 23:23:21 +0000313test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
314test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
315test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
316test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
317test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
318test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000319test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000320
321test('translate', u"abababc", u'bbbc', {ord('a'):None})
322test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
323test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
324
Guido van Rossumd4d26842000-03-13 23:21:48 +0000325# Contains:
326print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000327verify(('a' in u'abdb') == 1)
328verify(('a' in u'bdab') == 1)
329verify(('a' in u'bdaba') == 1)
330verify(('a' in u'bdba') == 1)
331verify(('a' in u'bdba') == 1)
332verify((u'a' in u'bdba') == 1)
333verify((u'a' in u'bdb') == 0)
334verify((u'a' in 'bdb') == 0)
335verify((u'a' in 'bdba') == 1)
336verify((u'a' in ('a',1,None)) == 1)
337verify((u'a' in (1,None,'a')) == 1)
338verify((u'a' in (1,None,u'a')) == 1)
339verify(('a' in ('a',1,None)) == 1)
340verify(('a' in (1,None,'a')) == 1)
341verify(('a' in (1,None,u'a')) == 1)
342verify(('a' in ('x',1,u'y')) == 0)
343verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000344print 'done.'
345
Guido van Rossuma831cac2000-03-10 23:23:21 +0000346# Formatting:
347print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000348verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
349verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
350verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
351verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
352verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
353verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
354verify(u"%c" % (u"a",) == u'a')
355verify(u"%c" % ("a",) == u'a')
356verify(u"%c" % (34,) == u'"')
357verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000358if sys.platform[:4] != 'java':
359 value = u"%r, %r" % (u"abc", "abc")
360 if value != u"u'abc', 'abc'":
361 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000362
Marc-André Lemburg36619082001-01-17 19:11:13 +0000363verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000364try:
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000365 if sys.platform[:4] != 'java':
366 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
367 else:
368 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000369except KeyError:
370 print '*** formatting failed for "%s"' % "u'abc, def'"
371else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000372 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000373
Guido van Rossum97064862000-04-10 13:52:48 +0000374# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000375verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
376verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
377verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
378verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
379verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
380verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
381verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
382verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
383verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000384verify('%*s' % (5,u'abc',) == u' abc')
385verify('%*s' % (-5,u'abc',) == u'abc ')
386verify('%*.*s' % (5,2,u'abc',) == u' ab')
387verify('%*.*s' % (5,3,u'abc',) == u' abc')
388verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
389verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000390print 'done.'
391
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000392# Test builtin codecs
393print 'Testing builtin codecs...',
394
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000395# UTF-7 specific encoding tests:
396utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
397 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
398 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
399 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
400 (u'+', '+-'),
401 (u'+-', '+--'),
402 (u'+?', '+-?'),
403 (u'\?', '+AFw?'),
404 (u'+?', '+-?'),
405 (ur'\\?', '+AFwAXA?'),
406 (ur'\\\?', '+AFwAXABc?'),
407 (ur'++--', '+-+---')]
408
409for x,y in utfTests:
410 verify( x.encode('utf-7') == y )
411
Tim Peters527e64f2001-10-04 05:36:56 +0000412try:
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000413 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
414except UnicodeError:
415 pass
416else:
417 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
418
419verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
420
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000421# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000422verify(u'\u20ac'.encode('utf-8') == \
423 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
424verify(u'\ud800\udc02'.encode('utf-8') == \
425 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
426verify(u'\ud84d\udc56'.encode('utf-8') == \
427 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000428# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000429verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000430 'utf-8') == u'\U00023456' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000431verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000432 'utf-8') == u'\U00010002' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000433verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000434 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000435
436# Other possible utf-8 test cases:
437# * strict decoding testing for all of the
438# UTF8_ERROR cases in PyUnicode_DecodeUTF8
439
440
441
Marc-André Lemburg36619082001-01-17 19:11:13 +0000442verify(unicode('hello','ascii') == u'hello')
443verify(unicode('hello','utf-8') == u'hello')
444verify(unicode('hello','utf8') == u'hello')
445verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000446
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000447# Compatibility to str():
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000448class String:
449 x = ''
450 def __str__(self):
451 return self.x
452
453o = String()
454
455o.x = 'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000456verify(unicode(o) == u'abc')
457verify(str(o) == 'abc')
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000458
459o.x = u'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000460verify(unicode(o) == u'abc')
461verify(str(o) == 'abc')
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000462
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000463for obj in (123, 123.45, 123L):
464 verify(unicode(obj) == unicode(str(obj)))
465
466# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000467try:
468 u'Andr\202 x'.encode('ascii')
469 u'Andr\202 x'.encode('ascii','strict')
470except ValueError:
471 pass
472else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000473 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000474verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
475verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000476
477try:
478 unicode('Andr\202 x','ascii')
479 unicode('Andr\202 x','ascii','strict')
480except ValueError:
481 pass
482else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000483 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000484verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
485verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000486
Marc-André Lemburg36619082001-01-17 19:11:13 +0000487verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000488verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000489verify(u'hello'.encode('utf-8') == 'hello')
490verify(u'hello'.encode('utf8') == 'hello')
491verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
492verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
493verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000494
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000495# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000496u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000497for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000498 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000499 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000500
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000501# Roundtrip safety for non-BMP (just a few chars)
502u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
503for encoding in ('utf-8',
504 'utf-16', 'utf-16-le', 'utf-16-be',
Marc-André Lemburg80d1dd52001-07-25 16:05:59 +0000505 #'raw_unicode_escape',
506 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000507 verify(unicode(u.encode(encoding),encoding) == u)
508
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000509u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000510for encoding in (
511 'latin-1',
512 ):
513 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000514 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000515 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000516 print '*** codec "%s" failed round-trip' % encoding
517 except ValueError,why:
518 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000519
520u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000521for encoding in (
522 'ascii',
523 ):
524 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000525 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000526 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000527 print '*** codec "%s" failed round-trip' % encoding
528 except ValueError,why:
529 print '*** codec for "%s" failed: %s' % (encoding, why)
530
531print 'done.'
532
533print 'Testing standard mapping codecs...',
534
535print '0-127...',
536s = ''.join(map(chr, range(128)))
537for encoding in (
538 'cp037', 'cp1026',
539 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
540 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000541 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000542 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
543 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
544 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
545 'mac_cyrillic', 'mac_latin2',
546
547 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
548 'cp1256', 'cp1257', 'cp1258',
549 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
550
551 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000552 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000553
Guido van Rossum9e896b32000-04-05 20:11:21 +0000554 ### These have undefined mappings:
555 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000556
Tim Peters2f228e72001-05-13 00:19:31 +0000557 ### These fail the round-trip:
558 #'cp875'
559
Guido van Rossum9e896b32000-04-05 20:11:21 +0000560 ):
561 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000562 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000563 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000564 print '*** codec "%s" failed round-trip' % encoding
565 except ValueError,why:
566 print '*** codec for "%s" failed: %s' % (encoding, why)
567
568print '128-255...',
569s = ''.join(map(chr, range(128,256)))
570for encoding in (
571 'cp037', 'cp1026',
572 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
573 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000574 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000575 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000576 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000577 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000578 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000579
Guido van Rossum9e896b32000-04-05 20:11:21 +0000580 ### These have undefined mappings:
581 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
582 #'cp1256', 'cp1257', 'cp1258',
583 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000584 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000585 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000586
Guido van Rossum9e896b32000-04-05 20:11:21 +0000587 ### These fail the round-trip:
588 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000589
Guido van Rossum9e896b32000-04-05 20:11:21 +0000590 ):
591 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000592 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000593 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000594 print '*** codec "%s" failed round-trip' % encoding
595 except ValueError,why:
596 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000597
598print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000599
600print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000601verify((u"abc" u"def") == u"abcdef")
602verify(("abc" u"def") == u"abcdef")
603verify((u"abc" "def") == u"abcdef")
604verify((u"abc" u"def" "ghi") == u"abcdefghi")
605verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000606print 'done.'