blob: 0748dc77c0dbf7aae79cb277b81cf1352a2cdee8 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import verify, verbose, TestFailed
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
Guido van Rossume4874ae2001-09-21 15:36:41 +000011# Test basic sanity of repr()
12verify(repr(u'abc') == "u'abc'")
13verify(repr(u'ab\\c') == "u'ab\\\\c'")
14verify(repr(u'ab\\') == "u'ab\\\\'")
15verify(repr(u'\\c') == "u'\\\\c'")
16verify(repr(u'\\') == "u'\\\\'")
17verify(repr(u'\n') == "u'\\n'")
18verify(repr(u'\r') == "u'\\r'")
19verify(repr(u'\t') == "u'\\t'")
20verify(repr(u'\b') == "u'\\x08'")
21
Guido van Rossuma831cac2000-03-10 23:23:21 +000022def test(method, input, output, *args):
23 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000024 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000025 try:
26 f = getattr(input, method)
27 value = apply(f, args)
28 except:
29 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000030 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000031 else:
32 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000033 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000034 if verbose:
35 print 'no'
36 print '*',f, `input`, `output`, `value`
37 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000038 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000039 else:
40 if verbose:
41 print 'yes'
42
43test('capitalize', u' hello ', u' hello ')
44test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000045test('capitalize', u'aaaa', u'Aaaa')
46test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000047
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000048test('count', u'aaa', 3, u'a')
49test('count', u'aaa', 0, u'b')
50test('count', 'aaa', 3, u'a')
51test('count', 'aaa', 0, u'b')
52test('count', u'aaa', 3, 'a')
53test('count', u'aaa', 0, 'b')
54
Guido van Rossuma831cac2000-03-10 23:23:21 +000055test('title', u' hello ', u' Hello ')
56test('title', u'hello ', u'Hello ')
57test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
58test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
59test('title', u"getInt", u'Getint')
60
61test('find', u'abcdefghiabc', 0, u'abc')
62test('find', u'abcdefghiabc', 9, u'abc', 1)
63test('find', u'abcdefghiabc', -1, u'def', 4)
64
65test('rfind', u'abcdefghiabc', 9, u'abc')
66
67test('lower', u'HeLLo', u'hello')
68test('lower', u'hello', u'hello')
69
70test('upper', u'HeLLo', u'HELLO')
71test('upper', u'HELLO', u'HELLO')
72
73if 0:
74 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
75
76 test('maketrans', u'abc', transtable, u'xyz')
77 test('maketrans', u'abc', ValueError, u'xyzq')
78
79test('split', u'this is the split function',
80 [u'this', u'is', u'the', u'split', u'function'])
81test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
82test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
83test('split', u'a b c d', [u'a', u'b c d'], None, 1)
84test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
85test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
86test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
87test('split', u'a b c d', [u'a b c d'], None, 0)
88test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
89test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000090test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
91test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
92test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
93test('split', u'endcase test', [u'endcase ', u''], u'test')
94test('split', u'endcase test', [u'endcase ', u''], 'test')
95test('split', 'endcase test', [u'endcase ', u''], u'test')
96
Guido van Rossuma831cac2000-03-10 23:23:21 +000097
98# join now works with any sequence type
99class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +0000100 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +0000101 def __len__(self): return len(self.seq)
102 def __getitem__(self, i): return self.seq[i]
103
104test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +0000105test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +0000106test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +0000107test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +0000108test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +0000109test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
110test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
111test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
112test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
113test('join', ' ', u'w x y z', Sequence(u'wxyz'))
114test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000115
116result = u''
117for i in range(10):
118 if i > 0:
119 result = result + u':'
120 result = result + u'x'*10
121test('join', u':', result, [u'x' * 10] * 10)
122test('join', u':', result, (u'x' * 10,) * 10)
123
124test('strip', u' hello ', u'hello')
125test('lstrip', u' hello ', u'hello ')
126test('rstrip', u' hello ', u' hello')
127test('strip', u'hello', u'hello')
128
129test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
130
131if 0:
132 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
133
134 table = string.maketrans('a', u'A')
135 test('translate', u'abc', u'Abc', table)
136 test('translate', u'xyz', u'xyz', table)
137
138test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000139test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000140test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
141test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
142test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
143test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
144test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
145test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
146test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
147
148test('startswith', u'hello', 1, u'he')
149test('startswith', u'hello', 1, u'hello')
150test('startswith', u'hello', 0, u'hello world')
151test('startswith', u'hello', 1, u'')
152test('startswith', u'hello', 0, u'ello')
153test('startswith', u'hello', 1, u'ello', 1)
154test('startswith', u'hello', 1, u'o', 4)
155test('startswith', u'hello', 0, u'o', 5)
156test('startswith', u'hello', 1, u'', 5)
157test('startswith', u'hello', 0, u'lo', 6)
158test('startswith', u'helloworld', 1, u'lowo', 3)
159test('startswith', u'helloworld', 1, u'lowo', 3, 7)
160test('startswith', u'helloworld', 0, u'lowo', 3, 6)
161
162test('endswith', u'hello', 1, u'lo')
163test('endswith', u'hello', 0, u'he')
164test('endswith', u'hello', 1, u'')
165test('endswith', u'hello', 0, u'hello world')
166test('endswith', u'helloworld', 0, u'worl')
167test('endswith', u'helloworld', 1, u'worl', 3, 9)
168test('endswith', u'helloworld', 1, u'world', 3, 12)
169test('endswith', u'helloworld', 1, u'lowo', 1, 7)
170test('endswith', u'helloworld', 1, u'lowo', 2, 7)
171test('endswith', u'helloworld', 1, u'lowo', 3, 7)
172test('endswith', u'helloworld', 0, u'lowo', 4, 7)
173test('endswith', u'helloworld', 0, u'lowo', 3, 8)
174test('endswith', u'ab', 0, u'ab', 0, 1)
175test('endswith', u'ab', 0, u'ab', 0, 0)
176
177test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
178test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
179test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
180test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
181
182if 0:
183 test('capwords', u'abc def ghi', u'Abc Def Ghi')
184 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
185 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
186
187# Comparisons:
188print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000189verify(u'abc' == 'abc')
190verify('abc' == u'abc')
191verify(u'abc' == u'abc')
192verify(u'abcd' > 'abc')
193verify('abcd' > u'abc')
194verify(u'abcd' > u'abc')
195verify(u'abc' < 'abcd')
196verify('abc' < u'abcd')
197verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000198print 'done.'
199
Marc-André Lemburge5034372000-08-08 08:04:29 +0000200if 0:
201 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000202
Marc-André Lemburge5034372000-08-08 08:04:29 +0000203 print 'Testing UTF-16 code point order comparisons...',
204 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000205 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000206 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000207 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000208
Marc-André Lemburge5034372000-08-08 08:04:29 +0000209 # Non surrogate above surrogate value, fixup required
210 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000211 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000212
Marc-André Lemburge5034372000-08-08 08:04:29 +0000213 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000214 s2 = u'\ud800\udc01'
215 test_lecmp(s, s2)
216 s2 = u'\ud900\udc01'
217 test_lecmp(s, s2)
218 s2 = u'\uda00\udc01'
219 test_lecmp(s, s2)
220 s2 = u'\udb00\udc01'
221 test_lecmp(s, s2)
222 s2 = u'\ud800\udd01'
223 test_lecmp(s, s2)
224 s2 = u'\ud900\udd01'
225 test_lecmp(s, s2)
226 s2 = u'\uda00\udd01'
227 test_lecmp(s, s2)
228 s2 = u'\udb00\udd01'
229 test_lecmp(s, s2)
230 s2 = u'\ud800\ude01'
231 test_lecmp(s, s2)
232 s2 = u'\ud900\ude01'
233 test_lecmp(s, s2)
234 s2 = u'\uda00\ude01'
235 test_lecmp(s, s2)
236 s2 = u'\udb00\ude01'
237 test_lecmp(s, s2)
238 s2 = u'\ud800\udfff'
239 test_lecmp(s, s2)
240 s2 = u'\ud900\udfff'
241 test_lecmp(s, s2)
242 s2 = u'\uda00\udfff'
243 test_lecmp(s, s2)
244 s2 = u'\udb00\udfff'
245 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000246
247 test_fixup(u'\ue000')
248 test_fixup(u'\uff61')
249
250 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000251 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000252 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000253
Guido van Rossuma831cac2000-03-10 23:23:21 +0000254test('ljust', u'abc', u'abc ', 10)
255test('rjust', u'abc', u' abc', 10)
256test('center', u'abc', u' abc ', 10)
257test('ljust', u'abc', u'abc ', 6)
258test('rjust', u'abc', u' abc', 6)
259test('center', u'abc', u' abc ', 6)
260test('ljust', u'abc', u'abc', 2)
261test('rjust', u'abc', u'abc', 2)
262test('center', u'abc', u'abc', 2)
263
264test('islower', u'a', 1)
265test('islower', u'A', 0)
266test('islower', u'\n', 0)
267test('islower', u'\u1FFc', 0)
268test('islower', u'abc', 1)
269test('islower', u'aBc', 0)
270test('islower', u'abc\n', 1)
271
272test('isupper', u'a', 0)
273test('isupper', u'A', 1)
274test('isupper', u'\n', 0)
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000275if sys.platform[:4] != 'java':
276 test('isupper', u'\u1FFc', 0)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000277test('isupper', u'ABC', 1)
278test('isupper', u'AbC', 0)
279test('isupper', u'ABC\n', 1)
280
281test('istitle', u'a', 0)
282test('istitle', u'A', 1)
283test('istitle', u'\n', 0)
284test('istitle', u'\u1FFc', 1)
285test('istitle', u'A Titlecased Line', 1)
286test('istitle', u'A\nTitlecased Line', 1)
287test('istitle', u'A Titlecased, Line', 1)
288test('istitle', u'Greek \u1FFcitlecases ...', 1)
289test('istitle', u'Not a capitalized String', 0)
290test('istitle', u'Not\ta Titlecase String', 0)
291test('istitle', u'Not--a Titlecase String', 0)
292
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000293test('isalpha', u'a', 1)
294test('isalpha', u'A', 1)
295test('isalpha', u'\n', 0)
296test('isalpha', u'\u1FFc', 1)
297test('isalpha', u'abc', 1)
298test('isalpha', u'aBc123', 0)
299test('isalpha', u'abc\n', 0)
300
301test('isalnum', u'a', 1)
302test('isalnum', u'A', 1)
303test('isalnum', u'\n', 0)
304test('isalnum', u'123abc456', 1)
305test('isalnum', u'a1b3c', 1)
306test('isalnum', u'aBc000 ', 0)
307test('isalnum', u'abc\n', 0)
308
Guido van Rossuma831cac2000-03-10 23:23:21 +0000309test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
310test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
311test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
312test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
313test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
314test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000315test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000316
317test('translate', u"abababc", u'bbbc', {ord('a'):None})
318test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
319test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
320
Guido van Rossumd4d26842000-03-13 23:21:48 +0000321# Contains:
322print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000323verify(('a' in u'abdb') == 1)
324verify(('a' in u'bdab') == 1)
325verify(('a' in u'bdaba') == 1)
326verify(('a' in u'bdba') == 1)
327verify(('a' in u'bdba') == 1)
328verify((u'a' in u'bdba') == 1)
329verify((u'a' in u'bdb') == 0)
330verify((u'a' in 'bdb') == 0)
331verify((u'a' in 'bdba') == 1)
332verify((u'a' in ('a',1,None)) == 1)
333verify((u'a' in (1,None,'a')) == 1)
334verify((u'a' in (1,None,u'a')) == 1)
335verify(('a' in ('a',1,None)) == 1)
336verify(('a' in (1,None,'a')) == 1)
337verify(('a' in (1,None,u'a')) == 1)
338verify(('a' in ('x',1,u'y')) == 0)
339verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000340print 'done.'
341
Guido van Rossuma831cac2000-03-10 23:23:21 +0000342# Formatting:
343print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000344verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
345verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
346verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
347verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
348verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
349verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
350verify(u"%c" % (u"a",) == u'a')
351verify(u"%c" % ("a",) == u'a')
352verify(u"%c" % (34,) == u'"')
353verify(u"%c" % (36,) == u'$')
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000354if sys.platform[:4] != 'java':
355 value = u"%r, %r" % (u"abc", "abc")
356 if value != u"u'abc', 'abc'":
357 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000358
Marc-André Lemburg36619082001-01-17 19:11:13 +0000359verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000360try:
Marc-André Lemburgef0a0322001-02-10 14:09:31 +0000361 if sys.platform[:4] != 'java':
362 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
363 else:
364 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä':"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000365except KeyError:
366 print '*** formatting failed for "%s"' % "u'abc, def'"
367else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000368 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000369
Guido van Rossum97064862000-04-10 13:52:48 +0000370# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000371verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
372verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
373verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
374verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
375verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
376verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
377verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
378verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
379verify('...%s...' % u"abc" == u'...abc...')
Marc-André Lemburg542fe562001-05-02 14:21:53 +0000380verify('%*s' % (5,u'abc',) == u' abc')
381verify('%*s' % (-5,u'abc',) == u'abc ')
382verify('%*.*s' % (5,2,u'abc',) == u' ab')
383verify('%*.*s' % (5,3,u'abc',) == u' abc')
384verify('%i %*.*s' % (10, 5,3,u'abc',) == u'10 abc')
385verify('%i%s %*.*s' % (10, 3, 5,3,u'abc',) == u'103 abc')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000386print 'done.'
387
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000388# Test builtin codecs
389print 'Testing builtin codecs...',
390
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000391# UTF-7 specific encoding tests:
392utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
393 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
394 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
395 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
396 (u'+', '+-'),
397 (u'+-', '+--'),
398 (u'+?', '+-?'),
399 (u'\?', '+AFw?'),
400 (u'+?', '+-?'),
401 (ur'\\?', '+AFwAXA?'),
402 (ur'\\\?', '+AFwAXABc?'),
403 (ur'++--', '+-+---')]
404
405for x,y in utfTests:
406 verify( x.encode('utf-7') == y )
407
408try:
409 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
410except UnicodeError:
411 pass
412else:
413 raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
414
415verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
416
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000417# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000418verify(u'\u20ac'.encode('utf-8') == \
419 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
420verify(u'\ud800\udc02'.encode('utf-8') == \
421 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
422verify(u'\ud84d\udc56'.encode('utf-8') == \
423 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000424# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000425verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000426 'utf-8') == u'\U00023456' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000427verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000428 'utf-8') == u'\U00010002' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000429verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000430 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000431
432# Other possible utf-8 test cases:
433# * strict decoding testing for all of the
434# UTF8_ERROR cases in PyUnicode_DecodeUTF8
435
436
437
Marc-André Lemburg36619082001-01-17 19:11:13 +0000438verify(unicode('hello','ascii') == u'hello')
439verify(unicode('hello','utf-8') == u'hello')
440verify(unicode('hello','utf8') == u'hello')
441verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000442
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000443# Compatibility to str():
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000444class String:
445 x = ''
446 def __str__(self):
447 return self.x
448
449o = String()
450
451o.x = 'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000452verify(unicode(o) == u'abc')
453verify(str(o) == 'abc')
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000454
455o.x = u'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000456verify(unicode(o) == u'abc')
457verify(str(o) == 'abc')
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000458
Marc-André Lemburg6871f6a2001-09-20 12:53:16 +0000459for obj in (123, 123.45, 123L):
460 verify(unicode(obj) == unicode(str(obj)))
461
462# Error handling
Guido van Rossum97064862000-04-10 13:52:48 +0000463try:
464 u'Andr\202 x'.encode('ascii')
465 u'Andr\202 x'.encode('ascii','strict')
466except ValueError:
467 pass
468else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000469 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000470verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
471verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000472
473try:
474 unicode('Andr\202 x','ascii')
475 unicode('Andr\202 x','ascii','strict')
476except ValueError:
477 pass
478else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000479 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000480verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
481verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000482
Marc-André Lemburg36619082001-01-17 19:11:13 +0000483verify(u'hello'.encode('ascii') == 'hello')
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000484verify(u'hello'.encode('utf-7') == 'hello')
Marc-André Lemburg36619082001-01-17 19:11:13 +0000485verify(u'hello'.encode('utf-8') == 'hello')
486verify(u'hello'.encode('utf8') == 'hello')
487verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
488verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
489verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000490
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000491# Roundtrip safety for BMP (just the first 1024 chars)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000492u = u''.join(map(unichr, range(1024)))
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000493for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000494 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000495 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000496
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000497# Roundtrip safety for non-BMP (just a few chars)
498u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
499for encoding in ('utf-8',
500 'utf-16', 'utf-16-le', 'utf-16-be',
Marc-André Lemburg80d1dd52001-07-25 16:05:59 +0000501 #'raw_unicode_escape',
502 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg6c6bfb72001-07-20 17:39:11 +0000503 verify(unicode(u.encode(encoding),encoding) == u)
504
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000505u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000506for encoding in (
507 'latin-1',
508 ):
509 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000510 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000511 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000512 print '*** codec "%s" failed round-trip' % encoding
513 except ValueError,why:
514 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000515
516u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000517for encoding in (
518 'ascii',
519 ):
520 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000521 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000522 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000523 print '*** codec "%s" failed round-trip' % encoding
524 except ValueError,why:
525 print '*** codec for "%s" failed: %s' % (encoding, why)
526
527print 'done.'
528
529print 'Testing standard mapping codecs...',
530
531print '0-127...',
532s = ''.join(map(chr, range(128)))
533for encoding in (
534 'cp037', 'cp1026',
535 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
536 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000537 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000538 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
539 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
540 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
541 'mac_cyrillic', 'mac_latin2',
542
543 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
544 'cp1256', 'cp1257', 'cp1258',
545 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
546
547 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Tim Peters2f228e72001-05-13 00:19:31 +0000548 'cp1006', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000549
Guido van Rossum9e896b32000-04-05 20:11:21 +0000550 ### These have undefined mappings:
551 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000552
Tim Peters2f228e72001-05-13 00:19:31 +0000553 ### These fail the round-trip:
554 #'cp875'
555
Guido van Rossum9e896b32000-04-05 20:11:21 +0000556 ):
557 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000558 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000559 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000560 print '*** codec "%s" failed round-trip' % encoding
561 except ValueError,why:
562 print '*** codec for "%s" failed: %s' % (encoding, why)
563
564print '128-255...',
565s = ''.join(map(chr, range(128,256)))
566for encoding in (
567 'cp037', 'cp1026',
568 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
569 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000570 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000571 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000572 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000573 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000574 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000575
Guido van Rossum9e896b32000-04-05 20:11:21 +0000576 ### These have undefined mappings:
577 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
578 #'cp1256', 'cp1257', 'cp1258',
579 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000580 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000581 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000582
Guido van Rossum9e896b32000-04-05 20:11:21 +0000583 ### These fail the round-trip:
584 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000585
Guido van Rossum9e896b32000-04-05 20:11:21 +0000586 ):
587 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000588 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000589 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000590 print '*** codec "%s" failed round-trip' % encoding
591 except ValueError,why:
592 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000593
594print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000595
596print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000597verify((u"abc" u"def") == u"abcdef")
598verify(("abc" u"def") == u"abcdef")
599verify((u"abc" "def") == u"abcdef")
600verify((u"abc" u"def" "ghi") == u"abcdefghi")
601verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000602print 'done.'