blob: c71f92747f3d8eb85790f3c5166b1ffc067dbdb2 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000013 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000014 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000022 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000023 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000035test('count', u'aaa', 3, u'a')
36test('count', u'aaa', 0, u'b')
37test('count', 'aaa', 3, u'a')
38test('count', 'aaa', 0, u'b')
39test('count', u'aaa', 3, 'a')
40test('count', u'aaa', 0, 'b')
41
Guido van Rossuma831cac2000-03-10 23:23:21 +000042test('title', u' hello ', u' Hello ')
43test('title', u'hello ', u'Hello ')
44test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
45test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
46test('title', u"getInt", u'Getint')
47
48test('find', u'abcdefghiabc', 0, u'abc')
49test('find', u'abcdefghiabc', 9, u'abc', 1)
50test('find', u'abcdefghiabc', -1, u'def', 4)
51
52test('rfind', u'abcdefghiabc', 9, u'abc')
53
54test('lower', u'HeLLo', u'hello')
55test('lower', u'hello', u'hello')
56
57test('upper', u'HeLLo', u'HELLO')
58test('upper', u'HELLO', u'HELLO')
59
60if 0:
61 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
62
63 test('maketrans', u'abc', transtable, u'xyz')
64 test('maketrans', u'abc', ValueError, u'xyzq')
65
66test('split', u'this is the split function',
67 [u'this', u'is', u'the', u'split', u'function'])
68test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
69test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
70test('split', u'a b c d', [u'a', u'b c d'], None, 1)
71test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
72test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
73test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
74test('split', u'a b c d', [u'a b c d'], None, 0)
75test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
76test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000077test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
78test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
79test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
80test('split', u'endcase test', [u'endcase ', u''], u'test')
81test('split', u'endcase test', [u'endcase ', u''], 'test')
82test('split', 'endcase test', [u'endcase ', u''], u'test')
83
Guido van Rossuma831cac2000-03-10 23:23:21 +000084
85# join now works with any sequence type
86class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +000087 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +000088 def __len__(self): return len(self.seq)
89 def __getitem__(self, i): return self.seq[i]
90
91test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +000092test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +000093test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +000094test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +000095test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +000096test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
97test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
98test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
99test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
100test('join', ' ', u'w x y z', Sequence(u'wxyz'))
101test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000102
103result = u''
104for i in range(10):
105 if i > 0:
106 result = result + u':'
107 result = result + u'x'*10
108test('join', u':', result, [u'x' * 10] * 10)
109test('join', u':', result, (u'x' * 10,) * 10)
110
111test('strip', u' hello ', u'hello')
112test('lstrip', u' hello ', u'hello ')
113test('rstrip', u' hello ', u' hello')
114test('strip', u'hello', u'hello')
115
116test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
117
118if 0:
119 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
120
121 table = string.maketrans('a', u'A')
122 test('translate', u'abc', u'Abc', table)
123 test('translate', u'xyz', u'xyz', table)
124
125test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000126test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000127test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
128test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
129test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
130test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
131test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
132test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
133test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
134
135test('startswith', u'hello', 1, u'he')
136test('startswith', u'hello', 1, u'hello')
137test('startswith', u'hello', 0, u'hello world')
138test('startswith', u'hello', 1, u'')
139test('startswith', u'hello', 0, u'ello')
140test('startswith', u'hello', 1, u'ello', 1)
141test('startswith', u'hello', 1, u'o', 4)
142test('startswith', u'hello', 0, u'o', 5)
143test('startswith', u'hello', 1, u'', 5)
144test('startswith', u'hello', 0, u'lo', 6)
145test('startswith', u'helloworld', 1, u'lowo', 3)
146test('startswith', u'helloworld', 1, u'lowo', 3, 7)
147test('startswith', u'helloworld', 0, u'lowo', 3, 6)
148
149test('endswith', u'hello', 1, u'lo')
150test('endswith', u'hello', 0, u'he')
151test('endswith', u'hello', 1, u'')
152test('endswith', u'hello', 0, u'hello world')
153test('endswith', u'helloworld', 0, u'worl')
154test('endswith', u'helloworld', 1, u'worl', 3, 9)
155test('endswith', u'helloworld', 1, u'world', 3, 12)
156test('endswith', u'helloworld', 1, u'lowo', 1, 7)
157test('endswith', u'helloworld', 1, u'lowo', 2, 7)
158test('endswith', u'helloworld', 1, u'lowo', 3, 7)
159test('endswith', u'helloworld', 0, u'lowo', 4, 7)
160test('endswith', u'helloworld', 0, u'lowo', 3, 8)
161test('endswith', u'ab', 0, u'ab', 0, 1)
162test('endswith', u'ab', 0, u'ab', 0, 0)
163
164test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
165test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
166test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
167test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
168
169if 0:
170 test('capwords', u'abc def ghi', u'Abc Def Ghi')
171 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
172 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
173
174# Comparisons:
175print 'Testing Unicode comparisons...',
176assert u'abc' == 'abc'
177assert 'abc' == u'abc'
178assert u'abc' == u'abc'
179assert u'abcd' > 'abc'
180assert 'abcd' > u'abc'
181assert u'abcd' > u'abc'
182assert u'abc' < 'abcd'
183assert 'abc' < u'abcd'
184assert u'abc' < u'abcd'
185print 'done.'
186
Marc-André Lemburge5034372000-08-08 08:04:29 +0000187if 0:
188 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000189
Marc-André Lemburge5034372000-08-08 08:04:29 +0000190 print 'Testing UTF-16 code point order comparisons...',
191 #No surrogates, no fixup required.
192 assert u'\u0061' < u'\u20ac'
193 # Non surrogate below surrogate value, no fixup required
194 assert u'\u0061' < u'\ud800\udc02'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000195
Marc-André Lemburge5034372000-08-08 08:04:29 +0000196 # Non surrogate above surrogate value, fixup required
197 def test_lecmp(s, s2):
Fred Drake004d5e62000-10-23 17:22:08 +0000198 assert s < s2 , "comparison failed on %s < %s" % (s, s2)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000199
Marc-André Lemburge5034372000-08-08 08:04:29 +0000200 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000201 s2 = u'\ud800\udc01'
202 test_lecmp(s, s2)
203 s2 = u'\ud900\udc01'
204 test_lecmp(s, s2)
205 s2 = u'\uda00\udc01'
206 test_lecmp(s, s2)
207 s2 = u'\udb00\udc01'
208 test_lecmp(s, s2)
209 s2 = u'\ud800\udd01'
210 test_lecmp(s, s2)
211 s2 = u'\ud900\udd01'
212 test_lecmp(s, s2)
213 s2 = u'\uda00\udd01'
214 test_lecmp(s, s2)
215 s2 = u'\udb00\udd01'
216 test_lecmp(s, s2)
217 s2 = u'\ud800\ude01'
218 test_lecmp(s, s2)
219 s2 = u'\ud900\ude01'
220 test_lecmp(s, s2)
221 s2 = u'\uda00\ude01'
222 test_lecmp(s, s2)
223 s2 = u'\udb00\ude01'
224 test_lecmp(s, s2)
225 s2 = u'\ud800\udfff'
226 test_lecmp(s, s2)
227 s2 = u'\ud900\udfff'
228 test_lecmp(s, s2)
229 s2 = u'\uda00\udfff'
230 test_lecmp(s, s2)
231 s2 = u'\udb00\udfff'
232 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000233
234 test_fixup(u'\ue000')
235 test_fixup(u'\uff61')
236
237 # Surrogates on both sides, no fixup required
238 assert u'\ud800\udc02' < u'\ud84d\udc56'
239 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000240
Guido van Rossuma831cac2000-03-10 23:23:21 +0000241test('ljust', u'abc', u'abc ', 10)
242test('rjust', u'abc', u' abc', 10)
243test('center', u'abc', u' abc ', 10)
244test('ljust', u'abc', u'abc ', 6)
245test('rjust', u'abc', u' abc', 6)
246test('center', u'abc', u' abc ', 6)
247test('ljust', u'abc', u'abc', 2)
248test('rjust', u'abc', u'abc', 2)
249test('center', u'abc', u'abc', 2)
250
251test('islower', u'a', 1)
252test('islower', u'A', 0)
253test('islower', u'\n', 0)
254test('islower', u'\u1FFc', 0)
255test('islower', u'abc', 1)
256test('islower', u'aBc', 0)
257test('islower', u'abc\n', 1)
258
259test('isupper', u'a', 0)
260test('isupper', u'A', 1)
261test('isupper', u'\n', 0)
262test('isupper', u'\u1FFc', 0)
263test('isupper', u'ABC', 1)
264test('isupper', u'AbC', 0)
265test('isupper', u'ABC\n', 1)
266
267test('istitle', u'a', 0)
268test('istitle', u'A', 1)
269test('istitle', u'\n', 0)
270test('istitle', u'\u1FFc', 1)
271test('istitle', u'A Titlecased Line', 1)
272test('istitle', u'A\nTitlecased Line', 1)
273test('istitle', u'A Titlecased, Line', 1)
274test('istitle', u'Greek \u1FFcitlecases ...', 1)
275test('istitle', u'Not a capitalized String', 0)
276test('istitle', u'Not\ta Titlecase String', 0)
277test('istitle', u'Not--a Titlecase String', 0)
278
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000279test('isalpha', u'a', 1)
280test('isalpha', u'A', 1)
281test('isalpha', u'\n', 0)
282test('isalpha', u'\u1FFc', 1)
283test('isalpha', u'abc', 1)
284test('isalpha', u'aBc123', 0)
285test('isalpha', u'abc\n', 0)
286
287test('isalnum', u'a', 1)
288test('isalnum', u'A', 1)
289test('isalnum', u'\n', 0)
290test('isalnum', u'123abc456', 1)
291test('isalnum', u'a1b3c', 1)
292test('isalnum', u'aBc000 ', 0)
293test('isalnum', u'abc\n', 0)
294
Guido van Rossuma831cac2000-03-10 23:23:21 +0000295test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
296test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
297test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
298test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
299test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
300test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000301test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000302
303test('translate', u"abababc", u'bbbc', {ord('a'):None})
304test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
305test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
306
Guido van Rossumd4d26842000-03-13 23:21:48 +0000307# Contains:
308print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000309assert ('a' in u'abdb') == 1
310assert ('a' in u'bdab') == 1
311assert ('a' in u'bdaba') == 1
312assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000313assert ('a' in u'bdba') == 1
314assert (u'a' in u'bdba') == 1
315assert (u'a' in u'bdb') == 0
316assert (u'a' in 'bdb') == 0
317assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000318assert (u'a' in ('a',1,None)) == 1
319assert (u'a' in (1,None,'a')) == 1
320assert (u'a' in (1,None,u'a')) == 1
321assert ('a' in ('a',1,None)) == 1
322assert ('a' in (1,None,'a')) == 1
323assert ('a' in (1,None,u'a')) == 1
324assert ('a' in ('x',1,u'y')) == 0
325assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000326print 'done.'
327
Guido van Rossuma831cac2000-03-10 23:23:21 +0000328# Formatting:
329print 'Testing Unicode formatting strings...',
330assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
331assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
332assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
333assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
334assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
335assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000336assert u"%c" % (u"a",) == u'a'
337assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000338assert u"%c" % (34,) == u'"'
339assert u"%c" % (36,) == u'$'
Fred Drake004d5e62000-10-23 17:22:08 +0000340value = u"%r, %r" % (u"abc", "abc")
Marc-André Lemburg84625732000-06-13 12:05:36 +0000341if value != u"u'abc', 'abc'":
342 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
343
Guido van Rossuma831cac2000-03-10 23:23:21 +0000344assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000345try:
Fred Drake004d5e62000-10-23 17:22:08 +0000346 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000347except KeyError:
348 print '*** formatting failed for "%s"' % "u'abc, def'"
349else:
350 assert value == u'abc, def'
351
Guido van Rossum97064862000-04-10 13:52:48 +0000352# formatting jobs delegated from the string implementation:
353assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
354assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
355assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
356assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
357assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
358assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
359assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
Marc-André Lemburgb96d8022000-10-07 08:52:45 +0000360assert '...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...'
Guido van Rossum97064862000-04-10 13:52:48 +0000361assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000362print 'done.'
363
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000364# Test builtin codecs
365print 'Testing builtin codecs...',
366
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000367# UTF-8 specific encoding tests:
368assert u'\u20ac'.encode('utf-8') == \
369 ''.join((chr(0xe2), chr(0x82), chr(0xac)))
370assert u'\ud800\udc02'.encode('utf-8') == \
371 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82)))
372assert u'\ud84d\udc56'.encode('utf-8') == \
373 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96)))
374# UTF-8 specific decoding tests
375assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
376 'utf-8') == u'\ud84d\udc56'
377assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
378 'utf-8') == u'\ud800\udc02'
379assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
380 'utf-8') == u'\u20ac'
381
382# Other possible utf-8 test cases:
383# * strict decoding testing for all of the
384# UTF8_ERROR cases in PyUnicode_DecodeUTF8
385
386
387
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000388assert unicode('hello','ascii') == u'hello'
389assert unicode('hello','utf-8') == u'hello'
390assert unicode('hello','utf8') == u'hello'
391assert unicode('hello','latin-1') == u'hello'
392
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000393class String:
394 x = ''
395 def __str__(self):
396 return self.x
397
398o = String()
399
400o.x = 'abc'
401assert unicode(o) == u'abc'
402assert str(o) == 'abc'
403
404o.x = u'abc'
405assert unicode(o) == u'abc'
406assert str(o) == 'abc'
407
Guido van Rossum97064862000-04-10 13:52:48 +0000408try:
409 u'Andr\202 x'.encode('ascii')
410 u'Andr\202 x'.encode('ascii','strict')
411except ValueError:
412 pass
413else:
414 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
415assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
416assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
417
418try:
419 unicode('Andr\202 x','ascii')
420 unicode('Andr\202 x','ascii','strict')
421except ValueError:
422 pass
423else:
424 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
425assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
426assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
427
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000428assert u'hello'.encode('ascii') == 'hello'
429assert u'hello'.encode('utf-8') == 'hello'
430assert u'hello'.encode('utf8') == 'hello'
431assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
432assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
433assert u'hello'.encode('latin-1') == 'hello'
434
435u = u''.join(map(unichr, range(1024)))
436for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
437 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
438 assert unicode(u.encode(encoding),encoding) == u
439
440u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000441for encoding in (
442 'latin-1',
443 ):
444 try:
445 assert unicode(u.encode(encoding),encoding) == u
446 except AssertionError:
447 print '*** codec "%s" failed round-trip' % encoding
448 except ValueError,why:
449 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000450
451u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000452for encoding in (
453 'ascii',
454 ):
455 try:
456 assert unicode(u.encode(encoding),encoding) == u
457 except AssertionError:
458 print '*** codec "%s" failed round-trip' % encoding
459 except ValueError,why:
460 print '*** codec for "%s" failed: %s' % (encoding, why)
461
462print 'done.'
463
464print 'Testing standard mapping codecs...',
465
466print '0-127...',
467s = ''.join(map(chr, range(128)))
468for encoding in (
469 'cp037', 'cp1026',
470 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
471 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000472 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000473 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
474 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
475 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
476 'mac_cyrillic', 'mac_latin2',
477
478 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
479 'cp1256', 'cp1257', 'cp1258',
480 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
481
482 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
483 'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000484
Guido van Rossum9e896b32000-04-05 20:11:21 +0000485 ### These have undefined mappings:
486 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000487
Guido van Rossum9e896b32000-04-05 20:11:21 +0000488 ):
489 try:
490 assert unicode(s,encoding).encode(encoding) == s
491 except AssertionError:
492 print '*** codec "%s" failed round-trip' % encoding
493 except ValueError,why:
494 print '*** codec for "%s" failed: %s' % (encoding, why)
495
496print '128-255...',
497s = ''.join(map(chr, range(128,256)))
498for encoding in (
499 'cp037', 'cp1026',
500 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
501 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000502 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000503 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000504 'iso8859_2', 'iso8859_4', 'iso8859_5',
505 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000506 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000507
Guido van Rossum9e896b32000-04-05 20:11:21 +0000508 ### These have undefined mappings:
509 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
510 #'cp1256', 'cp1257', 'cp1258',
511 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000512 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000513 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000514
Guido van Rossum9e896b32000-04-05 20:11:21 +0000515 ### These fail the round-trip:
516 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000517
Guido van Rossum9e896b32000-04-05 20:11:21 +0000518 ):
519 try:
520 assert unicode(s,encoding).encode(encoding) == s
521 except AssertionError:
522 print '*** codec "%s" failed round-trip' % encoding
523 except ValueError,why:
524 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000525
526print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000527
528print 'Testing Unicode string concatenation...',
529assert (u"abc" u"def") == u"abcdef"
530assert ("abc" u"def") == u"abcdef"
531assert (u"abc" "def") == u"abcdef"
532assert (u"abc" u"def" "ghi") == u"abcdefghi"
533assert ("abc" "def" u"ghi") == u"abcdefghi"
534print 'done.'