blob: 579bab1fe2723950bb7a21eecd38c02d3d2f75ea [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000013 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000014 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000022 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000023 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000070test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
71test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
72test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
73test('split', u'endcase test', [u'endcase ', u''], u'test')
74test('split', u'endcase test', [u'endcase ', u''], 'test')
75test('split', 'endcase test', [u'endcase ', u''], u'test')
76
Guido van Rossuma831cac2000-03-10 23:23:21 +000077
78# join now works with any sequence type
79class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +000080 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +000081 def __len__(self): return len(self.seq)
82 def __getitem__(self, i): return self.seq[i]
83
84test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +000085test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +000086test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +000087test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +000088test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +000089test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
90test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
91test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
92test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
93test('join', ' ', u'w x y z', Sequence(u'wxyz'))
94test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +000095
96result = u''
97for i in range(10):
98 if i > 0:
99 result = result + u':'
100 result = result + u'x'*10
101test('join', u':', result, [u'x' * 10] * 10)
102test('join', u':', result, (u'x' * 10,) * 10)
103
104test('strip', u' hello ', u'hello')
105test('lstrip', u' hello ', u'hello ')
106test('rstrip', u' hello ', u' hello')
107test('strip', u'hello', u'hello')
108
109test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
110
111if 0:
112 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
113
114 table = string.maketrans('a', u'A')
115 test('translate', u'abc', u'Abc', table)
116 test('translate', u'xyz', u'xyz', table)
117
118test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000119test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000120test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
121test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
122test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
123test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
124test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
125test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
126test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
127
128test('startswith', u'hello', 1, u'he')
129test('startswith', u'hello', 1, u'hello')
130test('startswith', u'hello', 0, u'hello world')
131test('startswith', u'hello', 1, u'')
132test('startswith', u'hello', 0, u'ello')
133test('startswith', u'hello', 1, u'ello', 1)
134test('startswith', u'hello', 1, u'o', 4)
135test('startswith', u'hello', 0, u'o', 5)
136test('startswith', u'hello', 1, u'', 5)
137test('startswith', u'hello', 0, u'lo', 6)
138test('startswith', u'helloworld', 1, u'lowo', 3)
139test('startswith', u'helloworld', 1, u'lowo', 3, 7)
140test('startswith', u'helloworld', 0, u'lowo', 3, 6)
141
142test('endswith', u'hello', 1, u'lo')
143test('endswith', u'hello', 0, u'he')
144test('endswith', u'hello', 1, u'')
145test('endswith', u'hello', 0, u'hello world')
146test('endswith', u'helloworld', 0, u'worl')
147test('endswith', u'helloworld', 1, u'worl', 3, 9)
148test('endswith', u'helloworld', 1, u'world', 3, 12)
149test('endswith', u'helloworld', 1, u'lowo', 1, 7)
150test('endswith', u'helloworld', 1, u'lowo', 2, 7)
151test('endswith', u'helloworld', 1, u'lowo', 3, 7)
152test('endswith', u'helloworld', 0, u'lowo', 4, 7)
153test('endswith', u'helloworld', 0, u'lowo', 3, 8)
154test('endswith', u'ab', 0, u'ab', 0, 1)
155test('endswith', u'ab', 0, u'ab', 0, 0)
156
157test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
158test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
159test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
160test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
161
162if 0:
163 test('capwords', u'abc def ghi', u'Abc Def Ghi')
164 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
165 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
166
167# Comparisons:
168print 'Testing Unicode comparisons...',
169assert u'abc' == 'abc'
170assert 'abc' == u'abc'
171assert u'abc' == u'abc'
172assert u'abcd' > 'abc'
173assert 'abcd' > u'abc'
174assert u'abcd' > u'abc'
175assert u'abc' < 'abcd'
176assert 'abc' < u'abcd'
177assert u'abc' < u'abcd'
178print 'done.'
179
Marc-André Lemburge5034372000-08-08 08:04:29 +0000180if 0:
181 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000182
Marc-André Lemburge5034372000-08-08 08:04:29 +0000183 print 'Testing UTF-16 code point order comparisons...',
184 #No surrogates, no fixup required.
185 assert u'\u0061' < u'\u20ac'
186 # Non surrogate below surrogate value, no fixup required
187 assert u'\u0061' < u'\ud800\udc02'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000188
Marc-André Lemburge5034372000-08-08 08:04:29 +0000189 # Non surrogate above surrogate value, fixup required
190 def test_lecmp(s, s2):
Fred Drake004d5e62000-10-23 17:22:08 +0000191 assert s < s2 , "comparison failed on %s < %s" % (s, s2)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000192
Marc-André Lemburge5034372000-08-08 08:04:29 +0000193 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000194 s2 = u'\ud800\udc01'
195 test_lecmp(s, s2)
196 s2 = u'\ud900\udc01'
197 test_lecmp(s, s2)
198 s2 = u'\uda00\udc01'
199 test_lecmp(s, s2)
200 s2 = u'\udb00\udc01'
201 test_lecmp(s, s2)
202 s2 = u'\ud800\udd01'
203 test_lecmp(s, s2)
204 s2 = u'\ud900\udd01'
205 test_lecmp(s, s2)
206 s2 = u'\uda00\udd01'
207 test_lecmp(s, s2)
208 s2 = u'\udb00\udd01'
209 test_lecmp(s, s2)
210 s2 = u'\ud800\ude01'
211 test_lecmp(s, s2)
212 s2 = u'\ud900\ude01'
213 test_lecmp(s, s2)
214 s2 = u'\uda00\ude01'
215 test_lecmp(s, s2)
216 s2 = u'\udb00\ude01'
217 test_lecmp(s, s2)
218 s2 = u'\ud800\udfff'
219 test_lecmp(s, s2)
220 s2 = u'\ud900\udfff'
221 test_lecmp(s, s2)
222 s2 = u'\uda00\udfff'
223 test_lecmp(s, s2)
224 s2 = u'\udb00\udfff'
225 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000226
227 test_fixup(u'\ue000')
228 test_fixup(u'\uff61')
229
230 # Surrogates on both sides, no fixup required
231 assert u'\ud800\udc02' < u'\ud84d\udc56'
232 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000233
Guido van Rossuma831cac2000-03-10 23:23:21 +0000234test('ljust', u'abc', u'abc ', 10)
235test('rjust', u'abc', u' abc', 10)
236test('center', u'abc', u' abc ', 10)
237test('ljust', u'abc', u'abc ', 6)
238test('rjust', u'abc', u' abc', 6)
239test('center', u'abc', u' abc ', 6)
240test('ljust', u'abc', u'abc', 2)
241test('rjust', u'abc', u'abc', 2)
242test('center', u'abc', u'abc', 2)
243
244test('islower', u'a', 1)
245test('islower', u'A', 0)
246test('islower', u'\n', 0)
247test('islower', u'\u1FFc', 0)
248test('islower', u'abc', 1)
249test('islower', u'aBc', 0)
250test('islower', u'abc\n', 1)
251
252test('isupper', u'a', 0)
253test('isupper', u'A', 1)
254test('isupper', u'\n', 0)
255test('isupper', u'\u1FFc', 0)
256test('isupper', u'ABC', 1)
257test('isupper', u'AbC', 0)
258test('isupper', u'ABC\n', 1)
259
260test('istitle', u'a', 0)
261test('istitle', u'A', 1)
262test('istitle', u'\n', 0)
263test('istitle', u'\u1FFc', 1)
264test('istitle', u'A Titlecased Line', 1)
265test('istitle', u'A\nTitlecased Line', 1)
266test('istitle', u'A Titlecased, Line', 1)
267test('istitle', u'Greek \u1FFcitlecases ...', 1)
268test('istitle', u'Not a capitalized String', 0)
269test('istitle', u'Not\ta Titlecase String', 0)
270test('istitle', u'Not--a Titlecase String', 0)
271
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000272test('isalpha', u'a', 1)
273test('isalpha', u'A', 1)
274test('isalpha', u'\n', 0)
275test('isalpha', u'\u1FFc', 1)
276test('isalpha', u'abc', 1)
277test('isalpha', u'aBc123', 0)
278test('isalpha', u'abc\n', 0)
279
280test('isalnum', u'a', 1)
281test('isalnum', u'A', 1)
282test('isalnum', u'\n', 0)
283test('isalnum', u'123abc456', 1)
284test('isalnum', u'a1b3c', 1)
285test('isalnum', u'aBc000 ', 0)
286test('isalnum', u'abc\n', 0)
287
Guido van Rossuma831cac2000-03-10 23:23:21 +0000288test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
289test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
290test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
291test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
292test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
293test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000294test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000295
296test('translate', u"abababc", u'bbbc', {ord('a'):None})
297test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
298test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
299
Guido van Rossumd4d26842000-03-13 23:21:48 +0000300# Contains:
301print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000302assert ('a' in u'abdb') == 1
303assert ('a' in u'bdab') == 1
304assert ('a' in u'bdaba') == 1
305assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000306assert ('a' in u'bdba') == 1
307assert (u'a' in u'bdba') == 1
308assert (u'a' in u'bdb') == 0
309assert (u'a' in 'bdb') == 0
310assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000311assert (u'a' in ('a',1,None)) == 1
312assert (u'a' in (1,None,'a')) == 1
313assert (u'a' in (1,None,u'a')) == 1
314assert ('a' in ('a',1,None)) == 1
315assert ('a' in (1,None,'a')) == 1
316assert ('a' in (1,None,u'a')) == 1
317assert ('a' in ('x',1,u'y')) == 0
318assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000319print 'done.'
320
Guido van Rossuma831cac2000-03-10 23:23:21 +0000321# Formatting:
322print 'Testing Unicode formatting strings...',
323assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
324assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
325assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
326assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
327assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
328assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000329assert u"%c" % (u"a",) == u'a'
330assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000331assert u"%c" % (34,) == u'"'
332assert u"%c" % (36,) == u'$'
Fred Drake004d5e62000-10-23 17:22:08 +0000333value = u"%r, %r" % (u"abc", "abc")
Marc-André Lemburg84625732000-06-13 12:05:36 +0000334if value != u"u'abc', 'abc'":
335 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
336
Guido van Rossuma831cac2000-03-10 23:23:21 +0000337assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000338try:
Fred Drake004d5e62000-10-23 17:22:08 +0000339 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000340except KeyError:
341 print '*** formatting failed for "%s"' % "u'abc, def'"
342else:
343 assert value == u'abc, def'
344
Guido van Rossum97064862000-04-10 13:52:48 +0000345# formatting jobs delegated from the string implementation:
346assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
347assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
348assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
349assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
350assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
351assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
352assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
Marc-André Lemburgb96d8022000-10-07 08:52:45 +0000353assert '...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...'
Guido van Rossum97064862000-04-10 13:52:48 +0000354assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000355print 'done.'
356
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000357# Test builtin codecs
358print 'Testing builtin codecs...',
359
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000360# UTF-8 specific encoding tests:
361assert u'\u20ac'.encode('utf-8') == \
362 ''.join((chr(0xe2), chr(0x82), chr(0xac)))
363assert u'\ud800\udc02'.encode('utf-8') == \
364 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82)))
365assert u'\ud84d\udc56'.encode('utf-8') == \
366 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96)))
367# UTF-8 specific decoding tests
368assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
369 'utf-8') == u'\ud84d\udc56'
370assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
371 'utf-8') == u'\ud800\udc02'
372assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
373 'utf-8') == u'\u20ac'
374
375# Other possible utf-8 test cases:
376# * strict decoding testing for all of the
377# UTF8_ERROR cases in PyUnicode_DecodeUTF8
378
379
380
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000381assert unicode('hello','ascii') == u'hello'
382assert unicode('hello','utf-8') == u'hello'
383assert unicode('hello','utf8') == u'hello'
384assert unicode('hello','latin-1') == u'hello'
385
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000386class String:
387 x = ''
388 def __str__(self):
389 return self.x
390
391o = String()
392
393o.x = 'abc'
394assert unicode(o) == u'abc'
395assert str(o) == 'abc'
396
397o.x = u'abc'
398assert unicode(o) == u'abc'
399assert str(o) == 'abc'
400
Guido van Rossum97064862000-04-10 13:52:48 +0000401try:
402 u'Andr\202 x'.encode('ascii')
403 u'Andr\202 x'.encode('ascii','strict')
404except ValueError:
405 pass
406else:
407 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
408assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
409assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
410
411try:
412 unicode('Andr\202 x','ascii')
413 unicode('Andr\202 x','ascii','strict')
414except ValueError:
415 pass
416else:
417 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
418assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
419assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
420
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000421assert u'hello'.encode('ascii') == 'hello'
422assert u'hello'.encode('utf-8') == 'hello'
423assert u'hello'.encode('utf8') == 'hello'
424assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
425assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
426assert u'hello'.encode('latin-1') == 'hello'
427
428u = u''.join(map(unichr, range(1024)))
429for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
430 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
431 assert unicode(u.encode(encoding),encoding) == u
432
433u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000434for encoding in (
435 'latin-1',
436 ):
437 try:
438 assert unicode(u.encode(encoding),encoding) == u
439 except AssertionError:
440 print '*** codec "%s" failed round-trip' % encoding
441 except ValueError,why:
442 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000443
444u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000445for encoding in (
446 'ascii',
447 ):
448 try:
449 assert unicode(u.encode(encoding),encoding) == u
450 except AssertionError:
451 print '*** codec "%s" failed round-trip' % encoding
452 except ValueError,why:
453 print '*** codec for "%s" failed: %s' % (encoding, why)
454
455print 'done.'
456
457print 'Testing standard mapping codecs...',
458
459print '0-127...',
460s = ''.join(map(chr, range(128)))
461for encoding in (
462 'cp037', 'cp1026',
463 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
464 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000465 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000466 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
467 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
468 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
469 'mac_cyrillic', 'mac_latin2',
470
471 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
472 'cp1256', 'cp1257', 'cp1258',
473 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
474
475 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
476 'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000477
Guido van Rossum9e896b32000-04-05 20:11:21 +0000478 ### These have undefined mappings:
479 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000480
Guido van Rossum9e896b32000-04-05 20:11:21 +0000481 ):
482 try:
483 assert unicode(s,encoding).encode(encoding) == s
484 except AssertionError:
485 print '*** codec "%s" failed round-trip' % encoding
486 except ValueError,why:
487 print '*** codec for "%s" failed: %s' % (encoding, why)
488
489print '128-255...',
490s = ''.join(map(chr, range(128,256)))
491for encoding in (
492 'cp037', 'cp1026',
493 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
494 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000495 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000496 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000497 'iso8859_2', 'iso8859_4', 'iso8859_5',
498 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000499 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000500
Guido van Rossum9e896b32000-04-05 20:11:21 +0000501 ### These have undefined mappings:
502 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
503 #'cp1256', 'cp1257', 'cp1258',
504 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000505 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000506 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000507
Guido van Rossum9e896b32000-04-05 20:11:21 +0000508 ### These fail the round-trip:
509 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000510
Guido van Rossum9e896b32000-04-05 20:11:21 +0000511 ):
512 try:
513 assert unicode(s,encoding).encode(encoding) == s
514 except AssertionError:
515 print '*** codec "%s" failed round-trip' % encoding
516 except ValueError,why:
517 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000518
519print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000520
521print 'Testing Unicode string concatenation...',
522assert (u"abc" u"def") == u"abcdef"
523assert ("abc" u"def") == u"abcdef"
524assert (u"abc" "def") == u"abcdef"
525assert (u"abc" u"def" "ghi") == u"abcdefghi"
526assert ("abc" "def" u"ghi") == u"abcdefghi"
527print 'done.'