blob: 5f79101651431083226266f7f1abbf4c76b1053f [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000013 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000014 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000022 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000023 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +000073 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +000074 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +000078test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +000079test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +000080test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +000081test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +000082test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
83test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
84test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
85test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
86test('join', ' ', u'w x y z', Sequence(u'wxyz'))
87test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +000088
89result = u''
90for i in range(10):
91 if i > 0:
92 result = result + u':'
93 result = result + u'x'*10
94test('join', u':', result, [u'x' * 10] * 10)
95test('join', u':', result, (u'x' * 10,) * 10)
96
97test('strip', u' hello ', u'hello')
98test('lstrip', u' hello ', u'hello ')
99test('rstrip', u' hello ', u' hello')
100test('strip', u'hello', u'hello')
101
102test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
103
104if 0:
105 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
106
107 table = string.maketrans('a', u'A')
108 test('translate', u'abc', u'Abc', table)
109 test('translate', u'xyz', u'xyz', table)
110
111test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000112test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000113test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
114test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
116test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
117test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
118test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
119test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
120
121test('startswith', u'hello', 1, u'he')
122test('startswith', u'hello', 1, u'hello')
123test('startswith', u'hello', 0, u'hello world')
124test('startswith', u'hello', 1, u'')
125test('startswith', u'hello', 0, u'ello')
126test('startswith', u'hello', 1, u'ello', 1)
127test('startswith', u'hello', 1, u'o', 4)
128test('startswith', u'hello', 0, u'o', 5)
129test('startswith', u'hello', 1, u'', 5)
130test('startswith', u'hello', 0, u'lo', 6)
131test('startswith', u'helloworld', 1, u'lowo', 3)
132test('startswith', u'helloworld', 1, u'lowo', 3, 7)
133test('startswith', u'helloworld', 0, u'lowo', 3, 6)
134
135test('endswith', u'hello', 1, u'lo')
136test('endswith', u'hello', 0, u'he')
137test('endswith', u'hello', 1, u'')
138test('endswith', u'hello', 0, u'hello world')
139test('endswith', u'helloworld', 0, u'worl')
140test('endswith', u'helloworld', 1, u'worl', 3, 9)
141test('endswith', u'helloworld', 1, u'world', 3, 12)
142test('endswith', u'helloworld', 1, u'lowo', 1, 7)
143test('endswith', u'helloworld', 1, u'lowo', 2, 7)
144test('endswith', u'helloworld', 1, u'lowo', 3, 7)
145test('endswith', u'helloworld', 0, u'lowo', 4, 7)
146test('endswith', u'helloworld', 0, u'lowo', 3, 8)
147test('endswith', u'ab', 0, u'ab', 0, 1)
148test('endswith', u'ab', 0, u'ab', 0, 0)
149
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
151test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
152test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
153test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
154
155if 0:
156 test('capwords', u'abc def ghi', u'Abc Def Ghi')
157 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
158 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
159
160# Comparisons:
161print 'Testing Unicode comparisons...',
162assert u'abc' == 'abc'
163assert 'abc' == u'abc'
164assert u'abc' == u'abc'
165assert u'abcd' > 'abc'
166assert 'abcd' > u'abc'
167assert u'abcd' > u'abc'
168assert u'abc' < 'abcd'
169assert 'abc' < u'abcd'
170assert u'abc' < u'abcd'
171print 'done.'
172
Marc-André Lemburge5034372000-08-08 08:04:29 +0000173if 0:
174 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000175
Marc-André Lemburge5034372000-08-08 08:04:29 +0000176 print 'Testing UTF-16 code point order comparisons...',
177 #No surrogates, no fixup required.
178 assert u'\u0061' < u'\u20ac'
179 # Non surrogate below surrogate value, no fixup required
180 assert u'\u0061' < u'\ud800\udc02'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000181
Marc-André Lemburge5034372000-08-08 08:04:29 +0000182 # Non surrogate above surrogate value, fixup required
183 def test_lecmp(s, s2):
Fred Drake004d5e62000-10-23 17:22:08 +0000184 assert s < s2 , "comparison failed on %s < %s" % (s, s2)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000185
Marc-André Lemburge5034372000-08-08 08:04:29 +0000186 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000187 s2 = u'\ud800\udc01'
188 test_lecmp(s, s2)
189 s2 = u'\ud900\udc01'
190 test_lecmp(s, s2)
191 s2 = u'\uda00\udc01'
192 test_lecmp(s, s2)
193 s2 = u'\udb00\udc01'
194 test_lecmp(s, s2)
195 s2 = u'\ud800\udd01'
196 test_lecmp(s, s2)
197 s2 = u'\ud900\udd01'
198 test_lecmp(s, s2)
199 s2 = u'\uda00\udd01'
200 test_lecmp(s, s2)
201 s2 = u'\udb00\udd01'
202 test_lecmp(s, s2)
203 s2 = u'\ud800\ude01'
204 test_lecmp(s, s2)
205 s2 = u'\ud900\ude01'
206 test_lecmp(s, s2)
207 s2 = u'\uda00\ude01'
208 test_lecmp(s, s2)
209 s2 = u'\udb00\ude01'
210 test_lecmp(s, s2)
211 s2 = u'\ud800\udfff'
212 test_lecmp(s, s2)
213 s2 = u'\ud900\udfff'
214 test_lecmp(s, s2)
215 s2 = u'\uda00\udfff'
216 test_lecmp(s, s2)
217 s2 = u'\udb00\udfff'
218 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000219
220 test_fixup(u'\ue000')
221 test_fixup(u'\uff61')
222
223 # Surrogates on both sides, no fixup required
224 assert u'\ud800\udc02' < u'\ud84d\udc56'
225 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000226
Guido van Rossuma831cac2000-03-10 23:23:21 +0000227test('ljust', u'abc', u'abc ', 10)
228test('rjust', u'abc', u' abc', 10)
229test('center', u'abc', u' abc ', 10)
230test('ljust', u'abc', u'abc ', 6)
231test('rjust', u'abc', u' abc', 6)
232test('center', u'abc', u' abc ', 6)
233test('ljust', u'abc', u'abc', 2)
234test('rjust', u'abc', u'abc', 2)
235test('center', u'abc', u'abc', 2)
236
237test('islower', u'a', 1)
238test('islower', u'A', 0)
239test('islower', u'\n', 0)
240test('islower', u'\u1FFc', 0)
241test('islower', u'abc', 1)
242test('islower', u'aBc', 0)
243test('islower', u'abc\n', 1)
244
245test('isupper', u'a', 0)
246test('isupper', u'A', 1)
247test('isupper', u'\n', 0)
248test('isupper', u'\u1FFc', 0)
249test('isupper', u'ABC', 1)
250test('isupper', u'AbC', 0)
251test('isupper', u'ABC\n', 1)
252
253test('istitle', u'a', 0)
254test('istitle', u'A', 1)
255test('istitle', u'\n', 0)
256test('istitle', u'\u1FFc', 1)
257test('istitle', u'A Titlecased Line', 1)
258test('istitle', u'A\nTitlecased Line', 1)
259test('istitle', u'A Titlecased, Line', 1)
260test('istitle', u'Greek \u1FFcitlecases ...', 1)
261test('istitle', u'Not a capitalized String', 0)
262test('istitle', u'Not\ta Titlecase String', 0)
263test('istitle', u'Not--a Titlecase String', 0)
264
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000265test('isalpha', u'a', 1)
266test('isalpha', u'A', 1)
267test('isalpha', u'\n', 0)
268test('isalpha', u'\u1FFc', 1)
269test('isalpha', u'abc', 1)
270test('isalpha', u'aBc123', 0)
271test('isalpha', u'abc\n', 0)
272
273test('isalnum', u'a', 1)
274test('isalnum', u'A', 1)
275test('isalnum', u'\n', 0)
276test('isalnum', u'123abc456', 1)
277test('isalnum', u'a1b3c', 1)
278test('isalnum', u'aBc000 ', 0)
279test('isalnum', u'abc\n', 0)
280
Guido van Rossuma831cac2000-03-10 23:23:21 +0000281test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
282test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
283test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
284test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
285test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
286test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000287test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000288
289test('translate', u"abababc", u'bbbc', {ord('a'):None})
290test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
291test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
292
Guido van Rossumd4d26842000-03-13 23:21:48 +0000293# Contains:
294print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000295assert ('a' in u'abdb') == 1
296assert ('a' in u'bdab') == 1
297assert ('a' in u'bdaba') == 1
298assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000299assert ('a' in u'bdba') == 1
300assert (u'a' in u'bdba') == 1
301assert (u'a' in u'bdb') == 0
302assert (u'a' in 'bdb') == 0
303assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000304assert (u'a' in ('a',1,None)) == 1
305assert (u'a' in (1,None,'a')) == 1
306assert (u'a' in (1,None,u'a')) == 1
307assert ('a' in ('a',1,None)) == 1
308assert ('a' in (1,None,'a')) == 1
309assert ('a' in (1,None,u'a')) == 1
310assert ('a' in ('x',1,u'y')) == 0
311assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000312print 'done.'
313
Guido van Rossuma831cac2000-03-10 23:23:21 +0000314# Formatting:
315print 'Testing Unicode formatting strings...',
316assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
317assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
318assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
319assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
320assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
321assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000322assert u"%c" % (u"a",) == u'a'
323assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000324assert u"%c" % (34,) == u'"'
325assert u"%c" % (36,) == u'$'
Fred Drake004d5e62000-10-23 17:22:08 +0000326value = u"%r, %r" % (u"abc", "abc")
Marc-André Lemburg84625732000-06-13 12:05:36 +0000327if value != u"u'abc', 'abc'":
328 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
329
Guido van Rossuma831cac2000-03-10 23:23:21 +0000330assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000331try:
Fred Drake004d5e62000-10-23 17:22:08 +0000332 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000333except KeyError:
334 print '*** formatting failed for "%s"' % "u'abc, def'"
335else:
336 assert value == u'abc, def'
337
Guido van Rossum97064862000-04-10 13:52:48 +0000338# formatting jobs delegated from the string implementation:
339assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
340assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
341assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
342assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
343assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
344assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
345assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
Marc-André Lemburgb96d8022000-10-07 08:52:45 +0000346assert '...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...'
Guido van Rossum97064862000-04-10 13:52:48 +0000347assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000348print 'done.'
349
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000350# Test builtin codecs
351print 'Testing builtin codecs...',
352
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000353# UTF-8 specific encoding tests:
354assert u'\u20ac'.encode('utf-8') == \
355 ''.join((chr(0xe2), chr(0x82), chr(0xac)))
356assert u'\ud800\udc02'.encode('utf-8') == \
357 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82)))
358assert u'\ud84d\udc56'.encode('utf-8') == \
359 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96)))
360# UTF-8 specific decoding tests
361assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
362 'utf-8') == u'\ud84d\udc56'
363assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
364 'utf-8') == u'\ud800\udc02'
365assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
366 'utf-8') == u'\u20ac'
367
368# Other possible utf-8 test cases:
369# * strict decoding testing for all of the
370# UTF8_ERROR cases in PyUnicode_DecodeUTF8
371
372
373
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000374assert unicode('hello','ascii') == u'hello'
375assert unicode('hello','utf-8') == u'hello'
376assert unicode('hello','utf8') == u'hello'
377assert unicode('hello','latin-1') == u'hello'
378
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000379class String:
380 x = ''
381 def __str__(self):
382 return self.x
383
384o = String()
385
386o.x = 'abc'
387assert unicode(o) == u'abc'
388assert str(o) == 'abc'
389
390o.x = u'abc'
391assert unicode(o) == u'abc'
392assert str(o) == 'abc'
393
Guido van Rossum97064862000-04-10 13:52:48 +0000394try:
395 u'Andr\202 x'.encode('ascii')
396 u'Andr\202 x'.encode('ascii','strict')
397except ValueError:
398 pass
399else:
400 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
401assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
402assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
403
404try:
405 unicode('Andr\202 x','ascii')
406 unicode('Andr\202 x','ascii','strict')
407except ValueError:
408 pass
409else:
410 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
411assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
412assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
413
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000414assert u'hello'.encode('ascii') == 'hello'
415assert u'hello'.encode('utf-8') == 'hello'
416assert u'hello'.encode('utf8') == 'hello'
417assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
418assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
419assert u'hello'.encode('latin-1') == 'hello'
420
421u = u''.join(map(unichr, range(1024)))
422for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
423 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
424 assert unicode(u.encode(encoding),encoding) == u
425
426u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000427for encoding in (
428 'latin-1',
429 ):
430 try:
431 assert unicode(u.encode(encoding),encoding) == u
432 except AssertionError:
433 print '*** codec "%s" failed round-trip' % encoding
434 except ValueError,why:
435 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000436
437u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000438for encoding in (
439 'ascii',
440 ):
441 try:
442 assert unicode(u.encode(encoding),encoding) == u
443 except AssertionError:
444 print '*** codec "%s" failed round-trip' % encoding
445 except ValueError,why:
446 print '*** codec for "%s" failed: %s' % (encoding, why)
447
448print 'done.'
449
450print 'Testing standard mapping codecs...',
451
452print '0-127...',
453s = ''.join(map(chr, range(128)))
454for encoding in (
455 'cp037', 'cp1026',
456 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
457 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000458 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000459 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
460 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
461 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
462 'mac_cyrillic', 'mac_latin2',
463
464 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
465 'cp1256', 'cp1257', 'cp1258',
466 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
467
468 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
469 'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000470
Guido van Rossum9e896b32000-04-05 20:11:21 +0000471 ### These have undefined mappings:
472 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000473
Guido van Rossum9e896b32000-04-05 20:11:21 +0000474 ):
475 try:
476 assert unicode(s,encoding).encode(encoding) == s
477 except AssertionError:
478 print '*** codec "%s" failed round-trip' % encoding
479 except ValueError,why:
480 print '*** codec for "%s" failed: %s' % (encoding, why)
481
482print '128-255...',
483s = ''.join(map(chr, range(128,256)))
484for encoding in (
485 'cp037', 'cp1026',
486 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
487 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000488 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000489 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
490 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
491 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
492 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000493
Guido van Rossum9e896b32000-04-05 20:11:21 +0000494 ### These have undefined mappings:
495 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
496 #'cp1256', 'cp1257', 'cp1258',
497 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
498 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000499
Guido van Rossum9e896b32000-04-05 20:11:21 +0000500 ### These fail the round-trip:
501 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000502
Guido van Rossum9e896b32000-04-05 20:11:21 +0000503 ):
504 try:
505 assert unicode(s,encoding).encode(encoding) == s
506 except AssertionError:
507 print '*** codec "%s" failed round-trip' % encoding
508 except ValueError,why:
509 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000510
511print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000512
513print 'Testing Unicode string concatenation...',
514assert (u"abc" u"def") == u"abcdef"
515assert ("abc" u"def") == u"abcdef"
516assert (u"abc" "def") == u"abcdef"
517assert (u"abc" u"def" "ghi") == u"abcdefghi"
518assert ("abc" "def" u"ghi") == u"abcdefghi"
519print 'done.'