blob: 74e0cf3641e1bc0f44562409650adb4069a0fb52 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
Marc-André Lemburge5034372000-08-08 08:04:29 +0000171if 0:
172 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000173
Marc-André Lemburge5034372000-08-08 08:04:29 +0000174 print 'Testing UTF-16 code point order comparisons...',
175 #No surrogates, no fixup required.
176 assert u'\u0061' < u'\u20ac'
177 # Non surrogate below surrogate value, no fixup required
178 assert u'\u0061' < u'\ud800\udc02'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000179
Marc-André Lemburge5034372000-08-08 08:04:29 +0000180 # Non surrogate above surrogate value, fixup required
181 def test_lecmp(s, s2):
Fred Drake004d5e62000-10-23 17:22:08 +0000182 assert s < s2 , "comparison failed on %s < %s" % (s, s2)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000183
Marc-André Lemburge5034372000-08-08 08:04:29 +0000184 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000185 s2 = u'\ud800\udc01'
186 test_lecmp(s, s2)
187 s2 = u'\ud900\udc01'
188 test_lecmp(s, s2)
189 s2 = u'\uda00\udc01'
190 test_lecmp(s, s2)
191 s2 = u'\udb00\udc01'
192 test_lecmp(s, s2)
193 s2 = u'\ud800\udd01'
194 test_lecmp(s, s2)
195 s2 = u'\ud900\udd01'
196 test_lecmp(s, s2)
197 s2 = u'\uda00\udd01'
198 test_lecmp(s, s2)
199 s2 = u'\udb00\udd01'
200 test_lecmp(s, s2)
201 s2 = u'\ud800\ude01'
202 test_lecmp(s, s2)
203 s2 = u'\ud900\ude01'
204 test_lecmp(s, s2)
205 s2 = u'\uda00\ude01'
206 test_lecmp(s, s2)
207 s2 = u'\udb00\ude01'
208 test_lecmp(s, s2)
209 s2 = u'\ud800\udfff'
210 test_lecmp(s, s2)
211 s2 = u'\ud900\udfff'
212 test_lecmp(s, s2)
213 s2 = u'\uda00\udfff'
214 test_lecmp(s, s2)
215 s2 = u'\udb00\udfff'
216 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000217
218 test_fixup(u'\ue000')
219 test_fixup(u'\uff61')
220
221 # Surrogates on both sides, no fixup required
222 assert u'\ud800\udc02' < u'\ud84d\udc56'
223 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000224
Guido van Rossuma831cac2000-03-10 23:23:21 +0000225test('ljust', u'abc', u'abc ', 10)
226test('rjust', u'abc', u' abc', 10)
227test('center', u'abc', u' abc ', 10)
228test('ljust', u'abc', u'abc ', 6)
229test('rjust', u'abc', u' abc', 6)
230test('center', u'abc', u' abc ', 6)
231test('ljust', u'abc', u'abc', 2)
232test('rjust', u'abc', u'abc', 2)
233test('center', u'abc', u'abc', 2)
234
235test('islower', u'a', 1)
236test('islower', u'A', 0)
237test('islower', u'\n', 0)
238test('islower', u'\u1FFc', 0)
239test('islower', u'abc', 1)
240test('islower', u'aBc', 0)
241test('islower', u'abc\n', 1)
242
243test('isupper', u'a', 0)
244test('isupper', u'A', 1)
245test('isupper', u'\n', 0)
246test('isupper', u'\u1FFc', 0)
247test('isupper', u'ABC', 1)
248test('isupper', u'AbC', 0)
249test('isupper', u'ABC\n', 1)
250
251test('istitle', u'a', 0)
252test('istitle', u'A', 1)
253test('istitle', u'\n', 0)
254test('istitle', u'\u1FFc', 1)
255test('istitle', u'A Titlecased Line', 1)
256test('istitle', u'A\nTitlecased Line', 1)
257test('istitle', u'A Titlecased, Line', 1)
258test('istitle', u'Greek \u1FFcitlecases ...', 1)
259test('istitle', u'Not a capitalized String', 0)
260test('istitle', u'Not\ta Titlecase String', 0)
261test('istitle', u'Not--a Titlecase String', 0)
262
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000263test('isalpha', u'a', 1)
264test('isalpha', u'A', 1)
265test('isalpha', u'\n', 0)
266test('isalpha', u'\u1FFc', 1)
267test('isalpha', u'abc', 1)
268test('isalpha', u'aBc123', 0)
269test('isalpha', u'abc\n', 0)
270
271test('isalnum', u'a', 1)
272test('isalnum', u'A', 1)
273test('isalnum', u'\n', 0)
274test('isalnum', u'123abc456', 1)
275test('isalnum', u'a1b3c', 1)
276test('isalnum', u'aBc000 ', 0)
277test('isalnum', u'abc\n', 0)
278
Guido van Rossuma831cac2000-03-10 23:23:21 +0000279test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
280test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
281test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
282test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
283test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
284test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000285test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000286
287test('translate', u"abababc", u'bbbc', {ord('a'):None})
288test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
289test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
290
Guido van Rossumd4d26842000-03-13 23:21:48 +0000291# Contains:
292print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000293assert ('a' in u'abdb') == 1
294assert ('a' in u'bdab') == 1
295assert ('a' in u'bdaba') == 1
296assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000297assert ('a' in u'bdba') == 1
298assert (u'a' in u'bdba') == 1
299assert (u'a' in u'bdb') == 0
300assert (u'a' in 'bdb') == 0
301assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000302assert (u'a' in ('a',1,None)) == 1
303assert (u'a' in (1,None,'a')) == 1
304assert (u'a' in (1,None,u'a')) == 1
305assert ('a' in ('a',1,None)) == 1
306assert ('a' in (1,None,'a')) == 1
307assert ('a' in (1,None,u'a')) == 1
308assert ('a' in ('x',1,u'y')) == 0
309assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000310print 'done.'
311
Guido van Rossuma831cac2000-03-10 23:23:21 +0000312# Formatting:
313print 'Testing Unicode formatting strings...',
314assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
315assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
316assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
317assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
318assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
319assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000320assert u"%c" % (u"a",) == u'a'
321assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000322assert u"%c" % (34,) == u'"'
323assert u"%c" % (36,) == u'$'
Fred Drake004d5e62000-10-23 17:22:08 +0000324value = u"%r, %r" % (u"abc", "abc")
Marc-André Lemburg84625732000-06-13 12:05:36 +0000325if value != u"u'abc', 'abc'":
326 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
327
Guido van Rossuma831cac2000-03-10 23:23:21 +0000328assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000329try:
Fred Drake004d5e62000-10-23 17:22:08 +0000330 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000331except KeyError:
332 print '*** formatting failed for "%s"' % "u'abc, def'"
333else:
334 assert value == u'abc, def'
335
Guido van Rossum97064862000-04-10 13:52:48 +0000336# formatting jobs delegated from the string implementation:
337assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
338assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
339assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
340assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
341assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
342assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
343assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
Marc-André Lemburgb96d8022000-10-07 08:52:45 +0000344assert '...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...'
Guido van Rossum97064862000-04-10 13:52:48 +0000345assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000346print 'done.'
347
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000348# Test builtin codecs
349print 'Testing builtin codecs...',
350
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000351# UTF-8 specific encoding tests:
352assert u'\u20ac'.encode('utf-8') == \
353 ''.join((chr(0xe2), chr(0x82), chr(0xac)))
354assert u'\ud800\udc02'.encode('utf-8') == \
355 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82)))
356assert u'\ud84d\udc56'.encode('utf-8') == \
357 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96)))
358# UTF-8 specific decoding tests
359assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
360 'utf-8') == u'\ud84d\udc56'
361assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
362 'utf-8') == u'\ud800\udc02'
363assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
364 'utf-8') == u'\u20ac'
365
366# Other possible utf-8 test cases:
367# * strict decoding testing for all of the
368# UTF8_ERROR cases in PyUnicode_DecodeUTF8
369
370
371
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000372assert unicode('hello','ascii') == u'hello'
373assert unicode('hello','utf-8') == u'hello'
374assert unicode('hello','utf8') == u'hello'
375assert unicode('hello','latin-1') == u'hello'
376
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000377class String:
378 x = ''
379 def __str__(self):
380 return self.x
381
382o = String()
383
384o.x = 'abc'
385assert unicode(o) == u'abc'
386assert str(o) == 'abc'
387
388o.x = u'abc'
389assert unicode(o) == u'abc'
390assert str(o) == 'abc'
391
Guido van Rossum97064862000-04-10 13:52:48 +0000392try:
393 u'Andr\202 x'.encode('ascii')
394 u'Andr\202 x'.encode('ascii','strict')
395except ValueError:
396 pass
397else:
398 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
399assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
400assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
401
402try:
403 unicode('Andr\202 x','ascii')
404 unicode('Andr\202 x','ascii','strict')
405except ValueError:
406 pass
407else:
408 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
409assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
410assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
411
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000412assert u'hello'.encode('ascii') == 'hello'
413assert u'hello'.encode('utf-8') == 'hello'
414assert u'hello'.encode('utf8') == 'hello'
415assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
416assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
417assert u'hello'.encode('latin-1') == 'hello'
418
419u = u''.join(map(unichr, range(1024)))
420for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
421 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
422 assert unicode(u.encode(encoding),encoding) == u
423
424u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000425for encoding in (
426 'latin-1',
427 ):
428 try:
429 assert unicode(u.encode(encoding),encoding) == u
430 except AssertionError:
431 print '*** codec "%s" failed round-trip' % encoding
432 except ValueError,why:
433 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000434
435u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000436for encoding in (
437 'ascii',
438 ):
439 try:
440 assert unicode(u.encode(encoding),encoding) == u
441 except AssertionError:
442 print '*** codec "%s" failed round-trip' % encoding
443 except ValueError,why:
444 print '*** codec for "%s" failed: %s' % (encoding, why)
445
446print 'done.'
447
448print 'Testing standard mapping codecs...',
449
450print '0-127...',
451s = ''.join(map(chr, range(128)))
452for encoding in (
453 'cp037', 'cp1026',
454 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
455 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000456 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000457 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
458 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
459 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
460 'mac_cyrillic', 'mac_latin2',
461
462 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
463 'cp1256', 'cp1257', 'cp1258',
464 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
465
466 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
467 'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000468
Guido van Rossum9e896b32000-04-05 20:11:21 +0000469 ### These have undefined mappings:
470 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000471
Guido van Rossum9e896b32000-04-05 20:11:21 +0000472 ):
473 try:
474 assert unicode(s,encoding).encode(encoding) == s
475 except AssertionError:
476 print '*** codec "%s" failed round-trip' % encoding
477 except ValueError,why:
478 print '*** codec for "%s" failed: %s' % (encoding, why)
479
480print '128-255...',
481s = ''.join(map(chr, range(128,256)))
482for encoding in (
483 'cp037', 'cp1026',
484 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
485 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000486 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000487 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
488 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
489 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
490 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000491
Guido van Rossum9e896b32000-04-05 20:11:21 +0000492 ### These have undefined mappings:
493 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
494 #'cp1256', 'cp1257', 'cp1258',
495 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
496 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000497
Guido van Rossum9e896b32000-04-05 20:11:21 +0000498 ### These fail the round-trip:
499 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000500
Guido van Rossum9e896b32000-04-05 20:11:21 +0000501 ):
502 try:
503 assert unicode(s,encoding).encode(encoding) == s
504 except AssertionError:
505 print '*** codec "%s" failed round-trip' % encoding
506 except ValueError,why:
507 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000508
509print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000510
511print 'Testing Unicode string concatenation...',
512assert (u"abc" u"def") == u"abcdef"
513assert ("abc" u"def") == u"abcdef"
514assert (u"abc" "def") == u"abcdef"
515assert (u"abc" u"def" "ghi") == u"abcdefghi"
516assert ("abc" "def" u"ghi") == u"abcdefghi"
517print 'done.'