blob: 8479c2094754faf989ad44907b682ba3ec7f193b [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
Marc-André Lemburge5034372000-08-08 08:04:29 +0000171if 0:
172 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000173
Marc-André Lemburge5034372000-08-08 08:04:29 +0000174 print 'Testing UTF-16 code point order comparisons...',
175 #No surrogates, no fixup required.
176 assert u'\u0061' < u'\u20ac'
177 # Non surrogate below surrogate value, no fixup required
178 assert u'\u0061' < u'\ud800\udc02'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000179
Marc-André Lemburge5034372000-08-08 08:04:29 +0000180 # Non surrogate above surrogate value, fixup required
181 def test_lecmp(s, s2):
182 assert s < s2 , "comparison failed on %s < %s" % (s, s2)
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000183
Marc-André Lemburge5034372000-08-08 08:04:29 +0000184 def test_fixup(s):
185 s2 = u'\ud800\udc01'
186 test_lecmp(s, s2)
187 s2 = u'\ud900\udc01'
188 test_lecmp(s, s2)
189 s2 = u'\uda00\udc01'
190 test_lecmp(s, s2)
191 s2 = u'\udb00\udc01'
192 test_lecmp(s, s2)
193 s2 = u'\ud800\udd01'
194 test_lecmp(s, s2)
195 s2 = u'\ud900\udd01'
196 test_lecmp(s, s2)
197 s2 = u'\uda00\udd01'
198 test_lecmp(s, s2)
199 s2 = u'\udb00\udd01'
200 test_lecmp(s, s2)
201 s2 = u'\ud800\ude01'
202 test_lecmp(s, s2)
203 s2 = u'\ud900\ude01'
204 test_lecmp(s, s2)
205 s2 = u'\uda00\ude01'
206 test_lecmp(s, s2)
207 s2 = u'\udb00\ude01'
208 test_lecmp(s, s2)
209 s2 = u'\ud800\udfff'
210 test_lecmp(s, s2)
211 s2 = u'\ud900\udfff'
212 test_lecmp(s, s2)
213 s2 = u'\uda00\udfff'
214 test_lecmp(s, s2)
215 s2 = u'\udb00\udfff'
216 test_lecmp(s, s2)
217
218 test_fixup(u'\ue000')
219 test_fixup(u'\uff61')
220
221 # Surrogates on both sides, no fixup required
222 assert u'\ud800\udc02' < u'\ud84d\udc56'
223 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000224
Guido van Rossuma831cac2000-03-10 23:23:21 +0000225test('ljust', u'abc', u'abc ', 10)
226test('rjust', u'abc', u' abc', 10)
227test('center', u'abc', u' abc ', 10)
228test('ljust', u'abc', u'abc ', 6)
229test('rjust', u'abc', u' abc', 6)
230test('center', u'abc', u' abc ', 6)
231test('ljust', u'abc', u'abc', 2)
232test('rjust', u'abc', u'abc', 2)
233test('center', u'abc', u'abc', 2)
234
235test('islower', u'a', 1)
236test('islower', u'A', 0)
237test('islower', u'\n', 0)
238test('islower', u'\u1FFc', 0)
239test('islower', u'abc', 1)
240test('islower', u'aBc', 0)
241test('islower', u'abc\n', 1)
242
243test('isupper', u'a', 0)
244test('isupper', u'A', 1)
245test('isupper', u'\n', 0)
246test('isupper', u'\u1FFc', 0)
247test('isupper', u'ABC', 1)
248test('isupper', u'AbC', 0)
249test('isupper', u'ABC\n', 1)
250
251test('istitle', u'a', 0)
252test('istitle', u'A', 1)
253test('istitle', u'\n', 0)
254test('istitle', u'\u1FFc', 1)
255test('istitle', u'A Titlecased Line', 1)
256test('istitle', u'A\nTitlecased Line', 1)
257test('istitle', u'A Titlecased, Line', 1)
258test('istitle', u'Greek \u1FFcitlecases ...', 1)
259test('istitle', u'Not a capitalized String', 0)
260test('istitle', u'Not\ta Titlecase String', 0)
261test('istitle', u'Not--a Titlecase String', 0)
262
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000263test('isalpha', u'a', 1)
264test('isalpha', u'A', 1)
265test('isalpha', u'\n', 0)
266test('isalpha', u'\u1FFc', 1)
267test('isalpha', u'abc', 1)
268test('isalpha', u'aBc123', 0)
269test('isalpha', u'abc\n', 0)
270
271test('isalnum', u'a', 1)
272test('isalnum', u'A', 1)
273test('isalnum', u'\n', 0)
274test('isalnum', u'123abc456', 1)
275test('isalnum', u'a1b3c', 1)
276test('isalnum', u'aBc000 ', 0)
277test('isalnum', u'abc\n', 0)
278
Guido van Rossuma831cac2000-03-10 23:23:21 +0000279test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
280test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
281test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
282test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
283test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
284test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000285test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000286
287test('translate', u"abababc", u'bbbc', {ord('a'):None})
288test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
289test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
290
Guido van Rossumd4d26842000-03-13 23:21:48 +0000291# Contains:
292print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000293assert ('a' in u'abdb') == 1
294assert ('a' in u'bdab') == 1
295assert ('a' in u'bdaba') == 1
296assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000297assert ('a' in u'bdba') == 1
298assert (u'a' in u'bdba') == 1
299assert (u'a' in u'bdb') == 0
300assert (u'a' in 'bdb') == 0
301assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000302assert (u'a' in ('a',1,None)) == 1
303assert (u'a' in (1,None,'a')) == 1
304assert (u'a' in (1,None,u'a')) == 1
305assert ('a' in ('a',1,None)) == 1
306assert ('a' in (1,None,'a')) == 1
307assert ('a' in (1,None,u'a')) == 1
308assert ('a' in ('x',1,u'y')) == 0
309assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000310print 'done.'
311
Guido van Rossuma831cac2000-03-10 23:23:21 +0000312# Formatting:
313print 'Testing Unicode formatting strings...',
314assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
315assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
316assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
317assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
318assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
319assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000320assert u"%c" % (u"a",) == u'a'
321assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000322assert u"%c" % (34,) == u'"'
323assert u"%c" % (36,) == u'$'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000324value = u"%r, %r" % (u"abc", "abc")
325if value != u"u'abc', 'abc'":
326 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
327
Guido van Rossuma831cac2000-03-10 23:23:21 +0000328assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000329try:
330 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
331except KeyError:
332 print '*** formatting failed for "%s"' % "u'abc, def'"
333else:
334 assert value == u'abc, def'
335
Guido van Rossum97064862000-04-10 13:52:48 +0000336# formatting jobs delegated from the string implementation:
337assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
338assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
339assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
340assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
341assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
342assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
343assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
344assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000345print 'done.'
346
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000347# Test builtin codecs
348print 'Testing builtin codecs...',
349
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000350# UTF-8 specific encoding tests:
351assert u'\u20ac'.encode('utf-8') == \
352 ''.join((chr(0xe2), chr(0x82), chr(0xac)))
353assert u'\ud800\udc02'.encode('utf-8') == \
354 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82)))
355assert u'\ud84d\udc56'.encode('utf-8') == \
356 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96)))
357# UTF-8 specific decoding tests
358assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
359 'utf-8') == u'\ud84d\udc56'
360assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
361 'utf-8') == u'\ud800\udc02'
362assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
363 'utf-8') == u'\u20ac'
364
365# Other possible utf-8 test cases:
366# * strict decoding testing for all of the
367# UTF8_ERROR cases in PyUnicode_DecodeUTF8
368
369
370
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000371assert unicode('hello','ascii') == u'hello'
372assert unicode('hello','utf-8') == u'hello'
373assert unicode('hello','utf8') == u'hello'
374assert unicode('hello','latin-1') == u'hello'
375
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000376class String:
377 x = ''
378 def __str__(self):
379 return self.x
380
381o = String()
382
383o.x = 'abc'
384assert unicode(o) == u'abc'
385assert str(o) == 'abc'
386
387o.x = u'abc'
388assert unicode(o) == u'abc'
389assert str(o) == 'abc'
390
Guido van Rossum97064862000-04-10 13:52:48 +0000391try:
392 u'Andr\202 x'.encode('ascii')
393 u'Andr\202 x'.encode('ascii','strict')
394except ValueError:
395 pass
396else:
397 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
398assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
399assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
400
401try:
402 unicode('Andr\202 x','ascii')
403 unicode('Andr\202 x','ascii','strict')
404except ValueError:
405 pass
406else:
407 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
408assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
409assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
410
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000411assert u'hello'.encode('ascii') == 'hello'
412assert u'hello'.encode('utf-8') == 'hello'
413assert u'hello'.encode('utf8') == 'hello'
414assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
415assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
416assert u'hello'.encode('latin-1') == 'hello'
417
418u = u''.join(map(unichr, range(1024)))
419for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
420 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
421 assert unicode(u.encode(encoding),encoding) == u
422
423u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000424for encoding in (
425 'latin-1',
426 ):
427 try:
428 assert unicode(u.encode(encoding),encoding) == u
429 except AssertionError:
430 print '*** codec "%s" failed round-trip' % encoding
431 except ValueError,why:
432 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000433
434u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000435for encoding in (
436 'ascii',
437 ):
438 try:
439 assert unicode(u.encode(encoding),encoding) == u
440 except AssertionError:
441 print '*** codec "%s" failed round-trip' % encoding
442 except ValueError,why:
443 print '*** codec for "%s" failed: %s' % (encoding, why)
444
445print 'done.'
446
447print 'Testing standard mapping codecs...',
448
449print '0-127...',
450s = ''.join(map(chr, range(128)))
451for encoding in (
452 'cp037', 'cp1026',
453 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
454 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
455 'cp863', 'cp865', 'cp866',
456 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
457 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
458 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
459 'mac_cyrillic', 'mac_latin2',
460
461 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
462 'cp1256', 'cp1257', 'cp1258',
463 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
464
465 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
466 'cp1006', 'cp875', 'iso8859_8',
467
468 ### These have undefined mappings:
469 #'cp424',
470
471 ):
472 try:
473 assert unicode(s,encoding).encode(encoding) == s
474 except AssertionError:
475 print '*** codec "%s" failed round-trip' % encoding
476 except ValueError,why:
477 print '*** codec for "%s" failed: %s' % (encoding, why)
478
479print '128-255...',
480s = ''.join(map(chr, range(128,256)))
481for encoding in (
482 'cp037', 'cp1026',
483 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
484 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
485 'cp863', 'cp865', 'cp866',
486 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
487 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
488 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
489 'mac_cyrillic', 'mac_latin2',
490
491 ### These have undefined mappings:
492 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
493 #'cp1256', 'cp1257', 'cp1258',
494 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
495 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
496
497 ### These fail the round-trip:
498 #'cp1006', 'cp875', 'iso8859_8',
499
500 ):
501 try:
502 assert unicode(s,encoding).encode(encoding) == s
503 except AssertionError:
504 print '*** codec "%s" failed round-trip' % encoding
505 except ValueError,why:
506 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000507
508print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000509
510print 'Testing Unicode string concatenation...',
511assert (u"abc" u"def") == u"abcdef"
512assert ("abc" u"def") == u"abcdef"
513assert (u"abc" "def") == u"abcdef"
514assert (u"abc" u"def" "ghi") == u"abcdefghi"
515assert ("abc" "def" u"ghi") == u"abcdefghi"
516print 'done.'
Marc-André Lemburga6f73d62000-06-28 16:41:23 +0000517