blob: 76a2591920e501682858b1ca41ae7f133eedd89f [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""
8from test_support import verbose
9import sys
10
11def test(method, input, output, *args):
12 if verbose:
13 print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
14 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
22 if value != output:
23 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
35test('title', u' hello ', u' Hello ')
36test('title', u'hello ', u'Hello ')
37test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
38test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
39test('title', u"getInt", u'Getint')
40
41test('find', u'abcdefghiabc', 0, u'abc')
42test('find', u'abcdefghiabc', 9, u'abc', 1)
43test('find', u'abcdefghiabc', -1, u'def', 4)
44
45test('rfind', u'abcdefghiabc', 9, u'abc')
46
47test('lower', u'HeLLo', u'hello')
48test('lower', u'hello', u'hello')
49
50test('upper', u'HeLLo', u'HELLO')
51test('upper', u'HELLO', u'HELLO')
52
53if 0:
54 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
55
56 test('maketrans', u'abc', transtable, u'xyz')
57 test('maketrans', u'abc', ValueError, u'xyzq')
58
59test('split', u'this is the split function',
60 [u'this', u'is', u'the', u'split', u'function'])
61test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
62test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
63test('split', u'a b c d', [u'a', u'b c d'], None, 1)
64test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
65test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
66test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
67test('split', u'a b c d', [u'a b c d'], None, 0)
68test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
69test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
70
71# join now works with any sequence type
72class Sequence:
73 def __init__(self): self.seq = 'wxyz'
74 def __len__(self): return len(self.seq)
75 def __getitem__(self, i): return self.seq[i]
76
77test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
78test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
79test('join', u' ', u'w x y z', Sequence())
80test('join', u' ', TypeError, 7)
81
82class BadSeq(Sequence):
83 def __init__(self): self.seq = [7, u'hello', 123L]
84
85test('join', u' ', TypeError, BadSeq())
86
87result = u''
88for i in range(10):
89 if i > 0:
90 result = result + u':'
91 result = result + u'x'*10
92test('join', u':', result, [u'x' * 10] * 10)
93test('join', u':', result, (u'x' * 10,) * 10)
94
95test('strip', u' hello ', u'hello')
96test('lstrip', u' hello ', u'hello ')
97test('rstrip', u' hello ', u' hello')
98test('strip', u'hello', u'hello')
99
100test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
101
102if 0:
103 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
104
105 table = string.maketrans('a', u'A')
106 test('translate', u'abc', u'Abc', table)
107 test('translate', u'xyz', u'xyz', table)
108
109test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000110test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000111test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
112test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
113test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
114test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
115test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
116test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
117test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
118
119test('startswith', u'hello', 1, u'he')
120test('startswith', u'hello', 1, u'hello')
121test('startswith', u'hello', 0, u'hello world')
122test('startswith', u'hello', 1, u'')
123test('startswith', u'hello', 0, u'ello')
124test('startswith', u'hello', 1, u'ello', 1)
125test('startswith', u'hello', 1, u'o', 4)
126test('startswith', u'hello', 0, u'o', 5)
127test('startswith', u'hello', 1, u'', 5)
128test('startswith', u'hello', 0, u'lo', 6)
129test('startswith', u'helloworld', 1, u'lowo', 3)
130test('startswith', u'helloworld', 1, u'lowo', 3, 7)
131test('startswith', u'helloworld', 0, u'lowo', 3, 6)
132
133test('endswith', u'hello', 1, u'lo')
134test('endswith', u'hello', 0, u'he')
135test('endswith', u'hello', 1, u'')
136test('endswith', u'hello', 0, u'hello world')
137test('endswith', u'helloworld', 0, u'worl')
138test('endswith', u'helloworld', 1, u'worl', 3, 9)
139test('endswith', u'helloworld', 1, u'world', 3, 12)
140test('endswith', u'helloworld', 1, u'lowo', 1, 7)
141test('endswith', u'helloworld', 1, u'lowo', 2, 7)
142test('endswith', u'helloworld', 1, u'lowo', 3, 7)
143test('endswith', u'helloworld', 0, u'lowo', 4, 7)
144test('endswith', u'helloworld', 0, u'lowo', 3, 8)
145test('endswith', u'ab', 0, u'ab', 0, 1)
146test('endswith', u'ab', 0, u'ab', 0, 0)
147
148test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
149test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
150test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
151test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
152
153if 0:
154 test('capwords', u'abc def ghi', u'Abc Def Ghi')
155 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
156 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
157
158# Comparisons:
159print 'Testing Unicode comparisons...',
160assert u'abc' == 'abc'
161assert 'abc' == u'abc'
162assert u'abc' == u'abc'
163assert u'abcd' > 'abc'
164assert 'abcd' > u'abc'
165assert u'abcd' > u'abc'
166assert u'abc' < 'abcd'
167assert 'abc' < u'abcd'
168assert u'abc' < u'abcd'
169print 'done.'
170
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000171print 'Testing UTF-16 code point order comparisons...',
172#No surrogates, no fixup required.
173assert u'\u0061' < u'\u20ac'
174# Non surrogate below surrogate value, no fixup required
175assert u'\u0061' < u'\ud800\udc02'
176
177# Non surrogate above surrogate value, fixup required
178def test_lecmp(s, s2):
179 assert s < s2 , "comparison failed on %s < %s" % (s, s2)
180
181def test_fixup(s):
182 s2 = u'\ud800\udc01'
183 test_lecmp(s, s2)
184 s2 = u'\ud900\udc01'
185 test_lecmp(s, s2)
186 s2 = u'\uda00\udc01'
187 test_lecmp(s, s2)
188 s2 = u'\udb00\udc01'
189 test_lecmp(s, s2)
190 s2 = u'\ud800\udd01'
191 test_lecmp(s, s2)
192 s2 = u'\ud900\udd01'
193 test_lecmp(s, s2)
194 s2 = u'\uda00\udd01'
195 test_lecmp(s, s2)
196 s2 = u'\udb00\udd01'
197 test_lecmp(s, s2)
198 s2 = u'\ud800\ude01'
199 test_lecmp(s, s2)
200 s2 = u'\ud900\ude01'
201 test_lecmp(s, s2)
202 s2 = u'\uda00\ude01'
203 test_lecmp(s, s2)
204 s2 = u'\udb00\ude01'
205 test_lecmp(s, s2)
206 s2 = u'\ud800\udfff'
207 test_lecmp(s, s2)
208 s2 = u'\ud900\udfff'
209 test_lecmp(s, s2)
210 s2 = u'\uda00\udfff'
211 test_lecmp(s, s2)
212 s2 = u'\udb00\udfff'
213 test_lecmp(s, s2)
214
215test_fixup(u'\ue000')
216test_fixup(u'\uff61')
217
218# Surrogates on both sides, no fixup required
219assert u'\ud800\udc02' < u'\ud84d\udc56'
220print 'done.'
221
Guido van Rossuma831cac2000-03-10 23:23:21 +0000222test('ljust', u'abc', u'abc ', 10)
223test('rjust', u'abc', u' abc', 10)
224test('center', u'abc', u' abc ', 10)
225test('ljust', u'abc', u'abc ', 6)
226test('rjust', u'abc', u' abc', 6)
227test('center', u'abc', u' abc ', 6)
228test('ljust', u'abc', u'abc', 2)
229test('rjust', u'abc', u'abc', 2)
230test('center', u'abc', u'abc', 2)
231
232test('islower', u'a', 1)
233test('islower', u'A', 0)
234test('islower', u'\n', 0)
235test('islower', u'\u1FFc', 0)
236test('islower', u'abc', 1)
237test('islower', u'aBc', 0)
238test('islower', u'abc\n', 1)
239
240test('isupper', u'a', 0)
241test('isupper', u'A', 1)
242test('isupper', u'\n', 0)
243test('isupper', u'\u1FFc', 0)
244test('isupper', u'ABC', 1)
245test('isupper', u'AbC', 0)
246test('isupper', u'ABC\n', 1)
247
248test('istitle', u'a', 0)
249test('istitle', u'A', 1)
250test('istitle', u'\n', 0)
251test('istitle', u'\u1FFc', 1)
252test('istitle', u'A Titlecased Line', 1)
253test('istitle', u'A\nTitlecased Line', 1)
254test('istitle', u'A Titlecased, Line', 1)
255test('istitle', u'Greek \u1FFcitlecases ...', 1)
256test('istitle', u'Not a capitalized String', 0)
257test('istitle', u'Not\ta Titlecase String', 0)
258test('istitle', u'Not--a Titlecase String', 0)
259
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000260test('isalpha', u'a', 1)
261test('isalpha', u'A', 1)
262test('isalpha', u'\n', 0)
263test('isalpha', u'\u1FFc', 1)
264test('isalpha', u'abc', 1)
265test('isalpha', u'aBc123', 0)
266test('isalpha', u'abc\n', 0)
267
268test('isalnum', u'a', 1)
269test('isalnum', u'A', 1)
270test('isalnum', u'\n', 0)
271test('isalnum', u'123abc456', 1)
272test('isalnum', u'a1b3c', 1)
273test('isalnum', u'aBc000 ', 0)
274test('isalnum', u'abc\n', 0)
275
Guido van Rossuma831cac2000-03-10 23:23:21 +0000276test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
277test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
278test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
279test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
280test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
281test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000282test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000283
284test('translate', u"abababc", u'bbbc', {ord('a'):None})
285test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
286test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
287
Guido van Rossumd4d26842000-03-13 23:21:48 +0000288# Contains:
289print 'Testing Unicode contains method...',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000290assert ('a' in u'abdb') == 1
291assert ('a' in u'bdab') == 1
292assert ('a' in u'bdaba') == 1
293assert ('a' in u'bdba') == 1
Guido van Rossumd4d26842000-03-13 23:21:48 +0000294assert ('a' in u'bdba') == 1
295assert (u'a' in u'bdba') == 1
296assert (u'a' in u'bdb') == 0
297assert (u'a' in 'bdb') == 0
298assert (u'a' in 'bdba') == 1
Guido van Rossum9e896b32000-04-05 20:11:21 +0000299assert (u'a' in ('a',1,None)) == 1
300assert (u'a' in (1,None,'a')) == 1
301assert (u'a' in (1,None,u'a')) == 1
302assert ('a' in ('a',1,None)) == 1
303assert ('a' in (1,None,'a')) == 1
304assert ('a' in (1,None,u'a')) == 1
305assert ('a' in ('x',1,u'y')) == 0
306assert ('a' in ('x',1,None)) == 0
Guido van Rossumd4d26842000-03-13 23:21:48 +0000307print 'done.'
308
Guido van Rossuma831cac2000-03-10 23:23:21 +0000309# Formatting:
310print 'Testing Unicode formatting strings...',
311assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
312assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
313assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
314assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
315assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
316assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
Marc-André Lemburg59a044b2000-06-08 17:50:55 +0000317assert u"%c" % (u"a",) == u'a'
318assert u"%c" % ("a",) == u'a'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000319assert u"%c" % (34,) == u'"'
320assert u"%c" % (36,) == u'$'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000321value = u"%r, %r" % (u"abc", "abc")
322if value != u"u'abc', 'abc'":
323 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
324
Guido van Rossuma831cac2000-03-10 23:23:21 +0000325assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
Marc-André Lemburg84625732000-06-13 12:05:36 +0000326try:
327 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
328except KeyError:
329 print '*** formatting failed for "%s"' % "u'abc, def'"
330else:
331 assert value == u'abc, def'
332
Guido van Rossum97064862000-04-10 13:52:48 +0000333# formatting jobs delegated from the string implementation:
334assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
335assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
336assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
337assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
338assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
339assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
340assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
341assert '...%s...' % u"abc" == u'...abc...'
Guido van Rossuma831cac2000-03-10 23:23:21 +0000342print 'done.'
343
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000344# Test builtin codecs
345print 'Testing builtin codecs...',
346
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000347# UTF-8 specific encoding tests:
348assert u'\u20ac'.encode('utf-8') == \
349 ''.join((chr(0xe2), chr(0x82), chr(0xac)))
350assert u'\ud800\udc02'.encode('utf-8') == \
351 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82)))
352assert u'\ud84d\udc56'.encode('utf-8') == \
353 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96)))
354# UTF-8 specific decoding tests
355assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
356 'utf-8') == u'\ud84d\udc56'
357assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
358 'utf-8') == u'\ud800\udc02'
359assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
360 'utf-8') == u'\u20ac'
361
362# Other possible utf-8 test cases:
363# * strict decoding testing for all of the
364# UTF8_ERROR cases in PyUnicode_DecodeUTF8
365
366
367
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000368assert unicode('hello','ascii') == u'hello'
369assert unicode('hello','utf-8') == u'hello'
370assert unicode('hello','utf8') == u'hello'
371assert unicode('hello','latin-1') == u'hello'
372
Marc-André Lemburgb6d78fcd2000-07-07 13:46:19 +0000373class String:
374 x = ''
375 def __str__(self):
376 return self.x
377
378o = String()
379
380o.x = 'abc'
381assert unicode(o) == u'abc'
382assert str(o) == 'abc'
383
384o.x = u'abc'
385assert unicode(o) == u'abc'
386assert str(o) == 'abc'
387
Guido van Rossum97064862000-04-10 13:52:48 +0000388try:
389 u'Andr\202 x'.encode('ascii')
390 u'Andr\202 x'.encode('ascii','strict')
391except ValueError:
392 pass
393else:
394 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
395assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
396assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
397
398try:
399 unicode('Andr\202 x','ascii')
400 unicode('Andr\202 x','ascii','strict')
401except ValueError:
402 pass
403else:
404 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
405assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
406assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
407
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000408assert u'hello'.encode('ascii') == 'hello'
409assert u'hello'.encode('utf-8') == 'hello'
410assert u'hello'.encode('utf8') == 'hello'
411assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
412assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
413assert u'hello'.encode('latin-1') == 'hello'
414
415u = u''.join(map(unichr, range(1024)))
416for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
417 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
418 assert unicode(u.encode(encoding),encoding) == u
419
420u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000421for encoding in (
422 'latin-1',
423 ):
424 try:
425 assert unicode(u.encode(encoding),encoding) == u
426 except AssertionError:
427 print '*** codec "%s" failed round-trip' % encoding
428 except ValueError,why:
429 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000430
431u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000432for encoding in (
433 'ascii',
434 ):
435 try:
436 assert unicode(u.encode(encoding),encoding) == u
437 except AssertionError:
438 print '*** codec "%s" failed round-trip' % encoding
439 except ValueError,why:
440 print '*** codec for "%s" failed: %s' % (encoding, why)
441
442print 'done.'
443
444print 'Testing standard mapping codecs...',
445
446print '0-127...',
447s = ''.join(map(chr, range(128)))
448for encoding in (
449 'cp037', 'cp1026',
450 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
451 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
452 'cp863', 'cp865', 'cp866',
453 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
454 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
455 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
456 'mac_cyrillic', 'mac_latin2',
457
458 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
459 'cp1256', 'cp1257', 'cp1258',
460 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
461
462 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
463 'cp1006', 'cp875', 'iso8859_8',
464
465 ### These have undefined mappings:
466 #'cp424',
467
468 ):
469 try:
470 assert unicode(s,encoding).encode(encoding) == s
471 except AssertionError:
472 print '*** codec "%s" failed round-trip' % encoding
473 except ValueError,why:
474 print '*** codec for "%s" failed: %s' % (encoding, why)
475
476print '128-255...',
477s = ''.join(map(chr, range(128,256)))
478for encoding in (
479 'cp037', 'cp1026',
480 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
481 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
482 'cp863', 'cp865', 'cp866',
483 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
484 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
485 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
486 'mac_cyrillic', 'mac_latin2',
487
488 ### These have undefined mappings:
489 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
490 #'cp1256', 'cp1257', 'cp1258',
491 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
492 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
493
494 ### These fail the round-trip:
495 #'cp1006', 'cp875', 'iso8859_8',
496
497 ):
498 try:
499 assert unicode(s,encoding).encode(encoding) == s
500 except AssertionError:
501 print '*** codec "%s" failed round-trip' % encoding
502 except ValueError,why:
503 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000504
505print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000506
507print 'Testing Unicode string concatenation...',
508assert (u"abc" u"def") == u"abcdef"
509assert ("abc" u"def") == u"abcdef"
510assert (u"abc" "def") == u"abcdef"
511assert (u"abc" u"def" "ghi") == u"abcdefghi"
512assert ("abc" "def" u"ghi") == u"abcdefghi"
513print 'done.'
Marc-André Lemburga6f73d62000-06-28 16:41:23 +0000514