blob: 2a24255b0491bccc3a834a2b8909a2d73afb2718 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
8from test_support import verify, verbose
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
11def test(method, input, output, *args):
12 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000013 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000014 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000022 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000023 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
Marc-André Lemburgfde66e12001-01-29 11:14:16 +000034test('capitalize', u'aaaa', u'Aaaa')
35test('capitalize', u'AaAa', u'Aaaa')
Guido van Rossuma831cac2000-03-10 23:23:21 +000036
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000037test('count', u'aaa', 3, u'a')
38test('count', u'aaa', 0, u'b')
39test('count', 'aaa', 3, u'a')
40test('count', 'aaa', 0, u'b')
41test('count', u'aaa', 3, 'a')
42test('count', u'aaa', 0, 'b')
43
Guido van Rossuma831cac2000-03-10 23:23:21 +000044test('title', u' hello ', u' Hello ')
45test('title', u'hello ', u'Hello ')
46test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
47test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
48test('title', u"getInt", u'Getint')
49
50test('find', u'abcdefghiabc', 0, u'abc')
51test('find', u'abcdefghiabc', 9, u'abc', 1)
52test('find', u'abcdefghiabc', -1, u'def', 4)
53
54test('rfind', u'abcdefghiabc', 9, u'abc')
55
56test('lower', u'HeLLo', u'hello')
57test('lower', u'hello', u'hello')
58
59test('upper', u'HeLLo', u'HELLO')
60test('upper', u'HELLO', u'HELLO')
61
62if 0:
63 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
64
65 test('maketrans', u'abc', transtable, u'xyz')
66 test('maketrans', u'abc', ValueError, u'xyzq')
67
68test('split', u'this is the split function',
69 [u'this', u'is', u'the', u'split', u'function'])
70test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
71test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
72test('split', u'a b c d', [u'a', u'b c d'], None, 1)
73test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
74test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
75test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
76test('split', u'a b c d', [u'a b c d'], None, 0)
77test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
78test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000079test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
80test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
81test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
82test('split', u'endcase test', [u'endcase ', u''], u'test')
83test('split', u'endcase test', [u'endcase ', u''], 'test')
84test('split', 'endcase test', [u'endcase ', u''], u'test')
85
Guido van Rossuma831cac2000-03-10 23:23:21 +000086
87# join now works with any sequence type
88class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +000089 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +000090 def __len__(self): return len(self.seq)
91 def __getitem__(self, i): return self.seq[i]
92
93test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +000094test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +000095test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +000096test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +000097test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +000098test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
99test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
100test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
101test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
102test('join', ' ', u'w x y z', Sequence(u'wxyz'))
103test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000104
105result = u''
106for i in range(10):
107 if i > 0:
108 result = result + u':'
109 result = result + u'x'*10
110test('join', u':', result, [u'x' * 10] * 10)
111test('join', u':', result, (u'x' * 10,) * 10)
112
113test('strip', u' hello ', u'hello')
114test('lstrip', u' hello ', u'hello ')
115test('rstrip', u' hello ', u' hello')
116test('strip', u'hello', u'hello')
117
118test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
119
120if 0:
121 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
122
123 table = string.maketrans('a', u'A')
124 test('translate', u'abc', u'Abc', table)
125 test('translate', u'xyz', u'xyz', table)
126
127test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000128test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000129test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
130test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
131test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
132test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
133test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
134test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
135test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
136
137test('startswith', u'hello', 1, u'he')
138test('startswith', u'hello', 1, u'hello')
139test('startswith', u'hello', 0, u'hello world')
140test('startswith', u'hello', 1, u'')
141test('startswith', u'hello', 0, u'ello')
142test('startswith', u'hello', 1, u'ello', 1)
143test('startswith', u'hello', 1, u'o', 4)
144test('startswith', u'hello', 0, u'o', 5)
145test('startswith', u'hello', 1, u'', 5)
146test('startswith', u'hello', 0, u'lo', 6)
147test('startswith', u'helloworld', 1, u'lowo', 3)
148test('startswith', u'helloworld', 1, u'lowo', 3, 7)
149test('startswith', u'helloworld', 0, u'lowo', 3, 6)
150
151test('endswith', u'hello', 1, u'lo')
152test('endswith', u'hello', 0, u'he')
153test('endswith', u'hello', 1, u'')
154test('endswith', u'hello', 0, u'hello world')
155test('endswith', u'helloworld', 0, u'worl')
156test('endswith', u'helloworld', 1, u'worl', 3, 9)
157test('endswith', u'helloworld', 1, u'world', 3, 12)
158test('endswith', u'helloworld', 1, u'lowo', 1, 7)
159test('endswith', u'helloworld', 1, u'lowo', 2, 7)
160test('endswith', u'helloworld', 1, u'lowo', 3, 7)
161test('endswith', u'helloworld', 0, u'lowo', 4, 7)
162test('endswith', u'helloworld', 0, u'lowo', 3, 8)
163test('endswith', u'ab', 0, u'ab', 0, 1)
164test('endswith', u'ab', 0, u'ab', 0, 0)
165
166test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
167test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
168test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
169test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
170
171if 0:
172 test('capwords', u'abc def ghi', u'Abc Def Ghi')
173 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
174 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
175
176# Comparisons:
177print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000178verify(u'abc' == 'abc')
179verify('abc' == u'abc')
180verify(u'abc' == u'abc')
181verify(u'abcd' > 'abc')
182verify('abcd' > u'abc')
183verify(u'abcd' > u'abc')
184verify(u'abc' < 'abcd')
185verify('abc' < u'abcd')
186verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000187print 'done.'
188
Marc-André Lemburge5034372000-08-08 08:04:29 +0000189if 0:
190 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000191
Marc-André Lemburge5034372000-08-08 08:04:29 +0000192 print 'Testing UTF-16 code point order comparisons...',
193 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000194 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000195 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000196 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000197
Marc-André Lemburge5034372000-08-08 08:04:29 +0000198 # Non surrogate above surrogate value, fixup required
199 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000200 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000201
Marc-André Lemburge5034372000-08-08 08:04:29 +0000202 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000203 s2 = u'\ud800\udc01'
204 test_lecmp(s, s2)
205 s2 = u'\ud900\udc01'
206 test_lecmp(s, s2)
207 s2 = u'\uda00\udc01'
208 test_lecmp(s, s2)
209 s2 = u'\udb00\udc01'
210 test_lecmp(s, s2)
211 s2 = u'\ud800\udd01'
212 test_lecmp(s, s2)
213 s2 = u'\ud900\udd01'
214 test_lecmp(s, s2)
215 s2 = u'\uda00\udd01'
216 test_lecmp(s, s2)
217 s2 = u'\udb00\udd01'
218 test_lecmp(s, s2)
219 s2 = u'\ud800\ude01'
220 test_lecmp(s, s2)
221 s2 = u'\ud900\ude01'
222 test_lecmp(s, s2)
223 s2 = u'\uda00\ude01'
224 test_lecmp(s, s2)
225 s2 = u'\udb00\ude01'
226 test_lecmp(s, s2)
227 s2 = u'\ud800\udfff'
228 test_lecmp(s, s2)
229 s2 = u'\ud900\udfff'
230 test_lecmp(s, s2)
231 s2 = u'\uda00\udfff'
232 test_lecmp(s, s2)
233 s2 = u'\udb00\udfff'
234 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000235
236 test_fixup(u'\ue000')
237 test_fixup(u'\uff61')
238
239 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000240 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000241 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000242
Guido van Rossuma831cac2000-03-10 23:23:21 +0000243test('ljust', u'abc', u'abc ', 10)
244test('rjust', u'abc', u' abc', 10)
245test('center', u'abc', u' abc ', 10)
246test('ljust', u'abc', u'abc ', 6)
247test('rjust', u'abc', u' abc', 6)
248test('center', u'abc', u' abc ', 6)
249test('ljust', u'abc', u'abc', 2)
250test('rjust', u'abc', u'abc', 2)
251test('center', u'abc', u'abc', 2)
252
253test('islower', u'a', 1)
254test('islower', u'A', 0)
255test('islower', u'\n', 0)
256test('islower', u'\u1FFc', 0)
257test('islower', u'abc', 1)
258test('islower', u'aBc', 0)
259test('islower', u'abc\n', 1)
260
261test('isupper', u'a', 0)
262test('isupper', u'A', 1)
263test('isupper', u'\n', 0)
264test('isupper', u'\u1FFc', 0)
265test('isupper', u'ABC', 1)
266test('isupper', u'AbC', 0)
267test('isupper', u'ABC\n', 1)
268
269test('istitle', u'a', 0)
270test('istitle', u'A', 1)
271test('istitle', u'\n', 0)
272test('istitle', u'\u1FFc', 1)
273test('istitle', u'A Titlecased Line', 1)
274test('istitle', u'A\nTitlecased Line', 1)
275test('istitle', u'A Titlecased, Line', 1)
276test('istitle', u'Greek \u1FFcitlecases ...', 1)
277test('istitle', u'Not a capitalized String', 0)
278test('istitle', u'Not\ta Titlecase String', 0)
279test('istitle', u'Not--a Titlecase String', 0)
280
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000281test('isalpha', u'a', 1)
282test('isalpha', u'A', 1)
283test('isalpha', u'\n', 0)
284test('isalpha', u'\u1FFc', 1)
285test('isalpha', u'abc', 1)
286test('isalpha', u'aBc123', 0)
287test('isalpha', u'abc\n', 0)
288
289test('isalnum', u'a', 1)
290test('isalnum', u'A', 1)
291test('isalnum', u'\n', 0)
292test('isalnum', u'123abc456', 1)
293test('isalnum', u'a1b3c', 1)
294test('isalnum', u'aBc000 ', 0)
295test('isalnum', u'abc\n', 0)
296
Guido van Rossuma831cac2000-03-10 23:23:21 +0000297test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
298test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
299test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
300test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
301test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
302test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000303test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000304
305test('translate', u"abababc", u'bbbc', {ord('a'):None})
306test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
307test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
308
Guido van Rossumd4d26842000-03-13 23:21:48 +0000309# Contains:
310print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000311verify(('a' in u'abdb') == 1)
312verify(('a' in u'bdab') == 1)
313verify(('a' in u'bdaba') == 1)
314verify(('a' in u'bdba') == 1)
315verify(('a' in u'bdba') == 1)
316verify((u'a' in u'bdba') == 1)
317verify((u'a' in u'bdb') == 0)
318verify((u'a' in 'bdb') == 0)
319verify((u'a' in 'bdba') == 1)
320verify((u'a' in ('a',1,None)) == 1)
321verify((u'a' in (1,None,'a')) == 1)
322verify((u'a' in (1,None,u'a')) == 1)
323verify(('a' in ('a',1,None)) == 1)
324verify(('a' in (1,None,'a')) == 1)
325verify(('a' in (1,None,u'a')) == 1)
326verify(('a' in ('x',1,u'y')) == 0)
327verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000328print 'done.'
329
Guido van Rossuma831cac2000-03-10 23:23:21 +0000330# Formatting:
331print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000332verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
333verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
334verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
335verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
336verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
337verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
338verify(u"%c" % (u"a",) == u'a')
339verify(u"%c" % ("a",) == u'a')
340verify(u"%c" % (34,) == u'"')
341verify(u"%c" % (36,) == u'$')
Fred Drake004d5e62000-10-23 17:22:08 +0000342value = u"%r, %r" % (u"abc", "abc")
Marc-André Lemburg84625732000-06-13 12:05:36 +0000343if value != u"u'abc', 'abc'":
344 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
345
Marc-André Lemburg36619082001-01-17 19:11:13 +0000346verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000347try:
Fred Drake004d5e62000-10-23 17:22:08 +0000348 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000349except KeyError:
350 print '*** formatting failed for "%s"' % "u'abc, def'"
351else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000352 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000353
Guido van Rossum97064862000-04-10 13:52:48 +0000354# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000355verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
356verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
357verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
358verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
359verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
360verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
361verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
362verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
363verify('...%s...' % u"abc" == u'...abc...')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000364print 'done.'
365
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000366# Test builtin codecs
367print 'Testing builtin codecs...',
368
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000369# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000370verify(u'\u20ac'.encode('utf-8') == \
371 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
372verify(u'\ud800\udc02'.encode('utf-8') == \
373 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
374verify(u'\ud84d\udc56'.encode('utf-8') == \
375 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000376# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000377verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000378 'utf-8') == u'\ud84d\udc56' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000379verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000380 'utf-8') == u'\ud800\udc02' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000381verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000382 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000383
384# Other possible utf-8 test cases:
385# * strict decoding testing for all of the
386# UTF8_ERROR cases in PyUnicode_DecodeUTF8
387
388
389
Marc-André Lemburg36619082001-01-17 19:11:13 +0000390verify(unicode('hello','ascii') == u'hello')
391verify(unicode('hello','utf-8') == u'hello')
392verify(unicode('hello','utf8') == u'hello')
393verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000394
Marc-André Lemburgb6d78fcd2000-07-07 13:46:19 +0000395class String:
396 x = ''
397 def __str__(self):
398 return self.x
399
400o = String()
401
402o.x = 'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000403verify(unicode(o) == u'abc')
404verify(str(o) == 'abc')
Marc-André Lemburgb6d78fcd2000-07-07 13:46:19 +0000405
406o.x = u'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000407verify(unicode(o) == u'abc')
408verify(str(o) == 'abc')
Marc-André Lemburgb6d78fcd2000-07-07 13:46:19 +0000409
Guido van Rossum97064862000-04-10 13:52:48 +0000410try:
411 u'Andr\202 x'.encode('ascii')
412 u'Andr\202 x'.encode('ascii','strict')
413except ValueError:
414 pass
415else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000416 raise TestFailed, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000417verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
418verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000419
420try:
421 unicode('Andr\202 x','ascii')
422 unicode('Andr\202 x','ascii','strict')
423except ValueError:
424 pass
425else:
Guido van Rossuma1374e42001-01-19 19:01:56 +0000426 raise TestFailed, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000427verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
428verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000429
Marc-André Lemburg36619082001-01-17 19:11:13 +0000430verify(u'hello'.encode('ascii') == 'hello')
431verify(u'hello'.encode('utf-8') == 'hello')
432verify(u'hello'.encode('utf8') == 'hello')
433verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
434verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
435verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000436
437u = u''.join(map(unichr, range(1024)))
438for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
439 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000440 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000441
442u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000443for encoding in (
444 'latin-1',
445 ):
446 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000447 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000448 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000449 print '*** codec "%s" failed round-trip' % encoding
450 except ValueError,why:
451 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000452
453u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000454for encoding in (
455 'ascii',
456 ):
457 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000458 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000459 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000460 print '*** codec "%s" failed round-trip' % encoding
461 except ValueError,why:
462 print '*** codec for "%s" failed: %s' % (encoding, why)
463
464print 'done.'
465
466print 'Testing standard mapping codecs...',
467
468print '0-127...',
469s = ''.join(map(chr, range(128)))
470for encoding in (
471 'cp037', 'cp1026',
472 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
473 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000474 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000475 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
476 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
477 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
478 'mac_cyrillic', 'mac_latin2',
479
480 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
481 'cp1256', 'cp1257', 'cp1258',
482 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
483
484 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
485 'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000486
Guido van Rossum9e896b32000-04-05 20:11:21 +0000487 ### These have undefined mappings:
488 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000489
Guido van Rossum9e896b32000-04-05 20:11:21 +0000490 ):
491 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000492 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000493 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000494 print '*** codec "%s" failed round-trip' % encoding
495 except ValueError,why:
496 print '*** codec for "%s" failed: %s' % (encoding, why)
497
498print '128-255...',
499s = ''.join(map(chr, range(128,256)))
500for encoding in (
501 'cp037', 'cp1026',
502 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
503 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000504 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000505 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000506 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000507 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000508 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000509
Guido van Rossum9e896b32000-04-05 20:11:21 +0000510 ### These have undefined mappings:
511 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
512 #'cp1256', 'cp1257', 'cp1258',
513 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000514 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000515 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000516
Guido van Rossum9e896b32000-04-05 20:11:21 +0000517 ### These fail the round-trip:
518 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000519
Guido van Rossum9e896b32000-04-05 20:11:21 +0000520 ):
521 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000522 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossuma1374e42001-01-19 19:01:56 +0000523 except TestFailed:
Guido van Rossum9e896b32000-04-05 20:11:21 +0000524 print '*** codec "%s" failed round-trip' % encoding
525 except ValueError,why:
526 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000527
528print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000529
530print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000531verify((u"abc" u"def") == u"abcdef")
532verify(("abc" u"def") == u"abcdef")
533verify((u"abc" "def") == u"abcdef")
534verify((u"abc" u"def" "ghi") == u"abcdefghi")
535verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000536print 'done.'