blob: 513b5ad40ce6beefbecb17a6c2132eea14e7c337 [file] [log] [blame]
Guido van Rossuma831cac2000-03-10 23:23:21 +00001""" Test script for the Unicode implementation.
2
Guido van Rossuma831cac2000-03-10 23:23:21 +00003Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
Marc-André Lemburg36619082001-01-17 19:11:13 +00007"""#"
8from test_support import verify, verbose
Guido van Rossuma831cac2000-03-10 23:23:21 +00009import sys
10
11def test(method, input, output, *args):
12 if verbose:
Guido van Rossum15ffc712000-11-29 12:13:59 +000013 print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),
Guido van Rossuma831cac2000-03-10 23:23:21 +000014 try:
15 f = getattr(input, method)
16 value = apply(f, args)
17 except:
18 value = sys.exc_type
Guido van Rossum66503202000-04-28 20:39:58 +000019 exc = sys.exc_info()[:2]
Guido van Rossuma831cac2000-03-10 23:23:21 +000020 else:
21 exc = None
Guido van Rossum15ffc712000-11-29 12:13:59 +000022 if value != output or type(value) is not type(output):
Guido van Rossuma831cac2000-03-10 23:23:21 +000023 if verbose:
24 print 'no'
25 print '*',f, `input`, `output`, `value`
26 if exc:
Guido van Rossum66503202000-04-28 20:39:58 +000027 print ' value == %s: %s' % (exc)
Guido van Rossuma831cac2000-03-10 23:23:21 +000028 else:
29 if verbose:
30 print 'yes'
31
32test('capitalize', u' hello ', u' hello ')
33test('capitalize', u'hello ', u'Hello ')
34
Marc-André Lemburg3a645e42001-01-16 11:54:12 +000035test('count', u'aaa', 3, u'a')
36test('count', u'aaa', 0, u'b')
37test('count', 'aaa', 3, u'a')
38test('count', 'aaa', 0, u'b')
39test('count', u'aaa', 3, 'a')
40test('count', u'aaa', 0, 'b')
41
Guido van Rossuma831cac2000-03-10 23:23:21 +000042test('title', u' hello ', u' Hello ')
43test('title', u'hello ', u'Hello ')
44test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
45test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
46test('title', u"getInt", u'Getint')
47
48test('find', u'abcdefghiabc', 0, u'abc')
49test('find', u'abcdefghiabc', 9, u'abc', 1)
50test('find', u'abcdefghiabc', -1, u'def', 4)
51
52test('rfind', u'abcdefghiabc', 9, u'abc')
53
54test('lower', u'HeLLo', u'hello')
55test('lower', u'hello', u'hello')
56
57test('upper', u'HeLLo', u'HELLO')
58test('upper', u'HELLO', u'HELLO')
59
60if 0:
61 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
62
63 test('maketrans', u'abc', transtable, u'xyz')
64 test('maketrans', u'abc', ValueError, u'xyzq')
65
66test('split', u'this is the split function',
67 [u'this', u'is', u'the', u'split', u'function'])
68test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
69test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
70test('split', u'a b c d', [u'a', u'b c d'], None, 1)
71test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
72test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
73test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
74test('split', u'a b c d', [u'a b c d'], None, 0)
75test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
76test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
Guido van Rossum8b264542000-12-19 02:22:31 +000077test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
78test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')
79test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')
80test('split', u'endcase test', [u'endcase ', u''], u'test')
81test('split', u'endcase test', [u'endcase ', u''], 'test')
82test('split', 'endcase test', [u'endcase ', u''], u'test')
83
Guido van Rossuma831cac2000-03-10 23:23:21 +000084
85# join now works with any sequence type
86class Sequence:
Guido van Rossum15ffc712000-11-29 12:13:59 +000087 def __init__(self, seq): self.seq = seq
Guido van Rossuma831cac2000-03-10 23:23:21 +000088 def __len__(self): return len(self.seq)
89 def __getitem__(self, i): return self.seq[i]
90
91test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
Guido van Rossum15ffc712000-11-29 12:13:59 +000092test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])
Guido van Rossuma831cac2000-03-10 23:23:21 +000093test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
Guido van Rossum15ffc712000-11-29 12:13:59 +000094test('join', u' ', u'w x y z', Sequence('wxyz'))
Guido van Rossuma831cac2000-03-10 23:23:21 +000095test('join', u' ', TypeError, 7)
Guido van Rossum15ffc712000-11-29 12:13:59 +000096test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))
97test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])
98test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])
99test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))
100test('join', ' ', u'w x y z', Sequence(u'wxyz'))
101test('join', ' ', TypeError, 7)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000102
103result = u''
104for i in range(10):
105 if i > 0:
106 result = result + u':'
107 result = result + u'x'*10
108test('join', u':', result, [u'x' * 10] * 10)
109test('join', u':', result, (u'x' * 10,) * 10)
110
111test('strip', u' hello ', u'hello')
112test('lstrip', u' hello ', u'hello ')
113test('rstrip', u' hello ', u' hello')
114test('strip', u'hello', u'hello')
115
116test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
117
118if 0:
119 test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
120
121 table = string.maketrans('a', u'A')
122 test('translate', u'abc', u'Abc', table)
123 test('translate', u'xyz', u'xyz', table)
124
125test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
Barry Warsaw51ac5802000-03-20 16:36:48 +0000126test('replace', u'one!two!three!', u'onetwothree', '!', '')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000127test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
128test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
129test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
130test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
131test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
132test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
133test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
134
135test('startswith', u'hello', 1, u'he')
136test('startswith', u'hello', 1, u'hello')
137test('startswith', u'hello', 0, u'hello world')
138test('startswith', u'hello', 1, u'')
139test('startswith', u'hello', 0, u'ello')
140test('startswith', u'hello', 1, u'ello', 1)
141test('startswith', u'hello', 1, u'o', 4)
142test('startswith', u'hello', 0, u'o', 5)
143test('startswith', u'hello', 1, u'', 5)
144test('startswith', u'hello', 0, u'lo', 6)
145test('startswith', u'helloworld', 1, u'lowo', 3)
146test('startswith', u'helloworld', 1, u'lowo', 3, 7)
147test('startswith', u'helloworld', 0, u'lowo', 3, 6)
148
149test('endswith', u'hello', 1, u'lo')
150test('endswith', u'hello', 0, u'he')
151test('endswith', u'hello', 1, u'')
152test('endswith', u'hello', 0, u'hello world')
153test('endswith', u'helloworld', 0, u'worl')
154test('endswith', u'helloworld', 1, u'worl', 3, 9)
155test('endswith', u'helloworld', 1, u'world', 3, 12)
156test('endswith', u'helloworld', 1, u'lowo', 1, 7)
157test('endswith', u'helloworld', 1, u'lowo', 2, 7)
158test('endswith', u'helloworld', 1, u'lowo', 3, 7)
159test('endswith', u'helloworld', 0, u'lowo', 4, 7)
160test('endswith', u'helloworld', 0, u'lowo', 3, 8)
161test('endswith', u'ab', 0, u'ab', 0, 1)
162test('endswith', u'ab', 0, u'ab', 0, 0)
163
164test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
165test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
166test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
167test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
168
169if 0:
170 test('capwords', u'abc def ghi', u'Abc Def Ghi')
171 test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
172 test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
173
174# Comparisons:
175print 'Testing Unicode comparisons...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000176verify(u'abc' == 'abc')
177verify('abc' == u'abc')
178verify(u'abc' == u'abc')
179verify(u'abcd' > 'abc')
180verify('abcd' > u'abc')
181verify(u'abcd' > u'abc')
182verify(u'abc' < 'abcd')
183verify('abc' < u'abcd')
184verify(u'abc' < u'abcd')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000185print 'done.'
186
Marc-André Lemburge5034372000-08-08 08:04:29 +0000187if 0:
188 # Move these tests to a Unicode collation module test...
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000189
Marc-André Lemburge5034372000-08-08 08:04:29 +0000190 print 'Testing UTF-16 code point order comparisons...',
191 #No surrogates, no fixup required.
Marc-André Lemburg36619082001-01-17 19:11:13 +0000192 verify(u'\u0061' < u'\u20ac')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000193 # Non surrogate below surrogate value, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000194 verify(u'\u0061' < u'\ud800\udc02')
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000195
Marc-André Lemburge5034372000-08-08 08:04:29 +0000196 # Non surrogate above surrogate value, fixup required
197 def test_lecmp(s, s2):
Tim Petersd2bf3b72001-01-18 02:22:22 +0000198 verify(s < s2 , "comparison failed on %s < %s" % (s, s2))
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000199
Marc-André Lemburge5034372000-08-08 08:04:29 +0000200 def test_fixup(s):
Fred Drake004d5e62000-10-23 17:22:08 +0000201 s2 = u'\ud800\udc01'
202 test_lecmp(s, s2)
203 s2 = u'\ud900\udc01'
204 test_lecmp(s, s2)
205 s2 = u'\uda00\udc01'
206 test_lecmp(s, s2)
207 s2 = u'\udb00\udc01'
208 test_lecmp(s, s2)
209 s2 = u'\ud800\udd01'
210 test_lecmp(s, s2)
211 s2 = u'\ud900\udd01'
212 test_lecmp(s, s2)
213 s2 = u'\uda00\udd01'
214 test_lecmp(s, s2)
215 s2 = u'\udb00\udd01'
216 test_lecmp(s, s2)
217 s2 = u'\ud800\ude01'
218 test_lecmp(s, s2)
219 s2 = u'\ud900\ude01'
220 test_lecmp(s, s2)
221 s2 = u'\uda00\ude01'
222 test_lecmp(s, s2)
223 s2 = u'\udb00\ude01'
224 test_lecmp(s, s2)
225 s2 = u'\ud800\udfff'
226 test_lecmp(s, s2)
227 s2 = u'\ud900\udfff'
228 test_lecmp(s, s2)
229 s2 = u'\uda00\udfff'
230 test_lecmp(s, s2)
231 s2 = u'\udb00\udfff'
232 test_lecmp(s, s2)
Marc-André Lemburge5034372000-08-08 08:04:29 +0000233
234 test_fixup(u'\ue000')
235 test_fixup(u'\uff61')
236
237 # Surrogates on both sides, no fixup required
Marc-André Lemburg36619082001-01-17 19:11:13 +0000238 verify(u'\ud800\udc02' < u'\ud84d\udc56')
Marc-André Lemburge5034372000-08-08 08:04:29 +0000239 print 'done.'
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000240
Guido van Rossuma831cac2000-03-10 23:23:21 +0000241test('ljust', u'abc', u'abc ', 10)
242test('rjust', u'abc', u' abc', 10)
243test('center', u'abc', u' abc ', 10)
244test('ljust', u'abc', u'abc ', 6)
245test('rjust', u'abc', u' abc', 6)
246test('center', u'abc', u' abc ', 6)
247test('ljust', u'abc', u'abc', 2)
248test('rjust', u'abc', u'abc', 2)
249test('center', u'abc', u'abc', 2)
250
251test('islower', u'a', 1)
252test('islower', u'A', 0)
253test('islower', u'\n', 0)
254test('islower', u'\u1FFc', 0)
255test('islower', u'abc', 1)
256test('islower', u'aBc', 0)
257test('islower', u'abc\n', 1)
258
259test('isupper', u'a', 0)
260test('isupper', u'A', 1)
261test('isupper', u'\n', 0)
262test('isupper', u'\u1FFc', 0)
263test('isupper', u'ABC', 1)
264test('isupper', u'AbC', 0)
265test('isupper', u'ABC\n', 1)
266
267test('istitle', u'a', 0)
268test('istitle', u'A', 1)
269test('istitle', u'\n', 0)
270test('istitle', u'\u1FFc', 1)
271test('istitle', u'A Titlecased Line', 1)
272test('istitle', u'A\nTitlecased Line', 1)
273test('istitle', u'A Titlecased, Line', 1)
274test('istitle', u'Greek \u1FFcitlecases ...', 1)
275test('istitle', u'Not a capitalized String', 0)
276test('istitle', u'Not\ta Titlecase String', 0)
277test('istitle', u'Not--a Titlecase String', 0)
278
Marc-André Lemburg9d467412000-07-05 09:46:40 +0000279test('isalpha', u'a', 1)
280test('isalpha', u'A', 1)
281test('isalpha', u'\n', 0)
282test('isalpha', u'\u1FFc', 1)
283test('isalpha', u'abc', 1)
284test('isalpha', u'aBc123', 0)
285test('isalpha', u'abc\n', 0)
286
287test('isalnum', u'a', 1)
288test('isalnum', u'A', 1)
289test('isalnum', u'\n', 0)
290test('isalnum', u'123abc456', 1)
291test('isalnum', u'a1b3c', 1)
292test('isalnum', u'aBc000 ', 0)
293test('isalnum', u'abc\n', 0)
294
Guido van Rossuma831cac2000-03-10 23:23:21 +0000295test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
296test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
297test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
298test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
299test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
300test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
Guido van Rossum7ee801d2000-04-11 15:37:02 +0000301test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
Guido van Rossuma831cac2000-03-10 23:23:21 +0000302
303test('translate', u"abababc", u'bbbc', {ord('a'):None})
304test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
305test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
306
Guido van Rossumd4d26842000-03-13 23:21:48 +0000307# Contains:
308print 'Testing Unicode contains method...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000309verify(('a' in u'abdb') == 1)
310verify(('a' in u'bdab') == 1)
311verify(('a' in u'bdaba') == 1)
312verify(('a' in u'bdba') == 1)
313verify(('a' in u'bdba') == 1)
314verify((u'a' in u'bdba') == 1)
315verify((u'a' in u'bdb') == 0)
316verify((u'a' in 'bdb') == 0)
317verify((u'a' in 'bdba') == 1)
318verify((u'a' in ('a',1,None)) == 1)
319verify((u'a' in (1,None,'a')) == 1)
320verify((u'a' in (1,None,u'a')) == 1)
321verify(('a' in ('a',1,None)) == 1)
322verify(('a' in (1,None,'a')) == 1)
323verify(('a' in (1,None,u'a')) == 1)
324verify(('a' in ('x',1,u'y')) == 0)
325verify(('a' in ('x',1,None)) == 0)
Guido van Rossumd4d26842000-03-13 23:21:48 +0000326print 'done.'
327
Guido van Rossuma831cac2000-03-10 23:23:21 +0000328# Formatting:
329print 'Testing Unicode formatting strings...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000330verify(u"%s, %s" % (u"abc", "abc") == u'abc, abc')
331verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00')
332verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00')
333verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50')
334verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57')
335verify(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57')
336verify(u"%c" % (u"a",) == u'a')
337verify(u"%c" % ("a",) == u'a')
338verify(u"%c" % (34,) == u'"')
339verify(u"%c" % (36,) == u'$')
Fred Drake004d5e62000-10-23 17:22:08 +0000340value = u"%r, %r" % (u"abc", "abc")
Marc-André Lemburg84625732000-06-13 12:05:36 +0000341if value != u"u'abc', 'abc'":
342 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
343
Marc-André Lemburg36619082001-01-17 19:11:13 +0000344verify(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000345try:
Fred Drake004d5e62000-10-23 17:22:08 +0000346 value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}
Marc-André Lemburg84625732000-06-13 12:05:36 +0000347except KeyError:
348 print '*** formatting failed for "%s"' % "u'abc, def'"
349else:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000350 verify(value == u'abc, def')
Marc-André Lemburg84625732000-06-13 12:05:36 +0000351
Guido van Rossum97064862000-04-10 13:52:48 +0000352# formatting jobs delegated from the string implementation:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000353verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
354verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
355verify('...%(foo)s...' % {u'foo':"abc"} == '...abc...')
356verify('...%(foo)s...' % {u'foo':u"abc"} == u'...abc...')
357verify('...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...')
358verify('...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...')
359verify('...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...')
360verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...%...%s...1...2...3...abc...')
361verify('...%s...' % u"abc" == u'...abc...')
Guido van Rossuma831cac2000-03-10 23:23:21 +0000362print 'done.'
363
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000364# Test builtin codecs
365print 'Testing builtin codecs...',
366
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000367# UTF-8 specific encoding tests:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000368verify(u'\u20ac'.encode('utf-8') == \
369 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
370verify(u'\ud800\udc02'.encode('utf-8') == \
371 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
372verify(u'\ud84d\udc56'.encode('utf-8') == \
373 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000374# UTF-8 specific decoding tests
Tim Petersd2bf3b72001-01-18 02:22:22 +0000375verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000376 'utf-8') == u'\ud84d\udc56' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000377verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000378 'utf-8') == u'\ud800\udc02' )
Tim Petersd2bf3b72001-01-18 02:22:22 +0000379verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
Marc-André Lemburg36619082001-01-17 19:11:13 +0000380 'utf-8') == u'\u20ac' )
Marc-André Lemburgd6d06ad2000-07-07 17:48:52 +0000381
382# Other possible utf-8 test cases:
383# * strict decoding testing for all of the
384# UTF8_ERROR cases in PyUnicode_DecodeUTF8
385
386
387
Marc-André Lemburg36619082001-01-17 19:11:13 +0000388verify(unicode('hello','ascii') == u'hello')
389verify(unicode('hello','utf-8') == u'hello')
390verify(unicode('hello','utf8') == u'hello')
391verify(unicode('hello','latin-1') == u'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000392
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000393class String:
394 x = ''
395 def __str__(self):
396 return self.x
397
398o = String()
399
400o.x = 'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000401verify(unicode(o) == u'abc')
402verify(str(o) == 'abc')
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000403
404o.x = u'abc'
Marc-André Lemburg36619082001-01-17 19:11:13 +0000405verify(unicode(o) == u'abc')
406verify(str(o) == 'abc')
Marc-André Lemburgb6d78fc2000-07-07 13:46:19 +0000407
Guido van Rossum97064862000-04-10 13:52:48 +0000408try:
409 u'Andr\202 x'.encode('ascii')
410 u'Andr\202 x'.encode('ascii','strict')
411except ValueError:
412 pass
413else:
414 raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000415verify(u'Andr\202 x'.encode('ascii','ignore') == "Andr x")
416verify(u'Andr\202 x'.encode('ascii','replace') == "Andr? x")
Guido van Rossum97064862000-04-10 13:52:48 +0000417
418try:
419 unicode('Andr\202 x','ascii')
420 unicode('Andr\202 x','ascii','strict')
421except ValueError:
422 pass
423else:
424 raise AssertionError, "unicode('Andr\202') failed to raise an exception"
Marc-André Lemburg36619082001-01-17 19:11:13 +0000425verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
426verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
Guido van Rossum97064862000-04-10 13:52:48 +0000427
Marc-André Lemburg36619082001-01-17 19:11:13 +0000428verify(u'hello'.encode('ascii') == 'hello')
429verify(u'hello'.encode('utf-8') == 'hello')
430verify(u'hello'.encode('utf8') == 'hello')
431verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
432verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
433verify(u'hello'.encode('latin-1') == 'hello')
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000434
435u = u''.join(map(unichr, range(1024)))
436for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
437 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
Marc-André Lemburg36619082001-01-17 19:11:13 +0000438 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000439
440u = u''.join(map(unichr, range(256)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000441for encoding in (
442 'latin-1',
443 ):
444 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000445 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000446 except AssertionError:
447 print '*** codec "%s" failed round-trip' % encoding
448 except ValueError,why:
449 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000450
451u = u''.join(map(unichr, range(128)))
Guido van Rossum9e896b32000-04-05 20:11:21 +0000452for encoding in (
453 'ascii',
454 ):
455 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000456 verify(unicode(u.encode(encoding),encoding) == u)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000457 except AssertionError:
458 print '*** codec "%s" failed round-trip' % encoding
459 except ValueError,why:
460 print '*** codec for "%s" failed: %s' % (encoding, why)
461
462print 'done.'
463
464print 'Testing standard mapping codecs...',
465
466print '0-127...',
467s = ''.join(map(chr, range(128)))
468for encoding in (
469 'cp037', 'cp1026',
470 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
471 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000472 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000473 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
474 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
475 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
476 'mac_cyrillic', 'mac_latin2',
477
478 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
479 'cp1256', 'cp1257', 'cp1258',
480 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
481
482 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
483 'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000484
Guido van Rossum9e896b32000-04-05 20:11:21 +0000485 ### These have undefined mappings:
486 #'cp424',
Fred Drake004d5e62000-10-23 17:22:08 +0000487
Guido van Rossum9e896b32000-04-05 20:11:21 +0000488 ):
489 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000490 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000491 except AssertionError:
492 print '*** codec "%s" failed round-trip' % encoding
493 except ValueError,why:
494 print '*** codec for "%s" failed: %s' % (encoding, why)
495
496print '128-255...',
497s = ''.join(map(chr, range(128,256)))
498for encoding in (
499 'cp037', 'cp1026',
500 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
501 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
Fred Drake004d5e62000-10-23 17:22:08 +0000502 'cp863', 'cp865', 'cp866',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000503 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000504 'iso8859_2', 'iso8859_4', 'iso8859_5',
Marc-André Lemburga866df82001-01-03 21:29:14 +0000505 'iso8859_9', 'koi8_r', 'latin_1',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000506 'mac_cyrillic', 'mac_latin2',
Fred Drake004d5e62000-10-23 17:22:08 +0000507
Guido van Rossum9e896b32000-04-05 20:11:21 +0000508 ### These have undefined mappings:
509 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
510 #'cp1256', 'cp1257', 'cp1258',
511 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
Tim Petersd2bf3b72001-01-18 02:22:22 +0000512 #'iso8859_3', 'iso8859_6', 'iso8859_7',
Guido van Rossum9e896b32000-04-05 20:11:21 +0000513 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
Fred Drake004d5e62000-10-23 17:22:08 +0000514
Guido van Rossum9e896b32000-04-05 20:11:21 +0000515 ### These fail the round-trip:
516 #'cp1006', 'cp875', 'iso8859_8',
Fred Drake004d5e62000-10-23 17:22:08 +0000517
Guido van Rossum9e896b32000-04-05 20:11:21 +0000518 ):
519 try:
Marc-André Lemburg36619082001-01-17 19:11:13 +0000520 verify(unicode(s,encoding).encode(encoding) == s)
Guido van Rossum9e896b32000-04-05 20:11:21 +0000521 except AssertionError:
522 print '*** codec "%s" failed round-trip' % encoding
523 except ValueError,why:
524 print '*** codec for "%s" failed: %s' % (encoding, why)
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000525
526print 'done.'
Fred Drakee0243e22000-04-13 14:11:56 +0000527
528print 'Testing Unicode string concatenation...',
Marc-André Lemburg36619082001-01-17 19:11:13 +0000529verify((u"abc" u"def") == u"abcdef")
530verify(("abc" u"def") == u"abcdef")
531verify((u"abc" "def") == u"abcdef")
532verify((u"abc" u"def" "ghi") == u"abcdefghi")
533verify(("abc" "def" u"ghi") == u"abcdefghi")
Fred Drakee0243e22000-04-13 14:11:56 +0000534print 'done.'