blob: f8563f61ae65d4a99573384a35ee07a9f24b8ed4 [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
2import unittest
Marc-André Lemburga37171d2001-06-19 20:09:28 +00003import codecs
Walter Dörwald9ae019b2006-03-18 14:22:26 +00004import sys, StringIO, _testcapi
Marc-André Lemburga37171d2001-06-19 20:09:28 +00005
Walter Dörwald69652032004-09-07 20:24:22 +00006class Queue(object):
7 """
8 queue: write bytes at one end, read bytes from the other end
9 """
10 def __init__(self):
11 self._buffer = ""
12
13 def write(self, chars):
14 self._buffer += chars
15
16 def read(self, size=-1):
17 if size<0:
18 s = self._buffer
19 self._buffer = ""
20 return s
21 else:
22 s = self._buffer[:size]
23 self._buffer = self._buffer[size:]
24 return s
25
Walter Dörwalde57d7b12004-12-21 22:24:00 +000026class ReadTest(unittest.TestCase):
27 def check_partial(self, input, partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000028 # get a StreamReader for the encoding and feed the bytestring version
Walter Dörwaldfc7e72d2007-11-19 12:14:05 +000029 # of input to the reader byte by byte. Read everything available from
Walter Dörwald69652032004-09-07 20:24:22 +000030 # the StreamReader and check that the results equal the appropriate
31 # entries from partialresults.
32 q = Queue()
Walter Dörwalde57d7b12004-12-21 22:24:00 +000033 r = codecs.getreader(self.encoding)(q)
Walter Dörwald69652032004-09-07 20:24:22 +000034 result = u""
Walter Dörwalde57d7b12004-12-21 22:24:00 +000035 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000036 q.write(c)
37 result += r.read()
38 self.assertEqual(result, partialresult)
39 # check that there's nothing left in the buffers
40 self.assertEqual(r.read(), u"")
41 self.assertEqual(r.bytebuffer, "")
42 self.assertEqual(r.charbuffer, u"")
43
Walter Dörwaldabb02e52006-03-15 11:35:15 +000044 # do the check again, this time using a incremental decoder
45 d = codecs.getincrementaldecoder(self.encoding)()
46 result = u""
47 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
48 result += d.decode(c)
49 self.assertEqual(result, partialresult)
50 # check that there's nothing left in the buffers
51 self.assertEqual(d.decode("", True), u"")
52 self.assertEqual(d.buffer, "")
53
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +000054 # Check whether the reset method works properly
Walter Dörwaldabb02e52006-03-15 11:35:15 +000055 d.reset()
56 result = u""
57 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
58 result += d.decode(c)
59 self.assertEqual(result, partialresult)
60 # check that there's nothing left in the buffers
61 self.assertEqual(d.decode("", True), u"")
62 self.assertEqual(d.buffer, "")
63
64 # check iterdecode()
65 encoded = input.encode(self.encoding)
66 self.assertEqual(
67 input,
68 u"".join(codecs.iterdecode(encoded, self.encoding))
69 )
70
Walter Dörwalde57d7b12004-12-21 22:24:00 +000071 def test_readline(self):
72 def getreader(input):
73 stream = StringIO.StringIO(input.encode(self.encoding))
74 return codecs.getreader(self.encoding)(stream)
75
Walter Dörwaldca199432006-03-06 22:39:12 +000076 def readalllines(input, keepends=True, size=None):
Walter Dörwalde57d7b12004-12-21 22:24:00 +000077 reader = getreader(input)
78 lines = []
79 while True:
Walter Dörwaldca199432006-03-06 22:39:12 +000080 line = reader.readline(size=size, keepends=keepends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000081 if not line:
82 break
83 lines.append(line)
Walter Dörwaldca199432006-03-06 22:39:12 +000084 return "|".join(lines)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000085
86 s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
Walter Dörwaldca199432006-03-06 22:39:12 +000087 sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
88 sexpectednoends = u"foo|bar|baz|spam|eggs"
89 self.assertEqual(readalllines(s, True), sexpected)
90 self.assertEqual(readalllines(s, False), sexpectednoends)
91 self.assertEqual(readalllines(s, True, 10), sexpected)
92 self.assertEqual(readalllines(s, False, 10), sexpectednoends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000093
94 # Test long lines (multiple calls to read() in readline())
95 vw = []
96 vwo = []
97 for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
98 vw.append((i*200)*u"\3042" + lineend)
99 vwo.append((i*200)*u"\3042")
100 self.assertEqual(readalllines("".join(vw), True), "".join(vw))
101 self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
102
103 # Test lines where the first read might end with \r, so the
104 # reader has to look ahead whether this is a lone \r or a \r\n
105 for size in xrange(80):
106 for lineend in u"\n \r\n \r \u2028".split():
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000107 s = 10*(size*u"a" + lineend + u"xxx\n")
108 reader = getreader(s)
109 for i in xrange(10):
110 self.assertEqual(
111 reader.readline(keepends=True),
112 size*u"a" + lineend,
113 )
114 reader = getreader(s)
115 for i in xrange(10):
116 self.assertEqual(
117 reader.readline(keepends=False),
118 size*u"a",
119 )
120
121 def test_bug1175396(self):
122 s = [
123 '<%!--===================================================\r\n',
124 ' BLOG index page: show recent articles,\r\n',
125 ' today\'s articles, or articles of a specific date.\r\n',
126 '========================================================--%>\r\n',
127 '<%@inputencoding="ISO-8859-1"%>\r\n',
128 '<%@pagetemplate=TEMPLATE.y%>\r\n',
129 '<%@import=import frog.util, frog%>\r\n',
130 '<%@import=import frog.objects%>\r\n',
131 '<%@import=from frog.storageerrors import StorageError%>\r\n',
132 '<%\r\n',
133 '\r\n',
134 'import logging\r\n',
135 'log=logging.getLogger("Snakelets.logger")\r\n',
136 '\r\n',
137 '\r\n',
138 'user=self.SessionCtx.user\r\n',
139 'storageEngine=self.SessionCtx.storageEngine\r\n',
140 '\r\n',
141 '\r\n',
142 'def readArticlesFromDate(date, count=None):\r\n',
143 ' entryids=storageEngine.listBlogEntries(date)\r\n',
144 ' entryids.reverse() # descending\r\n',
145 ' if count:\r\n',
146 ' entryids=entryids[:count]\r\n',
147 ' try:\r\n',
148 ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
149 ' except StorageError,x:\r\n',
150 ' log.error("Error loading articles: "+str(x))\r\n',
151 ' self.abort("cannot load articles")\r\n',
152 '\r\n',
153 'showdate=None\r\n',
154 '\r\n',
155 'arg=self.Request.getArg()\r\n',
156 'if arg=="today":\r\n',
157 ' #-------------------- TODAY\'S ARTICLES\r\n',
158 ' self.write("<h2>Today\'s articles</h2>")\r\n',
159 ' showdate = frog.util.isodatestr() \r\n',
160 ' entries = readArticlesFromDate(showdate)\r\n',
161 'elif arg=="active":\r\n',
162 ' #-------------------- ACTIVE ARTICLES redirect\r\n',
163 ' self.Yredirect("active.y")\r\n',
164 'elif arg=="login":\r\n',
165 ' #-------------------- LOGIN PAGE redirect\r\n',
166 ' self.Yredirect("login.y")\r\n',
167 'elif arg=="date":\r\n',
168 ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
169 ' showdate = self.Request.getParameter("date")\r\n',
170 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
171 ' entries = readArticlesFromDate(showdate)\r\n',
172 'else:\r\n',
173 ' #-------------------- RECENT ARTICLES\r\n',
174 ' self.write("<h2>Recent articles</h2>")\r\n',
175 ' dates=storageEngine.listBlogEntryDates()\r\n',
176 ' if dates:\r\n',
177 ' entries=[]\r\n',
178 ' SHOWAMOUNT=10\r\n',
179 ' for showdate in dates:\r\n',
180 ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
181 ' if len(entries)>=SHOWAMOUNT:\r\n',
182 ' break\r\n',
183 ' \r\n',
184 ]
185 stream = StringIO.StringIO("".join(s).encode(self.encoding))
186 reader = codecs.getreader(self.encoding)(stream)
187 for (i, line) in enumerate(reader):
188 self.assertEqual(line, s[i])
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000189
190 def test_readlinequeue(self):
191 q = Queue()
192 writer = codecs.getwriter(self.encoding)(q)
193 reader = codecs.getreader(self.encoding)(q)
194
195 # No lineends
196 writer.write(u"foo\r")
197 self.assertEqual(reader.readline(keepends=False), u"foo")
198 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000199 self.assertEqual(reader.readline(keepends=False), u"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000200 self.assertEqual(reader.readline(keepends=False), u"bar")
201 writer.write(u"baz")
202 self.assertEqual(reader.readline(keepends=False), u"baz")
203 self.assertEqual(reader.readline(keepends=False), u"")
204
205 # Lineends
206 writer.write(u"foo\r")
207 self.assertEqual(reader.readline(keepends=True), u"foo\r")
208 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000209 self.assertEqual(reader.readline(keepends=True), u"\n")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000210 self.assertEqual(reader.readline(keepends=True), u"bar\r")
211 writer.write(u"baz")
212 self.assertEqual(reader.readline(keepends=True), u"baz")
213 self.assertEqual(reader.readline(keepends=True), u"")
214 writer.write(u"foo\r\n")
215 self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
216
Walter Dörwald9fa09462005-01-10 12:01:39 +0000217 def test_bug1098990_a(self):
218 s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
219 s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
220 s3 = u"next line.\r\n"
221
222 s = (s1+s2+s3).encode(self.encoding)
223 stream = StringIO.StringIO(s)
224 reader = codecs.getreader(self.encoding)(stream)
225 self.assertEqual(reader.readline(), s1)
226 self.assertEqual(reader.readline(), s2)
227 self.assertEqual(reader.readline(), s3)
228 self.assertEqual(reader.readline(), u"")
229
230 def test_bug1098990_b(self):
231 s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
232 s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
233 s3 = u"stillokay:bbbbxx\r\n"
234 s4 = u"broken!!!!badbad\r\n"
235 s5 = u"againokay.\r\n"
236
237 s = (s1+s2+s3+s4+s5).encode(self.encoding)
238 stream = StringIO.StringIO(s)
239 reader = codecs.getreader(self.encoding)(stream)
240 self.assertEqual(reader.readline(), s1)
241 self.assertEqual(reader.readline(), s2)
242 self.assertEqual(reader.readline(), s3)
243 self.assertEqual(reader.readline(), s4)
244 self.assertEqual(reader.readline(), s5)
245 self.assertEqual(reader.readline(), u"")
246
Walter Dörwald6e390802007-08-17 16:41:28 +0000247class UTF32Test(ReadTest):
248 encoding = "utf-32"
249
250 spamle = ('\xff\xfe\x00\x00'
251 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
252 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
253 spambe = ('\x00\x00\xfe\xff'
254 '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
255 '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
256
257 def test_only_one_bom(self):
258 _,_,reader,writer = codecs.lookup(self.encoding)
259 # encode some stream
260 s = StringIO.StringIO()
261 f = writer(s)
262 f.write(u"spam")
263 f.write(u"spam")
264 d = s.getvalue()
265 # check whether there is exactly one BOM in it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000266 self.assertTrue(d == self.spamle or d == self.spambe)
Walter Dörwald6e390802007-08-17 16:41:28 +0000267 # try to read it back
268 s = StringIO.StringIO(d)
269 f = reader(s)
270 self.assertEquals(f.read(), u"spamspam")
271
272 def test_badbom(self):
273 s = StringIO.StringIO(4*"\xff")
274 f = codecs.getreader(self.encoding)(s)
275 self.assertRaises(UnicodeError, f.read)
276
277 s = StringIO.StringIO(8*"\xff")
278 f = codecs.getreader(self.encoding)(s)
279 self.assertRaises(UnicodeError, f.read)
280
281 def test_partial(self):
282 self.check_partial(
283 u"\x00\xff\u0100\uffff",
284 [
285 u"", # first byte of BOM read
286 u"", # second byte of BOM read
287 u"", # third byte of BOM read
288 u"", # fourth byte of BOM read => byteorder known
289 u"",
290 u"",
291 u"",
292 u"\x00",
293 u"\x00",
294 u"\x00",
295 u"\x00",
296 u"\x00\xff",
297 u"\x00\xff",
298 u"\x00\xff",
299 u"\x00\xff",
300 u"\x00\xff\u0100",
301 u"\x00\xff\u0100",
302 u"\x00\xff\u0100",
303 u"\x00\xff\u0100",
304 u"\x00\xff\u0100\uffff",
305 ]
306 )
307
Georg Brandle9741f32009-09-17 11:28:09 +0000308 def test_handlers(self):
309 self.assertEqual((u'\ufffd', 1),
310 codecs.utf_32_decode('\x01', 'replace', True))
311 self.assertEqual((u'', 1),
312 codecs.utf_32_decode('\x01', 'ignore', True))
313
Walter Dörwald6e390802007-08-17 16:41:28 +0000314 def test_errors(self):
315 self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
316 "\xff", "strict", True)
317
318class UTF32LETest(ReadTest):
319 encoding = "utf-32-le"
320
321 def test_partial(self):
322 self.check_partial(
323 u"\x00\xff\u0100\uffff",
324 [
325 u"",
326 u"",
327 u"",
328 u"\x00",
329 u"\x00",
330 u"\x00",
331 u"\x00",
332 u"\x00\xff",
333 u"\x00\xff",
334 u"\x00\xff",
335 u"\x00\xff",
336 u"\x00\xff\u0100",
337 u"\x00\xff\u0100",
338 u"\x00\xff\u0100",
339 u"\x00\xff\u0100",
340 u"\x00\xff\u0100\uffff",
341 ]
342 )
343
344 def test_simple(self):
345 self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
346
347 def test_errors(self):
348 self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
349 "\xff", "strict", True)
350
351class UTF32BETest(ReadTest):
352 encoding = "utf-32-be"
353
354 def test_partial(self):
355 self.check_partial(
356 u"\x00\xff\u0100\uffff",
357 [
358 u"",
359 u"",
360 u"",
361 u"\x00",
362 u"\x00",
363 u"\x00",
364 u"\x00",
365 u"\x00\xff",
366 u"\x00\xff",
367 u"\x00\xff",
368 u"\x00\xff",
369 u"\x00\xff\u0100",
370 u"\x00\xff\u0100",
371 u"\x00\xff\u0100",
372 u"\x00\xff\u0100",
373 u"\x00\xff\u0100\uffff",
374 ]
375 )
376
377 def test_simple(self):
378 self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
379
380 def test_errors(self):
381 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
382 "\xff", "strict", True)
383
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000384class UTF16Test(ReadTest):
385 encoding = "utf-16"
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000386
387 spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
388 spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
389
390 def test_only_one_bom(self):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000391 _,_,reader,writer = codecs.lookup(self.encoding)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000392 # encode some stream
393 s = StringIO.StringIO()
394 f = writer(s)
395 f.write(u"spam")
396 f.write(u"spam")
397 d = s.getvalue()
398 # check whether there is exactly one BOM in it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000399 self.assertTrue(d == self.spamle or d == self.spambe)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000400 # try to read it back
401 s = StringIO.StringIO(d)
402 f = reader(s)
403 self.assertEquals(f.read(), u"spamspam")
404
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000405 def test_badbom(self):
406 s = StringIO.StringIO("\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000407 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000408 self.assertRaises(UnicodeError, f.read)
409
410 s = StringIO.StringIO("\xff\xff\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000411 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000412 self.assertRaises(UnicodeError, f.read)
413
Walter Dörwald69652032004-09-07 20:24:22 +0000414 def test_partial(self):
415 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000416 u"\x00\xff\u0100\uffff",
417 [
418 u"", # first byte of BOM read
419 u"", # second byte of BOM read => byteorder known
420 u"",
421 u"\x00",
422 u"\x00",
423 u"\x00\xff",
424 u"\x00\xff",
425 u"\x00\xff\u0100",
426 u"\x00\xff\u0100",
427 u"\x00\xff\u0100\uffff",
428 ]
429 )
430
Georg Brandle9741f32009-09-17 11:28:09 +0000431 def test_handlers(self):
432 self.assertEqual((u'\ufffd', 1),
433 codecs.utf_16_decode('\x01', 'replace', True))
434 self.assertEqual((u'', 1),
435 codecs.utf_16_decode('\x01', 'ignore', True))
436
Walter Dörwalde22d3392005-11-17 08:52:34 +0000437 def test_errors(self):
438 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
439
Florent Xiclunaf4b61862010-02-26 10:40:58 +0000440 def test_bug691291(self):
441 # Files are always opened in binary mode, even if no binary mode was
442 # specified. This means that no automatic conversion of '\n' is done
443 # on reading and writing.
444 s1 = u'Hello\r\nworld\r\n'
445
446 s = s1.encode(self.encoding)
447 try:
448 with open(test_support.TESTFN, 'wb') as fp:
449 fp.write(s)
450 with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader:
451 self.assertEqual(reader.read(), s1)
452 finally:
453 test_support.unlink(test_support.TESTFN)
454
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000455class UTF16LETest(ReadTest):
456 encoding = "utf-16-le"
Walter Dörwald69652032004-09-07 20:24:22 +0000457
458 def test_partial(self):
459 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000460 u"\x00\xff\u0100\uffff",
461 [
462 u"",
463 u"\x00",
464 u"\x00",
465 u"\x00\xff",
466 u"\x00\xff",
467 u"\x00\xff\u0100",
468 u"\x00\xff\u0100",
469 u"\x00\xff\u0100\uffff",
470 ]
471 )
472
Walter Dörwalde22d3392005-11-17 08:52:34 +0000473 def test_errors(self):
474 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
475
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000476class UTF16BETest(ReadTest):
477 encoding = "utf-16-be"
Walter Dörwald69652032004-09-07 20:24:22 +0000478
479 def test_partial(self):
480 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000481 u"\x00\xff\u0100\uffff",
482 [
483 u"",
484 u"\x00",
485 u"\x00",
486 u"\x00\xff",
487 u"\x00\xff",
488 u"\x00\xff\u0100",
489 u"\x00\xff\u0100",
490 u"\x00\xff\u0100\uffff",
491 ]
492 )
493
Walter Dörwalde22d3392005-11-17 08:52:34 +0000494 def test_errors(self):
495 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
496
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000497class UTF8Test(ReadTest):
498 encoding = "utf-8"
Walter Dörwald69652032004-09-07 20:24:22 +0000499
500 def test_partial(self):
501 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000502 u"\x00\xff\u07ff\u0800\uffff",
503 [
504 u"\x00",
505 u"\x00",
506 u"\x00\xff",
507 u"\x00\xff",
508 u"\x00\xff\u07ff",
509 u"\x00\xff\u07ff",
510 u"\x00\xff\u07ff",
511 u"\x00\xff\u07ff\u0800",
512 u"\x00\xff\u07ff\u0800",
513 u"\x00\xff\u07ff\u0800",
514 u"\x00\xff\u07ff\u0800\uffff",
515 ]
516 )
517
Walter Dörwalde22d3392005-11-17 08:52:34 +0000518class UTF7Test(ReadTest):
519 encoding = "utf-7"
520
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000521 def test_partial(self):
522 self.check_partial(
523 u"a+-b",
524 [
525 u"a",
526 u"a",
527 u"a+",
528 u"a+-",
529 u"a+-b",
530 ]
531 )
Walter Dörwalde22d3392005-11-17 08:52:34 +0000532
533class UTF16ExTest(unittest.TestCase):
534
535 def test_errors(self):
536 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
537
538 def test_bad_args(self):
539 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
540
541class ReadBufferTest(unittest.TestCase):
542
543 def test_array(self):
544 import array
545 self.assertEqual(
546 codecs.readbuffer_encode(array.array("c", "spam")),
547 ("spam", 4)
548 )
549
550 def test_empty(self):
551 self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
552
553 def test_bad_args(self):
554 self.assertRaises(TypeError, codecs.readbuffer_encode)
555 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
556
557class CharBufferTest(unittest.TestCase):
558
559 def test_string(self):
560 self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
561
562 def test_empty(self):
563 self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
564
565 def test_bad_args(self):
566 self.assertRaises(TypeError, codecs.charbuffer_encode)
567 self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
568
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000569class UTF8SigTest(ReadTest):
570 encoding = "utf-8-sig"
571
572 def test_partial(self):
573 self.check_partial(
574 u"\ufeff\x00\xff\u07ff\u0800\uffff",
575 [
576 u"",
577 u"",
578 u"", # First BOM has been read and skipped
579 u"",
580 u"",
581 u"\ufeff", # Second BOM has been read and emitted
582 u"\ufeff\x00", # "\x00" read and emitted
583 u"\ufeff\x00", # First byte of encoded u"\xff" read
584 u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
585 u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
586 u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
587 u"\ufeff\x00\xff\u07ff",
588 u"\ufeff\x00\xff\u07ff",
589 u"\ufeff\x00\xff\u07ff\u0800",
590 u"\ufeff\x00\xff\u07ff\u0800",
591 u"\ufeff\x00\xff\u07ff\u0800",
592 u"\ufeff\x00\xff\u07ff\u0800\uffff",
593 ]
594 )
595
Walter Dörwald39b8b6a2006-11-23 05:03:56 +0000596 def test_bug1601501(self):
597 # SF bug #1601501: check that the codec works with a buffer
598 unicode("\xef\xbb\xbf", "utf-8-sig")
599
Walter Dörwald42348272007-04-12 10:35:00 +0000600 def test_bom(self):
601 d = codecs.getincrementaldecoder("utf-8-sig")()
602 s = u"spam"
603 self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
604
Walter Dörwald183744d2007-11-19 12:41:10 +0000605 def test_stream_bom(self):
606 unistring = u"ABC\u00A1\u2200XYZ"
607 bytestring = codecs.BOM_UTF8 + "ABC\xC2\xA1\xE2\x88\x80XYZ"
608
609 reader = codecs.getreader("utf-8-sig")
610 for sizehint in [None] + range(1, 11) + \
611 [64, 128, 256, 512, 1024]:
612 istream = reader(StringIO.StringIO(bytestring))
613 ostream = StringIO.StringIO()
614 while 1:
615 if sizehint is not None:
616 data = istream.read(sizehint)
617 else:
618 data = istream.read()
619
620 if not data:
621 break
622 ostream.write(data)
623
624 got = ostream.getvalue()
625 self.assertEqual(got, unistring)
626
627 def test_stream_bare(self):
628 unistring = u"ABC\u00A1\u2200XYZ"
629 bytestring = "ABC\xC2\xA1\xE2\x88\x80XYZ"
630
631 reader = codecs.getreader("utf-8-sig")
632 for sizehint in [None] + range(1, 11) + \
633 [64, 128, 256, 512, 1024]:
634 istream = reader(StringIO.StringIO(bytestring))
635 ostream = StringIO.StringIO()
636 while 1:
637 if sizehint is not None:
638 data = istream.read(sizehint)
639 else:
640 data = istream.read()
641
642 if not data:
643 break
644 ostream.write(data)
645
646 got = ostream.getvalue()
647 self.assertEqual(got, unistring)
648
Walter Dörwald8709a422002-09-03 13:53:40 +0000649class EscapeDecodeTest(unittest.TestCase):
Walter Dörwalde22d3392005-11-17 08:52:34 +0000650 def test_empty(self):
Walter Dörwald8709a422002-09-03 13:53:40 +0000651 self.assertEquals(codecs.escape_decode(""), ("", 0))
652
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000653class RecodingTest(unittest.TestCase):
654 def test_recoding(self):
655 f = StringIO.StringIO()
656 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
657 f2.write(u"a")
658 f2.close()
659 # Python used to crash on this at exit because of a refcount
660 # bug in _codecsmodule.c
Fred Drake2e2be372001-09-20 21:33:42 +0000661
Martin v. Löwis2548c732003-04-18 10:39:54 +0000662# From RFC 3492
663punycode_testcases = [
664 # A Arabic (Egyptian):
665 (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
666 u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
667 "egbpdaj6bu4bxfgehfvwxn"),
668 # B Chinese (simplified):
669 (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
670 "ihqwcrb4cv8a8dqg056pqjye"),
671 # C Chinese (traditional):
672 (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
673 "ihqwctvzc91f659drss3x8bo0yb"),
674 # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
675 (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
676 u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
677 u"\u0065\u0073\u006B\u0079",
678 "Proprostnemluvesky-uyb24dma41a"),
679 # E Hebrew:
680 (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
681 u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
682 u"\u05D1\u05E8\u05D9\u05EA",
683 "4dbcagdahymbxekheh6e0a7fei0b"),
684 # F Hindi (Devanagari):
685 (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
686 u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
687 u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
688 u"\u0939\u0948\u0902",
689 "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
690
691 #(G) Japanese (kanji and hiragana):
692 (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
693 u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
694 "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
695
696 # (H) Korean (Hangul syllables):
697 (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
698 u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
699 u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
700 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
701 "psd879ccm6fea98c"),
702
703 # (I) Russian (Cyrillic):
704 (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
705 u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
706 u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
707 u"\u0438",
708 "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
709
710 # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
711 (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
712 u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
713 u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
714 u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
715 u"\u0061\u00F1\u006F\u006C",
716 "PorqunopuedensimplementehablarenEspaol-fmd56a"),
717
718 # (K) Vietnamese:
719 # T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
720 # <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
721 (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
722 u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
723 u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
724 u"\u0056\u0069\u1EC7\u0074",
725 "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
726
Martin v. Löwis2548c732003-04-18 10:39:54 +0000727 #(L) 3<nen>B<gumi><kinpachi><sensei>
728 (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
729 "3B-ww4c5e180e575a65lsy2b"),
Tim Peters0eadaac2003-04-24 16:02:54 +0000730
Martin v. Löwis2548c732003-04-18 10:39:54 +0000731 # (M) <amuro><namie>-with-SUPER-MONKEYS
732 (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
733 u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
734 u"\u004F\u004E\u004B\u0045\u0059\u0053",
735 "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
736
737 # (N) Hello-Another-Way-<sorezore><no><basho>
738 (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
739 u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
740 u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
741 "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
742
743 # (O) <hitotsu><yane><no><shita>2
744 (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
745 "2-u9tlzr9756bt3uc0v"),
746
747 # (P) Maji<de>Koi<suru>5<byou><mae>
748 (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
749 u"\u308B\u0035\u79D2\u524D",
750 "MajiKoi5-783gue6qz075azm5e"),
751
752 # (Q) <pafii>de<runba>
753 (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
754 "de-jg4avhby1noc0d"),
755
756 # (R) <sono><supiido><de>
757 (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
758 "d9juau41awczczp"),
759
760 # (S) -> $1.00 <-
761 (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
762 u"\u003C\u002D",
763 "-> $1.00 <--")
764 ]
765
766for i in punycode_testcases:
767 if len(i)!=2:
768 print repr(i)
769
770class PunycodeTest(unittest.TestCase):
771 def test_encode(self):
772 for uni, puny in punycode_testcases:
773 # Need to convert both strings to lower case, since
774 # some of the extended encodings use upper case, but our
775 # code produces only lower case. Converting just puny to
776 # lower is also insufficient, since some of the input characters
777 # are upper case.
778 self.assertEquals(uni.encode("punycode").lower(), puny.lower())
779
780 def test_decode(self):
781 for uni, puny in punycode_testcases:
782 self.assertEquals(uni, puny.decode("punycode"))
783
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000784class UnicodeInternalTest(unittest.TestCase):
785 def test_bug1251300(self):
786 # Decoding with unicode_internal used to not correctly handle "code
787 # points" above 0x10ffff on UCS-4 builds.
788 if sys.maxunicode > 0xffff:
789 ok = [
790 ("\x00\x10\xff\xff", u"\U0010ffff"),
791 ("\x00\x00\x01\x01", u"\U00000101"),
792 ("", u""),
793 ]
794 not_ok = [
795 "\x7f\xff\xff\xff",
796 "\x80\x00\x00\x00",
797 "\x81\x00\x00\x00",
798 "\x00",
799 "\x00\x00\x00\x00\x00",
800 ]
801 for internal, uni in ok:
802 if sys.byteorder == "little":
803 internal = "".join(reversed(internal))
804 self.assertEquals(uni, internal.decode("unicode_internal"))
805 for internal in not_ok:
806 if sys.byteorder == "little":
807 internal = "".join(reversed(internal))
808 self.assertRaises(UnicodeDecodeError, internal.decode,
809 "unicode_internal")
810
811 def test_decode_error_attributes(self):
812 if sys.maxunicode > 0xffff:
813 try:
814 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
815 except UnicodeDecodeError, ex:
816 self.assertEquals("unicode_internal", ex.encoding)
817 self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
818 self.assertEquals(4, ex.start)
819 self.assertEquals(8, ex.end)
820 else:
821 self.fail()
822
823 def test_decode_callback(self):
824 if sys.maxunicode > 0xffff:
825 codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
826 decoder = codecs.getdecoder("unicode_internal")
827 ab = u"ab".encode("unicode_internal")
828 ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
829 "UnicodeInternalTest")
830 self.assertEquals((u"ab", 12), ignored)
831
Walter Dörwalda7fb4082009-05-06 14:28:24 +0000832 def test_encode_length(self):
833 # Issue 3739
834 encoder = codecs.getencoder("unicode_internal")
835 self.assertEquals(encoder(u"a")[1], 1)
836 self.assertEquals(encoder(u"\xe9\u0142")[1], 2)
837
Philip Jenvey034b0ac2010-04-05 02:51:51 +0000838 encoder = codecs.getencoder("string-escape")
839 self.assertEquals(encoder(r'\x00')[1], 4)
840
Martin v. Löwis2548c732003-04-18 10:39:54 +0000841# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
842nameprep_tests = [
843 # 3.1 Map to nothing.
844 ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
845 '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
846 '\xb8\x8f\xef\xbb\xbf',
847 'foobarbaz'),
848 # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
849 ('CAFE',
850 'cafe'),
851 # 3.3 Case folding 8bit U+00DF (german sharp s).
852 # The original test case is bogus; it says \xc3\xdf
853 ('\xc3\x9f',
854 'ss'),
855 # 3.4 Case folding U+0130 (turkish capital I with dot).
856 ('\xc4\xb0',
857 'i\xcc\x87'),
858 # 3.5 Case folding multibyte U+0143 U+037A.
859 ('\xc5\x83\xcd\xba',
860 '\xc5\x84 \xce\xb9'),
861 # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
862 # XXX: skip this as it fails in UCS-2 mode
863 #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
864 # 'telc\xe2\x88\x95kg\xcf\x83'),
865 (None, None),
866 # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
867 ('j\xcc\x8c\xc2\xa0\xc2\xaa',
868 '\xc7\xb0 a'),
869 # 3.8 Case folding U+1FB7 and normalization.
870 ('\xe1\xbe\xb7',
871 '\xe1\xbe\xb6\xce\xb9'),
872 # 3.9 Self-reverting case folding U+01F0 and normalization.
873 # The original test case is bogus, it says `\xc7\xf0'
874 ('\xc7\xb0',
875 '\xc7\xb0'),
876 # 3.10 Self-reverting case folding U+0390 and normalization.
877 ('\xce\x90',
878 '\xce\x90'),
879 # 3.11 Self-reverting case folding U+03B0 and normalization.
880 ('\xce\xb0',
881 '\xce\xb0'),
882 # 3.12 Self-reverting case folding U+1E96 and normalization.
883 ('\xe1\xba\x96',
884 '\xe1\xba\x96'),
885 # 3.13 Self-reverting case folding U+1F56 and normalization.
886 ('\xe1\xbd\x96',
887 '\xe1\xbd\x96'),
888 # 3.14 ASCII space character U+0020.
889 (' ',
890 ' '),
891 # 3.15 Non-ASCII 8bit space character U+00A0.
892 ('\xc2\xa0',
893 ' '),
894 # 3.16 Non-ASCII multibyte space character U+1680.
895 ('\xe1\x9a\x80',
896 None),
897 # 3.17 Non-ASCII multibyte space character U+2000.
898 ('\xe2\x80\x80',
899 ' '),
900 # 3.18 Zero Width Space U+200b.
901 ('\xe2\x80\x8b',
902 ''),
903 # 3.19 Non-ASCII multibyte space character U+3000.
904 ('\xe3\x80\x80',
905 ' '),
906 # 3.20 ASCII control characters U+0010 U+007F.
907 ('\x10\x7f',
908 '\x10\x7f'),
909 # 3.21 Non-ASCII 8bit control character U+0085.
910 ('\xc2\x85',
911 None),
912 # 3.22 Non-ASCII multibyte control character U+180E.
913 ('\xe1\xa0\x8e',
914 None),
915 # 3.23 Zero Width No-Break Space U+FEFF.
916 ('\xef\xbb\xbf',
917 ''),
918 # 3.24 Non-ASCII control character U+1D175.
919 ('\xf0\x9d\x85\xb5',
920 None),
921 # 3.25 Plane 0 private use character U+F123.
922 ('\xef\x84\xa3',
923 None),
924 # 3.26 Plane 15 private use character U+F1234.
925 ('\xf3\xb1\x88\xb4',
926 None),
927 # 3.27 Plane 16 private use character U+10F234.
928 ('\xf4\x8f\x88\xb4',
929 None),
930 # 3.28 Non-character code point U+8FFFE.
931 ('\xf2\x8f\xbf\xbe',
932 None),
933 # 3.29 Non-character code point U+10FFFF.
934 ('\xf4\x8f\xbf\xbf',
935 None),
936 # 3.30 Surrogate code U+DF42.
937 ('\xed\xbd\x82',
938 None),
939 # 3.31 Non-plain text character U+FFFD.
940 ('\xef\xbf\xbd',
941 None),
942 # 3.32 Ideographic description character U+2FF5.
943 ('\xe2\xbf\xb5',
944 None),
945 # 3.33 Display property character U+0341.
Tim Peters0eadaac2003-04-24 16:02:54 +0000946 ('\xcd\x81',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000947 '\xcc\x81'),
948 # 3.34 Left-to-right mark U+200E.
949 ('\xe2\x80\x8e',
950 None),
951 # 3.35 Deprecated U+202A.
952 ('\xe2\x80\xaa',
953 None),
954 # 3.36 Language tagging character U+E0001.
955 ('\xf3\xa0\x80\x81',
956 None),
957 # 3.37 Language tagging character U+E0042.
958 ('\xf3\xa0\x81\x82',
959 None),
960 # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
961 ('foo\xd6\xbebar',
962 None),
963 # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
964 ('foo\xef\xb5\x90bar',
965 None),
966 # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
967 ('foo\xef\xb9\xb6bar',
968 'foo \xd9\x8ebar'),
969 # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
970 ('\xd8\xa71',
971 None),
972 # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
973 ('\xd8\xa71\xd8\xa8',
974 '\xd8\xa71\xd8\xa8'),
975 # 3.43 Unassigned code point U+E0002.
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +0000976 # Skip this test as we allow unassigned
977 #('\xf3\xa0\x80\x82',
978 # None),
979 (None, None),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000980 # 3.44 Larger test (shrinking).
981 # Original test case reads \xc3\xdf
982 ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
983 '\xaa\xce\xb0\xe2\x80\x80',
984 'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
985 # 3.45 Larger test (expanding).
986 # Original test case reads \xc3\x9f
987 ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
988 '\x80',
989 'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
990 '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
991 '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
992 ]
993
994
995class NameprepTest(unittest.TestCase):
996 def test_nameprep(self):
997 from encodings.idna import nameprep
998 for pos, (orig, prepped) in enumerate(nameprep_tests):
999 if orig is None:
1000 # Skipped
1001 continue
1002 # The Unicode strings are given in UTF-8
1003 orig = unicode(orig, "utf-8")
1004 if prepped is None:
1005 # Input contains prohibited characters
1006 self.assertRaises(UnicodeError, nameprep, orig)
1007 else:
1008 prepped = unicode(prepped, "utf-8")
1009 try:
1010 self.assertEquals(nameprep(orig), prepped)
1011 except Exception,e:
1012 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
1013
Walter Dörwald78a0be62006-04-14 18:25:39 +00001014class IDNACodecTest(unittest.TestCase):
1015 def test_builtin_decode(self):
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001016 self.assertEquals(unicode("python.org", "idna"), u"python.org")
Walter Dörwald78a0be62006-04-14 18:25:39 +00001017 self.assertEquals(unicode("python.org.", "idna"), u"python.org.")
1018 self.assertEquals(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")
1019 self.assertEquals(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")
1020
1021 def test_builtin_encode(self):
1022 self.assertEquals(u"python.org".encode("idna"), "python.org")
1023 self.assertEquals("python.org.".encode("idna"), "python.org.")
1024 self.assertEquals(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
1025 self.assertEquals(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001026
Martin v. Löwis8b595142005-08-25 11:03:38 +00001027 def test_stream(self):
1028 import StringIO
1029 r = codecs.getreader("idna")(StringIO.StringIO("abc"))
1030 r.read(3)
1031 self.assertEquals(r.read(), u"")
1032
Walter Dörwald78a0be62006-04-14 18:25:39 +00001033 def test_incremental_decode(self):
1034 self.assertEquals(
1035 "".join(codecs.iterdecode("python.org", "idna")),
1036 u"python.org"
1037 )
1038 self.assertEquals(
1039 "".join(codecs.iterdecode("python.org.", "idna")),
1040 u"python.org."
1041 )
1042 self.assertEquals(
1043 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
1044 u"pyth\xf6n.org."
1045 )
1046 self.assertEquals(
1047 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
1048 u"pyth\xf6n.org."
1049 )
1050
1051 decoder = codecs.getincrementaldecoder("idna")()
1052 self.assertEquals(decoder.decode("xn--xam", ), u"")
1053 self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
1054 self.assertEquals(decoder.decode(u"rg"), u"")
1055 self.assertEquals(decoder.decode(u"", True), u"org")
1056
1057 decoder.reset()
1058 self.assertEquals(decoder.decode("xn--xam", ), u"")
1059 self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
1060 self.assertEquals(decoder.decode("rg."), u"org.")
1061 self.assertEquals(decoder.decode("", True), u"")
1062
1063 def test_incremental_encode(self):
1064 self.assertEquals(
1065 "".join(codecs.iterencode(u"python.org", "idna")),
1066 "python.org"
1067 )
1068 self.assertEquals(
1069 "".join(codecs.iterencode(u"python.org.", "idna")),
1070 "python.org."
1071 )
1072 self.assertEquals(
1073 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
1074 "xn--pythn-mua.org."
1075 )
1076 self.assertEquals(
1077 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
1078 "xn--pythn-mua.org."
1079 )
1080
1081 encoder = codecs.getincrementalencoder("idna")()
1082 self.assertEquals(encoder.encode(u"\xe4x"), "")
1083 self.assertEquals(encoder.encode(u"ample.org"), "xn--xample-9ta.")
1084 self.assertEquals(encoder.encode(u"", True), "org")
1085
1086 encoder.reset()
1087 self.assertEquals(encoder.encode(u"\xe4x"), "")
1088 self.assertEquals(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
1089 self.assertEquals(encoder.encode(u"", True), "")
1090
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001091class CodecsModuleTest(unittest.TestCase):
1092
1093 def test_decode(self):
1094 self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
1095 u'\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001096 self.assertRaises(TypeError, codecs.decode)
1097 self.assertEquals(codecs.decode('abc'), u'abc')
1098 self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
1099
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001100 def test_encode(self):
1101 self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
1102 '\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001103 self.assertRaises(TypeError, codecs.encode)
Walter Dörwald690402f2005-11-17 18:51:34 +00001104 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
Walter Dörwald063e1e82004-10-28 13:04:26 +00001105 self.assertEquals(codecs.encode(u'abc'), 'abc')
1106 self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
1107
1108 def test_register(self):
1109 self.assertRaises(TypeError, codecs.register)
Walter Dörwald690402f2005-11-17 18:51:34 +00001110 self.assertRaises(TypeError, codecs.register, 42)
Walter Dörwald063e1e82004-10-28 13:04:26 +00001111
1112 def test_lookup(self):
1113 self.assertRaises(TypeError, codecs.lookup)
1114 self.assertRaises(LookupError, codecs.lookup, "__spam__")
Walter Dörwald690402f2005-11-17 18:51:34 +00001115 self.assertRaises(LookupError, codecs.lookup, " ")
1116
1117 def test_getencoder(self):
1118 self.assertRaises(TypeError, codecs.getencoder)
1119 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
1120
1121 def test_getdecoder(self):
1122 self.assertRaises(TypeError, codecs.getdecoder)
1123 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
1124
1125 def test_getreader(self):
1126 self.assertRaises(TypeError, codecs.getreader)
1127 self.assertRaises(LookupError, codecs.getreader, "__spam__")
1128
1129 def test_getwriter(self):
1130 self.assertRaises(TypeError, codecs.getwriter)
1131 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001132
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001133class StreamReaderTest(unittest.TestCase):
1134
1135 def setUp(self):
1136 self.reader = codecs.getreader('utf-8')
1137 self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
1138
1139 def test_readlines(self):
1140 f = self.reader(self.stream)
1141 self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
1142
Georg Brandl8f99f812006-10-29 08:39:22 +00001143class EncodedFileTest(unittest.TestCase):
Tim Petersabd8a332006-11-03 02:32:46 +00001144
Georg Brandl8f99f812006-10-29 08:39:22 +00001145 def test_basic(self):
1146 f = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
Georg Brandl5b4e1c22006-10-29 09:32:16 +00001147 ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
1148 self.assertEquals(ef.read(), '\\\xd5\n\x00\x00\xae')
Georg Brandl8f99f812006-10-29 08:39:22 +00001149
1150 f = StringIO.StringIO()
1151 ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
1152 ef.write('\xc3\xbc')
1153 self.assertEquals(f.getvalue(), '\xfc')
1154
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001155class Str2StrTest(unittest.TestCase):
1156
1157 def test_read(self):
1158 sin = "\x80".encode("base64_codec")
1159 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
1160 sout = reader.read()
1161 self.assertEqual(sout, "\x80")
Ezio Melottib0f5adc2010-01-24 16:58:36 +00001162 self.assertIsInstance(sout, str)
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001163
1164 def test_readline(self):
1165 sin = "\x80".encode("base64_codec")
1166 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
1167 sout = reader.readline()
1168 self.assertEqual(sout, "\x80")
Ezio Melottib0f5adc2010-01-24 16:58:36 +00001169 self.assertIsInstance(sout, str)
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001170
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001171all_unicode_encodings = [
1172 "ascii",
1173 "base64_codec",
1174 "big5",
1175 "big5hkscs",
1176 "charmap",
1177 "cp037",
1178 "cp1006",
1179 "cp1026",
1180 "cp1140",
1181 "cp1250",
1182 "cp1251",
1183 "cp1252",
1184 "cp1253",
1185 "cp1254",
1186 "cp1255",
1187 "cp1256",
1188 "cp1257",
1189 "cp1258",
1190 "cp424",
1191 "cp437",
1192 "cp500",
1193 "cp737",
1194 "cp775",
1195 "cp850",
1196 "cp852",
1197 "cp855",
1198 "cp856",
1199 "cp857",
1200 "cp860",
1201 "cp861",
1202 "cp862",
1203 "cp863",
1204 "cp864",
1205 "cp865",
1206 "cp866",
1207 "cp869",
1208 "cp874",
1209 "cp875",
1210 "cp932",
1211 "cp949",
1212 "cp950",
1213 "euc_jis_2004",
1214 "euc_jisx0213",
1215 "euc_jp",
1216 "euc_kr",
1217 "gb18030",
1218 "gb2312",
1219 "gbk",
1220 "hex_codec",
1221 "hp_roman8",
1222 "hz",
1223 "idna",
1224 "iso2022_jp",
1225 "iso2022_jp_1",
1226 "iso2022_jp_2",
1227 "iso2022_jp_2004",
1228 "iso2022_jp_3",
1229 "iso2022_jp_ext",
1230 "iso2022_kr",
1231 "iso8859_1",
1232 "iso8859_10",
1233 "iso8859_11",
1234 "iso8859_13",
1235 "iso8859_14",
1236 "iso8859_15",
1237 "iso8859_16",
1238 "iso8859_2",
1239 "iso8859_3",
1240 "iso8859_4",
1241 "iso8859_5",
1242 "iso8859_6",
1243 "iso8859_7",
1244 "iso8859_8",
1245 "iso8859_9",
1246 "johab",
1247 "koi8_r",
1248 "koi8_u",
1249 "latin_1",
1250 "mac_cyrillic",
1251 "mac_greek",
1252 "mac_iceland",
1253 "mac_latin2",
1254 "mac_roman",
1255 "mac_turkish",
1256 "palmos",
1257 "ptcp154",
1258 "punycode",
1259 "raw_unicode_escape",
1260 "rot_13",
1261 "shift_jis",
1262 "shift_jis_2004",
1263 "shift_jisx0213",
1264 "tis_620",
1265 "unicode_escape",
1266 "unicode_internal",
1267 "utf_16",
1268 "utf_16_be",
1269 "utf_16_le",
1270 "utf_7",
1271 "utf_8",
1272]
1273
1274if hasattr(codecs, "mbcs_encode"):
1275 all_unicode_encodings.append("mbcs")
1276
1277# The following encodings work only with str, not unicode
1278all_string_encodings = [
1279 "quopri_codec",
1280 "string_escape",
1281 "uu_codec",
1282]
1283
1284# The following encoding is not tested, because it's not supposed
1285# to work:
1286# "undefined"
1287
1288# The following encodings don't work in stateful mode
1289broken_unicode_with_streams = [
1290 "base64_codec",
1291 "hex_codec",
1292 "punycode",
1293 "unicode_internal"
1294]
Georg Brandl2c9838e2006-10-29 14:39:09 +00001295broken_incremental_coders = broken_unicode_with_streams[:]
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001296
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001297# The following encodings only support "strict" mode
1298only_strict_mode = [
1299 "idna",
1300 "zlib_codec",
Neal Norwitz1ead6982006-10-29 23:58:36 +00001301 "bz2_codec",
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001302]
1303
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001304try:
1305 import bz2
1306except ImportError:
1307 pass
1308else:
1309 all_unicode_encodings.append("bz2_codec")
1310 broken_unicode_with_streams.append("bz2_codec")
1311
1312try:
1313 import zlib
1314except ImportError:
1315 pass
1316else:
1317 all_unicode_encodings.append("zlib_codec")
1318 broken_unicode_with_streams.append("zlib_codec")
1319
1320class BasicUnicodeTest(unittest.TestCase):
1321 def test_basics(self):
1322 s = u"abc123" # all codecs should be able to encode these
1323 for encoding in all_unicode_encodings:
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001324 name = codecs.lookup(encoding).name
1325 if encoding.endswith("_codec"):
1326 name += "_codec"
1327 elif encoding == "latin_1":
1328 name = "latin_1"
1329 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001330 (bytes, size) = codecs.getencoder(encoding)(s)
Walter Dörwalda7fb4082009-05-06 14:28:24 +00001331 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001332 (chars, size) = codecs.getdecoder(encoding)(bytes)
1333 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1334
1335 if encoding not in broken_unicode_with_streams:
1336 # check stream reader/writer
1337 q = Queue()
1338 writer = codecs.getwriter(encoding)(q)
1339 encodedresult = ""
1340 for c in s:
1341 writer.write(c)
1342 encodedresult += q.read()
1343 q = Queue()
1344 reader = codecs.getreader(encoding)(q)
1345 decodedresult = u""
1346 for c in encodedresult:
1347 q.write(c)
1348 decodedresult += reader.read()
1349 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1350
Georg Brandl2c9838e2006-10-29 14:39:09 +00001351 if encoding not in broken_incremental_coders:
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001352 # check incremental decoder/encoder (fetched via the Python
1353 # and C API) and iterencode()/iterdecode()
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001354 try:
1355 encoder = codecs.getincrementalencoder(encoding)()
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001356 cencoder = _testcapi.codec_incrementalencoder(encoding)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001357 except LookupError: # no IncrementalEncoder
1358 pass
1359 else:
1360 # check incremental decoder/encoder
1361 encodedresult = ""
1362 for c in s:
1363 encodedresult += encoder.encode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001364 encodedresult += encoder.encode(u"", True)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001365 decoder = codecs.getincrementaldecoder(encoding)()
1366 decodedresult = u""
1367 for c in encodedresult:
1368 decodedresult += decoder.decode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001369 decodedresult += decoder.decode("", True)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001370 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1371
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001372 # check C API
1373 encodedresult = ""
1374 for c in s:
1375 encodedresult += cencoder.encode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001376 encodedresult += cencoder.encode(u"", True)
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001377 cdecoder = _testcapi.codec_incrementaldecoder(encoding)
1378 decodedresult = u""
1379 for c in encodedresult:
1380 decodedresult += cdecoder.decode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001381 decodedresult += cdecoder.decode("", True)
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001382 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1383
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001384 # check iterencode()/iterdecode()
1385 result = u"".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
1386 self.assertEqual(result, s, "%r != %r (encoding=%r)" % (result, s, encoding))
1387
1388 # check iterencode()/iterdecode() with empty string
1389 result = u"".join(codecs.iterdecode(codecs.iterencode(u"", encoding), encoding))
1390 self.assertEqual(result, u"")
1391
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001392 if encoding not in only_strict_mode:
1393 # check incremental decoder/encoder with errors argument
1394 try:
1395 encoder = codecs.getincrementalencoder(encoding)("ignore")
1396 cencoder = _testcapi.codec_incrementalencoder(encoding, "ignore")
1397 except LookupError: # no IncrementalEncoder
1398 pass
1399 else:
1400 encodedresult = "".join(encoder.encode(c) for c in s)
1401 decoder = codecs.getincrementaldecoder(encoding)("ignore")
1402 decodedresult = u"".join(decoder.decode(c) for c in encodedresult)
1403 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
Tim Petersabd8a332006-11-03 02:32:46 +00001404
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001405 encodedresult = "".join(cencoder.encode(c) for c in s)
1406 cdecoder = _testcapi.codec_incrementaldecoder(encoding, "ignore")
1407 decodedresult = u"".join(cdecoder.decode(c) for c in encodedresult)
1408 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1409
Walter Dörwald729c31f2005-03-14 19:06:30 +00001410 def test_seek(self):
1411 # all codecs should be able to encode these
1412 s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
1413 for encoding in all_unicode_encodings:
1414 if encoding == "idna": # FIXME: See SF bug #1163178
1415 continue
1416 if encoding in broken_unicode_with_streams:
1417 continue
1418 reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
1419 for t in xrange(5):
1420 # Test that calling seek resets the internal codec state and buffers
1421 reader.seek(0, 0)
1422 line = reader.readline()
1423 self.assertEqual(s[:len(line)], line)
1424
Walter Dörwalde22d3392005-11-17 08:52:34 +00001425 def test_bad_decode_args(self):
1426 for encoding in all_unicode_encodings:
1427 decoder = codecs.getdecoder(encoding)
1428 self.assertRaises(TypeError, decoder)
1429 if encoding not in ("idna", "punycode"):
1430 self.assertRaises(TypeError, decoder, 42)
1431
1432 def test_bad_encode_args(self):
1433 for encoding in all_unicode_encodings:
1434 encoder = codecs.getencoder(encoding)
1435 self.assertRaises(TypeError, encoder)
1436
Neal Norwitz6d3d3392006-06-13 08:41:06 +00001437 def test_encoding_map_type_initialized(self):
1438 from encodings import cp1140
1439 # This used to crash, we are only verifying there's no crash.
1440 table_type = type(cp1140.encoding_table)
1441 self.assertEqual(table_type, table_type)
1442
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001443class BasicStrTest(unittest.TestCase):
1444 def test_basics(self):
1445 s = "abc123"
1446 for encoding in all_string_encodings:
1447 (bytes, size) = codecs.getencoder(encoding)(s)
1448 self.assertEqual(size, len(s))
1449 (chars, size) = codecs.getdecoder(encoding)(bytes)
1450 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1451
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001452class CharmapTest(unittest.TestCase):
1453 def test_decode_with_string_map(self):
1454 self.assertEquals(
1455 codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
1456 (u"abc", 3)
1457 )
1458
1459 self.assertEquals(
1460 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
1461 (u"ab\ufffd", 3)
1462 )
1463
1464 self.assertEquals(
1465 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
1466 (u"ab\ufffd", 3)
1467 )
1468
1469 self.assertEquals(
1470 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
1471 (u"ab", 3)
1472 )
1473
1474 self.assertEquals(
1475 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
1476 (u"ab", 3)
1477 )
1478
1479 allbytes = "".join(chr(i) for i in xrange(256))
1480 self.assertEquals(
1481 codecs.charmap_decode(allbytes, "ignore", u""),
1482 (u"", len(allbytes))
1483 )
1484
Georg Brandl8f99f812006-10-29 08:39:22 +00001485class WithStmtTest(unittest.TestCase):
1486 def test_encodedfile(self):
1487 f = StringIO.StringIO("\xc3\xbc")
1488 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
1489 self.assertEquals(ef.read(), "\xfc")
1490
1491 def test_streamreaderwriter(self):
1492 f = StringIO.StringIO("\xc3\xbc")
1493 info = codecs.lookup("utf-8")
1494 with codecs.StreamReaderWriter(f, info.streamreader,
1495 info.streamwriter, 'strict') as srw:
1496 self.assertEquals(srw.read(), u"\xfc")
1497
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001498
Fred Drake2e2be372001-09-20 21:33:42 +00001499def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001500 test_support.run_unittest(
Walter Dörwald6e390802007-08-17 16:41:28 +00001501 UTF32Test,
1502 UTF32LETest,
1503 UTF32BETest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001504 UTF16Test,
Walter Dörwald69652032004-09-07 20:24:22 +00001505 UTF16LETest,
1506 UTF16BETest,
1507 UTF8Test,
Martin v. Löwis412ed3b2006-01-08 10:45:39 +00001508 UTF8SigTest,
Walter Dörwalde22d3392005-11-17 08:52:34 +00001509 UTF7Test,
1510 UTF16ExTest,
1511 ReadBufferTest,
1512 CharBufferTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001513 EscapeDecodeTest,
1514 RecodingTest,
1515 PunycodeTest,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001516 UnicodeInternalTest,
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001517 NameprepTest,
Walter Dörwald78a0be62006-04-14 18:25:39 +00001518 IDNACodecTest,
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001519 CodecsModuleTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001520 StreamReaderTest,
Georg Brandl8f99f812006-10-29 08:39:22 +00001521 EncodedFileTest,
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001522 Str2StrTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001523 BasicUnicodeTest,
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001524 BasicStrTest,
Georg Brandl8f99f812006-10-29 08:39:22 +00001525 CharmapTest,
1526 WithStmtTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001527 )
Fred Drake2e2be372001-09-20 21:33:42 +00001528
1529
1530if __name__ == "__main__":
1531 test_main()