blob: a828edfa6ef07dd5cff022ddb9efa61f29b8251a [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
2import unittest
Marc-André Lemburga37171d2001-06-19 20:09:28 +00003import codecs
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00004import sys, _testcapi, io
Marc-André Lemburga37171d2001-06-19 20:09:28 +00005
Walter Dörwald69652032004-09-07 20:24:22 +00006class Queue(object):
7 """
8 queue: write bytes at one end, read bytes from the other end
9 """
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000010 def __init__(self, buffer):
11 self._buffer = buffer
Walter Dörwald69652032004-09-07 20:24:22 +000012
13 def write(self, chars):
14 self._buffer += chars
15
16 def read(self, size=-1):
17 if size<0:
18 s = self._buffer
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000019 self._buffer = self._buffer[:0] # make empty
Walter Dörwald69652032004-09-07 20:24:22 +000020 return s
21 else:
22 s = self._buffer[:size]
23 self._buffer = self._buffer[size:]
24 return s
25
Walter Dörwald3abcb012007-04-16 22:10:50 +000026class MixInCheckStateHandling:
27 def check_state_handling_decode(self, encoding, u, s):
Guido van Rossum805365e2007-05-07 22:24:25 +000028 for i in range(len(s)+1):
Walter Dörwald3abcb012007-04-16 22:10:50 +000029 d = codecs.getincrementaldecoder(encoding)()
30 part1 = d.decode(s[:i])
31 state = d.getstate()
32 self.assert_(isinstance(state[1], int))
33 # Check that the condition stated in the documentation for
34 # IncrementalDecoder.getstate() holds
35 if not state[1]:
36 # reset decoder to the default state without anything buffered
37 d.setstate((state[0][:0], 0))
38 # Feeding the previous input may not produce any output
39 self.assert_(not d.decode(state[0]))
40 # The decoder must return to the same state
41 self.assertEqual(state, d.getstate())
42 # Create a new decoder and set it to the state
43 # we extracted from the old one
44 d = codecs.getincrementaldecoder(encoding)()
45 d.setstate(state)
46 part2 = d.decode(s[i:], True)
47 self.assertEqual(u, part1+part2)
48
49 def check_state_handling_encode(self, encoding, u, s):
Guido van Rossum805365e2007-05-07 22:24:25 +000050 for i in range(len(u)+1):
Walter Dörwald3abcb012007-04-16 22:10:50 +000051 d = codecs.getincrementalencoder(encoding)()
52 part1 = d.encode(u[:i])
53 state = d.getstate()
54 d = codecs.getincrementalencoder(encoding)()
55 d.setstate(state)
56 part2 = d.encode(u[i:], True)
57 self.assertEqual(s, part1+part2)
58
59class ReadTest(unittest.TestCase, MixInCheckStateHandling):
Walter Dörwalde57d7b12004-12-21 22:24:00 +000060 def check_partial(self, input, partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000061 # get a StreamReader for the encoding and feed the bytestring version
Guido van Rossum87c0f1d2007-11-19 18:03:44 +000062 # of input to the reader byte by byte. Read everything available from
Walter Dörwald69652032004-09-07 20:24:22 +000063 # the StreamReader and check that the results equal the appropriate
64 # entries from partialresults.
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000065 q = Queue(b"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +000066 r = codecs.getreader(self.encoding)(q)
Guido van Rossumef87d6e2007-05-02 19:09:54 +000067 result = ""
Walter Dörwalde57d7b12004-12-21 22:24:00 +000068 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000069 q.write(bytes([c]))
Walter Dörwald69652032004-09-07 20:24:22 +000070 result += r.read()
71 self.assertEqual(result, partialresult)
72 # check that there's nothing left in the buffers
Guido van Rossumef87d6e2007-05-02 19:09:54 +000073 self.assertEqual(r.read(), "")
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000074 self.assertEqual(r.bytebuffer, b"")
Guido van Rossumef87d6e2007-05-02 19:09:54 +000075 self.assertEqual(r.charbuffer, "")
Walter Dörwald69652032004-09-07 20:24:22 +000076
Thomas Woutersa9773292006-04-21 09:43:23 +000077 # do the check again, this time using a incremental decoder
78 d = codecs.getincrementaldecoder(self.encoding)()
Guido van Rossumef87d6e2007-05-02 19:09:54 +000079 result = ""
Thomas Woutersa9773292006-04-21 09:43:23 +000080 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000081 result += d.decode(bytes([c]))
Thomas Woutersa9773292006-04-21 09:43:23 +000082 self.assertEqual(result, partialresult)
83 # check that there's nothing left in the buffers
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000084 self.assertEqual(d.decode(b"", True), "")
85 self.assertEqual(d.buffer, b"")
Thomas Woutersa9773292006-04-21 09:43:23 +000086
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000087 # Check whether the reset method works properly
Thomas Woutersa9773292006-04-21 09:43:23 +000088 d.reset()
Guido van Rossumef87d6e2007-05-02 19:09:54 +000089 result = ""
Thomas Woutersa9773292006-04-21 09:43:23 +000090 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000091 result += d.decode(bytes([c]))
Thomas Woutersa9773292006-04-21 09:43:23 +000092 self.assertEqual(result, partialresult)
93 # check that there's nothing left in the buffers
Walter Dörwaldca8a8d02007-05-04 13:05:09 +000094 self.assertEqual(d.decode(b"", True), "")
95 self.assertEqual(d.buffer, b"")
Thomas Woutersa9773292006-04-21 09:43:23 +000096
97 # check iterdecode()
98 encoded = input.encode(self.encoding)
99 self.assertEqual(
100 input,
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000101 "".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding))
Thomas Woutersa9773292006-04-21 09:43:23 +0000102 )
103
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000104 def test_readline(self):
105 def getreader(input):
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000106 stream = io.BytesIO(input.encode(self.encoding))
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000107 return codecs.getreader(self.encoding)(stream)
108
Walter Dörwaldca199432006-03-06 22:39:12 +0000109 def readalllines(input, keepends=True, size=None):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000110 reader = getreader(input)
111 lines = []
112 while True:
Walter Dörwaldca199432006-03-06 22:39:12 +0000113 line = reader.readline(size=size, keepends=keepends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000114 if not line:
115 break
116 lines.append(line)
Walter Dörwaldca199432006-03-06 22:39:12 +0000117 return "|".join(lines)
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000118
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000119 s = "foo\nbar\r\nbaz\rspam\u2028eggs"
120 sexpected = "foo\n|bar\r\n|baz\r|spam\u2028|eggs"
121 sexpectednoends = "foo|bar|baz|spam|eggs"
Walter Dörwaldca199432006-03-06 22:39:12 +0000122 self.assertEqual(readalllines(s, True), sexpected)
123 self.assertEqual(readalllines(s, False), sexpectednoends)
124 self.assertEqual(readalllines(s, True, 10), sexpected)
125 self.assertEqual(readalllines(s, False, 10), sexpectednoends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000126
127 # Test long lines (multiple calls to read() in readline())
128 vw = []
129 vwo = []
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000130 for (i, lineend) in enumerate("\n \r\n \r \u2028".split()):
131 vw.append((i*200)*"\3042" + lineend)
132 vwo.append((i*200)*"\3042")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000133 self.assertEqual(readalllines("".join(vw), True), "".join(vw))
134 self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
135
136 # Test lines where the first read might end with \r, so the
137 # reader has to look ahead whether this is a lone \r or a \r\n
Guido van Rossum805365e2007-05-07 22:24:25 +0000138 for size in range(80):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000139 for lineend in "\n \r\n \r \u2028".split():
140 s = 10*(size*"a" + lineend + "xxx\n")
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000141 reader = getreader(s)
Guido van Rossum805365e2007-05-07 22:24:25 +0000142 for i in range(10):
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000143 self.assertEqual(
144 reader.readline(keepends=True),
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000145 size*"a" + lineend,
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000146 )
147 reader = getreader(s)
Guido van Rossum805365e2007-05-07 22:24:25 +0000148 for i in range(10):
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000149 self.assertEqual(
150 reader.readline(keepends=False),
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000151 size*"a",
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000152 )
153
154 def test_bug1175396(self):
155 s = [
156 '<%!--===================================================\r\n',
157 ' BLOG index page: show recent articles,\r\n',
158 ' today\'s articles, or articles of a specific date.\r\n',
159 '========================================================--%>\r\n',
160 '<%@inputencoding="ISO-8859-1"%>\r\n',
161 '<%@pagetemplate=TEMPLATE.y%>\r\n',
162 '<%@import=import frog.util, frog%>\r\n',
163 '<%@import=import frog.objects%>\r\n',
164 '<%@import=from frog.storageerrors import StorageError%>\r\n',
165 '<%\r\n',
166 '\r\n',
167 'import logging\r\n',
168 'log=logging.getLogger("Snakelets.logger")\r\n',
169 '\r\n',
170 '\r\n',
171 'user=self.SessionCtx.user\r\n',
172 'storageEngine=self.SessionCtx.storageEngine\r\n',
173 '\r\n',
174 '\r\n',
175 'def readArticlesFromDate(date, count=None):\r\n',
176 ' entryids=storageEngine.listBlogEntries(date)\r\n',
177 ' entryids.reverse() # descending\r\n',
178 ' if count:\r\n',
179 ' entryids=entryids[:count]\r\n',
180 ' try:\r\n',
181 ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
182 ' except StorageError,x:\r\n',
183 ' log.error("Error loading articles: "+str(x))\r\n',
184 ' self.abort("cannot load articles")\r\n',
185 '\r\n',
186 'showdate=None\r\n',
187 '\r\n',
188 'arg=self.Request.getArg()\r\n',
189 'if arg=="today":\r\n',
190 ' #-------------------- TODAY\'S ARTICLES\r\n',
191 ' self.write("<h2>Today\'s articles</h2>")\r\n',
192 ' showdate = frog.util.isodatestr() \r\n',
193 ' entries = readArticlesFromDate(showdate)\r\n',
194 'elif arg=="active":\r\n',
195 ' #-------------------- ACTIVE ARTICLES redirect\r\n',
196 ' self.Yredirect("active.y")\r\n',
197 'elif arg=="login":\r\n',
198 ' #-------------------- LOGIN PAGE redirect\r\n',
199 ' self.Yredirect("login.y")\r\n',
200 'elif arg=="date":\r\n',
201 ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
202 ' showdate = self.Request.getParameter("date")\r\n',
203 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
204 ' entries = readArticlesFromDate(showdate)\r\n',
205 'else:\r\n',
206 ' #-------------------- RECENT ARTICLES\r\n',
207 ' self.write("<h2>Recent articles</h2>")\r\n',
208 ' dates=storageEngine.listBlogEntryDates()\r\n',
209 ' if dates:\r\n',
210 ' entries=[]\r\n',
211 ' SHOWAMOUNT=10\r\n',
212 ' for showdate in dates:\r\n',
213 ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
214 ' if len(entries)>=SHOWAMOUNT:\r\n',
215 ' break\r\n',
216 ' \r\n',
217 ]
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000218 stream = io.BytesIO("".join(s).encode(self.encoding))
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000219 reader = codecs.getreader(self.encoding)(stream)
220 for (i, line) in enumerate(reader):
221 self.assertEqual(line, s[i])
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000222
223 def test_readlinequeue(self):
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000224 q = Queue(b"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000225 writer = codecs.getwriter(self.encoding)(q)
226 reader = codecs.getreader(self.encoding)(q)
227
228 # No lineends
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000229 writer.write("foo\r")
230 self.assertEqual(reader.readline(keepends=False), "foo")
231 writer.write("\nbar\r")
232 self.assertEqual(reader.readline(keepends=False), "")
233 self.assertEqual(reader.readline(keepends=False), "bar")
234 writer.write("baz")
235 self.assertEqual(reader.readline(keepends=False), "baz")
236 self.assertEqual(reader.readline(keepends=False), "")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000237
238 # Lineends
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000239 writer.write("foo\r")
240 self.assertEqual(reader.readline(keepends=True), "foo\r")
241 writer.write("\nbar\r")
242 self.assertEqual(reader.readline(keepends=True), "\n")
243 self.assertEqual(reader.readline(keepends=True), "bar\r")
244 writer.write("baz")
245 self.assertEqual(reader.readline(keepends=True), "baz")
246 self.assertEqual(reader.readline(keepends=True), "")
247 writer.write("foo\r\n")
248 self.assertEqual(reader.readline(keepends=True), "foo\r\n")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000249
Walter Dörwald9fa09462005-01-10 12:01:39 +0000250 def test_bug1098990_a(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000251 s1 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
252 s2 = "offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
253 s3 = "next line.\r\n"
Walter Dörwald9fa09462005-01-10 12:01:39 +0000254
255 s = (s1+s2+s3).encode(self.encoding)
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000256 stream = io.BytesIO(s)
Walter Dörwald9fa09462005-01-10 12:01:39 +0000257 reader = codecs.getreader(self.encoding)(stream)
258 self.assertEqual(reader.readline(), s1)
259 self.assertEqual(reader.readline(), s2)
260 self.assertEqual(reader.readline(), s3)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000261 self.assertEqual(reader.readline(), "")
Walter Dörwald9fa09462005-01-10 12:01:39 +0000262
263 def test_bug1098990_b(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000264 s1 = "aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
265 s2 = "bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
266 s3 = "stillokay:bbbbxx\r\n"
267 s4 = "broken!!!!badbad\r\n"
268 s5 = "againokay.\r\n"
Walter Dörwald9fa09462005-01-10 12:01:39 +0000269
270 s = (s1+s2+s3+s4+s5).encode(self.encoding)
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000271 stream = io.BytesIO(s)
Walter Dörwald9fa09462005-01-10 12:01:39 +0000272 reader = codecs.getreader(self.encoding)(stream)
273 self.assertEqual(reader.readline(), s1)
274 self.assertEqual(reader.readline(), s2)
275 self.assertEqual(reader.readline(), s3)
276 self.assertEqual(reader.readline(), s4)
277 self.assertEqual(reader.readline(), s5)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000278 self.assertEqual(reader.readline(), "")
Walter Dörwald9fa09462005-01-10 12:01:39 +0000279
Walter Dörwald41980ca2007-08-16 21:55:45 +0000280class UTF32Test(ReadTest):
281 encoding = "utf-32"
282
283 spamle = (b'\xff\xfe\x00\x00'
284 b's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
285 b's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
286 spambe = (b'\x00\x00\xfe\xff'
287 b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
288 b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
289
290 def test_only_one_bom(self):
291 _,_,reader,writer = codecs.lookup(self.encoding)
292 # encode some stream
293 s = io.BytesIO()
294 f = writer(s)
295 f.write("spam")
296 f.write("spam")
297 d = s.getvalue()
298 # check whether there is exactly one BOM in it
299 self.assert_(d == self.spamle or d == self.spambe)
300 # try to read it back
301 s = io.BytesIO(d)
302 f = reader(s)
303 self.assertEquals(f.read(), "spamspam")
304
305 def test_badbom(self):
306 s = io.BytesIO(4*b"\xff")
307 f = codecs.getreader(self.encoding)(s)
308 self.assertRaises(UnicodeError, f.read)
309
310 s = io.BytesIO(8*b"\xff")
311 f = codecs.getreader(self.encoding)(s)
312 self.assertRaises(UnicodeError, f.read)
313
314 def test_partial(self):
315 self.check_partial(
316 "\x00\xff\u0100\uffff",
317 [
318 "", # first byte of BOM read
319 "", # second byte of BOM read
320 "", # third byte of BOM read
321 "", # fourth byte of BOM read => byteorder known
322 "",
323 "",
324 "",
325 "\x00",
326 "\x00",
327 "\x00",
328 "\x00",
329 "\x00\xff",
330 "\x00\xff",
331 "\x00\xff",
332 "\x00\xff",
333 "\x00\xff\u0100",
334 "\x00\xff\u0100",
335 "\x00\xff\u0100",
336 "\x00\xff\u0100",
337 "\x00\xff\u0100\uffff",
338 ]
339 )
340
341 def test_errors(self):
342 self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
343 b"\xff", "strict", True)
344
345 def test_decoder_state(self):
346 self.check_state_handling_decode(self.encoding,
347 "spamspam", self.spamle)
348 self.check_state_handling_decode(self.encoding,
349 "spamspam", self.spambe)
350
351class UTF32LETest(ReadTest):
352 encoding = "utf-32-le"
353
354 def test_partial(self):
355 self.check_partial(
356 "\x00\xff\u0100\uffff",
357 [
358 "",
359 "",
360 "",
361 "\x00",
362 "\x00",
363 "\x00",
364 "\x00",
365 "\x00\xff",
366 "\x00\xff",
367 "\x00\xff",
368 "\x00\xff",
369 "\x00\xff\u0100",
370 "\x00\xff\u0100",
371 "\x00\xff\u0100",
372 "\x00\xff\u0100",
373 "\x00\xff\u0100\uffff",
374 ]
375 )
376
377 def test_simple(self):
378 self.assertEqual("\U00010203".encode(self.encoding), b"\x03\x02\x01\x00")
379
380 def test_errors(self):
381 self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
382 b"\xff", "strict", True)
383
384class UTF32BETest(ReadTest):
385 encoding = "utf-32-be"
386
387 def test_partial(self):
388 self.check_partial(
389 "\x00\xff\u0100\uffff",
390 [
391 "",
392 "",
393 "",
394 "\x00",
395 "\x00",
396 "\x00",
397 "\x00",
398 "\x00\xff",
399 "\x00\xff",
400 "\x00\xff",
401 "\x00\xff",
402 "\x00\xff\u0100",
403 "\x00\xff\u0100",
404 "\x00\xff\u0100",
405 "\x00\xff\u0100",
406 "\x00\xff\u0100\uffff",
407 ]
408 )
409
410 def test_simple(self):
411 self.assertEqual("\U00010203".encode(self.encoding), b"\x00\x01\x02\x03")
412
413 def test_errors(self):
414 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
415 b"\xff", "strict", True)
416
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000417class UTF16Test(ReadTest):
418 encoding = "utf-16"
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000419
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000420 spamle = b'\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
421 spambe = b'\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000422
423 def test_only_one_bom(self):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000424 _,_,reader,writer = codecs.lookup(self.encoding)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000425 # encode some stream
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000426 s = io.BytesIO()
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000427 f = writer(s)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000428 f.write("spam")
429 f.write("spam")
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000430 d = s.getvalue()
431 # check whether there is exactly one BOM in it
432 self.assert_(d == self.spamle or d == self.spambe)
433 # try to read it back
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000434 s = io.BytesIO(d)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000435 f = reader(s)
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000436 self.assertEquals(f.read(), "spamspam")
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000437
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000438 def test_badbom(self):
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000439 s = io.BytesIO(b"\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000440 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000441 self.assertRaises(UnicodeError, f.read)
442
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +0000443 s = io.BytesIO(b"\xff\xff\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000444 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000445 self.assertRaises(UnicodeError, f.read)
446
Walter Dörwald69652032004-09-07 20:24:22 +0000447 def test_partial(self):
448 self.check_partial(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000449 "\x00\xff\u0100\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000450 [
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000451 "", # first byte of BOM read
452 "", # second byte of BOM read => byteorder known
453 "",
454 "\x00",
455 "\x00",
456 "\x00\xff",
457 "\x00\xff",
458 "\x00\xff\u0100",
459 "\x00\xff\u0100",
460 "\x00\xff\u0100\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000461 ]
462 )
463
Walter Dörwalde22d3392005-11-17 08:52:34 +0000464 def test_errors(self):
Walter Dörwald3abcb012007-04-16 22:10:50 +0000465 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode,
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000466 b"\xff", "strict", True)
Walter Dörwald3abcb012007-04-16 22:10:50 +0000467
468 def test_decoder_state(self):
469 self.check_state_handling_decode(self.encoding,
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000470 "spamspam", self.spamle)
Walter Dörwald3abcb012007-04-16 22:10:50 +0000471 self.check_state_handling_decode(self.encoding,
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000472 "spamspam", self.spambe)
Walter Dörwalde22d3392005-11-17 08:52:34 +0000473
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000474class UTF16LETest(ReadTest):
475 encoding = "utf-16-le"
Walter Dörwald69652032004-09-07 20:24:22 +0000476
477 def test_partial(self):
478 self.check_partial(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000479 "\x00\xff\u0100\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000480 [
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000481 "",
482 "\x00",
483 "\x00",
484 "\x00\xff",
485 "\x00\xff",
486 "\x00\xff\u0100",
487 "\x00\xff\u0100",
488 "\x00\xff\u0100\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000489 ]
490 )
491
Walter Dörwalde22d3392005-11-17 08:52:34 +0000492 def test_errors(self):
Walter Dörwald3abcb012007-04-16 22:10:50 +0000493 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000494 b"\xff", "strict", True)
Walter Dörwalde22d3392005-11-17 08:52:34 +0000495
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000496class UTF16BETest(ReadTest):
497 encoding = "utf-16-be"
Walter Dörwald69652032004-09-07 20:24:22 +0000498
499 def test_partial(self):
500 self.check_partial(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000501 "\x00\xff\u0100\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000502 [
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000503 "",
504 "\x00",
505 "\x00",
506 "\x00\xff",
507 "\x00\xff",
508 "\x00\xff\u0100",
509 "\x00\xff\u0100",
510 "\x00\xff\u0100\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000511 ]
512 )
513
Walter Dörwalde22d3392005-11-17 08:52:34 +0000514 def test_errors(self):
Walter Dörwald3abcb012007-04-16 22:10:50 +0000515 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000516 b"\xff", "strict", True)
Walter Dörwalde22d3392005-11-17 08:52:34 +0000517
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000518class UTF8Test(ReadTest):
519 encoding = "utf-8"
Walter Dörwald69652032004-09-07 20:24:22 +0000520
521 def test_partial(self):
522 self.check_partial(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000523 "\x00\xff\u07ff\u0800\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000524 [
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000525 "\x00",
526 "\x00",
527 "\x00\xff",
528 "\x00\xff",
529 "\x00\xff\u07ff",
530 "\x00\xff\u07ff",
531 "\x00\xff\u07ff",
532 "\x00\xff\u07ff\u0800",
533 "\x00\xff\u07ff\u0800",
534 "\x00\xff\u07ff\u0800",
535 "\x00\xff\u07ff\u0800\uffff",
Walter Dörwald69652032004-09-07 20:24:22 +0000536 ]
537 )
538
Walter Dörwald3abcb012007-04-16 22:10:50 +0000539 def test_decoder_state(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000540 u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
Walter Dörwald3abcb012007-04-16 22:10:50 +0000541 self.check_state_handling_decode(self.encoding,
542 u, u.encode(self.encoding))
543
Walter Dörwalde22d3392005-11-17 08:52:34 +0000544class UTF7Test(ReadTest):
545 encoding = "utf-7"
546
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000547 def test_partial(self):
548 self.check_partial(
549 "a+-b",
550 [
551 "a",
552 "a",
553 "a+",
554 "a+-",
555 "a+-b",
556 ]
557 )
Walter Dörwalde22d3392005-11-17 08:52:34 +0000558
559class UTF16ExTest(unittest.TestCase):
560
561 def test_errors(self):
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000562 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, b"\xff", "strict", 0, True)
Walter Dörwalde22d3392005-11-17 08:52:34 +0000563
564 def test_bad_args(self):
565 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
566
567class ReadBufferTest(unittest.TestCase):
568
569 def test_array(self):
570 import array
571 self.assertEqual(
Guido van Rossum09549f42007-08-27 20:40:10 +0000572 codecs.readbuffer_encode(array.array("b", b"spam")),
Walter Dörwald2233d272007-06-22 12:17:08 +0000573 (b"spam", 4)
Walter Dörwalde22d3392005-11-17 08:52:34 +0000574 )
575
576 def test_empty(self):
Walter Dörwald2233d272007-06-22 12:17:08 +0000577 self.assertEqual(codecs.readbuffer_encode(""), (b"", 0))
Walter Dörwalde22d3392005-11-17 08:52:34 +0000578
579 def test_bad_args(self):
580 self.assertRaises(TypeError, codecs.readbuffer_encode)
581 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
582
583class CharBufferTest(unittest.TestCase):
584
585 def test_string(self):
Guido van Rossum09549f42007-08-27 20:40:10 +0000586 self.assertEqual(codecs.charbuffer_encode(b"spam"), (b"spam", 4))
Walter Dörwalde22d3392005-11-17 08:52:34 +0000587
588 def test_empty(self):
Guido van Rossum09549f42007-08-27 20:40:10 +0000589 self.assertEqual(codecs.charbuffer_encode(b""), (b"", 0))
Walter Dörwalde22d3392005-11-17 08:52:34 +0000590
591 def test_bad_args(self):
592 self.assertRaises(TypeError, codecs.charbuffer_encode)
593 self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
594
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000595class UTF8SigTest(ReadTest):
596 encoding = "utf-8-sig"
597
598 def test_partial(self):
599 self.check_partial(
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000600 "\ufeff\x00\xff\u07ff\u0800\uffff",
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000601 [
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000602 "",
603 "",
604 "", # First BOM has been read and skipped
605 "",
606 "",
607 "\ufeff", # Second BOM has been read and emitted
608 "\ufeff\x00", # "\x00" read and emitted
Walter Dörwald32a4c712007-06-20 09:25:34 +0000609 "\ufeff\x00", # First byte of encoded "\xff" read
610 "\ufeff\x00\xff", # Second byte of encoded "\xff" read
611 "\ufeff\x00\xff", # First byte of encoded "\u07ff" read
612 "\ufeff\x00\xff\u07ff", # Second byte of encoded "\u07ff" read
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000613 "\ufeff\x00\xff\u07ff",
614 "\ufeff\x00\xff\u07ff",
615 "\ufeff\x00\xff\u07ff\u0800",
616 "\ufeff\x00\xff\u07ff\u0800",
617 "\ufeff\x00\xff\u07ff\u0800",
618 "\ufeff\x00\xff\u07ff\u0800\uffff",
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000619 ]
620 )
621
Thomas Wouters89f507f2006-12-13 04:49:30 +0000622 def test_bug1601501(self):
623 # SF bug #1601501: check that the codec works with a buffer
Walter Dörwaldca8a8d02007-05-04 13:05:09 +0000624 str(b"\xef\xbb\xbf", "utf-8-sig")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000625
Walter Dörwald3abcb012007-04-16 22:10:50 +0000626 def test_bom(self):
627 d = codecs.getincrementaldecoder("utf-8-sig")()
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000628 s = "spam"
Walter Dörwald3abcb012007-04-16 22:10:50 +0000629 self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
630
Guido van Rossum87c0f1d2007-11-19 18:03:44 +0000631 def test_stream_bom(self):
632 unistring = "ABC\u00A1\u2200XYZ"
633 bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
634
635 reader = codecs.getreader("utf-8-sig")
636 for sizehint in [None] + list(range(1, 11)) + \
637 [64, 128, 256, 512, 1024]:
638 istream = reader(io.BytesIO(bytestring))
639 ostream = io.StringIO()
640 while 1:
641 if sizehint is not None:
642 data = istream.read(sizehint)
643 else:
644 data = istream.read()
645
646 if not data:
647 break
648 ostream.write(data)
649
650 got = ostream.getvalue()
651 self.assertEqual(got, unistring)
652
653 def test_stream_bare(self):
654 unistring = "ABC\u00A1\u2200XYZ"
655 bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
656
657 reader = codecs.getreader("utf-8-sig")
658 for sizehint in [None] + list(range(1, 11)) + \
659 [64, 128, 256, 512, 1024]:
660 istream = reader(io.BytesIO(bytestring))
661 ostream = io.StringIO()
662 while 1:
663 if sizehint is not None:
664 data = istream.read(sizehint)
665 else:
666 data = istream.read()
667
668 if not data:
669 break
670 ostream.write(data)
671
672 got = ostream.getvalue()
673 self.assertEqual(got, unistring)
674
675class EscapeDecodeTest(unittest.TestCase):
676 def test_empty(self):
677 self.assertEquals(codecs.escape_decode(""), ("", 0))
Walter Dörwald3abcb012007-04-16 22:10:50 +0000678
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000679class RecodingTest(unittest.TestCase):
680 def test_recoding(self):
Guido van Rossumf4cfc8f2007-05-17 21:52:23 +0000681 f = io.BytesIO()
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000682 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000683 f2.write("a")
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000684 f2.close()
685 # Python used to crash on this at exit because of a refcount
686 # bug in _codecsmodule.c
Fred Drake2e2be372001-09-20 21:33:42 +0000687
Martin v. Löwis2548c732003-04-18 10:39:54 +0000688# From RFC 3492
689punycode_testcases = [
690 # A Arabic (Egyptian):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000691 ("\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
692 "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000693 b"egbpdaj6bu4bxfgehfvwxn"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000694 # B Chinese (simplified):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000695 ("\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000696 b"ihqwcrb4cv8a8dqg056pqjye"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000697 # C Chinese (traditional):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000698 ("\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000699 b"ihqwctvzc91f659drss3x8bo0yb"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000700 # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000701 ("\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
702 "\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
703 "\u0065\u0073\u006B\u0079",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000704 b"Proprostnemluvesky-uyb24dma41a"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000705 # E Hebrew:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000706 ("\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
707 "\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
708 "\u05D1\u05E8\u05D9\u05EA",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000709 b"4dbcagdahymbxekheh6e0a7fei0b"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000710 # F Hindi (Devanagari):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000711 ("\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
Walter Dörwalda4c61282007-05-10 12:36:25 +0000712 "\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
713 "\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
714 "\u0939\u0948\u0902",
715 b"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000716
717 #(G) Japanese (kanji and hiragana):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000718 ("\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
Walter Dörwalda4c61282007-05-10 12:36:25 +0000719 "\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
720 b"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000721
722 # (H) Korean (Hangul syllables):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000723 ("\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
724 "\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
725 "\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000726 b"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
727 b"psd879ccm6fea98c"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000728
729 # (I) Russian (Cyrillic):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000730 ("\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
731 "\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
732 "\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
733 "\u0438",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000734 b"b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000735
736 # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000737 ("\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
738 "\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
739 "\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
740 "\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
741 "\u0061\u00F1\u006F\u006C",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000742 b"PorqunopuedensimplementehablarenEspaol-fmd56a"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000743
744 # (K) Vietnamese:
745 # T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
746 # <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000747 ("\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
748 "\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
749 "\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
750 "\u0056\u0069\u1EC7\u0074",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000751 b"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000752
Martin v. Löwis2548c732003-04-18 10:39:54 +0000753 #(L) 3<nen>B<gumi><kinpachi><sensei>
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000754 ("\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000755 b"3B-ww4c5e180e575a65lsy2b"),
Tim Peters0eadaac2003-04-24 16:02:54 +0000756
Martin v. Löwis2548c732003-04-18 10:39:54 +0000757 # (M) <amuro><namie>-with-SUPER-MONKEYS
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000758 ("\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
759 "\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
760 "\u004F\u004E\u004B\u0045\u0059\u0053",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000761 b"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000762
763 # (N) Hello-Another-Way-<sorezore><no><basho>
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000764 ("\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
765 "\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
766 "\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000767 b"Hello-Another-Way--fc4qua05auwb3674vfr0b"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000768
769 # (O) <hitotsu><yane><no><shita>2
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000770 ("\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000771 b"2-u9tlzr9756bt3uc0v"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000772
773 # (P) Maji<de>Koi<suru>5<byou><mae>
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000774 ("\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
775 "\u308B\u0035\u79D2\u524D",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000776 b"MajiKoi5-783gue6qz075azm5e"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000777
778 # (Q) <pafii>de<runba>
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000779 ("\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000780 b"de-jg4avhby1noc0d"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000781
782 # (R) <sono><supiido><de>
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000783 ("\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000784 b"d9juau41awczczp"),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000785
786 # (S) -> $1.00 <-
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000787 ("\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
788 "\u003C\u002D",
Walter Dörwalda4c61282007-05-10 12:36:25 +0000789 b"-> $1.00 <--")
Martin v. Löwis2548c732003-04-18 10:39:54 +0000790 ]
791
792for i in punycode_testcases:
793 if len(i)!=2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000794 print(repr(i))
Martin v. Löwis2548c732003-04-18 10:39:54 +0000795
796class PunycodeTest(unittest.TestCase):
797 def test_encode(self):
798 for uni, puny in punycode_testcases:
799 # Need to convert both strings to lower case, since
800 # some of the extended encodings use upper case, but our
801 # code produces only lower case. Converting just puny to
802 # lower is also insufficient, since some of the input characters
803 # are upper case.
Walter Dörwalda4c61282007-05-10 12:36:25 +0000804 self.assertEquals(
805 str(uni.encode("punycode"), "ascii").lower(),
806 str(puny, "ascii").lower()
807 )
Martin v. Löwis2548c732003-04-18 10:39:54 +0000808
809 def test_decode(self):
810 for uni, puny in punycode_testcases:
811 self.assertEquals(uni, puny.decode("punycode"))
Guido van Rossum04c70ad2007-08-29 14:04:40 +0000812 puny = puny.decode("ascii").encode("ascii")
813 self.assertEquals(uni, puny.decode("punycode"))
Martin v. Löwis2548c732003-04-18 10:39:54 +0000814
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000815class UnicodeInternalTest(unittest.TestCase):
816 def test_bug1251300(self):
817 # Decoding with unicode_internal used to not correctly handle "code
818 # points" above 0x10ffff on UCS-4 builds.
819 if sys.maxunicode > 0xffff:
820 ok = [
Walter Dörwald092a2252007-06-07 11:26:16 +0000821 (b"\x00\x10\xff\xff", "\U0010ffff"),
822 (b"\x00\x00\x01\x01", "\U00000101"),
823 (b"", ""),
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000824 ]
825 not_ok = [
Walter Dörwald092a2252007-06-07 11:26:16 +0000826 b"\x7f\xff\xff\xff",
827 b"\x80\x00\x00\x00",
828 b"\x81\x00\x00\x00",
829 b"\x00",
830 b"\x00\x00\x00\x00\x00",
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000831 ]
832 for internal, uni in ok:
833 if sys.byteorder == "little":
Walter Dörwald092a2252007-06-07 11:26:16 +0000834 internal = bytes(reversed(internal))
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000835 self.assertEquals(uni, internal.decode("unicode_internal"))
836 for internal in not_ok:
837 if sys.byteorder == "little":
Walter Dörwald092a2252007-06-07 11:26:16 +0000838 internal = bytes(reversed(internal))
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000839 self.assertRaises(UnicodeDecodeError, internal.decode,
840 "unicode_internal")
841
842 def test_decode_error_attributes(self):
843 if sys.maxunicode > 0xffff:
844 try:
Walter Dörwald092a2252007-06-07 11:26:16 +0000845 b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
Guido van Rossumb940e112007-01-10 16:19:56 +0000846 except UnicodeDecodeError as ex:
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000847 self.assertEquals("unicode_internal", ex.encoding)
Walter Dörwald092a2252007-06-07 11:26:16 +0000848 self.assertEquals(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000849 self.assertEquals(4, ex.start)
850 self.assertEquals(8, ex.end)
851 else:
852 self.fail()
853
854 def test_decode_callback(self):
855 if sys.maxunicode > 0xffff:
856 codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
857 decoder = codecs.getdecoder("unicode_internal")
Guido van Rossum98297ee2007-11-06 21:34:58 +0000858 ab = "ab".encode("unicode_internal").decode()
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000859 ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
860 "ascii"),
861 "UnicodeInternalTest")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000862 self.assertEquals(("ab", 12), ignored)
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000863
Martin v. Löwis2548c732003-04-18 10:39:54 +0000864# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
865nameprep_tests = [
866 # 3.1 Map to nothing.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000867 (b'foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
868 b'\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
869 b'\xb8\x8f\xef\xbb\xbf',
870 b'foobarbaz'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000871 # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000872 (b'CAFE',
873 b'cafe'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000874 # 3.3 Case folding 8bit U+00DF (german sharp s).
875 # The original test case is bogus; it says \xc3\xdf
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000876 (b'\xc3\x9f',
877 b'ss'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000878 # 3.4 Case folding U+0130 (turkish capital I with dot).
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000879 (b'\xc4\xb0',
880 b'i\xcc\x87'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000881 # 3.5 Case folding multibyte U+0143 U+037A.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000882 (b'\xc5\x83\xcd\xba',
883 b'\xc5\x84 \xce\xb9'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000884 # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
885 # XXX: skip this as it fails in UCS-2 mode
886 #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
887 # 'telc\xe2\x88\x95kg\xcf\x83'),
888 (None, None),
889 # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000890 (b'j\xcc\x8c\xc2\xa0\xc2\xaa',
891 b'\xc7\xb0 a'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000892 # 3.8 Case folding U+1FB7 and normalization.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000893 (b'\xe1\xbe\xb7',
894 b'\xe1\xbe\xb6\xce\xb9'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000895 # 3.9 Self-reverting case folding U+01F0 and normalization.
896 # The original test case is bogus, it says `\xc7\xf0'
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000897 (b'\xc7\xb0',
898 b'\xc7\xb0'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000899 # 3.10 Self-reverting case folding U+0390 and normalization.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000900 (b'\xce\x90',
901 b'\xce\x90'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000902 # 3.11 Self-reverting case folding U+03B0 and normalization.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000903 (b'\xce\xb0',
904 b'\xce\xb0'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000905 # 3.12 Self-reverting case folding U+1E96 and normalization.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000906 (b'\xe1\xba\x96',
907 b'\xe1\xba\x96'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000908 # 3.13 Self-reverting case folding U+1F56 and normalization.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000909 (b'\xe1\xbd\x96',
910 b'\xe1\xbd\x96'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000911 # 3.14 ASCII space character U+0020.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000912 (b' ',
913 b' '),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000914 # 3.15 Non-ASCII 8bit space character U+00A0.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000915 (b'\xc2\xa0',
916 b' '),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000917 # 3.16 Non-ASCII multibyte space character U+1680.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000918 (b'\xe1\x9a\x80',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000919 None),
920 # 3.17 Non-ASCII multibyte space character U+2000.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000921 (b'\xe2\x80\x80',
922 b' '),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000923 # 3.18 Zero Width Space U+200b.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000924 (b'\xe2\x80\x8b',
925 b''),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000926 # 3.19 Non-ASCII multibyte space character U+3000.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000927 (b'\xe3\x80\x80',
928 b' '),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000929 # 3.20 ASCII control characters U+0010 U+007F.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000930 (b'\x10\x7f',
931 b'\x10\x7f'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000932 # 3.21 Non-ASCII 8bit control character U+0085.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000933 (b'\xc2\x85',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000934 None),
935 # 3.22 Non-ASCII multibyte control character U+180E.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000936 (b'\xe1\xa0\x8e',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000937 None),
938 # 3.23 Zero Width No-Break Space U+FEFF.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000939 (b'\xef\xbb\xbf',
940 b''),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000941 # 3.24 Non-ASCII control character U+1D175.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000942 (b'\xf0\x9d\x85\xb5',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000943 None),
944 # 3.25 Plane 0 private use character U+F123.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000945 (b'\xef\x84\xa3',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000946 None),
947 # 3.26 Plane 15 private use character U+F1234.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000948 (b'\xf3\xb1\x88\xb4',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000949 None),
950 # 3.27 Plane 16 private use character U+10F234.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000951 (b'\xf4\x8f\x88\xb4',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000952 None),
953 # 3.28 Non-character code point U+8FFFE.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000954 (b'\xf2\x8f\xbf\xbe',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000955 None),
956 # 3.29 Non-character code point U+10FFFF.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000957 (b'\xf4\x8f\xbf\xbf',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000958 None),
959 # 3.30 Surrogate code U+DF42.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000960 (b'\xed\xbd\x82',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000961 None),
962 # 3.31 Non-plain text character U+FFFD.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000963 (b'\xef\xbf\xbd',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000964 None),
965 # 3.32 Ideographic description character U+2FF5.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000966 (b'\xe2\xbf\xb5',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000967 None),
968 # 3.33 Display property character U+0341.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000969 (b'\xcd\x81',
970 b'\xcc\x81'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000971 # 3.34 Left-to-right mark U+200E.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000972 (b'\xe2\x80\x8e',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000973 None),
974 # 3.35 Deprecated U+202A.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000975 (b'\xe2\x80\xaa',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000976 None),
977 # 3.36 Language tagging character U+E0001.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000978 (b'\xf3\xa0\x80\x81',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000979 None),
980 # 3.37 Language tagging character U+E0042.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000981 (b'\xf3\xa0\x81\x82',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000982 None),
983 # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000984 (b'foo\xd6\xbebar',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000985 None),
986 # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000987 (b'foo\xef\xb5\x90bar',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000988 None),
989 # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000990 (b'foo\xef\xb9\xb6bar',
991 b'foo \xd9\x8ebar'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000992 # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000993 (b'\xd8\xa71',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000994 None),
995 # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
Walter Dörwald0ac30f82007-05-11 10:32:57 +0000996 (b'\xd8\xa71\xd8\xa8',
997 b'\xd8\xa71\xd8\xa8'),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000998 # 3.43 Unassigned code point U+E0002.
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +0000999 # Skip this test as we allow unassigned
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001000 #(b'\xf3\xa0\x80\x82',
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +00001001 # None),
1002 (None, None),
Martin v. Löwis2548c732003-04-18 10:39:54 +00001003 # 3.44 Larger test (shrinking).
1004 # Original test case reads \xc3\xdf
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001005 (b'X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
1006 b'\xaa\xce\xb0\xe2\x80\x80',
1007 b'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
Martin v. Löwis2548c732003-04-18 10:39:54 +00001008 # 3.45 Larger test (expanding).
1009 # Original test case reads \xc3\x9f
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001010 (b'X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
1011 b'\x80',
1012 b'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
1013 b'\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
1014 b'\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
Martin v. Löwis2548c732003-04-18 10:39:54 +00001015 ]
1016
1017
1018class NameprepTest(unittest.TestCase):
1019 def test_nameprep(self):
1020 from encodings.idna import nameprep
1021 for pos, (orig, prepped) in enumerate(nameprep_tests):
1022 if orig is None:
1023 # Skipped
1024 continue
1025 # The Unicode strings are given in UTF-8
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001026 orig = str(orig, "utf-8")
Martin v. Löwis2548c732003-04-18 10:39:54 +00001027 if prepped is None:
1028 # Input contains prohibited characters
1029 self.assertRaises(UnicodeError, nameprep, orig)
1030 else:
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001031 prepped = str(prepped, "utf-8")
Martin v. Löwis2548c732003-04-18 10:39:54 +00001032 try:
1033 self.assertEquals(nameprep(orig), prepped)
Guido van Rossumb940e112007-01-10 16:19:56 +00001034 except Exception as e:
Martin v. Löwis2548c732003-04-18 10:39:54 +00001035 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
1036
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001037class IDNACodecTest(unittest.TestCase):
1038 def test_builtin_decode(self):
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001039 self.assertEquals(str(b"python.org", "idna"), "python.org")
1040 self.assertEquals(str(b"python.org.", "idna"), "python.org.")
1041 self.assertEquals(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org")
1042 self.assertEquals(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001043
1044 def test_builtin_encode(self):
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001045 self.assertEquals("python.org".encode("idna"), b"python.org")
1046 self.assertEquals("python.org.".encode("idna"), b"python.org.")
1047 self.assertEquals("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
1048 self.assertEquals("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001049
Martin v. Löwis8b595142005-08-25 11:03:38 +00001050 def test_stream(self):
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001051 r = codecs.getreader("idna")(io.BytesIO(b"abc"))
Martin v. Löwis8b595142005-08-25 11:03:38 +00001052 r.read(3)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001053 self.assertEquals(r.read(), "")
Martin v. Löwis8b595142005-08-25 11:03:38 +00001054
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001055 def test_incremental_decode(self):
1056 self.assertEquals(
Guido van Rossum09549f42007-08-27 20:40:10 +00001057 "".join(codecs.iterdecode((bytes([c]) for c in b"python.org"), "idna")),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001058 "python.org"
1059 )
1060 self.assertEquals(
Guido van Rossum09549f42007-08-27 20:40:10 +00001061 "".join(codecs.iterdecode((bytes([c]) for c in b"python.org."), "idna")),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001062 "python.org."
1063 )
1064 self.assertEquals(
Guido van Rossum09549f42007-08-27 20:40:10 +00001065 "".join(codecs.iterdecode((bytes([c]) for c in b"xn--pythn-mua.org."), "idna")),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001066 "pyth\xf6n.org."
1067 )
1068 self.assertEquals(
Guido van Rossum09549f42007-08-27 20:40:10 +00001069 "".join(codecs.iterdecode((bytes([c]) for c in b"xn--pythn-mua.org."), "idna")),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001070 "pyth\xf6n.org."
1071 )
1072
1073 decoder = codecs.getincrementaldecoder("idna")()
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001074 self.assertEquals(decoder.decode(b"xn--xam", ), "")
1075 self.assertEquals(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
1076 self.assertEquals(decoder.decode(b"rg"), "")
1077 self.assertEquals(decoder.decode(b"", True), "org")
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001078
1079 decoder.reset()
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001080 self.assertEquals(decoder.decode(b"xn--xam", ), "")
1081 self.assertEquals(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
1082 self.assertEquals(decoder.decode(b"rg."), "org.")
1083 self.assertEquals(decoder.decode(b"", True), "")
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001084
1085 def test_incremental_encode(self):
1086 self.assertEquals(
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001087 b"".join(codecs.iterencode("python.org", "idna")),
1088 b"python.org"
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001089 )
1090 self.assertEquals(
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001091 b"".join(codecs.iterencode("python.org.", "idna")),
1092 b"python.org."
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001093 )
1094 self.assertEquals(
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001095 b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
1096 b"xn--pythn-mua.org."
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001097 )
1098 self.assertEquals(
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001099 b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
1100 b"xn--pythn-mua.org."
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001101 )
1102
1103 encoder = codecs.getincrementalencoder("idna")()
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001104 self.assertEquals(encoder.encode("\xe4x"), b"")
1105 self.assertEquals(encoder.encode("ample.org"), b"xn--xample-9ta.")
1106 self.assertEquals(encoder.encode("", True), b"org")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001107
1108 encoder.reset()
Walter Dörwald0ac30f82007-05-11 10:32:57 +00001109 self.assertEquals(encoder.encode("\xe4x"), b"")
1110 self.assertEquals(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
1111 self.assertEquals(encoder.encode("", True), b"")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001112
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001113class CodecsModuleTest(unittest.TestCase):
1114
1115 def test_decode(self):
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001116 self.assertEquals(codecs.decode(b'\xe4\xf6\xfc', 'latin-1'),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001117 '\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001118 self.assertRaises(TypeError, codecs.decode)
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001119 self.assertEquals(codecs.decode(b'abc'), 'abc')
1120 self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001121
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001122 def test_encode(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001123 self.assertEquals(codecs.encode('\xe4\xf6\xfc', 'latin-1'),
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001124 b'\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001125 self.assertRaises(TypeError, codecs.encode)
Walter Dörwald690402f2005-11-17 18:51:34 +00001126 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001127 self.assertEquals(codecs.encode('abc'), b'abc')
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001128 self.assertRaises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001129
1130 def test_register(self):
1131 self.assertRaises(TypeError, codecs.register)
Walter Dörwald690402f2005-11-17 18:51:34 +00001132 self.assertRaises(TypeError, codecs.register, 42)
Walter Dörwald063e1e82004-10-28 13:04:26 +00001133
1134 def test_lookup(self):
1135 self.assertRaises(TypeError, codecs.lookup)
1136 self.assertRaises(LookupError, codecs.lookup, "__spam__")
Walter Dörwald690402f2005-11-17 18:51:34 +00001137 self.assertRaises(LookupError, codecs.lookup, " ")
1138
1139 def test_getencoder(self):
1140 self.assertRaises(TypeError, codecs.getencoder)
1141 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
1142
1143 def test_getdecoder(self):
1144 self.assertRaises(TypeError, codecs.getdecoder)
1145 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
1146
1147 def test_getreader(self):
1148 self.assertRaises(TypeError, codecs.getreader)
1149 self.assertRaises(LookupError, codecs.getreader, "__spam__")
1150
1151 def test_getwriter(self):
1152 self.assertRaises(TypeError, codecs.getwriter)
1153 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001154
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001155class StreamReaderTest(unittest.TestCase):
1156
1157 def setUp(self):
1158 self.reader = codecs.getreader('utf-8')
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001159 self.stream = io.BytesIO(b'\xed\x95\x9c\n\xea\xb8\x80')
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001160
1161 def test_readlines(self):
1162 f = self.reader(self.stream)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001163 self.assertEquals(f.readlines(), ['\ud55c\n', '\uae00'])
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001164
Thomas Wouters89f507f2006-12-13 04:49:30 +00001165class EncodedFileTest(unittest.TestCase):
1166
1167 def test_basic(self):
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001168 f = io.BytesIO(b'\xed\x95\x9c\n\xea\xb8\x80')
Thomas Wouters89f507f2006-12-13 04:49:30 +00001169 ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001170 self.assertEquals(ef.read(), b'\\\xd5\n\x00\x00\xae')
Thomas Wouters89f507f2006-12-13 04:49:30 +00001171
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001172 f = io.BytesIO()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001173 ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001174 ef.write(b'\xc3\xbc')
1175 self.assertEquals(f.getvalue(), b'\xfc')
Thomas Wouters89f507f2006-12-13 04:49:30 +00001176
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001177all_unicode_encodings = [
1178 "ascii",
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001179 "big5",
1180 "big5hkscs",
1181 "charmap",
1182 "cp037",
1183 "cp1006",
1184 "cp1026",
1185 "cp1140",
1186 "cp1250",
1187 "cp1251",
1188 "cp1252",
1189 "cp1253",
1190 "cp1254",
1191 "cp1255",
1192 "cp1256",
1193 "cp1257",
1194 "cp1258",
1195 "cp424",
1196 "cp437",
1197 "cp500",
1198 "cp737",
1199 "cp775",
1200 "cp850",
1201 "cp852",
1202 "cp855",
1203 "cp856",
1204 "cp857",
1205 "cp860",
1206 "cp861",
1207 "cp862",
1208 "cp863",
1209 "cp864",
1210 "cp865",
1211 "cp866",
1212 "cp869",
1213 "cp874",
1214 "cp875",
1215 "cp932",
1216 "cp949",
1217 "cp950",
1218 "euc_jis_2004",
1219 "euc_jisx0213",
1220 "euc_jp",
1221 "euc_kr",
1222 "gb18030",
1223 "gb2312",
1224 "gbk",
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001225 "hp_roman8",
1226 "hz",
1227 "idna",
1228 "iso2022_jp",
1229 "iso2022_jp_1",
1230 "iso2022_jp_2",
1231 "iso2022_jp_2004",
1232 "iso2022_jp_3",
1233 "iso2022_jp_ext",
1234 "iso2022_kr",
1235 "iso8859_1",
1236 "iso8859_10",
1237 "iso8859_11",
1238 "iso8859_13",
1239 "iso8859_14",
1240 "iso8859_15",
1241 "iso8859_16",
1242 "iso8859_2",
1243 "iso8859_3",
1244 "iso8859_4",
1245 "iso8859_5",
1246 "iso8859_6",
1247 "iso8859_7",
1248 "iso8859_8",
1249 "iso8859_9",
1250 "johab",
1251 "koi8_r",
1252 "koi8_u",
1253 "latin_1",
1254 "mac_cyrillic",
1255 "mac_greek",
1256 "mac_iceland",
1257 "mac_latin2",
1258 "mac_roman",
1259 "mac_turkish",
1260 "palmos",
1261 "ptcp154",
1262 "punycode",
1263 "raw_unicode_escape",
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001264 "shift_jis",
1265 "shift_jis_2004",
1266 "shift_jisx0213",
1267 "tis_620",
1268 "unicode_escape",
1269 "unicode_internal",
1270 "utf_16",
1271 "utf_16_be",
1272 "utf_16_le",
1273 "utf_7",
1274 "utf_8",
1275]
1276
1277if hasattr(codecs, "mbcs_encode"):
1278 all_unicode_encodings.append("mbcs")
1279
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001280# The following encoding is not tested, because it's not supposed
1281# to work:
1282# "undefined"
1283
1284# The following encodings don't work in stateful mode
1285broken_unicode_with_streams = [
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001286 "punycode",
1287 "unicode_internal"
1288]
Walter Dörwald3abcb012007-04-16 22:10:50 +00001289broken_incremental_coders = broken_unicode_with_streams + [
1290 "idna",
Walter Dörwald3abcb012007-04-16 22:10:50 +00001291]
Thomas Wouters89f507f2006-12-13 04:49:30 +00001292
1293# The following encodings only support "strict" mode
1294only_strict_mode = [
1295 "idna",
Thomas Wouters89f507f2006-12-13 04:49:30 +00001296]
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001297
Walter Dörwald3abcb012007-04-16 22:10:50 +00001298class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001299 def test_basics(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001300 s = "abc123" # all codecs should be able to encode these
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001301 for encoding in all_unicode_encodings:
Thomas Woutersa9773292006-04-21 09:43:23 +00001302 name = codecs.lookup(encoding).name
1303 if encoding.endswith("_codec"):
1304 name += "_codec"
1305 elif encoding == "latin_1":
1306 name = "latin_1"
1307 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001308 (b, size) = codecs.getencoder(encoding)(s)
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001309 if encoding != "unicode_internal":
1310 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001311 (chars, size) = codecs.getdecoder(encoding)(b)
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001312 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1313
1314 if encoding not in broken_unicode_with_streams:
1315 # check stream reader/writer
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001316 q = Queue(b"")
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001317 writer = codecs.getwriter(encoding)(q)
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001318 encodedresult = b""
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001319 for c in s:
1320 writer.write(c)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001321 chunk = q.read()
1322 self.assert_(type(chunk) is bytes, type(chunk))
1323 encodedresult += chunk
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001324 q = Queue(b"")
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001325 reader = codecs.getreader(encoding)(q)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001326 decodedresult = ""
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001327 for c in encodedresult:
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001328 q.write(bytes([c]))
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001329 decodedresult += reader.read()
1330 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1331
Thomas Wouters89f507f2006-12-13 04:49:30 +00001332 if encoding not in broken_incremental_coders:
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001333 # check incremental decoder/encoder (fetched via the Python
1334 # and C API) and iterencode()/iterdecode()
Thomas Woutersa9773292006-04-21 09:43:23 +00001335 try:
1336 encoder = codecs.getincrementalencoder(encoding)()
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001337 cencoder = _testcapi.codec_incrementalencoder(encoding)
Thomas Woutersa9773292006-04-21 09:43:23 +00001338 except LookupError: # no IncrementalEncoder
1339 pass
1340 else:
1341 # check incremental decoder/encoder
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001342 encodedresult = b""
Thomas Woutersa9773292006-04-21 09:43:23 +00001343 for c in s:
1344 encodedresult += encoder.encode(c)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001345 encodedresult += encoder.encode("", True)
Thomas Woutersa9773292006-04-21 09:43:23 +00001346 decoder = codecs.getincrementaldecoder(encoding)()
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001347 decodedresult = ""
Thomas Woutersa9773292006-04-21 09:43:23 +00001348 for c in encodedresult:
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001349 decodedresult += decoder.decode(bytes([c]))
Guido van Rossumf4cfc8f2007-05-17 21:52:23 +00001350 decodedresult += decoder.decode(b"", True)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001351 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1352
1353 # check C API
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001354 encodedresult = b""
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001355 for c in s:
1356 encodedresult += cencoder.encode(c)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001357 encodedresult += cencoder.encode("", True)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001358 cdecoder = _testcapi.codec_incrementaldecoder(encoding)
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001359 decodedresult = ""
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001360 for c in encodedresult:
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001361 decodedresult += cdecoder.decode(bytes([c]))
Guido van Rossumf4cfc8f2007-05-17 21:52:23 +00001362 decodedresult += cdecoder.decode(b"", True)
Thomas Woutersa9773292006-04-21 09:43:23 +00001363 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1364
1365 # check iterencode()/iterdecode()
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001366 result = "".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
Thomas Woutersa9773292006-04-21 09:43:23 +00001367 self.assertEqual(result, s, "%r != %r (encoding=%r)" % (result, s, encoding))
1368
1369 # check iterencode()/iterdecode() with empty string
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001370 result = "".join(codecs.iterdecode(codecs.iterencode("", encoding), encoding))
1371 self.assertEqual(result, "")
Thomas Woutersa9773292006-04-21 09:43:23 +00001372
Thomas Wouters89f507f2006-12-13 04:49:30 +00001373 if encoding not in only_strict_mode:
1374 # check incremental decoder/encoder with errors argument
1375 try:
1376 encoder = codecs.getincrementalencoder(encoding)("ignore")
1377 cencoder = _testcapi.codec_incrementalencoder(encoding, "ignore")
1378 except LookupError: # no IncrementalEncoder
1379 pass
1380 else:
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001381 encodedresult = b"".join(encoder.encode(c) for c in s)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001382 decoder = codecs.getincrementaldecoder(encoding)("ignore")
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001383 decodedresult = "".join(decoder.decode(bytes([c])) for c in encodedresult)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001384 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1385
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001386 encodedresult = b"".join(cencoder.encode(c) for c in s)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001387 cdecoder = _testcapi.codec_incrementaldecoder(encoding, "ignore")
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001388 decodedresult = "".join(cdecoder.decode(bytes([c])) for c in encodedresult)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001389 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1390
Walter Dörwald729c31f2005-03-14 19:06:30 +00001391 def test_seek(self):
1392 # all codecs should be able to encode these
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001393 s = "%s\n%s\n" % (100*"abc123", 100*"def456")
Walter Dörwald729c31f2005-03-14 19:06:30 +00001394 for encoding in all_unicode_encodings:
1395 if encoding == "idna": # FIXME: See SF bug #1163178
1396 continue
1397 if encoding in broken_unicode_with_streams:
1398 continue
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001399 reader = codecs.getreader(encoding)(io.BytesIO(s.encode(encoding)))
Guido van Rossum805365e2007-05-07 22:24:25 +00001400 for t in range(5):
Walter Dörwald729c31f2005-03-14 19:06:30 +00001401 # Test that calling seek resets the internal codec state and buffers
1402 reader.seek(0, 0)
Guido van Rossumf4cfc8f2007-05-17 21:52:23 +00001403 data = reader.read()
1404 self.assertEqual(s, data)
Walter Dörwald729c31f2005-03-14 19:06:30 +00001405
Walter Dörwalde22d3392005-11-17 08:52:34 +00001406 def test_bad_decode_args(self):
1407 for encoding in all_unicode_encodings:
1408 decoder = codecs.getdecoder(encoding)
1409 self.assertRaises(TypeError, decoder)
1410 if encoding not in ("idna", "punycode"):
1411 self.assertRaises(TypeError, decoder, 42)
1412
1413 def test_bad_encode_args(self):
1414 for encoding in all_unicode_encodings:
1415 encoder = codecs.getencoder(encoding)
1416 self.assertRaises(TypeError, encoder)
1417
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001418 def test_encoding_map_type_initialized(self):
1419 from encodings import cp1140
1420 # This used to crash, we are only verifying there's no crash.
1421 table_type = type(cp1140.encoding_table)
1422 self.assertEqual(table_type, table_type)
1423
Walter Dörwald3abcb012007-04-16 22:10:50 +00001424 def test_decoder_state(self):
1425 # Check that getstate() and setstate() handle the state properly
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001426 u = "abc123"
Walter Dörwald3abcb012007-04-16 22:10:50 +00001427 for encoding in all_unicode_encodings:
1428 if encoding not in broken_incremental_coders:
1429 self.check_state_handling_decode(encoding, u, u.encode(encoding))
1430 self.check_state_handling_encode(encoding, u, u.encode(encoding))
1431
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001432class CharmapTest(unittest.TestCase):
1433 def test_decode_with_string_map(self):
1434 self.assertEquals(
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001435 codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001436 ("abc", 3)
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001437 )
1438
1439 self.assertEquals(
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001440 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001441 ("ab\ufffd", 3)
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001442 )
1443
1444 self.assertEquals(
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001445 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab\ufffe"),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001446 ("ab\ufffd", 3)
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001447 )
1448
1449 self.assertEquals(
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001450 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab"),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001451 ("ab", 3)
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001452 )
1453
1454 self.assertEquals(
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001455 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab\ufffe"),
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001456 ("ab", 3)
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001457 )
1458
Guido van Rossum805365e2007-05-07 22:24:25 +00001459 allbytes = bytes(range(256))
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001460 self.assertEquals(
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001461 codecs.charmap_decode(allbytes, "ignore", ""),
1462 ("", len(allbytes))
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001463 )
1464
Thomas Wouters89f507f2006-12-13 04:49:30 +00001465class WithStmtTest(unittest.TestCase):
1466 def test_encodedfile(self):
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001467 f = io.BytesIO(b"\xc3\xbc")
Thomas Wouters89f507f2006-12-13 04:49:30 +00001468 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
Walter Dörwaldca8a8d02007-05-04 13:05:09 +00001469 self.assertEquals(ef.read(), b"\xfc")
Thomas Wouters89f507f2006-12-13 04:49:30 +00001470
1471 def test_streamreaderwriter(self):
Walter Dörwaldc3ab0a72007-05-10 15:02:49 +00001472 f = io.BytesIO(b"\xc3\xbc")
Thomas Wouters89f507f2006-12-13 04:49:30 +00001473 info = codecs.lookup("utf-8")
1474 with codecs.StreamReaderWriter(f, info.streamreader,
1475 info.streamwriter, 'strict') as srw:
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001476 self.assertEquals(srw.read(), "\xfc")
Thomas Wouters89f507f2006-12-13 04:49:30 +00001477
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001478
Fred Drake2e2be372001-09-20 21:33:42 +00001479def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001480 test_support.run_unittest(
Walter Dörwald41980ca2007-08-16 21:55:45 +00001481 UTF32Test,
1482 UTF32LETest,
1483 UTF32BETest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001484 UTF16Test,
Walter Dörwald69652032004-09-07 20:24:22 +00001485 UTF16LETest,
1486 UTF16BETest,
1487 UTF8Test,
Martin v. Löwis412ed3b2006-01-08 10:45:39 +00001488 UTF8SigTest,
Walter Dörwalde22d3392005-11-17 08:52:34 +00001489 UTF7Test,
1490 UTF16ExTest,
1491 ReadBufferTest,
1492 CharBufferTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001493 RecodingTest,
1494 PunycodeTest,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001495 UnicodeInternalTest,
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001496 NameprepTest,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001497 IDNACodecTest,
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001498 CodecsModuleTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001499 StreamReaderTest,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001500 EncodedFileTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001501 BasicUnicodeTest,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001502 CharmapTest,
1503 WithStmtTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001504 )
Fred Drake2e2be372001-09-20 21:33:42 +00001505
1506
1507if __name__ == "__main__":
1508 test_main()