blob: ded5d1917a62db1c7f29edd7c995a4961ee141db [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
2import unittest
Marc-André Lemburga37171d2001-06-19 20:09:28 +00003import codecs
Walter Dörwalda47d1c02005-08-30 10:23:14 +00004import sys, StringIO
Marc-André Lemburga37171d2001-06-19 20:09:28 +00005
Walter Dörwald69652032004-09-07 20:24:22 +00006class Queue(object):
7 """
8 queue: write bytes at one end, read bytes from the other end
9 """
10 def __init__(self):
11 self._buffer = ""
12
13 def write(self, chars):
14 self._buffer += chars
15
16 def read(self, size=-1):
17 if size<0:
18 s = self._buffer
19 self._buffer = ""
20 return s
21 else:
22 s = self._buffer[:size]
23 self._buffer = self._buffer[size:]
24 return s
25
Walter Dörwalde57d7b12004-12-21 22:24:00 +000026class ReadTest(unittest.TestCase):
27 def check_partial(self, input, partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000028 # get a StreamReader for the encoding and feed the bytestring version
29 # of input to the reader byte by byte. Read every available from
30 # the StreamReader and check that the results equal the appropriate
31 # entries from partialresults.
32 q = Queue()
Walter Dörwalde57d7b12004-12-21 22:24:00 +000033 r = codecs.getreader(self.encoding)(q)
Walter Dörwald69652032004-09-07 20:24:22 +000034 result = u""
Walter Dörwalde57d7b12004-12-21 22:24:00 +000035 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000036 q.write(c)
37 result += r.read()
38 self.assertEqual(result, partialresult)
39 # check that there's nothing left in the buffers
40 self.assertEqual(r.read(), u"")
41 self.assertEqual(r.bytebuffer, "")
42 self.assertEqual(r.charbuffer, u"")
43
Walter Dörwalde57d7b12004-12-21 22:24:00 +000044 def test_readline(self):
45 def getreader(input):
46 stream = StringIO.StringIO(input.encode(self.encoding))
47 return codecs.getreader(self.encoding)(stream)
48
49 def readalllines(input, keepends=True):
50 reader = getreader(input)
51 lines = []
52 while True:
53 line = reader.readline(keepends=keepends)
54 if not line:
55 break
56 lines.append(line)
57 return "".join(lines)
58
59 s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
60 self.assertEqual(readalllines(s, True), s)
61 self.assertEqual(readalllines(s, False), u"foobarbazspameggs")
62
63 # Test long lines (multiple calls to read() in readline())
64 vw = []
65 vwo = []
66 for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
67 vw.append((i*200)*u"\3042" + lineend)
68 vwo.append((i*200)*u"\3042")
69 self.assertEqual(readalllines("".join(vw), True), "".join(vw))
70 self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
71
72 # Test lines where the first read might end with \r, so the
73 # reader has to look ahead whether this is a lone \r or a \r\n
74 for size in xrange(80):
75 for lineend in u"\n \r\n \r \u2028".split():
Walter Dörwald7a6dc132005-04-04 21:38:47 +000076 s = 10*(size*u"a" + lineend + u"xxx\n")
77 reader = getreader(s)
78 for i in xrange(10):
79 self.assertEqual(
80 reader.readline(keepends=True),
81 size*u"a" + lineend,
82 )
83 reader = getreader(s)
84 for i in xrange(10):
85 self.assertEqual(
86 reader.readline(keepends=False),
87 size*u"a",
88 )
89
90 def test_bug1175396(self):
91 s = [
92 '<%!--===================================================\r\n',
93 ' BLOG index page: show recent articles,\r\n',
94 ' today\'s articles, or articles of a specific date.\r\n',
95 '========================================================--%>\r\n',
96 '<%@inputencoding="ISO-8859-1"%>\r\n',
97 '<%@pagetemplate=TEMPLATE.y%>\r\n',
98 '<%@import=import frog.util, frog%>\r\n',
99 '<%@import=import frog.objects%>\r\n',
100 '<%@import=from frog.storageerrors import StorageError%>\r\n',
101 '<%\r\n',
102 '\r\n',
103 'import logging\r\n',
104 'log=logging.getLogger("Snakelets.logger")\r\n',
105 '\r\n',
106 '\r\n',
107 'user=self.SessionCtx.user\r\n',
108 'storageEngine=self.SessionCtx.storageEngine\r\n',
109 '\r\n',
110 '\r\n',
111 'def readArticlesFromDate(date, count=None):\r\n',
112 ' entryids=storageEngine.listBlogEntries(date)\r\n',
113 ' entryids.reverse() # descending\r\n',
114 ' if count:\r\n',
115 ' entryids=entryids[:count]\r\n',
116 ' try:\r\n',
117 ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
118 ' except StorageError,x:\r\n',
119 ' log.error("Error loading articles: "+str(x))\r\n',
120 ' self.abort("cannot load articles")\r\n',
121 '\r\n',
122 'showdate=None\r\n',
123 '\r\n',
124 'arg=self.Request.getArg()\r\n',
125 'if arg=="today":\r\n',
126 ' #-------------------- TODAY\'S ARTICLES\r\n',
127 ' self.write("<h2>Today\'s articles</h2>")\r\n',
128 ' showdate = frog.util.isodatestr() \r\n',
129 ' entries = readArticlesFromDate(showdate)\r\n',
130 'elif arg=="active":\r\n',
131 ' #-------------------- ACTIVE ARTICLES redirect\r\n',
132 ' self.Yredirect("active.y")\r\n',
133 'elif arg=="login":\r\n',
134 ' #-------------------- LOGIN PAGE redirect\r\n',
135 ' self.Yredirect("login.y")\r\n',
136 'elif arg=="date":\r\n',
137 ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
138 ' showdate = self.Request.getParameter("date")\r\n',
139 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
140 ' entries = readArticlesFromDate(showdate)\r\n',
141 'else:\r\n',
142 ' #-------------------- RECENT ARTICLES\r\n',
143 ' self.write("<h2>Recent articles</h2>")\r\n',
144 ' dates=storageEngine.listBlogEntryDates()\r\n',
145 ' if dates:\r\n',
146 ' entries=[]\r\n',
147 ' SHOWAMOUNT=10\r\n',
148 ' for showdate in dates:\r\n',
149 ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
150 ' if len(entries)>=SHOWAMOUNT:\r\n',
151 ' break\r\n',
152 ' \r\n',
153 ]
154 stream = StringIO.StringIO("".join(s).encode(self.encoding))
155 reader = codecs.getreader(self.encoding)(stream)
156 for (i, line) in enumerate(reader):
157 self.assertEqual(line, s[i])
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000158
159 def test_readlinequeue(self):
160 q = Queue()
161 writer = codecs.getwriter(self.encoding)(q)
162 reader = codecs.getreader(self.encoding)(q)
163
164 # No lineends
165 writer.write(u"foo\r")
166 self.assertEqual(reader.readline(keepends=False), u"foo")
167 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000168 self.assertEqual(reader.readline(keepends=False), u"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000169 self.assertEqual(reader.readline(keepends=False), u"bar")
170 writer.write(u"baz")
171 self.assertEqual(reader.readline(keepends=False), u"baz")
172 self.assertEqual(reader.readline(keepends=False), u"")
173
174 # Lineends
175 writer.write(u"foo\r")
176 self.assertEqual(reader.readline(keepends=True), u"foo\r")
177 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000178 self.assertEqual(reader.readline(keepends=True), u"\n")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000179 self.assertEqual(reader.readline(keepends=True), u"bar\r")
180 writer.write(u"baz")
181 self.assertEqual(reader.readline(keepends=True), u"baz")
182 self.assertEqual(reader.readline(keepends=True), u"")
183 writer.write(u"foo\r\n")
184 self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
185
Walter Dörwald9fa09462005-01-10 12:01:39 +0000186 def test_bug1098990_a(self):
187 s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
188 s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
189 s3 = u"next line.\r\n"
190
191 s = (s1+s2+s3).encode(self.encoding)
192 stream = StringIO.StringIO(s)
193 reader = codecs.getreader(self.encoding)(stream)
194 self.assertEqual(reader.readline(), s1)
195 self.assertEqual(reader.readline(), s2)
196 self.assertEqual(reader.readline(), s3)
197 self.assertEqual(reader.readline(), u"")
198
199 def test_bug1098990_b(self):
200 s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
201 s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
202 s3 = u"stillokay:bbbbxx\r\n"
203 s4 = u"broken!!!!badbad\r\n"
204 s5 = u"againokay.\r\n"
205
206 s = (s1+s2+s3+s4+s5).encode(self.encoding)
207 stream = StringIO.StringIO(s)
208 reader = codecs.getreader(self.encoding)(stream)
209 self.assertEqual(reader.readline(), s1)
210 self.assertEqual(reader.readline(), s2)
211 self.assertEqual(reader.readline(), s3)
212 self.assertEqual(reader.readline(), s4)
213 self.assertEqual(reader.readline(), s5)
214 self.assertEqual(reader.readline(), u"")
215
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000216class UTF16Test(ReadTest):
217 encoding = "utf-16"
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000218
219 spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
220 spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
221
222 def test_only_one_bom(self):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000223 _,_,reader,writer = codecs.lookup(self.encoding)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000224 # encode some stream
225 s = StringIO.StringIO()
226 f = writer(s)
227 f.write(u"spam")
228 f.write(u"spam")
229 d = s.getvalue()
230 # check whether there is exactly one BOM in it
231 self.assert_(d == self.spamle or d == self.spambe)
232 # try to read it back
233 s = StringIO.StringIO(d)
234 f = reader(s)
235 self.assertEquals(f.read(), u"spamspam")
236
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000237 def test_badbom(self):
238 s = StringIO.StringIO("\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000239 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000240 self.assertRaises(UnicodeError, f.read)
241
242 s = StringIO.StringIO("\xff\xff\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000243 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000244 self.assertRaises(UnicodeError, f.read)
245
Walter Dörwald69652032004-09-07 20:24:22 +0000246 def test_partial(self):
247 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000248 u"\x00\xff\u0100\uffff",
249 [
250 u"", # first byte of BOM read
251 u"", # second byte of BOM read => byteorder known
252 u"",
253 u"\x00",
254 u"\x00",
255 u"\x00\xff",
256 u"\x00\xff",
257 u"\x00\xff\u0100",
258 u"\x00\xff\u0100",
259 u"\x00\xff\u0100\uffff",
260 ]
261 )
262
Walter Dörwalde22d3392005-11-17 08:52:34 +0000263 def test_errors(self):
264 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
265
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000266class UTF16LETest(ReadTest):
267 encoding = "utf-16-le"
Walter Dörwald69652032004-09-07 20:24:22 +0000268
269 def test_partial(self):
270 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000271 u"\x00\xff\u0100\uffff",
272 [
273 u"",
274 u"\x00",
275 u"\x00",
276 u"\x00\xff",
277 u"\x00\xff",
278 u"\x00\xff\u0100",
279 u"\x00\xff\u0100",
280 u"\x00\xff\u0100\uffff",
281 ]
282 )
283
Walter Dörwalde22d3392005-11-17 08:52:34 +0000284 def test_errors(self):
285 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
286
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000287class UTF16BETest(ReadTest):
288 encoding = "utf-16-be"
Walter Dörwald69652032004-09-07 20:24:22 +0000289
290 def test_partial(self):
291 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000292 u"\x00\xff\u0100\uffff",
293 [
294 u"",
295 u"\x00",
296 u"\x00",
297 u"\x00\xff",
298 u"\x00\xff",
299 u"\x00\xff\u0100",
300 u"\x00\xff\u0100",
301 u"\x00\xff\u0100\uffff",
302 ]
303 )
304
Walter Dörwalde22d3392005-11-17 08:52:34 +0000305 def test_errors(self):
306 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
307
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000308class UTF8Test(ReadTest):
309 encoding = "utf-8"
Walter Dörwald69652032004-09-07 20:24:22 +0000310
311 def test_partial(self):
312 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000313 u"\x00\xff\u07ff\u0800\uffff",
314 [
315 u"\x00",
316 u"\x00",
317 u"\x00\xff",
318 u"\x00\xff",
319 u"\x00\xff\u07ff",
320 u"\x00\xff\u07ff",
321 u"\x00\xff\u07ff",
322 u"\x00\xff\u07ff\u0800",
323 u"\x00\xff\u07ff\u0800",
324 u"\x00\xff\u07ff\u0800",
325 u"\x00\xff\u07ff\u0800\uffff",
326 ]
327 )
328
Walter Dörwalde22d3392005-11-17 08:52:34 +0000329class UTF7Test(ReadTest):
330 encoding = "utf-7"
331
332 # No test_partial() yet, because UTF-7 doesn't support it.
333
334class UTF16ExTest(unittest.TestCase):
335
336 def test_errors(self):
337 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
338
339 def test_bad_args(self):
340 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
341
342class ReadBufferTest(unittest.TestCase):
343
344 def test_array(self):
345 import array
346 self.assertEqual(
347 codecs.readbuffer_encode(array.array("c", "spam")),
348 ("spam", 4)
349 )
350
351 def test_empty(self):
352 self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
353
354 def test_bad_args(self):
355 self.assertRaises(TypeError, codecs.readbuffer_encode)
356 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
357
358class CharBufferTest(unittest.TestCase):
359
360 def test_string(self):
361 self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
362
363 def test_empty(self):
364 self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
365
366 def test_bad_args(self):
367 self.assertRaises(TypeError, codecs.charbuffer_encode)
368 self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
369
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000370class UTF8SigTest(ReadTest):
371 encoding = "utf-8-sig"
372
373 def test_partial(self):
374 self.check_partial(
375 u"\ufeff\x00\xff\u07ff\u0800\uffff",
376 [
377 u"",
378 u"",
379 u"", # First BOM has been read and skipped
380 u"",
381 u"",
382 u"\ufeff", # Second BOM has been read and emitted
383 u"\ufeff\x00", # "\x00" read and emitted
384 u"\ufeff\x00", # First byte of encoded u"\xff" read
385 u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
386 u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
387 u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
388 u"\ufeff\x00\xff\u07ff",
389 u"\ufeff\x00\xff\u07ff",
390 u"\ufeff\x00\xff\u07ff\u0800",
391 u"\ufeff\x00\xff\u07ff\u0800",
392 u"\ufeff\x00\xff\u07ff\u0800",
393 u"\ufeff\x00\xff\u07ff\u0800\uffff",
394 ]
395 )
396
Walter Dörwald8709a422002-09-03 13:53:40 +0000397class EscapeDecodeTest(unittest.TestCase):
Walter Dörwalde22d3392005-11-17 08:52:34 +0000398 def test_empty(self):
Walter Dörwald8709a422002-09-03 13:53:40 +0000399 self.assertEquals(codecs.escape_decode(""), ("", 0))
400
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000401class RecodingTest(unittest.TestCase):
402 def test_recoding(self):
403 f = StringIO.StringIO()
404 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
405 f2.write(u"a")
406 f2.close()
407 # Python used to crash on this at exit because of a refcount
408 # bug in _codecsmodule.c
Fred Drake2e2be372001-09-20 21:33:42 +0000409
Martin v. Löwis2548c732003-04-18 10:39:54 +0000410# From RFC 3492
411punycode_testcases = [
412 # A Arabic (Egyptian):
413 (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
414 u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
415 "egbpdaj6bu4bxfgehfvwxn"),
416 # B Chinese (simplified):
417 (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
418 "ihqwcrb4cv8a8dqg056pqjye"),
419 # C Chinese (traditional):
420 (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
421 "ihqwctvzc91f659drss3x8bo0yb"),
422 # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
423 (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
424 u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
425 u"\u0065\u0073\u006B\u0079",
426 "Proprostnemluvesky-uyb24dma41a"),
427 # E Hebrew:
428 (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
429 u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
430 u"\u05D1\u05E8\u05D9\u05EA",
431 "4dbcagdahymbxekheh6e0a7fei0b"),
432 # F Hindi (Devanagari):
433 (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
434 u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
435 u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
436 u"\u0939\u0948\u0902",
437 "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
438
439 #(G) Japanese (kanji and hiragana):
440 (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
441 u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
442 "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
443
444 # (H) Korean (Hangul syllables):
445 (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
446 u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
447 u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
448 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
449 "psd879ccm6fea98c"),
450
451 # (I) Russian (Cyrillic):
452 (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
453 u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
454 u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
455 u"\u0438",
456 "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
457
458 # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
459 (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
460 u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
461 u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
462 u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
463 u"\u0061\u00F1\u006F\u006C",
464 "PorqunopuedensimplementehablarenEspaol-fmd56a"),
465
466 # (K) Vietnamese:
467 # T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
468 # <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
469 (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
470 u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
471 u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
472 u"\u0056\u0069\u1EC7\u0074",
473 "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
474
Martin v. Löwis2548c732003-04-18 10:39:54 +0000475 #(L) 3<nen>B<gumi><kinpachi><sensei>
476 (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
477 "3B-ww4c5e180e575a65lsy2b"),
Tim Peters0eadaac2003-04-24 16:02:54 +0000478
Martin v. Löwis2548c732003-04-18 10:39:54 +0000479 # (M) <amuro><namie>-with-SUPER-MONKEYS
480 (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
481 u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
482 u"\u004F\u004E\u004B\u0045\u0059\u0053",
483 "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
484
485 # (N) Hello-Another-Way-<sorezore><no><basho>
486 (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
487 u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
488 u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
489 "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
490
491 # (O) <hitotsu><yane><no><shita>2
492 (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
493 "2-u9tlzr9756bt3uc0v"),
494
495 # (P) Maji<de>Koi<suru>5<byou><mae>
496 (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
497 u"\u308B\u0035\u79D2\u524D",
498 "MajiKoi5-783gue6qz075azm5e"),
499
500 # (Q) <pafii>de<runba>
501 (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
502 "de-jg4avhby1noc0d"),
503
504 # (R) <sono><supiido><de>
505 (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
506 "d9juau41awczczp"),
507
508 # (S) -> $1.00 <-
509 (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
510 u"\u003C\u002D",
511 "-> $1.00 <--")
512 ]
513
514for i in punycode_testcases:
515 if len(i)!=2:
516 print repr(i)
517
518class PunycodeTest(unittest.TestCase):
519 def test_encode(self):
520 for uni, puny in punycode_testcases:
521 # Need to convert both strings to lower case, since
522 # some of the extended encodings use upper case, but our
523 # code produces only lower case. Converting just puny to
524 # lower is also insufficient, since some of the input characters
525 # are upper case.
526 self.assertEquals(uni.encode("punycode").lower(), puny.lower())
527
528 def test_decode(self):
529 for uni, puny in punycode_testcases:
530 self.assertEquals(uni, puny.decode("punycode"))
531
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000532class UnicodeInternalTest(unittest.TestCase):
533 def test_bug1251300(self):
534 # Decoding with unicode_internal used to not correctly handle "code
535 # points" above 0x10ffff on UCS-4 builds.
536 if sys.maxunicode > 0xffff:
537 ok = [
538 ("\x00\x10\xff\xff", u"\U0010ffff"),
539 ("\x00\x00\x01\x01", u"\U00000101"),
540 ("", u""),
541 ]
542 not_ok = [
543 "\x7f\xff\xff\xff",
544 "\x80\x00\x00\x00",
545 "\x81\x00\x00\x00",
546 "\x00",
547 "\x00\x00\x00\x00\x00",
548 ]
549 for internal, uni in ok:
550 if sys.byteorder == "little":
551 internal = "".join(reversed(internal))
552 self.assertEquals(uni, internal.decode("unicode_internal"))
553 for internal in not_ok:
554 if sys.byteorder == "little":
555 internal = "".join(reversed(internal))
556 self.assertRaises(UnicodeDecodeError, internal.decode,
557 "unicode_internal")
558
559 def test_decode_error_attributes(self):
560 if sys.maxunicode > 0xffff:
561 try:
562 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
563 except UnicodeDecodeError, ex:
564 self.assertEquals("unicode_internal", ex.encoding)
565 self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
566 self.assertEquals(4, ex.start)
567 self.assertEquals(8, ex.end)
568 else:
569 self.fail()
570
571 def test_decode_callback(self):
572 if sys.maxunicode > 0xffff:
573 codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
574 decoder = codecs.getdecoder("unicode_internal")
575 ab = u"ab".encode("unicode_internal")
576 ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
577 "UnicodeInternalTest")
578 self.assertEquals((u"ab", 12), ignored)
579
Martin v. Löwis2548c732003-04-18 10:39:54 +0000580# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
581nameprep_tests = [
582 # 3.1 Map to nothing.
583 ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
584 '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
585 '\xb8\x8f\xef\xbb\xbf',
586 'foobarbaz'),
587 # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
588 ('CAFE',
589 'cafe'),
590 # 3.3 Case folding 8bit U+00DF (german sharp s).
591 # The original test case is bogus; it says \xc3\xdf
592 ('\xc3\x9f',
593 'ss'),
594 # 3.4 Case folding U+0130 (turkish capital I with dot).
595 ('\xc4\xb0',
596 'i\xcc\x87'),
597 # 3.5 Case folding multibyte U+0143 U+037A.
598 ('\xc5\x83\xcd\xba',
599 '\xc5\x84 \xce\xb9'),
600 # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
601 # XXX: skip this as it fails in UCS-2 mode
602 #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
603 # 'telc\xe2\x88\x95kg\xcf\x83'),
604 (None, None),
605 # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
606 ('j\xcc\x8c\xc2\xa0\xc2\xaa',
607 '\xc7\xb0 a'),
608 # 3.8 Case folding U+1FB7 and normalization.
609 ('\xe1\xbe\xb7',
610 '\xe1\xbe\xb6\xce\xb9'),
611 # 3.9 Self-reverting case folding U+01F0 and normalization.
612 # The original test case is bogus, it says `\xc7\xf0'
613 ('\xc7\xb0',
614 '\xc7\xb0'),
615 # 3.10 Self-reverting case folding U+0390 and normalization.
616 ('\xce\x90',
617 '\xce\x90'),
618 # 3.11 Self-reverting case folding U+03B0 and normalization.
619 ('\xce\xb0',
620 '\xce\xb0'),
621 # 3.12 Self-reverting case folding U+1E96 and normalization.
622 ('\xe1\xba\x96',
623 '\xe1\xba\x96'),
624 # 3.13 Self-reverting case folding U+1F56 and normalization.
625 ('\xe1\xbd\x96',
626 '\xe1\xbd\x96'),
627 # 3.14 ASCII space character U+0020.
628 (' ',
629 ' '),
630 # 3.15 Non-ASCII 8bit space character U+00A0.
631 ('\xc2\xa0',
632 ' '),
633 # 3.16 Non-ASCII multibyte space character U+1680.
634 ('\xe1\x9a\x80',
635 None),
636 # 3.17 Non-ASCII multibyte space character U+2000.
637 ('\xe2\x80\x80',
638 ' '),
639 # 3.18 Zero Width Space U+200b.
640 ('\xe2\x80\x8b',
641 ''),
642 # 3.19 Non-ASCII multibyte space character U+3000.
643 ('\xe3\x80\x80',
644 ' '),
645 # 3.20 ASCII control characters U+0010 U+007F.
646 ('\x10\x7f',
647 '\x10\x7f'),
648 # 3.21 Non-ASCII 8bit control character U+0085.
649 ('\xc2\x85',
650 None),
651 # 3.22 Non-ASCII multibyte control character U+180E.
652 ('\xe1\xa0\x8e',
653 None),
654 # 3.23 Zero Width No-Break Space U+FEFF.
655 ('\xef\xbb\xbf',
656 ''),
657 # 3.24 Non-ASCII control character U+1D175.
658 ('\xf0\x9d\x85\xb5',
659 None),
660 # 3.25 Plane 0 private use character U+F123.
661 ('\xef\x84\xa3',
662 None),
663 # 3.26 Plane 15 private use character U+F1234.
664 ('\xf3\xb1\x88\xb4',
665 None),
666 # 3.27 Plane 16 private use character U+10F234.
667 ('\xf4\x8f\x88\xb4',
668 None),
669 # 3.28 Non-character code point U+8FFFE.
670 ('\xf2\x8f\xbf\xbe',
671 None),
672 # 3.29 Non-character code point U+10FFFF.
673 ('\xf4\x8f\xbf\xbf',
674 None),
675 # 3.30 Surrogate code U+DF42.
676 ('\xed\xbd\x82',
677 None),
678 # 3.31 Non-plain text character U+FFFD.
679 ('\xef\xbf\xbd',
680 None),
681 # 3.32 Ideographic description character U+2FF5.
682 ('\xe2\xbf\xb5',
683 None),
684 # 3.33 Display property character U+0341.
Tim Peters0eadaac2003-04-24 16:02:54 +0000685 ('\xcd\x81',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000686 '\xcc\x81'),
687 # 3.34 Left-to-right mark U+200E.
688 ('\xe2\x80\x8e',
689 None),
690 # 3.35 Deprecated U+202A.
691 ('\xe2\x80\xaa',
692 None),
693 # 3.36 Language tagging character U+E0001.
694 ('\xf3\xa0\x80\x81',
695 None),
696 # 3.37 Language tagging character U+E0042.
697 ('\xf3\xa0\x81\x82',
698 None),
699 # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
700 ('foo\xd6\xbebar',
701 None),
702 # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
703 ('foo\xef\xb5\x90bar',
704 None),
705 # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
706 ('foo\xef\xb9\xb6bar',
707 'foo \xd9\x8ebar'),
708 # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
709 ('\xd8\xa71',
710 None),
711 # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
712 ('\xd8\xa71\xd8\xa8',
713 '\xd8\xa71\xd8\xa8'),
714 # 3.43 Unassigned code point U+E0002.
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +0000715 # Skip this test as we allow unassigned
716 #('\xf3\xa0\x80\x82',
717 # None),
718 (None, None),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000719 # 3.44 Larger test (shrinking).
720 # Original test case reads \xc3\xdf
721 ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
722 '\xaa\xce\xb0\xe2\x80\x80',
723 'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
724 # 3.45 Larger test (expanding).
725 # Original test case reads \xc3\x9f
726 ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
727 '\x80',
728 'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
729 '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
730 '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
731 ]
732
733
734class NameprepTest(unittest.TestCase):
735 def test_nameprep(self):
736 from encodings.idna import nameprep
737 for pos, (orig, prepped) in enumerate(nameprep_tests):
738 if orig is None:
739 # Skipped
740 continue
741 # The Unicode strings are given in UTF-8
742 orig = unicode(orig, "utf-8")
743 if prepped is None:
744 # Input contains prohibited characters
745 self.assertRaises(UnicodeError, nameprep, orig)
746 else:
747 prepped = unicode(prepped, "utf-8")
748 try:
749 self.assertEquals(nameprep(orig), prepped)
750 except Exception,e:
751 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
752
Martin v. Löwisa1dde132004-03-24 16:48:24 +0000753class CodecTest(unittest.TestCase):
754 def test_builtin(self):
755 self.assertEquals(unicode("python.org", "idna"), u"python.org")
756
Martin v. Löwis8b595142005-08-25 11:03:38 +0000757 def test_stream(self):
758 import StringIO
759 r = codecs.getreader("idna")(StringIO.StringIO("abc"))
760 r.read(3)
761 self.assertEquals(r.read(), u"")
762
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000763class CodecsModuleTest(unittest.TestCase):
764
765 def test_decode(self):
766 self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
767 u'\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +0000768 self.assertRaises(TypeError, codecs.decode)
769 self.assertEquals(codecs.decode('abc'), u'abc')
770 self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
771
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000772 def test_encode(self):
773 self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
774 '\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +0000775 self.assertRaises(TypeError, codecs.encode)
Walter Dörwald690402f2005-11-17 18:51:34 +0000776 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
Walter Dörwald063e1e82004-10-28 13:04:26 +0000777 self.assertEquals(codecs.encode(u'abc'), 'abc')
778 self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
779
780 def test_register(self):
781 self.assertRaises(TypeError, codecs.register)
Walter Dörwald690402f2005-11-17 18:51:34 +0000782 self.assertRaises(TypeError, codecs.register, 42)
Walter Dörwald063e1e82004-10-28 13:04:26 +0000783
784 def test_lookup(self):
785 self.assertRaises(TypeError, codecs.lookup)
786 self.assertRaises(LookupError, codecs.lookup, "__spam__")
Walter Dörwald690402f2005-11-17 18:51:34 +0000787 self.assertRaises(LookupError, codecs.lookup, " ")
788
789 def test_getencoder(self):
790 self.assertRaises(TypeError, codecs.getencoder)
791 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
792
793 def test_getdecoder(self):
794 self.assertRaises(TypeError, codecs.getdecoder)
795 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
796
797 def test_getreader(self):
798 self.assertRaises(TypeError, codecs.getreader)
799 self.assertRaises(LookupError, codecs.getreader, "__spam__")
800
801 def test_getwriter(self):
802 self.assertRaises(TypeError, codecs.getwriter)
803 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000804
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +0000805class StreamReaderTest(unittest.TestCase):
806
807 def setUp(self):
808 self.reader = codecs.getreader('utf-8')
809 self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
810
811 def test_readlines(self):
812 f = self.reader(self.stream)
813 self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
814
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000815class Str2StrTest(unittest.TestCase):
816
817 def test_read(self):
818 sin = "\x80".encode("base64_codec")
819 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
820 sout = reader.read()
821 self.assertEqual(sout, "\x80")
822 self.assert_(isinstance(sout, str))
823
824 def test_readline(self):
825 sin = "\x80".encode("base64_codec")
826 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
827 sout = reader.readline()
828 self.assertEqual(sout, "\x80")
829 self.assert_(isinstance(sout, str))
830
Walter Dörwaldee1d2472004-12-29 16:04:38 +0000831all_unicode_encodings = [
832 "ascii",
833 "base64_codec",
834 "big5",
835 "big5hkscs",
836 "charmap",
837 "cp037",
838 "cp1006",
839 "cp1026",
840 "cp1140",
841 "cp1250",
842 "cp1251",
843 "cp1252",
844 "cp1253",
845 "cp1254",
846 "cp1255",
847 "cp1256",
848 "cp1257",
849 "cp1258",
850 "cp424",
851 "cp437",
852 "cp500",
853 "cp737",
854 "cp775",
855 "cp850",
856 "cp852",
857 "cp855",
858 "cp856",
859 "cp857",
860 "cp860",
861 "cp861",
862 "cp862",
863 "cp863",
864 "cp864",
865 "cp865",
866 "cp866",
867 "cp869",
868 "cp874",
869 "cp875",
870 "cp932",
871 "cp949",
872 "cp950",
873 "euc_jis_2004",
874 "euc_jisx0213",
875 "euc_jp",
876 "euc_kr",
877 "gb18030",
878 "gb2312",
879 "gbk",
880 "hex_codec",
881 "hp_roman8",
882 "hz",
883 "idna",
884 "iso2022_jp",
885 "iso2022_jp_1",
886 "iso2022_jp_2",
887 "iso2022_jp_2004",
888 "iso2022_jp_3",
889 "iso2022_jp_ext",
890 "iso2022_kr",
891 "iso8859_1",
892 "iso8859_10",
893 "iso8859_11",
894 "iso8859_13",
895 "iso8859_14",
896 "iso8859_15",
897 "iso8859_16",
898 "iso8859_2",
899 "iso8859_3",
900 "iso8859_4",
901 "iso8859_5",
902 "iso8859_6",
903 "iso8859_7",
904 "iso8859_8",
905 "iso8859_9",
906 "johab",
907 "koi8_r",
908 "koi8_u",
909 "latin_1",
910 "mac_cyrillic",
911 "mac_greek",
912 "mac_iceland",
913 "mac_latin2",
914 "mac_roman",
915 "mac_turkish",
916 "palmos",
917 "ptcp154",
918 "punycode",
919 "raw_unicode_escape",
920 "rot_13",
921 "shift_jis",
922 "shift_jis_2004",
923 "shift_jisx0213",
924 "tis_620",
925 "unicode_escape",
926 "unicode_internal",
927 "utf_16",
928 "utf_16_be",
929 "utf_16_le",
930 "utf_7",
931 "utf_8",
932]
933
934if hasattr(codecs, "mbcs_encode"):
935 all_unicode_encodings.append("mbcs")
936
937# The following encodings work only with str, not unicode
938all_string_encodings = [
939 "quopri_codec",
940 "string_escape",
941 "uu_codec",
942]
943
944# The following encoding is not tested, because it's not supposed
945# to work:
946# "undefined"
947
948# The following encodings don't work in stateful mode
949broken_unicode_with_streams = [
950 "base64_codec",
951 "hex_codec",
952 "punycode",
953 "unicode_internal"
954]
955
956try:
957 import bz2
958except ImportError:
959 pass
960else:
961 all_unicode_encodings.append("bz2_codec")
962 broken_unicode_with_streams.append("bz2_codec")
963
964try:
965 import zlib
966except ImportError:
967 pass
968else:
969 all_unicode_encodings.append("zlib_codec")
970 broken_unicode_with_streams.append("zlib_codec")
971
972class BasicUnicodeTest(unittest.TestCase):
973 def test_basics(self):
974 s = u"abc123" # all codecs should be able to encode these
975 for encoding in all_unicode_encodings:
976 (bytes, size) = codecs.getencoder(encoding)(s)
977 if encoding != "unicode_internal":
978 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
979 (chars, size) = codecs.getdecoder(encoding)(bytes)
980 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
981
982 if encoding not in broken_unicode_with_streams:
983 # check stream reader/writer
984 q = Queue()
985 writer = codecs.getwriter(encoding)(q)
986 encodedresult = ""
987 for c in s:
988 writer.write(c)
989 encodedresult += q.read()
990 q = Queue()
991 reader = codecs.getreader(encoding)(q)
992 decodedresult = u""
993 for c in encodedresult:
994 q.write(c)
995 decodedresult += reader.read()
996 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
997
Walter Dörwald729c31f2005-03-14 19:06:30 +0000998 def test_seek(self):
999 # all codecs should be able to encode these
1000 s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
1001 for encoding in all_unicode_encodings:
1002 if encoding == "idna": # FIXME: See SF bug #1163178
1003 continue
1004 if encoding in broken_unicode_with_streams:
1005 continue
1006 reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
1007 for t in xrange(5):
1008 # Test that calling seek resets the internal codec state and buffers
1009 reader.seek(0, 0)
1010 line = reader.readline()
1011 self.assertEqual(s[:len(line)], line)
1012
Walter Dörwalde22d3392005-11-17 08:52:34 +00001013 def test_bad_decode_args(self):
1014 for encoding in all_unicode_encodings:
1015 decoder = codecs.getdecoder(encoding)
1016 self.assertRaises(TypeError, decoder)
1017 if encoding not in ("idna", "punycode"):
1018 self.assertRaises(TypeError, decoder, 42)
1019
1020 def test_bad_encode_args(self):
1021 for encoding in all_unicode_encodings:
1022 encoder = codecs.getencoder(encoding)
1023 self.assertRaises(TypeError, encoder)
1024
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001025class BasicStrTest(unittest.TestCase):
1026 def test_basics(self):
1027 s = "abc123"
1028 for encoding in all_string_encodings:
1029 (bytes, size) = codecs.getencoder(encoding)(s)
1030 self.assertEqual(size, len(s))
1031 (chars, size) = codecs.getdecoder(encoding)(bytes)
1032 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1033
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001034class CharmapTest(unittest.TestCase):
1035 def test_decode_with_string_map(self):
1036 self.assertEquals(
1037 codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
1038 (u"abc", 3)
1039 )
1040
1041 self.assertEquals(
1042 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
1043 (u"ab\ufffd", 3)
1044 )
1045
1046 self.assertEquals(
1047 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
1048 (u"ab\ufffd", 3)
1049 )
1050
1051 self.assertEquals(
1052 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
1053 (u"ab", 3)
1054 )
1055
1056 self.assertEquals(
1057 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
1058 (u"ab", 3)
1059 )
1060
1061 allbytes = "".join(chr(i) for i in xrange(256))
1062 self.assertEquals(
1063 codecs.charmap_decode(allbytes, "ignore", u""),
1064 (u"", len(allbytes))
1065 )
1066
1067
Fred Drake2e2be372001-09-20 21:33:42 +00001068def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001069 test_support.run_unittest(
1070 UTF16Test,
Walter Dörwald69652032004-09-07 20:24:22 +00001071 UTF16LETest,
1072 UTF16BETest,
1073 UTF8Test,
Martin v. Löwis412ed3b2006-01-08 10:45:39 +00001074 UTF8SigTest,
Walter Dörwalde22d3392005-11-17 08:52:34 +00001075 UTF7Test,
1076 UTF16ExTest,
1077 ReadBufferTest,
1078 CharBufferTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001079 EscapeDecodeTest,
1080 RecodingTest,
1081 PunycodeTest,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001082 UnicodeInternalTest,
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001083 NameprepTest,
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001084 CodecTest,
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001085 CodecsModuleTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001086 StreamReaderTest,
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001087 Str2StrTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001088 BasicUnicodeTest,
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001089 BasicStrTest,
1090 CharmapTest
Walter Dörwald21d3a322003-05-01 17:45:56 +00001091 )
Fred Drake2e2be372001-09-20 21:33:42 +00001092
1093
1094if __name__ == "__main__":
1095 test_main()