blob: 3944d654525db6d1cdff848e566b31049bfbc39f [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
2import unittest
Marc-André Lemburga37171d2001-06-19 20:09:28 +00003import codecs
Walter Dörwalda47d1c02005-08-30 10:23:14 +00004import sys, StringIO
Marc-André Lemburga37171d2001-06-19 20:09:28 +00005
Walter Dörwald69652032004-09-07 20:24:22 +00006class Queue(object):
7 """
8 queue: write bytes at one end, read bytes from the other end
9 """
10 def __init__(self):
11 self._buffer = ""
12
13 def write(self, chars):
14 self._buffer += chars
15
16 def read(self, size=-1):
17 if size<0:
18 s = self._buffer
19 self._buffer = ""
20 return s
21 else:
22 s = self._buffer[:size]
23 self._buffer = self._buffer[size:]
24 return s
25
Walter Dörwalde57d7b12004-12-21 22:24:00 +000026class ReadTest(unittest.TestCase):
27 def check_partial(self, input, partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000028 # get a StreamReader for the encoding and feed the bytestring version
29 # of input to the reader byte by byte. Read every available from
30 # the StreamReader and check that the results equal the appropriate
31 # entries from partialresults.
32 q = Queue()
Walter Dörwalde57d7b12004-12-21 22:24:00 +000033 r = codecs.getreader(self.encoding)(q)
Walter Dörwald69652032004-09-07 20:24:22 +000034 result = u""
Walter Dörwalde57d7b12004-12-21 22:24:00 +000035 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000036 q.write(c)
37 result += r.read()
38 self.assertEqual(result, partialresult)
39 # check that there's nothing left in the buffers
40 self.assertEqual(r.read(), u"")
41 self.assertEqual(r.bytebuffer, "")
42 self.assertEqual(r.charbuffer, u"")
43
Walter Dörwalde57d7b12004-12-21 22:24:00 +000044 def test_readline(self):
45 def getreader(input):
46 stream = StringIO.StringIO(input.encode(self.encoding))
47 return codecs.getreader(self.encoding)(stream)
48
Walter Dörwaldca199432006-03-06 22:39:12 +000049 def readalllines(input, keepends=True, size=None):
Walter Dörwalde57d7b12004-12-21 22:24:00 +000050 reader = getreader(input)
51 lines = []
52 while True:
Walter Dörwaldca199432006-03-06 22:39:12 +000053 line = reader.readline(size=size, keepends=keepends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000054 if not line:
55 break
56 lines.append(line)
Walter Dörwaldca199432006-03-06 22:39:12 +000057 return "|".join(lines)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000058
59 s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
Walter Dörwaldca199432006-03-06 22:39:12 +000060 sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
61 sexpectednoends = u"foo|bar|baz|spam|eggs"
62 self.assertEqual(readalllines(s, True), sexpected)
63 self.assertEqual(readalllines(s, False), sexpectednoends)
64 self.assertEqual(readalllines(s, True, 10), sexpected)
65 self.assertEqual(readalllines(s, False, 10), sexpectednoends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000066
67 # Test long lines (multiple calls to read() in readline())
68 vw = []
69 vwo = []
70 for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
71 vw.append((i*200)*u"\3042" + lineend)
72 vwo.append((i*200)*u"\3042")
73 self.assertEqual(readalllines("".join(vw), True), "".join(vw))
74 self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
75
76 # Test lines where the first read might end with \r, so the
77 # reader has to look ahead whether this is a lone \r or a \r\n
78 for size in xrange(80):
79 for lineend in u"\n \r\n \r \u2028".split():
Walter Dörwald7a6dc132005-04-04 21:38:47 +000080 s = 10*(size*u"a" + lineend + u"xxx\n")
81 reader = getreader(s)
82 for i in xrange(10):
83 self.assertEqual(
84 reader.readline(keepends=True),
85 size*u"a" + lineend,
86 )
87 reader = getreader(s)
88 for i in xrange(10):
89 self.assertEqual(
90 reader.readline(keepends=False),
91 size*u"a",
92 )
93
94 def test_bug1175396(self):
95 s = [
96 '<%!--===================================================\r\n',
97 ' BLOG index page: show recent articles,\r\n',
98 ' today\'s articles, or articles of a specific date.\r\n',
99 '========================================================--%>\r\n',
100 '<%@inputencoding="ISO-8859-1"%>\r\n',
101 '<%@pagetemplate=TEMPLATE.y%>\r\n',
102 '<%@import=import frog.util, frog%>\r\n',
103 '<%@import=import frog.objects%>\r\n',
104 '<%@import=from frog.storageerrors import StorageError%>\r\n',
105 '<%\r\n',
106 '\r\n',
107 'import logging\r\n',
108 'log=logging.getLogger("Snakelets.logger")\r\n',
109 '\r\n',
110 '\r\n',
111 'user=self.SessionCtx.user\r\n',
112 'storageEngine=self.SessionCtx.storageEngine\r\n',
113 '\r\n',
114 '\r\n',
115 'def readArticlesFromDate(date, count=None):\r\n',
116 ' entryids=storageEngine.listBlogEntries(date)\r\n',
117 ' entryids.reverse() # descending\r\n',
118 ' if count:\r\n',
119 ' entryids=entryids[:count]\r\n',
120 ' try:\r\n',
121 ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
122 ' except StorageError,x:\r\n',
123 ' log.error("Error loading articles: "+str(x))\r\n',
124 ' self.abort("cannot load articles")\r\n',
125 '\r\n',
126 'showdate=None\r\n',
127 '\r\n',
128 'arg=self.Request.getArg()\r\n',
129 'if arg=="today":\r\n',
130 ' #-------------------- TODAY\'S ARTICLES\r\n',
131 ' self.write("<h2>Today\'s articles</h2>")\r\n',
132 ' showdate = frog.util.isodatestr() \r\n',
133 ' entries = readArticlesFromDate(showdate)\r\n',
134 'elif arg=="active":\r\n',
135 ' #-------------------- ACTIVE ARTICLES redirect\r\n',
136 ' self.Yredirect("active.y")\r\n',
137 'elif arg=="login":\r\n',
138 ' #-------------------- LOGIN PAGE redirect\r\n',
139 ' self.Yredirect("login.y")\r\n',
140 'elif arg=="date":\r\n',
141 ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
142 ' showdate = self.Request.getParameter("date")\r\n',
143 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
144 ' entries = readArticlesFromDate(showdate)\r\n',
145 'else:\r\n',
146 ' #-------------------- RECENT ARTICLES\r\n',
147 ' self.write("<h2>Recent articles</h2>")\r\n',
148 ' dates=storageEngine.listBlogEntryDates()\r\n',
149 ' if dates:\r\n',
150 ' entries=[]\r\n',
151 ' SHOWAMOUNT=10\r\n',
152 ' for showdate in dates:\r\n',
153 ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
154 ' if len(entries)>=SHOWAMOUNT:\r\n',
155 ' break\r\n',
156 ' \r\n',
157 ]
158 stream = StringIO.StringIO("".join(s).encode(self.encoding))
159 reader = codecs.getreader(self.encoding)(stream)
160 for (i, line) in enumerate(reader):
161 self.assertEqual(line, s[i])
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000162
163 def test_readlinequeue(self):
164 q = Queue()
165 writer = codecs.getwriter(self.encoding)(q)
166 reader = codecs.getreader(self.encoding)(q)
167
168 # No lineends
169 writer.write(u"foo\r")
170 self.assertEqual(reader.readline(keepends=False), u"foo")
171 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000172 self.assertEqual(reader.readline(keepends=False), u"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000173 self.assertEqual(reader.readline(keepends=False), u"bar")
174 writer.write(u"baz")
175 self.assertEqual(reader.readline(keepends=False), u"baz")
176 self.assertEqual(reader.readline(keepends=False), u"")
177
178 # Lineends
179 writer.write(u"foo\r")
180 self.assertEqual(reader.readline(keepends=True), u"foo\r")
181 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000182 self.assertEqual(reader.readline(keepends=True), u"\n")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000183 self.assertEqual(reader.readline(keepends=True), u"bar\r")
184 writer.write(u"baz")
185 self.assertEqual(reader.readline(keepends=True), u"baz")
186 self.assertEqual(reader.readline(keepends=True), u"")
187 writer.write(u"foo\r\n")
188 self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
189
Walter Dörwald9fa09462005-01-10 12:01:39 +0000190 def test_bug1098990_a(self):
191 s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
192 s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
193 s3 = u"next line.\r\n"
194
195 s = (s1+s2+s3).encode(self.encoding)
196 stream = StringIO.StringIO(s)
197 reader = codecs.getreader(self.encoding)(stream)
198 self.assertEqual(reader.readline(), s1)
199 self.assertEqual(reader.readline(), s2)
200 self.assertEqual(reader.readline(), s3)
201 self.assertEqual(reader.readline(), u"")
202
203 def test_bug1098990_b(self):
204 s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
205 s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
206 s3 = u"stillokay:bbbbxx\r\n"
207 s4 = u"broken!!!!badbad\r\n"
208 s5 = u"againokay.\r\n"
209
210 s = (s1+s2+s3+s4+s5).encode(self.encoding)
211 stream = StringIO.StringIO(s)
212 reader = codecs.getreader(self.encoding)(stream)
213 self.assertEqual(reader.readline(), s1)
214 self.assertEqual(reader.readline(), s2)
215 self.assertEqual(reader.readline(), s3)
216 self.assertEqual(reader.readline(), s4)
217 self.assertEqual(reader.readline(), s5)
218 self.assertEqual(reader.readline(), u"")
219
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000220class UTF16Test(ReadTest):
221 encoding = "utf-16"
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000222
223 spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
224 spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
225
226 def test_only_one_bom(self):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000227 _,_,reader,writer = codecs.lookup(self.encoding)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000228 # encode some stream
229 s = StringIO.StringIO()
230 f = writer(s)
231 f.write(u"spam")
232 f.write(u"spam")
233 d = s.getvalue()
234 # check whether there is exactly one BOM in it
235 self.assert_(d == self.spamle or d == self.spambe)
236 # try to read it back
237 s = StringIO.StringIO(d)
238 f = reader(s)
239 self.assertEquals(f.read(), u"spamspam")
240
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000241 def test_badbom(self):
242 s = StringIO.StringIO("\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000243 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000244 self.assertRaises(UnicodeError, f.read)
245
246 s = StringIO.StringIO("\xff\xff\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000247 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000248 self.assertRaises(UnicodeError, f.read)
249
Walter Dörwald69652032004-09-07 20:24:22 +0000250 def test_partial(self):
251 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000252 u"\x00\xff\u0100\uffff",
253 [
254 u"", # first byte of BOM read
255 u"", # second byte of BOM read => byteorder known
256 u"",
257 u"\x00",
258 u"\x00",
259 u"\x00\xff",
260 u"\x00\xff",
261 u"\x00\xff\u0100",
262 u"\x00\xff\u0100",
263 u"\x00\xff\u0100\uffff",
264 ]
265 )
266
Walter Dörwalde22d3392005-11-17 08:52:34 +0000267 def test_errors(self):
268 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
269
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000270class UTF16LETest(ReadTest):
271 encoding = "utf-16-le"
Walter Dörwald69652032004-09-07 20:24:22 +0000272
273 def test_partial(self):
274 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000275 u"\x00\xff\u0100\uffff",
276 [
277 u"",
278 u"\x00",
279 u"\x00",
280 u"\x00\xff",
281 u"\x00\xff",
282 u"\x00\xff\u0100",
283 u"\x00\xff\u0100",
284 u"\x00\xff\u0100\uffff",
285 ]
286 )
287
Walter Dörwalde22d3392005-11-17 08:52:34 +0000288 def test_errors(self):
289 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
290
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000291class UTF16BETest(ReadTest):
292 encoding = "utf-16-be"
Walter Dörwald69652032004-09-07 20:24:22 +0000293
294 def test_partial(self):
295 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000296 u"\x00\xff\u0100\uffff",
297 [
298 u"",
299 u"\x00",
300 u"\x00",
301 u"\x00\xff",
302 u"\x00\xff",
303 u"\x00\xff\u0100",
304 u"\x00\xff\u0100",
305 u"\x00\xff\u0100\uffff",
306 ]
307 )
308
Walter Dörwalde22d3392005-11-17 08:52:34 +0000309 def test_errors(self):
310 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
311
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000312class UTF8Test(ReadTest):
313 encoding = "utf-8"
Walter Dörwald69652032004-09-07 20:24:22 +0000314
315 def test_partial(self):
316 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000317 u"\x00\xff\u07ff\u0800\uffff",
318 [
319 u"\x00",
320 u"\x00",
321 u"\x00\xff",
322 u"\x00\xff",
323 u"\x00\xff\u07ff",
324 u"\x00\xff\u07ff",
325 u"\x00\xff\u07ff",
326 u"\x00\xff\u07ff\u0800",
327 u"\x00\xff\u07ff\u0800",
328 u"\x00\xff\u07ff\u0800",
329 u"\x00\xff\u07ff\u0800\uffff",
330 ]
331 )
332
Walter Dörwalde22d3392005-11-17 08:52:34 +0000333class UTF7Test(ReadTest):
334 encoding = "utf-7"
335
336 # No test_partial() yet, because UTF-7 doesn't support it.
337
338class UTF16ExTest(unittest.TestCase):
339
340 def test_errors(self):
341 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
342
343 def test_bad_args(self):
344 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
345
346class ReadBufferTest(unittest.TestCase):
347
348 def test_array(self):
349 import array
350 self.assertEqual(
351 codecs.readbuffer_encode(array.array("c", "spam")),
352 ("spam", 4)
353 )
354
355 def test_empty(self):
356 self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
357
358 def test_bad_args(self):
359 self.assertRaises(TypeError, codecs.readbuffer_encode)
360 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
361
362class CharBufferTest(unittest.TestCase):
363
364 def test_string(self):
365 self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
366
367 def test_empty(self):
368 self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
369
370 def test_bad_args(self):
371 self.assertRaises(TypeError, codecs.charbuffer_encode)
372 self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
373
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000374class UTF8SigTest(ReadTest):
375 encoding = "utf-8-sig"
376
377 def test_partial(self):
378 self.check_partial(
379 u"\ufeff\x00\xff\u07ff\u0800\uffff",
380 [
381 u"",
382 u"",
383 u"", # First BOM has been read and skipped
384 u"",
385 u"",
386 u"\ufeff", # Second BOM has been read and emitted
387 u"\ufeff\x00", # "\x00" read and emitted
388 u"\ufeff\x00", # First byte of encoded u"\xff" read
389 u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
390 u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
391 u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
392 u"\ufeff\x00\xff\u07ff",
393 u"\ufeff\x00\xff\u07ff",
394 u"\ufeff\x00\xff\u07ff\u0800",
395 u"\ufeff\x00\xff\u07ff\u0800",
396 u"\ufeff\x00\xff\u07ff\u0800",
397 u"\ufeff\x00\xff\u07ff\u0800\uffff",
398 ]
399 )
400
Walter Dörwald8709a422002-09-03 13:53:40 +0000401class EscapeDecodeTest(unittest.TestCase):
Walter Dörwalde22d3392005-11-17 08:52:34 +0000402 def test_empty(self):
Walter Dörwald8709a422002-09-03 13:53:40 +0000403 self.assertEquals(codecs.escape_decode(""), ("", 0))
404
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000405class RecodingTest(unittest.TestCase):
406 def test_recoding(self):
407 f = StringIO.StringIO()
408 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
409 f2.write(u"a")
410 f2.close()
411 # Python used to crash on this at exit because of a refcount
412 # bug in _codecsmodule.c
Fred Drake2e2be372001-09-20 21:33:42 +0000413
Martin v. Löwis2548c732003-04-18 10:39:54 +0000414# From RFC 3492
415punycode_testcases = [
416 # A Arabic (Egyptian):
417 (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
418 u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
419 "egbpdaj6bu4bxfgehfvwxn"),
420 # B Chinese (simplified):
421 (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
422 "ihqwcrb4cv8a8dqg056pqjye"),
423 # C Chinese (traditional):
424 (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
425 "ihqwctvzc91f659drss3x8bo0yb"),
426 # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
427 (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
428 u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
429 u"\u0065\u0073\u006B\u0079",
430 "Proprostnemluvesky-uyb24dma41a"),
431 # E Hebrew:
432 (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
433 u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
434 u"\u05D1\u05E8\u05D9\u05EA",
435 "4dbcagdahymbxekheh6e0a7fei0b"),
436 # F Hindi (Devanagari):
437 (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
438 u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
439 u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
440 u"\u0939\u0948\u0902",
441 "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
442
443 #(G) Japanese (kanji and hiragana):
444 (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
445 u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
446 "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
447
448 # (H) Korean (Hangul syllables):
449 (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
450 u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
451 u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
452 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
453 "psd879ccm6fea98c"),
454
455 # (I) Russian (Cyrillic):
456 (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
457 u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
458 u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
459 u"\u0438",
460 "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
461
462 # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
463 (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
464 u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
465 u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
466 u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
467 u"\u0061\u00F1\u006F\u006C",
468 "PorqunopuedensimplementehablarenEspaol-fmd56a"),
469
470 # (K) Vietnamese:
471 # T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
472 # <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
473 (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
474 u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
475 u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
476 u"\u0056\u0069\u1EC7\u0074",
477 "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
478
Martin v. Löwis2548c732003-04-18 10:39:54 +0000479 #(L) 3<nen>B<gumi><kinpachi><sensei>
480 (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
481 "3B-ww4c5e180e575a65lsy2b"),
Tim Peters0eadaac2003-04-24 16:02:54 +0000482
Martin v. Löwis2548c732003-04-18 10:39:54 +0000483 # (M) <amuro><namie>-with-SUPER-MONKEYS
484 (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
485 u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
486 u"\u004F\u004E\u004B\u0045\u0059\u0053",
487 "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
488
489 # (N) Hello-Another-Way-<sorezore><no><basho>
490 (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
491 u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
492 u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
493 "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
494
495 # (O) <hitotsu><yane><no><shita>2
496 (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
497 "2-u9tlzr9756bt3uc0v"),
498
499 # (P) Maji<de>Koi<suru>5<byou><mae>
500 (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
501 u"\u308B\u0035\u79D2\u524D",
502 "MajiKoi5-783gue6qz075azm5e"),
503
504 # (Q) <pafii>de<runba>
505 (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
506 "de-jg4avhby1noc0d"),
507
508 # (R) <sono><supiido><de>
509 (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
510 "d9juau41awczczp"),
511
512 # (S) -> $1.00 <-
513 (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
514 u"\u003C\u002D",
515 "-> $1.00 <--")
516 ]
517
518for i in punycode_testcases:
519 if len(i)!=2:
520 print repr(i)
521
522class PunycodeTest(unittest.TestCase):
523 def test_encode(self):
524 for uni, puny in punycode_testcases:
525 # Need to convert both strings to lower case, since
526 # some of the extended encodings use upper case, but our
527 # code produces only lower case. Converting just puny to
528 # lower is also insufficient, since some of the input characters
529 # are upper case.
530 self.assertEquals(uni.encode("punycode").lower(), puny.lower())
531
532 def test_decode(self):
533 for uni, puny in punycode_testcases:
534 self.assertEquals(uni, puny.decode("punycode"))
535
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000536class UnicodeInternalTest(unittest.TestCase):
537 def test_bug1251300(self):
538 # Decoding with unicode_internal used to not correctly handle "code
539 # points" above 0x10ffff on UCS-4 builds.
540 if sys.maxunicode > 0xffff:
541 ok = [
542 ("\x00\x10\xff\xff", u"\U0010ffff"),
543 ("\x00\x00\x01\x01", u"\U00000101"),
544 ("", u""),
545 ]
546 not_ok = [
547 "\x7f\xff\xff\xff",
548 "\x80\x00\x00\x00",
549 "\x81\x00\x00\x00",
550 "\x00",
551 "\x00\x00\x00\x00\x00",
552 ]
553 for internal, uni in ok:
554 if sys.byteorder == "little":
555 internal = "".join(reversed(internal))
556 self.assertEquals(uni, internal.decode("unicode_internal"))
557 for internal in not_ok:
558 if sys.byteorder == "little":
559 internal = "".join(reversed(internal))
560 self.assertRaises(UnicodeDecodeError, internal.decode,
561 "unicode_internal")
562
563 def test_decode_error_attributes(self):
564 if sys.maxunicode > 0xffff:
565 try:
566 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
567 except UnicodeDecodeError, ex:
568 self.assertEquals("unicode_internal", ex.encoding)
569 self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
570 self.assertEquals(4, ex.start)
571 self.assertEquals(8, ex.end)
572 else:
573 self.fail()
574
575 def test_decode_callback(self):
576 if sys.maxunicode > 0xffff:
577 codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
578 decoder = codecs.getdecoder("unicode_internal")
579 ab = u"ab".encode("unicode_internal")
580 ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
581 "UnicodeInternalTest")
582 self.assertEquals((u"ab", 12), ignored)
583
Martin v. Löwis2548c732003-04-18 10:39:54 +0000584# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
585nameprep_tests = [
586 # 3.1 Map to nothing.
587 ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
588 '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
589 '\xb8\x8f\xef\xbb\xbf',
590 'foobarbaz'),
591 # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
592 ('CAFE',
593 'cafe'),
594 # 3.3 Case folding 8bit U+00DF (german sharp s).
595 # The original test case is bogus; it says \xc3\xdf
596 ('\xc3\x9f',
597 'ss'),
598 # 3.4 Case folding U+0130 (turkish capital I with dot).
599 ('\xc4\xb0',
600 'i\xcc\x87'),
601 # 3.5 Case folding multibyte U+0143 U+037A.
602 ('\xc5\x83\xcd\xba',
603 '\xc5\x84 \xce\xb9'),
604 # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
605 # XXX: skip this as it fails in UCS-2 mode
606 #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
607 # 'telc\xe2\x88\x95kg\xcf\x83'),
608 (None, None),
609 # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
610 ('j\xcc\x8c\xc2\xa0\xc2\xaa',
611 '\xc7\xb0 a'),
612 # 3.8 Case folding U+1FB7 and normalization.
613 ('\xe1\xbe\xb7',
614 '\xe1\xbe\xb6\xce\xb9'),
615 # 3.9 Self-reverting case folding U+01F0 and normalization.
616 # The original test case is bogus, it says `\xc7\xf0'
617 ('\xc7\xb0',
618 '\xc7\xb0'),
619 # 3.10 Self-reverting case folding U+0390 and normalization.
620 ('\xce\x90',
621 '\xce\x90'),
622 # 3.11 Self-reverting case folding U+03B0 and normalization.
623 ('\xce\xb0',
624 '\xce\xb0'),
625 # 3.12 Self-reverting case folding U+1E96 and normalization.
626 ('\xe1\xba\x96',
627 '\xe1\xba\x96'),
628 # 3.13 Self-reverting case folding U+1F56 and normalization.
629 ('\xe1\xbd\x96',
630 '\xe1\xbd\x96'),
631 # 3.14 ASCII space character U+0020.
632 (' ',
633 ' '),
634 # 3.15 Non-ASCII 8bit space character U+00A0.
635 ('\xc2\xa0',
636 ' '),
637 # 3.16 Non-ASCII multibyte space character U+1680.
638 ('\xe1\x9a\x80',
639 None),
640 # 3.17 Non-ASCII multibyte space character U+2000.
641 ('\xe2\x80\x80',
642 ' '),
643 # 3.18 Zero Width Space U+200b.
644 ('\xe2\x80\x8b',
645 ''),
646 # 3.19 Non-ASCII multibyte space character U+3000.
647 ('\xe3\x80\x80',
648 ' '),
649 # 3.20 ASCII control characters U+0010 U+007F.
650 ('\x10\x7f',
651 '\x10\x7f'),
652 # 3.21 Non-ASCII 8bit control character U+0085.
653 ('\xc2\x85',
654 None),
655 # 3.22 Non-ASCII multibyte control character U+180E.
656 ('\xe1\xa0\x8e',
657 None),
658 # 3.23 Zero Width No-Break Space U+FEFF.
659 ('\xef\xbb\xbf',
660 ''),
661 # 3.24 Non-ASCII control character U+1D175.
662 ('\xf0\x9d\x85\xb5',
663 None),
664 # 3.25 Plane 0 private use character U+F123.
665 ('\xef\x84\xa3',
666 None),
667 # 3.26 Plane 15 private use character U+F1234.
668 ('\xf3\xb1\x88\xb4',
669 None),
670 # 3.27 Plane 16 private use character U+10F234.
671 ('\xf4\x8f\x88\xb4',
672 None),
673 # 3.28 Non-character code point U+8FFFE.
674 ('\xf2\x8f\xbf\xbe',
675 None),
676 # 3.29 Non-character code point U+10FFFF.
677 ('\xf4\x8f\xbf\xbf',
678 None),
679 # 3.30 Surrogate code U+DF42.
680 ('\xed\xbd\x82',
681 None),
682 # 3.31 Non-plain text character U+FFFD.
683 ('\xef\xbf\xbd',
684 None),
685 # 3.32 Ideographic description character U+2FF5.
686 ('\xe2\xbf\xb5',
687 None),
688 # 3.33 Display property character U+0341.
Tim Peters0eadaac2003-04-24 16:02:54 +0000689 ('\xcd\x81',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000690 '\xcc\x81'),
691 # 3.34 Left-to-right mark U+200E.
692 ('\xe2\x80\x8e',
693 None),
694 # 3.35 Deprecated U+202A.
695 ('\xe2\x80\xaa',
696 None),
697 # 3.36 Language tagging character U+E0001.
698 ('\xf3\xa0\x80\x81',
699 None),
700 # 3.37 Language tagging character U+E0042.
701 ('\xf3\xa0\x81\x82',
702 None),
703 # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
704 ('foo\xd6\xbebar',
705 None),
706 # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
707 ('foo\xef\xb5\x90bar',
708 None),
709 # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
710 ('foo\xef\xb9\xb6bar',
711 'foo \xd9\x8ebar'),
712 # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
713 ('\xd8\xa71',
714 None),
715 # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
716 ('\xd8\xa71\xd8\xa8',
717 '\xd8\xa71\xd8\xa8'),
718 # 3.43 Unassigned code point U+E0002.
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +0000719 # Skip this test as we allow unassigned
720 #('\xf3\xa0\x80\x82',
721 # None),
722 (None, None),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000723 # 3.44 Larger test (shrinking).
724 # Original test case reads \xc3\xdf
725 ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
726 '\xaa\xce\xb0\xe2\x80\x80',
727 'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
728 # 3.45 Larger test (expanding).
729 # Original test case reads \xc3\x9f
730 ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
731 '\x80',
732 'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
733 '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
734 '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
735 ]
736
737
738class NameprepTest(unittest.TestCase):
739 def test_nameprep(self):
740 from encodings.idna import nameprep
741 for pos, (orig, prepped) in enumerate(nameprep_tests):
742 if orig is None:
743 # Skipped
744 continue
745 # The Unicode strings are given in UTF-8
746 orig = unicode(orig, "utf-8")
747 if prepped is None:
748 # Input contains prohibited characters
749 self.assertRaises(UnicodeError, nameprep, orig)
750 else:
751 prepped = unicode(prepped, "utf-8")
752 try:
753 self.assertEquals(nameprep(orig), prepped)
754 except Exception,e:
755 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
756
Martin v. Löwisa1dde132004-03-24 16:48:24 +0000757class CodecTest(unittest.TestCase):
758 def test_builtin(self):
759 self.assertEquals(unicode("python.org", "idna"), u"python.org")
760
Martin v. Löwis8b595142005-08-25 11:03:38 +0000761 def test_stream(self):
762 import StringIO
763 r = codecs.getreader("idna")(StringIO.StringIO("abc"))
764 r.read(3)
765 self.assertEquals(r.read(), u"")
766
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000767class CodecsModuleTest(unittest.TestCase):
768
769 def test_decode(self):
770 self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
771 u'\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +0000772 self.assertRaises(TypeError, codecs.decode)
773 self.assertEquals(codecs.decode('abc'), u'abc')
774 self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
775
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000776 def test_encode(self):
777 self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
778 '\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +0000779 self.assertRaises(TypeError, codecs.encode)
Walter Dörwald690402f2005-11-17 18:51:34 +0000780 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
Walter Dörwald063e1e82004-10-28 13:04:26 +0000781 self.assertEquals(codecs.encode(u'abc'), 'abc')
782 self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
783
784 def test_register(self):
785 self.assertRaises(TypeError, codecs.register)
Walter Dörwald690402f2005-11-17 18:51:34 +0000786 self.assertRaises(TypeError, codecs.register, 42)
Walter Dörwald063e1e82004-10-28 13:04:26 +0000787
788 def test_lookup(self):
789 self.assertRaises(TypeError, codecs.lookup)
790 self.assertRaises(LookupError, codecs.lookup, "__spam__")
Walter Dörwald690402f2005-11-17 18:51:34 +0000791 self.assertRaises(LookupError, codecs.lookup, " ")
792
793 def test_getencoder(self):
794 self.assertRaises(TypeError, codecs.getencoder)
795 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
796
797 def test_getdecoder(self):
798 self.assertRaises(TypeError, codecs.getdecoder)
799 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
800
801 def test_getreader(self):
802 self.assertRaises(TypeError, codecs.getreader)
803 self.assertRaises(LookupError, codecs.getreader, "__spam__")
804
805 def test_getwriter(self):
806 self.assertRaises(TypeError, codecs.getwriter)
807 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000808
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +0000809class StreamReaderTest(unittest.TestCase):
810
811 def setUp(self):
812 self.reader = codecs.getreader('utf-8')
813 self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
814
815 def test_readlines(self):
816 f = self.reader(self.stream)
817 self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
818
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000819class Str2StrTest(unittest.TestCase):
820
821 def test_read(self):
822 sin = "\x80".encode("base64_codec")
823 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
824 sout = reader.read()
825 self.assertEqual(sout, "\x80")
826 self.assert_(isinstance(sout, str))
827
828 def test_readline(self):
829 sin = "\x80".encode("base64_codec")
830 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
831 sout = reader.readline()
832 self.assertEqual(sout, "\x80")
833 self.assert_(isinstance(sout, str))
834
Walter Dörwaldee1d2472004-12-29 16:04:38 +0000835all_unicode_encodings = [
836 "ascii",
837 "base64_codec",
838 "big5",
839 "big5hkscs",
840 "charmap",
841 "cp037",
842 "cp1006",
843 "cp1026",
844 "cp1140",
845 "cp1250",
846 "cp1251",
847 "cp1252",
848 "cp1253",
849 "cp1254",
850 "cp1255",
851 "cp1256",
852 "cp1257",
853 "cp1258",
854 "cp424",
855 "cp437",
856 "cp500",
857 "cp737",
858 "cp775",
859 "cp850",
860 "cp852",
861 "cp855",
862 "cp856",
863 "cp857",
864 "cp860",
865 "cp861",
866 "cp862",
867 "cp863",
868 "cp864",
869 "cp865",
870 "cp866",
871 "cp869",
872 "cp874",
873 "cp875",
874 "cp932",
875 "cp949",
876 "cp950",
877 "euc_jis_2004",
878 "euc_jisx0213",
879 "euc_jp",
880 "euc_kr",
881 "gb18030",
882 "gb2312",
883 "gbk",
884 "hex_codec",
885 "hp_roman8",
886 "hz",
887 "idna",
888 "iso2022_jp",
889 "iso2022_jp_1",
890 "iso2022_jp_2",
891 "iso2022_jp_2004",
892 "iso2022_jp_3",
893 "iso2022_jp_ext",
894 "iso2022_kr",
895 "iso8859_1",
896 "iso8859_10",
897 "iso8859_11",
898 "iso8859_13",
899 "iso8859_14",
900 "iso8859_15",
901 "iso8859_16",
902 "iso8859_2",
903 "iso8859_3",
904 "iso8859_4",
905 "iso8859_5",
906 "iso8859_6",
907 "iso8859_7",
908 "iso8859_8",
909 "iso8859_9",
910 "johab",
911 "koi8_r",
912 "koi8_u",
913 "latin_1",
914 "mac_cyrillic",
915 "mac_greek",
916 "mac_iceland",
917 "mac_latin2",
918 "mac_roman",
919 "mac_turkish",
920 "palmos",
921 "ptcp154",
922 "punycode",
923 "raw_unicode_escape",
924 "rot_13",
925 "shift_jis",
926 "shift_jis_2004",
927 "shift_jisx0213",
928 "tis_620",
929 "unicode_escape",
930 "unicode_internal",
931 "utf_16",
932 "utf_16_be",
933 "utf_16_le",
934 "utf_7",
935 "utf_8",
936]
937
938if hasattr(codecs, "mbcs_encode"):
939 all_unicode_encodings.append("mbcs")
940
941# The following encodings work only with str, not unicode
942all_string_encodings = [
943 "quopri_codec",
944 "string_escape",
945 "uu_codec",
946]
947
948# The following encoding is not tested, because it's not supposed
949# to work:
950# "undefined"
951
952# The following encodings don't work in stateful mode
953broken_unicode_with_streams = [
954 "base64_codec",
955 "hex_codec",
956 "punycode",
957 "unicode_internal"
958]
959
960try:
961 import bz2
962except ImportError:
963 pass
964else:
965 all_unicode_encodings.append("bz2_codec")
966 broken_unicode_with_streams.append("bz2_codec")
967
968try:
969 import zlib
970except ImportError:
971 pass
972else:
973 all_unicode_encodings.append("zlib_codec")
974 broken_unicode_with_streams.append("zlib_codec")
975
976class BasicUnicodeTest(unittest.TestCase):
977 def test_basics(self):
978 s = u"abc123" # all codecs should be able to encode these
979 for encoding in all_unicode_encodings:
980 (bytes, size) = codecs.getencoder(encoding)(s)
981 if encoding != "unicode_internal":
982 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
983 (chars, size) = codecs.getdecoder(encoding)(bytes)
984 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
985
986 if encoding not in broken_unicode_with_streams:
987 # check stream reader/writer
988 q = Queue()
989 writer = codecs.getwriter(encoding)(q)
990 encodedresult = ""
991 for c in s:
992 writer.write(c)
993 encodedresult += q.read()
994 q = Queue()
995 reader = codecs.getreader(encoding)(q)
996 decodedresult = u""
997 for c in encodedresult:
998 q.write(c)
999 decodedresult += reader.read()
1000 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1001
Walter Dörwald729c31f2005-03-14 19:06:30 +00001002 def test_seek(self):
1003 # all codecs should be able to encode these
1004 s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
1005 for encoding in all_unicode_encodings:
1006 if encoding == "idna": # FIXME: See SF bug #1163178
1007 continue
1008 if encoding in broken_unicode_with_streams:
1009 continue
1010 reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
1011 for t in xrange(5):
1012 # Test that calling seek resets the internal codec state and buffers
1013 reader.seek(0, 0)
1014 line = reader.readline()
1015 self.assertEqual(s[:len(line)], line)
1016
Walter Dörwalde22d3392005-11-17 08:52:34 +00001017 def test_bad_decode_args(self):
1018 for encoding in all_unicode_encodings:
1019 decoder = codecs.getdecoder(encoding)
1020 self.assertRaises(TypeError, decoder)
1021 if encoding not in ("idna", "punycode"):
1022 self.assertRaises(TypeError, decoder, 42)
1023
1024 def test_bad_encode_args(self):
1025 for encoding in all_unicode_encodings:
1026 encoder = codecs.getencoder(encoding)
1027 self.assertRaises(TypeError, encoder)
1028
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001029class BasicStrTest(unittest.TestCase):
1030 def test_basics(self):
1031 s = "abc123"
1032 for encoding in all_string_encodings:
1033 (bytes, size) = codecs.getencoder(encoding)(s)
1034 self.assertEqual(size, len(s))
1035 (chars, size) = codecs.getdecoder(encoding)(bytes)
1036 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1037
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001038class CharmapTest(unittest.TestCase):
1039 def test_decode_with_string_map(self):
1040 self.assertEquals(
1041 codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
1042 (u"abc", 3)
1043 )
1044
1045 self.assertEquals(
1046 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
1047 (u"ab\ufffd", 3)
1048 )
1049
1050 self.assertEquals(
1051 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
1052 (u"ab\ufffd", 3)
1053 )
1054
1055 self.assertEquals(
1056 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
1057 (u"ab", 3)
1058 )
1059
1060 self.assertEquals(
1061 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
1062 (u"ab", 3)
1063 )
1064
1065 allbytes = "".join(chr(i) for i in xrange(256))
1066 self.assertEquals(
1067 codecs.charmap_decode(allbytes, "ignore", u""),
1068 (u"", len(allbytes))
1069 )
1070
1071
Fred Drake2e2be372001-09-20 21:33:42 +00001072def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001073 test_support.run_unittest(
1074 UTF16Test,
Walter Dörwald69652032004-09-07 20:24:22 +00001075 UTF16LETest,
1076 UTF16BETest,
1077 UTF8Test,
Martin v. Löwis412ed3b2006-01-08 10:45:39 +00001078 UTF8SigTest,
Walter Dörwalde22d3392005-11-17 08:52:34 +00001079 UTF7Test,
1080 UTF16ExTest,
1081 ReadBufferTest,
1082 CharBufferTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001083 EscapeDecodeTest,
1084 RecodingTest,
1085 PunycodeTest,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001086 UnicodeInternalTest,
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001087 NameprepTest,
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001088 CodecTest,
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001089 CodecsModuleTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001090 StreamReaderTest,
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001091 Str2StrTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001092 BasicUnicodeTest,
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001093 BasicStrTest,
1094 CharmapTest
Walter Dörwald21d3a322003-05-01 17:45:56 +00001095 )
Fred Drake2e2be372001-09-20 21:33:42 +00001096
1097
1098if __name__ == "__main__":
1099 test_main()