blob: 254bc4dc7947ad5c35751c71f2919ec7db0f0e07 [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
2import unittest
Marc-André Lemburga37171d2001-06-19 20:09:28 +00003import codecs
Walter Dörwald9ae019b2006-03-18 14:22:26 +00004import sys, StringIO, _testcapi
Marc-André Lemburga37171d2001-06-19 20:09:28 +00005
Walter Dörwald69652032004-09-07 20:24:22 +00006class Queue(object):
7 """
8 queue: write bytes at one end, read bytes from the other end
9 """
10 def __init__(self):
11 self._buffer = ""
12
13 def write(self, chars):
14 self._buffer += chars
15
16 def read(self, size=-1):
17 if size<0:
18 s = self._buffer
19 self._buffer = ""
20 return s
21 else:
22 s = self._buffer[:size]
23 self._buffer = self._buffer[size:]
24 return s
25
Walter Dörwalde57d7b12004-12-21 22:24:00 +000026class ReadTest(unittest.TestCase):
27 def check_partial(self, input, partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000028 # get a StreamReader for the encoding and feed the bytestring version
Walter Dörwaldfc7e72d2007-11-19 12:14:05 +000029 # of input to the reader byte by byte. Read everything available from
Walter Dörwald69652032004-09-07 20:24:22 +000030 # the StreamReader and check that the results equal the appropriate
31 # entries from partialresults.
32 q = Queue()
Walter Dörwalde57d7b12004-12-21 22:24:00 +000033 r = codecs.getreader(self.encoding)(q)
Walter Dörwald69652032004-09-07 20:24:22 +000034 result = u""
Walter Dörwalde57d7b12004-12-21 22:24:00 +000035 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000036 q.write(c)
37 result += r.read()
38 self.assertEqual(result, partialresult)
39 # check that there's nothing left in the buffers
40 self.assertEqual(r.read(), u"")
41 self.assertEqual(r.bytebuffer, "")
42 self.assertEqual(r.charbuffer, u"")
43
Walter Dörwaldabb02e52006-03-15 11:35:15 +000044 # do the check again, this time using a incremental decoder
45 d = codecs.getincrementaldecoder(self.encoding)()
46 result = u""
47 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
48 result += d.decode(c)
49 self.assertEqual(result, partialresult)
50 # check that there's nothing left in the buffers
51 self.assertEqual(d.decode("", True), u"")
52 self.assertEqual(d.buffer, "")
53
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +000054 # Check whether the reset method works properly
Walter Dörwaldabb02e52006-03-15 11:35:15 +000055 d.reset()
56 result = u""
57 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
58 result += d.decode(c)
59 self.assertEqual(result, partialresult)
60 # check that there's nothing left in the buffers
61 self.assertEqual(d.decode("", True), u"")
62 self.assertEqual(d.buffer, "")
63
64 # check iterdecode()
65 encoded = input.encode(self.encoding)
66 self.assertEqual(
67 input,
68 u"".join(codecs.iterdecode(encoded, self.encoding))
69 )
70
Walter Dörwalde57d7b12004-12-21 22:24:00 +000071 def test_readline(self):
72 def getreader(input):
73 stream = StringIO.StringIO(input.encode(self.encoding))
74 return codecs.getreader(self.encoding)(stream)
75
Walter Dörwaldca199432006-03-06 22:39:12 +000076 def readalllines(input, keepends=True, size=None):
Walter Dörwalde57d7b12004-12-21 22:24:00 +000077 reader = getreader(input)
78 lines = []
79 while True:
Walter Dörwaldca199432006-03-06 22:39:12 +000080 line = reader.readline(size=size, keepends=keepends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000081 if not line:
82 break
83 lines.append(line)
Walter Dörwaldca199432006-03-06 22:39:12 +000084 return "|".join(lines)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000085
86 s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
Walter Dörwaldca199432006-03-06 22:39:12 +000087 sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
88 sexpectednoends = u"foo|bar|baz|spam|eggs"
89 self.assertEqual(readalllines(s, True), sexpected)
90 self.assertEqual(readalllines(s, False), sexpectednoends)
91 self.assertEqual(readalllines(s, True, 10), sexpected)
92 self.assertEqual(readalllines(s, False, 10), sexpectednoends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000093
94 # Test long lines (multiple calls to read() in readline())
95 vw = []
96 vwo = []
97 for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
98 vw.append((i*200)*u"\3042" + lineend)
99 vwo.append((i*200)*u"\3042")
100 self.assertEqual(readalllines("".join(vw), True), "".join(vw))
101 self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
102
103 # Test lines where the first read might end with \r, so the
104 # reader has to look ahead whether this is a lone \r or a \r\n
105 for size in xrange(80):
106 for lineend in u"\n \r\n \r \u2028".split():
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000107 s = 10*(size*u"a" + lineend + u"xxx\n")
108 reader = getreader(s)
109 for i in xrange(10):
110 self.assertEqual(
111 reader.readline(keepends=True),
112 size*u"a" + lineend,
113 )
114 reader = getreader(s)
115 for i in xrange(10):
116 self.assertEqual(
117 reader.readline(keepends=False),
118 size*u"a",
119 )
120
121 def test_bug1175396(self):
122 s = [
123 '<%!--===================================================\r\n',
124 ' BLOG index page: show recent articles,\r\n',
125 ' today\'s articles, or articles of a specific date.\r\n',
126 '========================================================--%>\r\n',
127 '<%@inputencoding="ISO-8859-1"%>\r\n',
128 '<%@pagetemplate=TEMPLATE.y%>\r\n',
129 '<%@import=import frog.util, frog%>\r\n',
130 '<%@import=import frog.objects%>\r\n',
131 '<%@import=from frog.storageerrors import StorageError%>\r\n',
132 '<%\r\n',
133 '\r\n',
134 'import logging\r\n',
135 'log=logging.getLogger("Snakelets.logger")\r\n',
136 '\r\n',
137 '\r\n',
138 'user=self.SessionCtx.user\r\n',
139 'storageEngine=self.SessionCtx.storageEngine\r\n',
140 '\r\n',
141 '\r\n',
142 'def readArticlesFromDate(date, count=None):\r\n',
143 ' entryids=storageEngine.listBlogEntries(date)\r\n',
144 ' entryids.reverse() # descending\r\n',
145 ' if count:\r\n',
146 ' entryids=entryids[:count]\r\n',
147 ' try:\r\n',
148 ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
149 ' except StorageError,x:\r\n',
150 ' log.error("Error loading articles: "+str(x))\r\n',
151 ' self.abort("cannot load articles")\r\n',
152 '\r\n',
153 'showdate=None\r\n',
154 '\r\n',
155 'arg=self.Request.getArg()\r\n',
156 'if arg=="today":\r\n',
157 ' #-------------------- TODAY\'S ARTICLES\r\n',
158 ' self.write("<h2>Today\'s articles</h2>")\r\n',
159 ' showdate = frog.util.isodatestr() \r\n',
160 ' entries = readArticlesFromDate(showdate)\r\n',
161 'elif arg=="active":\r\n',
162 ' #-------------------- ACTIVE ARTICLES redirect\r\n',
163 ' self.Yredirect("active.y")\r\n',
164 'elif arg=="login":\r\n',
165 ' #-------------------- LOGIN PAGE redirect\r\n',
166 ' self.Yredirect("login.y")\r\n',
167 'elif arg=="date":\r\n',
168 ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
169 ' showdate = self.Request.getParameter("date")\r\n',
170 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
171 ' entries = readArticlesFromDate(showdate)\r\n',
172 'else:\r\n',
173 ' #-------------------- RECENT ARTICLES\r\n',
174 ' self.write("<h2>Recent articles</h2>")\r\n',
175 ' dates=storageEngine.listBlogEntryDates()\r\n',
176 ' if dates:\r\n',
177 ' entries=[]\r\n',
178 ' SHOWAMOUNT=10\r\n',
179 ' for showdate in dates:\r\n',
180 ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
181 ' if len(entries)>=SHOWAMOUNT:\r\n',
182 ' break\r\n',
183 ' \r\n',
184 ]
185 stream = StringIO.StringIO("".join(s).encode(self.encoding))
186 reader = codecs.getreader(self.encoding)(stream)
187 for (i, line) in enumerate(reader):
188 self.assertEqual(line, s[i])
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000189
190 def test_readlinequeue(self):
191 q = Queue()
192 writer = codecs.getwriter(self.encoding)(q)
193 reader = codecs.getreader(self.encoding)(q)
194
195 # No lineends
196 writer.write(u"foo\r")
197 self.assertEqual(reader.readline(keepends=False), u"foo")
198 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000199 self.assertEqual(reader.readline(keepends=False), u"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000200 self.assertEqual(reader.readline(keepends=False), u"bar")
201 writer.write(u"baz")
202 self.assertEqual(reader.readline(keepends=False), u"baz")
203 self.assertEqual(reader.readline(keepends=False), u"")
204
205 # Lineends
206 writer.write(u"foo\r")
207 self.assertEqual(reader.readline(keepends=True), u"foo\r")
208 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000209 self.assertEqual(reader.readline(keepends=True), u"\n")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000210 self.assertEqual(reader.readline(keepends=True), u"bar\r")
211 writer.write(u"baz")
212 self.assertEqual(reader.readline(keepends=True), u"baz")
213 self.assertEqual(reader.readline(keepends=True), u"")
214 writer.write(u"foo\r\n")
215 self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
216
Walter Dörwald9fa09462005-01-10 12:01:39 +0000217 def test_bug1098990_a(self):
218 s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
219 s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
220 s3 = u"next line.\r\n"
221
222 s = (s1+s2+s3).encode(self.encoding)
223 stream = StringIO.StringIO(s)
224 reader = codecs.getreader(self.encoding)(stream)
225 self.assertEqual(reader.readline(), s1)
226 self.assertEqual(reader.readline(), s2)
227 self.assertEqual(reader.readline(), s3)
228 self.assertEqual(reader.readline(), u"")
229
230 def test_bug1098990_b(self):
231 s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
232 s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
233 s3 = u"stillokay:bbbbxx\r\n"
234 s4 = u"broken!!!!badbad\r\n"
235 s5 = u"againokay.\r\n"
236
237 s = (s1+s2+s3+s4+s5).encode(self.encoding)
238 stream = StringIO.StringIO(s)
239 reader = codecs.getreader(self.encoding)(stream)
240 self.assertEqual(reader.readline(), s1)
241 self.assertEqual(reader.readline(), s2)
242 self.assertEqual(reader.readline(), s3)
243 self.assertEqual(reader.readline(), s4)
244 self.assertEqual(reader.readline(), s5)
245 self.assertEqual(reader.readline(), u"")
246
Walter Dörwald6e390802007-08-17 16:41:28 +0000247class UTF32Test(ReadTest):
248 encoding = "utf-32"
249
250 spamle = ('\xff\xfe\x00\x00'
251 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
252 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
253 spambe = ('\x00\x00\xfe\xff'
254 '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
255 '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
256
257 def test_only_one_bom(self):
258 _,_,reader,writer = codecs.lookup(self.encoding)
259 # encode some stream
260 s = StringIO.StringIO()
261 f = writer(s)
262 f.write(u"spam")
263 f.write(u"spam")
264 d = s.getvalue()
265 # check whether there is exactly one BOM in it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000266 self.assertTrue(d == self.spamle or d == self.spambe)
Walter Dörwald6e390802007-08-17 16:41:28 +0000267 # try to read it back
268 s = StringIO.StringIO(d)
269 f = reader(s)
270 self.assertEquals(f.read(), u"spamspam")
271
272 def test_badbom(self):
273 s = StringIO.StringIO(4*"\xff")
274 f = codecs.getreader(self.encoding)(s)
275 self.assertRaises(UnicodeError, f.read)
276
277 s = StringIO.StringIO(8*"\xff")
278 f = codecs.getreader(self.encoding)(s)
279 self.assertRaises(UnicodeError, f.read)
280
281 def test_partial(self):
282 self.check_partial(
283 u"\x00\xff\u0100\uffff",
284 [
285 u"", # first byte of BOM read
286 u"", # second byte of BOM read
287 u"", # third byte of BOM read
288 u"", # fourth byte of BOM read => byteorder known
289 u"",
290 u"",
291 u"",
292 u"\x00",
293 u"\x00",
294 u"\x00",
295 u"\x00",
296 u"\x00\xff",
297 u"\x00\xff",
298 u"\x00\xff",
299 u"\x00\xff",
300 u"\x00\xff\u0100",
301 u"\x00\xff\u0100",
302 u"\x00\xff\u0100",
303 u"\x00\xff\u0100",
304 u"\x00\xff\u0100\uffff",
305 ]
306 )
307
Georg Brandle9741f32009-09-17 11:28:09 +0000308 def test_handlers(self):
309 self.assertEqual((u'\ufffd', 1),
310 codecs.utf_32_decode('\x01', 'replace', True))
311 self.assertEqual((u'', 1),
312 codecs.utf_32_decode('\x01', 'ignore', True))
313
Walter Dörwald6e390802007-08-17 16:41:28 +0000314 def test_errors(self):
315 self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
316 "\xff", "strict", True)
317
318class UTF32LETest(ReadTest):
319 encoding = "utf-32-le"
320
321 def test_partial(self):
322 self.check_partial(
323 u"\x00\xff\u0100\uffff",
324 [
325 u"",
326 u"",
327 u"",
328 u"\x00",
329 u"\x00",
330 u"\x00",
331 u"\x00",
332 u"\x00\xff",
333 u"\x00\xff",
334 u"\x00\xff",
335 u"\x00\xff",
336 u"\x00\xff\u0100",
337 u"\x00\xff\u0100",
338 u"\x00\xff\u0100",
339 u"\x00\xff\u0100",
340 u"\x00\xff\u0100\uffff",
341 ]
342 )
343
344 def test_simple(self):
345 self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
346
347 def test_errors(self):
348 self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
349 "\xff", "strict", True)
350
351class UTF32BETest(ReadTest):
352 encoding = "utf-32-be"
353
354 def test_partial(self):
355 self.check_partial(
356 u"\x00\xff\u0100\uffff",
357 [
358 u"",
359 u"",
360 u"",
361 u"\x00",
362 u"\x00",
363 u"\x00",
364 u"\x00",
365 u"\x00\xff",
366 u"\x00\xff",
367 u"\x00\xff",
368 u"\x00\xff",
369 u"\x00\xff\u0100",
370 u"\x00\xff\u0100",
371 u"\x00\xff\u0100",
372 u"\x00\xff\u0100",
373 u"\x00\xff\u0100\uffff",
374 ]
375 )
376
377 def test_simple(self):
378 self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
379
380 def test_errors(self):
381 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
382 "\xff", "strict", True)
383
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000384class UTF16Test(ReadTest):
385 encoding = "utf-16"
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000386
387 spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
388 spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
389
390 def test_only_one_bom(self):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000391 _,_,reader,writer = codecs.lookup(self.encoding)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000392 # encode some stream
393 s = StringIO.StringIO()
394 f = writer(s)
395 f.write(u"spam")
396 f.write(u"spam")
397 d = s.getvalue()
398 # check whether there is exactly one BOM in it
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000399 self.assertTrue(d == self.spamle or d == self.spambe)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000400 # try to read it back
401 s = StringIO.StringIO(d)
402 f = reader(s)
403 self.assertEquals(f.read(), u"spamspam")
404
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000405 def test_badbom(self):
406 s = StringIO.StringIO("\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000407 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000408 self.assertRaises(UnicodeError, f.read)
409
410 s = StringIO.StringIO("\xff\xff\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000411 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000412 self.assertRaises(UnicodeError, f.read)
413
Walter Dörwald69652032004-09-07 20:24:22 +0000414 def test_partial(self):
415 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000416 u"\x00\xff\u0100\uffff",
417 [
418 u"", # first byte of BOM read
419 u"", # second byte of BOM read => byteorder known
420 u"",
421 u"\x00",
422 u"\x00",
423 u"\x00\xff",
424 u"\x00\xff",
425 u"\x00\xff\u0100",
426 u"\x00\xff\u0100",
427 u"\x00\xff\u0100\uffff",
428 ]
429 )
430
Georg Brandle9741f32009-09-17 11:28:09 +0000431 def test_handlers(self):
432 self.assertEqual((u'\ufffd', 1),
433 codecs.utf_16_decode('\x01', 'replace', True))
434 self.assertEqual((u'', 1),
435 codecs.utf_16_decode('\x01', 'ignore', True))
436
Walter Dörwalde22d3392005-11-17 08:52:34 +0000437 def test_errors(self):
438 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
439
Florent Xiclunaf4b61862010-02-26 10:40:58 +0000440 def test_bug691291(self):
441 # Files are always opened in binary mode, even if no binary mode was
442 # specified. This means that no automatic conversion of '\n' is done
443 # on reading and writing.
444 s1 = u'Hello\r\nworld\r\n'
445
446 s = s1.encode(self.encoding)
447 try:
448 with open(test_support.TESTFN, 'wb') as fp:
449 fp.write(s)
450 with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader:
451 self.assertEqual(reader.read(), s1)
452 finally:
453 test_support.unlink(test_support.TESTFN)
454
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000455class UTF16LETest(ReadTest):
456 encoding = "utf-16-le"
Walter Dörwald69652032004-09-07 20:24:22 +0000457
458 def test_partial(self):
459 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000460 u"\x00\xff\u0100\uffff",
461 [
462 u"",
463 u"\x00",
464 u"\x00",
465 u"\x00\xff",
466 u"\x00\xff",
467 u"\x00\xff\u0100",
468 u"\x00\xff\u0100",
469 u"\x00\xff\u0100\uffff",
470 ]
471 )
472
Walter Dörwalde22d3392005-11-17 08:52:34 +0000473 def test_errors(self):
474 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
475
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000476class UTF16BETest(ReadTest):
477 encoding = "utf-16-be"
Walter Dörwald69652032004-09-07 20:24:22 +0000478
479 def test_partial(self):
480 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000481 u"\x00\xff\u0100\uffff",
482 [
483 u"",
484 u"\x00",
485 u"\x00",
486 u"\x00\xff",
487 u"\x00\xff",
488 u"\x00\xff\u0100",
489 u"\x00\xff\u0100",
490 u"\x00\xff\u0100\uffff",
491 ]
492 )
493
Walter Dörwalde22d3392005-11-17 08:52:34 +0000494 def test_errors(self):
495 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
496
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000497class UTF8Test(ReadTest):
498 encoding = "utf-8"
Walter Dörwald69652032004-09-07 20:24:22 +0000499
500 def test_partial(self):
501 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000502 u"\x00\xff\u07ff\u0800\uffff",
503 [
504 u"\x00",
505 u"\x00",
506 u"\x00\xff",
507 u"\x00\xff",
508 u"\x00\xff\u07ff",
509 u"\x00\xff\u07ff",
510 u"\x00\xff\u07ff",
511 u"\x00\xff\u07ff\u0800",
512 u"\x00\xff\u07ff\u0800",
513 u"\x00\xff\u07ff\u0800",
514 u"\x00\xff\u07ff\u0800\uffff",
515 ]
516 )
517
Walter Dörwalde22d3392005-11-17 08:52:34 +0000518class UTF7Test(ReadTest):
519 encoding = "utf-7"
520
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000521 def test_partial(self):
522 self.check_partial(
523 u"a+-b",
524 [
525 u"a",
526 u"a",
527 u"a+",
528 u"a+-",
529 u"a+-b",
530 ]
531 )
Walter Dörwalde22d3392005-11-17 08:52:34 +0000532
533class UTF16ExTest(unittest.TestCase):
534
535 def test_errors(self):
536 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
537
538 def test_bad_args(self):
539 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
540
541class ReadBufferTest(unittest.TestCase):
542
543 def test_array(self):
544 import array
545 self.assertEqual(
546 codecs.readbuffer_encode(array.array("c", "spam")),
547 ("spam", 4)
548 )
549
550 def test_empty(self):
551 self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
552
553 def test_bad_args(self):
554 self.assertRaises(TypeError, codecs.readbuffer_encode)
555 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
556
557class CharBufferTest(unittest.TestCase):
558
559 def test_string(self):
560 self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
561
562 def test_empty(self):
563 self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
564
565 def test_bad_args(self):
566 self.assertRaises(TypeError, codecs.charbuffer_encode)
567 self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
568
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000569class UTF8SigTest(ReadTest):
570 encoding = "utf-8-sig"
571
572 def test_partial(self):
573 self.check_partial(
574 u"\ufeff\x00\xff\u07ff\u0800\uffff",
575 [
576 u"",
577 u"",
578 u"", # First BOM has been read and skipped
579 u"",
580 u"",
581 u"\ufeff", # Second BOM has been read and emitted
582 u"\ufeff\x00", # "\x00" read and emitted
583 u"\ufeff\x00", # First byte of encoded u"\xff" read
584 u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
585 u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
586 u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
587 u"\ufeff\x00\xff\u07ff",
588 u"\ufeff\x00\xff\u07ff",
589 u"\ufeff\x00\xff\u07ff\u0800",
590 u"\ufeff\x00\xff\u07ff\u0800",
591 u"\ufeff\x00\xff\u07ff\u0800",
592 u"\ufeff\x00\xff\u07ff\u0800\uffff",
593 ]
594 )
595
Walter Dörwald39b8b6a2006-11-23 05:03:56 +0000596 def test_bug1601501(self):
597 # SF bug #1601501: check that the codec works with a buffer
598 unicode("\xef\xbb\xbf", "utf-8-sig")
599
Walter Dörwald42348272007-04-12 10:35:00 +0000600 def test_bom(self):
601 d = codecs.getincrementaldecoder("utf-8-sig")()
602 s = u"spam"
603 self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
604
Walter Dörwald183744d2007-11-19 12:41:10 +0000605 def test_stream_bom(self):
606 unistring = u"ABC\u00A1\u2200XYZ"
607 bytestring = codecs.BOM_UTF8 + "ABC\xC2\xA1\xE2\x88\x80XYZ"
608
609 reader = codecs.getreader("utf-8-sig")
610 for sizehint in [None] + range(1, 11) + \
611 [64, 128, 256, 512, 1024]:
612 istream = reader(StringIO.StringIO(bytestring))
613 ostream = StringIO.StringIO()
614 while 1:
615 if sizehint is not None:
616 data = istream.read(sizehint)
617 else:
618 data = istream.read()
619
620 if not data:
621 break
622 ostream.write(data)
623
624 got = ostream.getvalue()
625 self.assertEqual(got, unistring)
626
627 def test_stream_bare(self):
628 unistring = u"ABC\u00A1\u2200XYZ"
629 bytestring = "ABC\xC2\xA1\xE2\x88\x80XYZ"
630
631 reader = codecs.getreader("utf-8-sig")
632 for sizehint in [None] + range(1, 11) + \
633 [64, 128, 256, 512, 1024]:
634 istream = reader(StringIO.StringIO(bytestring))
635 ostream = StringIO.StringIO()
636 while 1:
637 if sizehint is not None:
638 data = istream.read(sizehint)
639 else:
640 data = istream.read()
641
642 if not data:
643 break
644 ostream.write(data)
645
646 got = ostream.getvalue()
647 self.assertEqual(got, unistring)
648
Walter Dörwald8709a422002-09-03 13:53:40 +0000649class EscapeDecodeTest(unittest.TestCase):
Walter Dörwalde22d3392005-11-17 08:52:34 +0000650 def test_empty(self):
Walter Dörwald8709a422002-09-03 13:53:40 +0000651 self.assertEquals(codecs.escape_decode(""), ("", 0))
652
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000653class RecodingTest(unittest.TestCase):
654 def test_recoding(self):
655 f = StringIO.StringIO()
656 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
657 f2.write(u"a")
658 f2.close()
659 # Python used to crash on this at exit because of a refcount
660 # bug in _codecsmodule.c
Fred Drake2e2be372001-09-20 21:33:42 +0000661
Martin v. Löwis2548c732003-04-18 10:39:54 +0000662# From RFC 3492
663punycode_testcases = [
664 # A Arabic (Egyptian):
665 (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
666 u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
667 "egbpdaj6bu4bxfgehfvwxn"),
668 # B Chinese (simplified):
669 (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
670 "ihqwcrb4cv8a8dqg056pqjye"),
671 # C Chinese (traditional):
672 (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
673 "ihqwctvzc91f659drss3x8bo0yb"),
674 # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
675 (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
676 u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
677 u"\u0065\u0073\u006B\u0079",
678 "Proprostnemluvesky-uyb24dma41a"),
679 # E Hebrew:
680 (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
681 u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
682 u"\u05D1\u05E8\u05D9\u05EA",
683 "4dbcagdahymbxekheh6e0a7fei0b"),
684 # F Hindi (Devanagari):
685 (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
686 u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
687 u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
688 u"\u0939\u0948\u0902",
689 "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
690
691 #(G) Japanese (kanji and hiragana):
692 (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
693 u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
694 "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
695
696 # (H) Korean (Hangul syllables):
697 (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
698 u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
699 u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
700 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
701 "psd879ccm6fea98c"),
702
703 # (I) Russian (Cyrillic):
704 (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
705 u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
706 u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
707 u"\u0438",
708 "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
709
710 # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
711 (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
712 u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
713 u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
714 u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
715 u"\u0061\u00F1\u006F\u006C",
716 "PorqunopuedensimplementehablarenEspaol-fmd56a"),
717
718 # (K) Vietnamese:
719 # T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
720 # <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
721 (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
722 u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
723 u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
724 u"\u0056\u0069\u1EC7\u0074",
725 "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
726
Martin v. Löwis2548c732003-04-18 10:39:54 +0000727 #(L) 3<nen>B<gumi><kinpachi><sensei>
728 (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
729 "3B-ww4c5e180e575a65lsy2b"),
Tim Peters0eadaac2003-04-24 16:02:54 +0000730
Martin v. Löwis2548c732003-04-18 10:39:54 +0000731 # (M) <amuro><namie>-with-SUPER-MONKEYS
732 (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
733 u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
734 u"\u004F\u004E\u004B\u0045\u0059\u0053",
735 "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
736
737 # (N) Hello-Another-Way-<sorezore><no><basho>
738 (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
739 u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
740 u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
741 "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
742
743 # (O) <hitotsu><yane><no><shita>2
744 (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
745 "2-u9tlzr9756bt3uc0v"),
746
747 # (P) Maji<de>Koi<suru>5<byou><mae>
748 (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
749 u"\u308B\u0035\u79D2\u524D",
750 "MajiKoi5-783gue6qz075azm5e"),
751
752 # (Q) <pafii>de<runba>
753 (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
754 "de-jg4avhby1noc0d"),
755
756 # (R) <sono><supiido><de>
757 (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
758 "d9juau41awczczp"),
759
760 # (S) -> $1.00 <-
761 (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
762 u"\u003C\u002D",
763 "-> $1.00 <--")
764 ]
765
766for i in punycode_testcases:
767 if len(i)!=2:
768 print repr(i)
769
770class PunycodeTest(unittest.TestCase):
771 def test_encode(self):
772 for uni, puny in punycode_testcases:
773 # Need to convert both strings to lower case, since
774 # some of the extended encodings use upper case, but our
775 # code produces only lower case. Converting just puny to
776 # lower is also insufficient, since some of the input characters
777 # are upper case.
778 self.assertEquals(uni.encode("punycode").lower(), puny.lower())
779
780 def test_decode(self):
781 for uni, puny in punycode_testcases:
782 self.assertEquals(uni, puny.decode("punycode"))
783
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000784class UnicodeInternalTest(unittest.TestCase):
785 def test_bug1251300(self):
786 # Decoding with unicode_internal used to not correctly handle "code
787 # points" above 0x10ffff on UCS-4 builds.
788 if sys.maxunicode > 0xffff:
789 ok = [
790 ("\x00\x10\xff\xff", u"\U0010ffff"),
791 ("\x00\x00\x01\x01", u"\U00000101"),
792 ("", u""),
793 ]
794 not_ok = [
795 "\x7f\xff\xff\xff",
796 "\x80\x00\x00\x00",
797 "\x81\x00\x00\x00",
798 "\x00",
799 "\x00\x00\x00\x00\x00",
800 ]
801 for internal, uni in ok:
802 if sys.byteorder == "little":
803 internal = "".join(reversed(internal))
804 self.assertEquals(uni, internal.decode("unicode_internal"))
805 for internal in not_ok:
806 if sys.byteorder == "little":
807 internal = "".join(reversed(internal))
808 self.assertRaises(UnicodeDecodeError, internal.decode,
809 "unicode_internal")
810
811 def test_decode_error_attributes(self):
812 if sys.maxunicode > 0xffff:
813 try:
814 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
815 except UnicodeDecodeError, ex:
816 self.assertEquals("unicode_internal", ex.encoding)
817 self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
818 self.assertEquals(4, ex.start)
819 self.assertEquals(8, ex.end)
820 else:
821 self.fail()
822
823 def test_decode_callback(self):
824 if sys.maxunicode > 0xffff:
825 codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
826 decoder = codecs.getdecoder("unicode_internal")
827 ab = u"ab".encode("unicode_internal")
828 ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
829 "UnicodeInternalTest")
830 self.assertEquals((u"ab", 12), ignored)
831
Walter Dörwalda7fb4082009-05-06 14:28:24 +0000832 def test_encode_length(self):
833 # Issue 3739
834 encoder = codecs.getencoder("unicode_internal")
835 self.assertEquals(encoder(u"a")[1], 1)
836 self.assertEquals(encoder(u"\xe9\u0142")[1], 2)
837
Philip Jenvey034b0ac2010-04-05 02:51:51 +0000838 encoder = codecs.getencoder("string-escape")
839 self.assertEquals(encoder(r'\x00')[1], 4)
840
Martin v. Löwis2548c732003-04-18 10:39:54 +0000841# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
842nameprep_tests = [
843 # 3.1 Map to nothing.
844 ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
845 '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
846 '\xb8\x8f\xef\xbb\xbf',
847 'foobarbaz'),
848 # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
849 ('CAFE',
850 'cafe'),
851 # 3.3 Case folding 8bit U+00DF (german sharp s).
852 # The original test case is bogus; it says \xc3\xdf
853 ('\xc3\x9f',
854 'ss'),
855 # 3.4 Case folding U+0130 (turkish capital I with dot).
856 ('\xc4\xb0',
857 'i\xcc\x87'),
858 # 3.5 Case folding multibyte U+0143 U+037A.
859 ('\xc5\x83\xcd\xba',
860 '\xc5\x84 \xce\xb9'),
861 # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
862 # XXX: skip this as it fails in UCS-2 mode
863 #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
864 # 'telc\xe2\x88\x95kg\xcf\x83'),
865 (None, None),
866 # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
867 ('j\xcc\x8c\xc2\xa0\xc2\xaa',
868 '\xc7\xb0 a'),
869 # 3.8 Case folding U+1FB7 and normalization.
870 ('\xe1\xbe\xb7',
871 '\xe1\xbe\xb6\xce\xb9'),
872 # 3.9 Self-reverting case folding U+01F0 and normalization.
873 # The original test case is bogus, it says `\xc7\xf0'
874 ('\xc7\xb0',
875 '\xc7\xb0'),
876 # 3.10 Self-reverting case folding U+0390 and normalization.
877 ('\xce\x90',
878 '\xce\x90'),
879 # 3.11 Self-reverting case folding U+03B0 and normalization.
880 ('\xce\xb0',
881 '\xce\xb0'),
882 # 3.12 Self-reverting case folding U+1E96 and normalization.
883 ('\xe1\xba\x96',
884 '\xe1\xba\x96'),
885 # 3.13 Self-reverting case folding U+1F56 and normalization.
886 ('\xe1\xbd\x96',
887 '\xe1\xbd\x96'),
888 # 3.14 ASCII space character U+0020.
889 (' ',
890 ' '),
891 # 3.15 Non-ASCII 8bit space character U+00A0.
892 ('\xc2\xa0',
893 ' '),
894 # 3.16 Non-ASCII multibyte space character U+1680.
895 ('\xe1\x9a\x80',
896 None),
897 # 3.17 Non-ASCII multibyte space character U+2000.
898 ('\xe2\x80\x80',
899 ' '),
900 # 3.18 Zero Width Space U+200b.
901 ('\xe2\x80\x8b',
902 ''),
903 # 3.19 Non-ASCII multibyte space character U+3000.
904 ('\xe3\x80\x80',
905 ' '),
906 # 3.20 ASCII control characters U+0010 U+007F.
907 ('\x10\x7f',
908 '\x10\x7f'),
909 # 3.21 Non-ASCII 8bit control character U+0085.
910 ('\xc2\x85',
911 None),
912 # 3.22 Non-ASCII multibyte control character U+180E.
913 ('\xe1\xa0\x8e',
914 None),
915 # 3.23 Zero Width No-Break Space U+FEFF.
916 ('\xef\xbb\xbf',
917 ''),
918 # 3.24 Non-ASCII control character U+1D175.
919 ('\xf0\x9d\x85\xb5',
920 None),
921 # 3.25 Plane 0 private use character U+F123.
922 ('\xef\x84\xa3',
923 None),
924 # 3.26 Plane 15 private use character U+F1234.
925 ('\xf3\xb1\x88\xb4',
926 None),
927 # 3.27 Plane 16 private use character U+10F234.
928 ('\xf4\x8f\x88\xb4',
929 None),
930 # 3.28 Non-character code point U+8FFFE.
931 ('\xf2\x8f\xbf\xbe',
932 None),
933 # 3.29 Non-character code point U+10FFFF.
934 ('\xf4\x8f\xbf\xbf',
935 None),
936 # 3.30 Surrogate code U+DF42.
937 ('\xed\xbd\x82',
938 None),
939 # 3.31 Non-plain text character U+FFFD.
940 ('\xef\xbf\xbd',
941 None),
942 # 3.32 Ideographic description character U+2FF5.
943 ('\xe2\xbf\xb5',
944 None),
945 # 3.33 Display property character U+0341.
Tim Peters0eadaac2003-04-24 16:02:54 +0000946 ('\xcd\x81',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000947 '\xcc\x81'),
948 # 3.34 Left-to-right mark U+200E.
949 ('\xe2\x80\x8e',
950 None),
951 # 3.35 Deprecated U+202A.
952 ('\xe2\x80\xaa',
953 None),
954 # 3.36 Language tagging character U+E0001.
955 ('\xf3\xa0\x80\x81',
956 None),
957 # 3.37 Language tagging character U+E0042.
958 ('\xf3\xa0\x81\x82',
959 None),
960 # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
961 ('foo\xd6\xbebar',
962 None),
963 # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
964 ('foo\xef\xb5\x90bar',
965 None),
966 # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
967 ('foo\xef\xb9\xb6bar',
968 'foo \xd9\x8ebar'),
969 # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
970 ('\xd8\xa71',
971 None),
972 # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
973 ('\xd8\xa71\xd8\xa8',
974 '\xd8\xa71\xd8\xa8'),
975 # 3.43 Unassigned code point U+E0002.
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +0000976 # Skip this test as we allow unassigned
977 #('\xf3\xa0\x80\x82',
978 # None),
979 (None, None),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000980 # 3.44 Larger test (shrinking).
981 # Original test case reads \xc3\xdf
982 ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
983 '\xaa\xce\xb0\xe2\x80\x80',
984 'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
985 # 3.45 Larger test (expanding).
986 # Original test case reads \xc3\x9f
987 ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
988 '\x80',
989 'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
990 '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
991 '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
992 ]
993
994
995class NameprepTest(unittest.TestCase):
996 def test_nameprep(self):
997 from encodings.idna import nameprep
998 for pos, (orig, prepped) in enumerate(nameprep_tests):
999 if orig is None:
1000 # Skipped
1001 continue
1002 # The Unicode strings are given in UTF-8
1003 orig = unicode(orig, "utf-8")
1004 if prepped is None:
1005 # Input contains prohibited characters
1006 self.assertRaises(UnicodeError, nameprep, orig)
1007 else:
1008 prepped = unicode(prepped, "utf-8")
1009 try:
1010 self.assertEquals(nameprep(orig), prepped)
1011 except Exception,e:
1012 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
1013
Walter Dörwald78a0be62006-04-14 18:25:39 +00001014class IDNACodecTest(unittest.TestCase):
1015 def test_builtin_decode(self):
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001016 self.assertEquals(unicode("python.org", "idna"), u"python.org")
Walter Dörwald78a0be62006-04-14 18:25:39 +00001017 self.assertEquals(unicode("python.org.", "idna"), u"python.org.")
1018 self.assertEquals(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")
1019 self.assertEquals(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")
1020
1021 def test_builtin_encode(self):
1022 self.assertEquals(u"python.org".encode("idna"), "python.org")
1023 self.assertEquals("python.org.".encode("idna"), "python.org.")
1024 self.assertEquals(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
1025 self.assertEquals(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001026
Martin v. Löwis8b595142005-08-25 11:03:38 +00001027 def test_stream(self):
1028 import StringIO
1029 r = codecs.getreader("idna")(StringIO.StringIO("abc"))
1030 r.read(3)
1031 self.assertEquals(r.read(), u"")
1032
Walter Dörwald78a0be62006-04-14 18:25:39 +00001033 def test_incremental_decode(self):
1034 self.assertEquals(
1035 "".join(codecs.iterdecode("python.org", "idna")),
1036 u"python.org"
1037 )
1038 self.assertEquals(
1039 "".join(codecs.iterdecode("python.org.", "idna")),
1040 u"python.org."
1041 )
1042 self.assertEquals(
1043 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
1044 u"pyth\xf6n.org."
1045 )
1046 self.assertEquals(
1047 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
1048 u"pyth\xf6n.org."
1049 )
1050
1051 decoder = codecs.getincrementaldecoder("idna")()
1052 self.assertEquals(decoder.decode("xn--xam", ), u"")
1053 self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
1054 self.assertEquals(decoder.decode(u"rg"), u"")
1055 self.assertEquals(decoder.decode(u"", True), u"org")
1056
1057 decoder.reset()
1058 self.assertEquals(decoder.decode("xn--xam", ), u"")
1059 self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
1060 self.assertEquals(decoder.decode("rg."), u"org.")
1061 self.assertEquals(decoder.decode("", True), u"")
1062
1063 def test_incremental_encode(self):
1064 self.assertEquals(
1065 "".join(codecs.iterencode(u"python.org", "idna")),
1066 "python.org"
1067 )
1068 self.assertEquals(
1069 "".join(codecs.iterencode(u"python.org.", "idna")),
1070 "python.org."
1071 )
1072 self.assertEquals(
1073 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
1074 "xn--pythn-mua.org."
1075 )
1076 self.assertEquals(
1077 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
1078 "xn--pythn-mua.org."
1079 )
1080
1081 encoder = codecs.getincrementalencoder("idna")()
1082 self.assertEquals(encoder.encode(u"\xe4x"), "")
1083 self.assertEquals(encoder.encode(u"ample.org"), "xn--xample-9ta.")
1084 self.assertEquals(encoder.encode(u"", True), "org")
1085
1086 encoder.reset()
1087 self.assertEquals(encoder.encode(u"\xe4x"), "")
1088 self.assertEquals(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
1089 self.assertEquals(encoder.encode(u"", True), "")
1090
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001091class CodecsModuleTest(unittest.TestCase):
1092
1093 def test_decode(self):
1094 self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
1095 u'\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001096 self.assertRaises(TypeError, codecs.decode)
1097 self.assertEquals(codecs.decode('abc'), u'abc')
1098 self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
1099
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001100 def test_encode(self):
1101 self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
1102 '\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001103 self.assertRaises(TypeError, codecs.encode)
Walter Dörwald690402f2005-11-17 18:51:34 +00001104 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
Walter Dörwald063e1e82004-10-28 13:04:26 +00001105 self.assertEquals(codecs.encode(u'abc'), 'abc')
1106 self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
1107
1108 def test_register(self):
1109 self.assertRaises(TypeError, codecs.register)
Walter Dörwald690402f2005-11-17 18:51:34 +00001110 self.assertRaises(TypeError, codecs.register, 42)
Walter Dörwald063e1e82004-10-28 13:04:26 +00001111
1112 def test_lookup(self):
1113 self.assertRaises(TypeError, codecs.lookup)
1114 self.assertRaises(LookupError, codecs.lookup, "__spam__")
Walter Dörwald690402f2005-11-17 18:51:34 +00001115 self.assertRaises(LookupError, codecs.lookup, " ")
1116
1117 def test_getencoder(self):
1118 self.assertRaises(TypeError, codecs.getencoder)
1119 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
1120
1121 def test_getdecoder(self):
1122 self.assertRaises(TypeError, codecs.getdecoder)
1123 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
1124
1125 def test_getreader(self):
1126 self.assertRaises(TypeError, codecs.getreader)
1127 self.assertRaises(LookupError, codecs.getreader, "__spam__")
1128
1129 def test_getwriter(self):
1130 self.assertRaises(TypeError, codecs.getwriter)
1131 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001132
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001133class StreamReaderTest(unittest.TestCase):
1134
1135 def setUp(self):
1136 self.reader = codecs.getreader('utf-8')
1137 self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
1138
1139 def test_readlines(self):
1140 f = self.reader(self.stream)
1141 self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
1142
Georg Brandl8f99f812006-10-29 08:39:22 +00001143class EncodedFileTest(unittest.TestCase):
Tim Petersabd8a332006-11-03 02:32:46 +00001144
Georg Brandl8f99f812006-10-29 08:39:22 +00001145 def test_basic(self):
1146 f = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
Georg Brandl5b4e1c22006-10-29 09:32:16 +00001147 ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
1148 self.assertEquals(ef.read(), '\\\xd5\n\x00\x00\xae')
Georg Brandl8f99f812006-10-29 08:39:22 +00001149
1150 f = StringIO.StringIO()
1151 ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
1152 ef.write('\xc3\xbc')
1153 self.assertEquals(f.getvalue(), '\xfc')
1154
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001155class Str2StrTest(unittest.TestCase):
1156
1157 def test_read(self):
1158 sin = "\x80".encode("base64_codec")
1159 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
1160 sout = reader.read()
1161 self.assertEqual(sout, "\x80")
Ezio Melottib0f5adc2010-01-24 16:58:36 +00001162 self.assertIsInstance(sout, str)
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001163
1164 def test_readline(self):
1165 sin = "\x80".encode("base64_codec")
1166 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
1167 sout = reader.readline()
1168 self.assertEqual(sout, "\x80")
Ezio Melottib0f5adc2010-01-24 16:58:36 +00001169 self.assertIsInstance(sout, str)
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001170
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001171all_unicode_encodings = [
1172 "ascii",
1173 "base64_codec",
1174 "big5",
1175 "big5hkscs",
1176 "charmap",
1177 "cp037",
1178 "cp1006",
1179 "cp1026",
1180 "cp1140",
1181 "cp1250",
1182 "cp1251",
1183 "cp1252",
1184 "cp1253",
1185 "cp1254",
1186 "cp1255",
1187 "cp1256",
1188 "cp1257",
1189 "cp1258",
1190 "cp424",
1191 "cp437",
1192 "cp500",
Georg Brandlf0757a22010-05-24 21:29:07 +00001193 "cp720",
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001194 "cp737",
1195 "cp775",
1196 "cp850",
1197 "cp852",
1198 "cp855",
1199 "cp856",
1200 "cp857",
Georg Brandlf0757a22010-05-24 21:29:07 +00001201 "cp858",
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001202 "cp860",
1203 "cp861",
1204 "cp862",
1205 "cp863",
1206 "cp864",
1207 "cp865",
1208 "cp866",
1209 "cp869",
1210 "cp874",
1211 "cp875",
1212 "cp932",
1213 "cp949",
1214 "cp950",
1215 "euc_jis_2004",
1216 "euc_jisx0213",
1217 "euc_jp",
1218 "euc_kr",
1219 "gb18030",
1220 "gb2312",
1221 "gbk",
1222 "hex_codec",
1223 "hp_roman8",
1224 "hz",
1225 "idna",
1226 "iso2022_jp",
1227 "iso2022_jp_1",
1228 "iso2022_jp_2",
1229 "iso2022_jp_2004",
1230 "iso2022_jp_3",
1231 "iso2022_jp_ext",
1232 "iso2022_kr",
1233 "iso8859_1",
1234 "iso8859_10",
1235 "iso8859_11",
1236 "iso8859_13",
1237 "iso8859_14",
1238 "iso8859_15",
1239 "iso8859_16",
1240 "iso8859_2",
1241 "iso8859_3",
1242 "iso8859_4",
1243 "iso8859_5",
1244 "iso8859_6",
1245 "iso8859_7",
1246 "iso8859_8",
1247 "iso8859_9",
1248 "johab",
1249 "koi8_r",
1250 "koi8_u",
1251 "latin_1",
1252 "mac_cyrillic",
1253 "mac_greek",
1254 "mac_iceland",
1255 "mac_latin2",
1256 "mac_roman",
1257 "mac_turkish",
1258 "palmos",
1259 "ptcp154",
1260 "punycode",
1261 "raw_unicode_escape",
1262 "rot_13",
1263 "shift_jis",
1264 "shift_jis_2004",
1265 "shift_jisx0213",
1266 "tis_620",
1267 "unicode_escape",
1268 "unicode_internal",
1269 "utf_16",
1270 "utf_16_be",
1271 "utf_16_le",
1272 "utf_7",
1273 "utf_8",
1274]
1275
1276if hasattr(codecs, "mbcs_encode"):
1277 all_unicode_encodings.append("mbcs")
1278
1279# The following encodings work only with str, not unicode
1280all_string_encodings = [
1281 "quopri_codec",
1282 "string_escape",
1283 "uu_codec",
1284]
1285
1286# The following encoding is not tested, because it's not supposed
1287# to work:
1288# "undefined"
1289
1290# The following encodings don't work in stateful mode
1291broken_unicode_with_streams = [
1292 "base64_codec",
1293 "hex_codec",
1294 "punycode",
1295 "unicode_internal"
1296]
Georg Brandl2c9838e2006-10-29 14:39:09 +00001297broken_incremental_coders = broken_unicode_with_streams[:]
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001298
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001299# The following encodings only support "strict" mode
1300only_strict_mode = [
1301 "idna",
1302 "zlib_codec",
Neal Norwitz1ead6982006-10-29 23:58:36 +00001303 "bz2_codec",
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001304]
1305
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001306try:
1307 import bz2
1308except ImportError:
1309 pass
1310else:
1311 all_unicode_encodings.append("bz2_codec")
1312 broken_unicode_with_streams.append("bz2_codec")
1313
1314try:
1315 import zlib
1316except ImportError:
1317 pass
1318else:
1319 all_unicode_encodings.append("zlib_codec")
1320 broken_unicode_with_streams.append("zlib_codec")
1321
1322class BasicUnicodeTest(unittest.TestCase):
1323 def test_basics(self):
1324 s = u"abc123" # all codecs should be able to encode these
1325 for encoding in all_unicode_encodings:
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001326 name = codecs.lookup(encoding).name
1327 if encoding.endswith("_codec"):
1328 name += "_codec"
1329 elif encoding == "latin_1":
1330 name = "latin_1"
1331 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001332 (bytes, size) = codecs.getencoder(encoding)(s)
Walter Dörwalda7fb4082009-05-06 14:28:24 +00001333 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001334 (chars, size) = codecs.getdecoder(encoding)(bytes)
1335 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1336
1337 if encoding not in broken_unicode_with_streams:
1338 # check stream reader/writer
1339 q = Queue()
1340 writer = codecs.getwriter(encoding)(q)
1341 encodedresult = ""
1342 for c in s:
1343 writer.write(c)
1344 encodedresult += q.read()
1345 q = Queue()
1346 reader = codecs.getreader(encoding)(q)
1347 decodedresult = u""
1348 for c in encodedresult:
1349 q.write(c)
1350 decodedresult += reader.read()
1351 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1352
Georg Brandl2c9838e2006-10-29 14:39:09 +00001353 if encoding not in broken_incremental_coders:
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001354 # check incremental decoder/encoder (fetched via the Python
1355 # and C API) and iterencode()/iterdecode()
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001356 try:
1357 encoder = codecs.getincrementalencoder(encoding)()
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001358 cencoder = _testcapi.codec_incrementalencoder(encoding)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001359 except LookupError: # no IncrementalEncoder
1360 pass
1361 else:
1362 # check incremental decoder/encoder
1363 encodedresult = ""
1364 for c in s:
1365 encodedresult += encoder.encode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001366 encodedresult += encoder.encode(u"", True)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001367 decoder = codecs.getincrementaldecoder(encoding)()
1368 decodedresult = u""
1369 for c in encodedresult:
1370 decodedresult += decoder.decode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001371 decodedresult += decoder.decode("", True)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001372 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1373
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001374 # check C API
1375 encodedresult = ""
1376 for c in s:
1377 encodedresult += cencoder.encode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001378 encodedresult += cencoder.encode(u"", True)
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001379 cdecoder = _testcapi.codec_incrementaldecoder(encoding)
1380 decodedresult = u""
1381 for c in encodedresult:
1382 decodedresult += cdecoder.decode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001383 decodedresult += cdecoder.decode("", True)
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001384 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1385
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001386 # check iterencode()/iterdecode()
1387 result = u"".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
1388 self.assertEqual(result, s, "%r != %r (encoding=%r)" % (result, s, encoding))
1389
1390 # check iterencode()/iterdecode() with empty string
1391 result = u"".join(codecs.iterdecode(codecs.iterencode(u"", encoding), encoding))
1392 self.assertEqual(result, u"")
1393
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001394 if encoding not in only_strict_mode:
1395 # check incremental decoder/encoder with errors argument
1396 try:
1397 encoder = codecs.getincrementalencoder(encoding)("ignore")
1398 cencoder = _testcapi.codec_incrementalencoder(encoding, "ignore")
1399 except LookupError: # no IncrementalEncoder
1400 pass
1401 else:
1402 encodedresult = "".join(encoder.encode(c) for c in s)
1403 decoder = codecs.getincrementaldecoder(encoding)("ignore")
1404 decodedresult = u"".join(decoder.decode(c) for c in encodedresult)
1405 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
Tim Petersabd8a332006-11-03 02:32:46 +00001406
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001407 encodedresult = "".join(cencoder.encode(c) for c in s)
1408 cdecoder = _testcapi.codec_incrementaldecoder(encoding, "ignore")
1409 decodedresult = u"".join(cdecoder.decode(c) for c in encodedresult)
1410 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1411
Walter Dörwald729c31f2005-03-14 19:06:30 +00001412 def test_seek(self):
1413 # all codecs should be able to encode these
1414 s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
1415 for encoding in all_unicode_encodings:
1416 if encoding == "idna": # FIXME: See SF bug #1163178
1417 continue
1418 if encoding in broken_unicode_with_streams:
1419 continue
1420 reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
1421 for t in xrange(5):
1422 # Test that calling seek resets the internal codec state and buffers
1423 reader.seek(0, 0)
1424 line = reader.readline()
1425 self.assertEqual(s[:len(line)], line)
1426
Walter Dörwalde22d3392005-11-17 08:52:34 +00001427 def test_bad_decode_args(self):
1428 for encoding in all_unicode_encodings:
1429 decoder = codecs.getdecoder(encoding)
1430 self.assertRaises(TypeError, decoder)
1431 if encoding not in ("idna", "punycode"):
1432 self.assertRaises(TypeError, decoder, 42)
1433
1434 def test_bad_encode_args(self):
1435 for encoding in all_unicode_encodings:
1436 encoder = codecs.getencoder(encoding)
1437 self.assertRaises(TypeError, encoder)
1438
Neal Norwitz6d3d3392006-06-13 08:41:06 +00001439 def test_encoding_map_type_initialized(self):
1440 from encodings import cp1140
1441 # This used to crash, we are only verifying there's no crash.
1442 table_type = type(cp1140.encoding_table)
1443 self.assertEqual(table_type, table_type)
1444
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001445class BasicStrTest(unittest.TestCase):
1446 def test_basics(self):
1447 s = "abc123"
1448 for encoding in all_string_encodings:
1449 (bytes, size) = codecs.getencoder(encoding)(s)
1450 self.assertEqual(size, len(s))
1451 (chars, size) = codecs.getdecoder(encoding)(bytes)
1452 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1453
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001454class CharmapTest(unittest.TestCase):
1455 def test_decode_with_string_map(self):
1456 self.assertEquals(
1457 codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
1458 (u"abc", 3)
1459 )
1460
1461 self.assertEquals(
1462 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
1463 (u"ab\ufffd", 3)
1464 )
1465
1466 self.assertEquals(
1467 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
1468 (u"ab\ufffd", 3)
1469 )
1470
1471 self.assertEquals(
1472 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
1473 (u"ab", 3)
1474 )
1475
1476 self.assertEquals(
1477 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
1478 (u"ab", 3)
1479 )
1480
1481 allbytes = "".join(chr(i) for i in xrange(256))
1482 self.assertEquals(
1483 codecs.charmap_decode(allbytes, "ignore", u""),
1484 (u"", len(allbytes))
1485 )
1486
Georg Brandl8f99f812006-10-29 08:39:22 +00001487class WithStmtTest(unittest.TestCase):
1488 def test_encodedfile(self):
1489 f = StringIO.StringIO("\xc3\xbc")
1490 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
1491 self.assertEquals(ef.read(), "\xfc")
1492
1493 def test_streamreaderwriter(self):
1494 f = StringIO.StringIO("\xc3\xbc")
1495 info = codecs.lookup("utf-8")
1496 with codecs.StreamReaderWriter(f, info.streamreader,
1497 info.streamwriter, 'strict') as srw:
1498 self.assertEquals(srw.read(), u"\xfc")
1499
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001500
Victor Stinner262be5e2010-05-22 02:11:07 +00001501class BomTest(unittest.TestCase):
1502 def test_seek0(self):
Victor Stinner7df55da2010-05-22 13:37:56 +00001503 data = u"1234567890"
Victor Stinner262be5e2010-05-22 02:11:07 +00001504 tests = ("utf-16",
1505 "utf-16-le",
1506 "utf-16-be",
1507 "utf-32",
1508 "utf-32-le",
1509 "utf-32-be")
1510 for encoding in tests:
Victor Stinner7df55da2010-05-22 13:37:56 +00001511 # Check if the BOM is written only once
1512 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
Victor Stinner262be5e2010-05-22 02:11:07 +00001513 f.write(data)
1514 f.write(data)
1515 f.seek(0)
1516 self.assertEquals(f.read(), data * 2)
1517 f.seek(0)
1518 self.assertEquals(f.read(), data * 2)
1519
Victor Stinner7df55da2010-05-22 13:37:56 +00001520 # Check that the BOM is written after a seek(0)
1521 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1522 f.write(data[0])
1523 self.assertNotEquals(f.tell(), 0)
1524 f.seek(0)
1525 f.write(data)
1526 f.seek(0)
1527 self.assertEquals(f.read(), data)
1528
1529 # (StreamWriter) Check that the BOM is written after a seek(0)
1530 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1531 f.writer.write(data[0])
1532 self.assertNotEquals(f.writer.tell(), 0)
1533 f.writer.seek(0)
1534 f.writer.write(data)
1535 f.seek(0)
1536 self.assertEquals(f.read(), data)
1537
1538 # Check that the BOM is not written after a seek() at a position
1539 # different than the start
1540 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1541 f.write(data)
1542 f.seek(f.tell())
1543 f.write(data)
1544 f.seek(0)
1545 self.assertEquals(f.read(), data * 2)
1546
1547 # (StreamWriter) Check that the BOM is not written after a seek()
1548 # at a position different than the start
1549 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1550 f.writer.write(data)
1551 f.writer.seek(f.writer.tell())
1552 f.writer.write(data)
1553 f.seek(0)
1554 self.assertEquals(f.read(), data * 2)
1555
Victor Stinner262be5e2010-05-22 02:11:07 +00001556
Fred Drake2e2be372001-09-20 21:33:42 +00001557def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001558 test_support.run_unittest(
Walter Dörwald6e390802007-08-17 16:41:28 +00001559 UTF32Test,
1560 UTF32LETest,
1561 UTF32BETest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001562 UTF16Test,
Walter Dörwald69652032004-09-07 20:24:22 +00001563 UTF16LETest,
1564 UTF16BETest,
1565 UTF8Test,
Martin v. Löwis412ed3b2006-01-08 10:45:39 +00001566 UTF8SigTest,
Walter Dörwalde22d3392005-11-17 08:52:34 +00001567 UTF7Test,
1568 UTF16ExTest,
1569 ReadBufferTest,
1570 CharBufferTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001571 EscapeDecodeTest,
1572 RecodingTest,
1573 PunycodeTest,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001574 UnicodeInternalTest,
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001575 NameprepTest,
Walter Dörwald78a0be62006-04-14 18:25:39 +00001576 IDNACodecTest,
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001577 CodecsModuleTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001578 StreamReaderTest,
Georg Brandl8f99f812006-10-29 08:39:22 +00001579 EncodedFileTest,
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001580 Str2StrTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001581 BasicUnicodeTest,
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001582 BasicStrTest,
Georg Brandl8f99f812006-10-29 08:39:22 +00001583 CharmapTest,
1584 WithStmtTest,
Victor Stinner262be5e2010-05-22 02:11:07 +00001585 BomTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001586 )
Fred Drake2e2be372001-09-20 21:33:42 +00001587
1588
1589if __name__ == "__main__":
1590 test_main()