blob: 4d03ae735ded55c055b5141bda0eb96c829cd219 [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
2import unittest
Marc-André Lemburga37171d2001-06-19 20:09:28 +00003import codecs
Walter Dörwald9ae019b2006-03-18 14:22:26 +00004import sys, StringIO, _testcapi
Marc-André Lemburga37171d2001-06-19 20:09:28 +00005
Walter Dörwald69652032004-09-07 20:24:22 +00006class Queue(object):
7 """
8 queue: write bytes at one end, read bytes from the other end
9 """
10 def __init__(self):
11 self._buffer = ""
12
13 def write(self, chars):
14 self._buffer += chars
15
16 def read(self, size=-1):
17 if size<0:
18 s = self._buffer
19 self._buffer = ""
20 return s
21 else:
22 s = self._buffer[:size]
23 self._buffer = self._buffer[size:]
24 return s
25
Walter Dörwalde57d7b12004-12-21 22:24:00 +000026class ReadTest(unittest.TestCase):
27 def check_partial(self, input, partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000028 # get a StreamReader for the encoding and feed the bytestring version
Walter Dörwaldfc7e72d2007-11-19 12:14:05 +000029 # of input to the reader byte by byte. Read everything available from
Walter Dörwald69652032004-09-07 20:24:22 +000030 # the StreamReader and check that the results equal the appropriate
31 # entries from partialresults.
32 q = Queue()
Walter Dörwalde57d7b12004-12-21 22:24:00 +000033 r = codecs.getreader(self.encoding)(q)
Walter Dörwald69652032004-09-07 20:24:22 +000034 result = u""
Walter Dörwalde57d7b12004-12-21 22:24:00 +000035 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
Walter Dörwald69652032004-09-07 20:24:22 +000036 q.write(c)
37 result += r.read()
38 self.assertEqual(result, partialresult)
39 # check that there's nothing left in the buffers
40 self.assertEqual(r.read(), u"")
41 self.assertEqual(r.bytebuffer, "")
42 self.assertEqual(r.charbuffer, u"")
43
Walter Dörwaldabb02e52006-03-15 11:35:15 +000044 # do the check again, this time using a incremental decoder
45 d = codecs.getincrementaldecoder(self.encoding)()
46 result = u""
47 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
48 result += d.decode(c)
49 self.assertEqual(result, partialresult)
50 # check that there's nothing left in the buffers
51 self.assertEqual(d.decode("", True), u"")
52 self.assertEqual(d.buffer, "")
53
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +000054 # Check whether the reset method works properly
Walter Dörwaldabb02e52006-03-15 11:35:15 +000055 d.reset()
56 result = u""
57 for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
58 result += d.decode(c)
59 self.assertEqual(result, partialresult)
60 # check that there's nothing left in the buffers
61 self.assertEqual(d.decode("", True), u"")
62 self.assertEqual(d.buffer, "")
63
64 # check iterdecode()
65 encoded = input.encode(self.encoding)
66 self.assertEqual(
67 input,
68 u"".join(codecs.iterdecode(encoded, self.encoding))
69 )
70
Walter Dörwalde57d7b12004-12-21 22:24:00 +000071 def test_readline(self):
72 def getreader(input):
73 stream = StringIO.StringIO(input.encode(self.encoding))
74 return codecs.getreader(self.encoding)(stream)
75
Walter Dörwaldca199432006-03-06 22:39:12 +000076 def readalllines(input, keepends=True, size=None):
Walter Dörwalde57d7b12004-12-21 22:24:00 +000077 reader = getreader(input)
78 lines = []
79 while True:
Walter Dörwaldca199432006-03-06 22:39:12 +000080 line = reader.readline(size=size, keepends=keepends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000081 if not line:
82 break
83 lines.append(line)
Walter Dörwaldca199432006-03-06 22:39:12 +000084 return "|".join(lines)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000085
86 s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
Walter Dörwaldca199432006-03-06 22:39:12 +000087 sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
88 sexpectednoends = u"foo|bar|baz|spam|eggs"
89 self.assertEqual(readalllines(s, True), sexpected)
90 self.assertEqual(readalllines(s, False), sexpectednoends)
91 self.assertEqual(readalllines(s, True, 10), sexpected)
92 self.assertEqual(readalllines(s, False, 10), sexpectednoends)
Walter Dörwalde57d7b12004-12-21 22:24:00 +000093
94 # Test long lines (multiple calls to read() in readline())
95 vw = []
96 vwo = []
97 for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
98 vw.append((i*200)*u"\3042" + lineend)
99 vwo.append((i*200)*u"\3042")
100 self.assertEqual(readalllines("".join(vw), True), "".join(vw))
101 self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
102
103 # Test lines where the first read might end with \r, so the
104 # reader has to look ahead whether this is a lone \r or a \r\n
105 for size in xrange(80):
106 for lineend in u"\n \r\n \r \u2028".split():
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000107 s = 10*(size*u"a" + lineend + u"xxx\n")
108 reader = getreader(s)
109 for i in xrange(10):
110 self.assertEqual(
111 reader.readline(keepends=True),
112 size*u"a" + lineend,
113 )
114 reader = getreader(s)
115 for i in xrange(10):
116 self.assertEqual(
117 reader.readline(keepends=False),
118 size*u"a",
119 )
120
121 def test_bug1175396(self):
122 s = [
123 '<%!--===================================================\r\n',
124 ' BLOG index page: show recent articles,\r\n',
125 ' today\'s articles, or articles of a specific date.\r\n',
126 '========================================================--%>\r\n',
127 '<%@inputencoding="ISO-8859-1"%>\r\n',
128 '<%@pagetemplate=TEMPLATE.y%>\r\n',
129 '<%@import=import frog.util, frog%>\r\n',
130 '<%@import=import frog.objects%>\r\n',
131 '<%@import=from frog.storageerrors import StorageError%>\r\n',
132 '<%\r\n',
133 '\r\n',
134 'import logging\r\n',
135 'log=logging.getLogger("Snakelets.logger")\r\n',
136 '\r\n',
137 '\r\n',
138 'user=self.SessionCtx.user\r\n',
139 'storageEngine=self.SessionCtx.storageEngine\r\n',
140 '\r\n',
141 '\r\n',
142 'def readArticlesFromDate(date, count=None):\r\n',
143 ' entryids=storageEngine.listBlogEntries(date)\r\n',
144 ' entryids.reverse() # descending\r\n',
145 ' if count:\r\n',
146 ' entryids=entryids[:count]\r\n',
147 ' try:\r\n',
148 ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
149 ' except StorageError,x:\r\n',
150 ' log.error("Error loading articles: "+str(x))\r\n',
151 ' self.abort("cannot load articles")\r\n',
152 '\r\n',
153 'showdate=None\r\n',
154 '\r\n',
155 'arg=self.Request.getArg()\r\n',
156 'if arg=="today":\r\n',
157 ' #-------------------- TODAY\'S ARTICLES\r\n',
158 ' self.write("<h2>Today\'s articles</h2>")\r\n',
159 ' showdate = frog.util.isodatestr() \r\n',
160 ' entries = readArticlesFromDate(showdate)\r\n',
161 'elif arg=="active":\r\n',
162 ' #-------------------- ACTIVE ARTICLES redirect\r\n',
163 ' self.Yredirect("active.y")\r\n',
164 'elif arg=="login":\r\n',
165 ' #-------------------- LOGIN PAGE redirect\r\n',
166 ' self.Yredirect("login.y")\r\n',
167 'elif arg=="date":\r\n',
168 ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
169 ' showdate = self.Request.getParameter("date")\r\n',
170 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
171 ' entries = readArticlesFromDate(showdate)\r\n',
172 'else:\r\n',
173 ' #-------------------- RECENT ARTICLES\r\n',
174 ' self.write("<h2>Recent articles</h2>")\r\n',
175 ' dates=storageEngine.listBlogEntryDates()\r\n',
176 ' if dates:\r\n',
177 ' entries=[]\r\n',
178 ' SHOWAMOUNT=10\r\n',
179 ' for showdate in dates:\r\n',
180 ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
181 ' if len(entries)>=SHOWAMOUNT:\r\n',
182 ' break\r\n',
183 ' \r\n',
184 ]
185 stream = StringIO.StringIO("".join(s).encode(self.encoding))
186 reader = codecs.getreader(self.encoding)(stream)
187 for (i, line) in enumerate(reader):
188 self.assertEqual(line, s[i])
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000189
190 def test_readlinequeue(self):
191 q = Queue()
192 writer = codecs.getwriter(self.encoding)(q)
193 reader = codecs.getreader(self.encoding)(q)
194
195 # No lineends
196 writer.write(u"foo\r")
197 self.assertEqual(reader.readline(keepends=False), u"foo")
198 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000199 self.assertEqual(reader.readline(keepends=False), u"")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000200 self.assertEqual(reader.readline(keepends=False), u"bar")
201 writer.write(u"baz")
202 self.assertEqual(reader.readline(keepends=False), u"baz")
203 self.assertEqual(reader.readline(keepends=False), u"")
204
205 # Lineends
206 writer.write(u"foo\r")
207 self.assertEqual(reader.readline(keepends=True), u"foo\r")
208 writer.write(u"\nbar\r")
Walter Dörwald43148c82005-04-21 21:45:36 +0000209 self.assertEqual(reader.readline(keepends=True), u"\n")
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000210 self.assertEqual(reader.readline(keepends=True), u"bar\r")
211 writer.write(u"baz")
212 self.assertEqual(reader.readline(keepends=True), u"baz")
213 self.assertEqual(reader.readline(keepends=True), u"")
214 writer.write(u"foo\r\n")
215 self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
216
Walter Dörwald9fa09462005-01-10 12:01:39 +0000217 def test_bug1098990_a(self):
218 s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
219 s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
220 s3 = u"next line.\r\n"
221
222 s = (s1+s2+s3).encode(self.encoding)
223 stream = StringIO.StringIO(s)
224 reader = codecs.getreader(self.encoding)(stream)
225 self.assertEqual(reader.readline(), s1)
226 self.assertEqual(reader.readline(), s2)
227 self.assertEqual(reader.readline(), s3)
228 self.assertEqual(reader.readline(), u"")
229
230 def test_bug1098990_b(self):
231 s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
232 s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
233 s3 = u"stillokay:bbbbxx\r\n"
234 s4 = u"broken!!!!badbad\r\n"
235 s5 = u"againokay.\r\n"
236
237 s = (s1+s2+s3+s4+s5).encode(self.encoding)
238 stream = StringIO.StringIO(s)
239 reader = codecs.getreader(self.encoding)(stream)
240 self.assertEqual(reader.readline(), s1)
241 self.assertEqual(reader.readline(), s2)
242 self.assertEqual(reader.readline(), s3)
243 self.assertEqual(reader.readline(), s4)
244 self.assertEqual(reader.readline(), s5)
245 self.assertEqual(reader.readline(), u"")
246
Walter Dörwald6e390802007-08-17 16:41:28 +0000247class UTF32Test(ReadTest):
248 encoding = "utf-32"
249
250 spamle = ('\xff\xfe\x00\x00'
251 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
252 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
253 spambe = ('\x00\x00\xfe\xff'
254 '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
255 '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
256
257 def test_only_one_bom(self):
258 _,_,reader,writer = codecs.lookup(self.encoding)
259 # encode some stream
260 s = StringIO.StringIO()
261 f = writer(s)
262 f.write(u"spam")
263 f.write(u"spam")
264 d = s.getvalue()
265 # check whether there is exactly one BOM in it
266 self.assert_(d == self.spamle or d == self.spambe)
267 # try to read it back
268 s = StringIO.StringIO(d)
269 f = reader(s)
270 self.assertEquals(f.read(), u"spamspam")
271
272 def test_badbom(self):
273 s = StringIO.StringIO(4*"\xff")
274 f = codecs.getreader(self.encoding)(s)
275 self.assertRaises(UnicodeError, f.read)
276
277 s = StringIO.StringIO(8*"\xff")
278 f = codecs.getreader(self.encoding)(s)
279 self.assertRaises(UnicodeError, f.read)
280
281 def test_partial(self):
282 self.check_partial(
283 u"\x00\xff\u0100\uffff",
284 [
285 u"", # first byte of BOM read
286 u"", # second byte of BOM read
287 u"", # third byte of BOM read
288 u"", # fourth byte of BOM read => byteorder known
289 u"",
290 u"",
291 u"",
292 u"\x00",
293 u"\x00",
294 u"\x00",
295 u"\x00",
296 u"\x00\xff",
297 u"\x00\xff",
298 u"\x00\xff",
299 u"\x00\xff",
300 u"\x00\xff\u0100",
301 u"\x00\xff\u0100",
302 u"\x00\xff\u0100",
303 u"\x00\xff\u0100",
304 u"\x00\xff\u0100\uffff",
305 ]
306 )
307
Georg Brandlf7a09be2009-09-17 11:33:31 +0000308 def test_handlers(self):
309 self.assertEqual((u'\ufffd', 1),
310 codecs.utf_32_decode('\x01', 'replace', True))
311 self.assertEqual((u'', 1),
312 codecs.utf_32_decode('\x01', 'ignore', True))
313
Walter Dörwald6e390802007-08-17 16:41:28 +0000314 def test_errors(self):
315 self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
316 "\xff", "strict", True)
317
Antoine Pitrou4595e512010-06-11 21:48:02 +0000318 def test_issue8941(self):
319 # Issue #8941: insufficient result allocation when decoding into
320 # surrogate pairs on UCS-2 builds.
321 encoded_le = '\xff\xfe\x00\x00' + '\x00\x00\x01\x00' * 1024
322 self.assertEqual(u'\U00010000' * 1024,
323 codecs.utf_32_decode(encoded_le)[0])
324 encoded_be = '\x00\x00\xfe\xff' + '\x00\x01\x00\x00' * 1024
325 self.assertEqual(u'\U00010000' * 1024,
326 codecs.utf_32_decode(encoded_be)[0])
327
Walter Dörwald6e390802007-08-17 16:41:28 +0000328class UTF32LETest(ReadTest):
329 encoding = "utf-32-le"
330
331 def test_partial(self):
332 self.check_partial(
333 u"\x00\xff\u0100\uffff",
334 [
335 u"",
336 u"",
337 u"",
338 u"\x00",
339 u"\x00",
340 u"\x00",
341 u"\x00",
342 u"\x00\xff",
343 u"\x00\xff",
344 u"\x00\xff",
345 u"\x00\xff",
346 u"\x00\xff\u0100",
347 u"\x00\xff\u0100",
348 u"\x00\xff\u0100",
349 u"\x00\xff\u0100",
350 u"\x00\xff\u0100\uffff",
351 ]
352 )
353
354 def test_simple(self):
355 self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
356
357 def test_errors(self):
358 self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
359 "\xff", "strict", True)
360
Antoine Pitrou4595e512010-06-11 21:48:02 +0000361 def test_issue8941(self):
362 # Issue #8941: insufficient result allocation when decoding into
363 # surrogate pairs on UCS-2 builds.
364 encoded = '\x00\x00\x01\x00' * 1024
365 self.assertEqual(u'\U00010000' * 1024,
366 codecs.utf_32_le_decode(encoded)[0])
367
Walter Dörwald6e390802007-08-17 16:41:28 +0000368class UTF32BETest(ReadTest):
369 encoding = "utf-32-be"
370
371 def test_partial(self):
372 self.check_partial(
373 u"\x00\xff\u0100\uffff",
374 [
375 u"",
376 u"",
377 u"",
378 u"\x00",
379 u"\x00",
380 u"\x00",
381 u"\x00",
382 u"\x00\xff",
383 u"\x00\xff",
384 u"\x00\xff",
385 u"\x00\xff",
386 u"\x00\xff\u0100",
387 u"\x00\xff\u0100",
388 u"\x00\xff\u0100",
389 u"\x00\xff\u0100",
390 u"\x00\xff\u0100\uffff",
391 ]
392 )
393
394 def test_simple(self):
395 self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
396
397 def test_errors(self):
398 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
399 "\xff", "strict", True)
400
Antoine Pitrou4595e512010-06-11 21:48:02 +0000401 def test_issue8941(self):
402 # Issue #8941: insufficient result allocation when decoding into
403 # surrogate pairs on UCS-2 builds.
404 encoded = '\x00\x01\x00\x00' * 1024
405 self.assertEqual(u'\U00010000' * 1024,
406 codecs.utf_32_be_decode(encoded)[0])
407
408
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000409class UTF16Test(ReadTest):
410 encoding = "utf-16"
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000411
412 spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
413 spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
414
415 def test_only_one_bom(self):
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000416 _,_,reader,writer = codecs.lookup(self.encoding)
Marc-André Lemburga37171d2001-06-19 20:09:28 +0000417 # encode some stream
418 s = StringIO.StringIO()
419 f = writer(s)
420 f.write(u"spam")
421 f.write(u"spam")
422 d = s.getvalue()
423 # check whether there is exactly one BOM in it
424 self.assert_(d == self.spamle or d == self.spambe)
425 # try to read it back
426 s = StringIO.StringIO(d)
427 f = reader(s)
428 self.assertEquals(f.read(), u"spamspam")
429
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000430 def test_badbom(self):
431 s = StringIO.StringIO("\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000432 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000433 self.assertRaises(UnicodeError, f.read)
434
435 s = StringIO.StringIO("\xff\xff\xff\xff")
Walter Dörwalda9620d12005-02-08 10:10:01 +0000436 f = codecs.getreader(self.encoding)(s)
Walter Dörwald1f1d2522005-02-04 14:15:34 +0000437 self.assertRaises(UnicodeError, f.read)
438
Walter Dörwald69652032004-09-07 20:24:22 +0000439 def test_partial(self):
440 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000441 u"\x00\xff\u0100\uffff",
442 [
443 u"", # first byte of BOM read
444 u"", # second byte of BOM read => byteorder known
445 u"",
446 u"\x00",
447 u"\x00",
448 u"\x00\xff",
449 u"\x00\xff",
450 u"\x00\xff\u0100",
451 u"\x00\xff\u0100",
452 u"\x00\xff\u0100\uffff",
453 ]
454 )
455
Georg Brandlf7a09be2009-09-17 11:33:31 +0000456 def test_handlers(self):
457 self.assertEqual((u'\ufffd', 1),
458 codecs.utf_16_decode('\x01', 'replace', True))
459 self.assertEqual((u'', 1),
460 codecs.utf_16_decode('\x01', 'ignore', True))
461
Walter Dörwalde22d3392005-11-17 08:52:34 +0000462 def test_errors(self):
463 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
464
Florent Xiclunac92fc822010-02-27 11:26:58 +0000465 def test_bug691291(self):
466 # Files are always opened in binary mode, even if no binary mode was
467 # specified. This means that no automatic conversion of '\n' is done
468 # on reading and writing.
469 s1 = u'Hello\r\nworld\r\n'
470
471 s = s1.encode(self.encoding)
472 try:
473 with open(test_support.TESTFN, 'wb') as fp:
474 fp.write(s)
475 with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader:
476 self.assertEqual(reader.read(), s1)
477 finally:
478 test_support.unlink(test_support.TESTFN)
479
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000480class UTF16LETest(ReadTest):
481 encoding = "utf-16-le"
Walter Dörwald69652032004-09-07 20:24:22 +0000482
483 def test_partial(self):
484 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000485 u"\x00\xff\u0100\uffff",
486 [
487 u"",
488 u"\x00",
489 u"\x00",
490 u"\x00\xff",
491 u"\x00\xff",
492 u"\x00\xff\u0100",
493 u"\x00\xff\u0100",
494 u"\x00\xff\u0100\uffff",
495 ]
496 )
497
Walter Dörwalde22d3392005-11-17 08:52:34 +0000498 def test_errors(self):
499 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
500
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000501class UTF16BETest(ReadTest):
502 encoding = "utf-16-be"
Walter Dörwald69652032004-09-07 20:24:22 +0000503
504 def test_partial(self):
505 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000506 u"\x00\xff\u0100\uffff",
507 [
508 u"",
509 u"\x00",
510 u"\x00",
511 u"\x00\xff",
512 u"\x00\xff",
513 u"\x00\xff\u0100",
514 u"\x00\xff\u0100",
515 u"\x00\xff\u0100\uffff",
516 ]
517 )
518
Walter Dörwalde22d3392005-11-17 08:52:34 +0000519 def test_errors(self):
520 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
521
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000522class UTF8Test(ReadTest):
523 encoding = "utf-8"
Walter Dörwald69652032004-09-07 20:24:22 +0000524
525 def test_partial(self):
526 self.check_partial(
Walter Dörwald69652032004-09-07 20:24:22 +0000527 u"\x00\xff\u07ff\u0800\uffff",
528 [
529 u"\x00",
530 u"\x00",
531 u"\x00\xff",
532 u"\x00\xff",
533 u"\x00\xff\u07ff",
534 u"\x00\xff\u07ff",
535 u"\x00\xff\u07ff",
536 u"\x00\xff\u07ff\u0800",
537 u"\x00\xff\u07ff\u0800",
538 u"\x00\xff\u07ff\u0800",
539 u"\x00\xff\u07ff\u0800\uffff",
540 ]
541 )
542
Walter Dörwalde22d3392005-11-17 08:52:34 +0000543class UTF7Test(ReadTest):
544 encoding = "utf-7"
545
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000546 def test_partial(self):
547 self.check_partial(
548 u"a+-b",
549 [
550 u"a",
551 u"a",
552 u"a+",
553 u"a+-",
554 u"a+-b",
555 ]
556 )
Walter Dörwalde22d3392005-11-17 08:52:34 +0000557
558class UTF16ExTest(unittest.TestCase):
559
560 def test_errors(self):
561 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
562
563 def test_bad_args(self):
564 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
565
566class ReadBufferTest(unittest.TestCase):
567
568 def test_array(self):
569 import array
570 self.assertEqual(
571 codecs.readbuffer_encode(array.array("c", "spam")),
572 ("spam", 4)
573 )
574
575 def test_empty(self):
576 self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
577
578 def test_bad_args(self):
579 self.assertRaises(TypeError, codecs.readbuffer_encode)
580 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
581
582class CharBufferTest(unittest.TestCase):
583
584 def test_string(self):
585 self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
586
587 def test_empty(self):
588 self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
589
590 def test_bad_args(self):
591 self.assertRaises(TypeError, codecs.charbuffer_encode)
592 self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
593
Martin v. Löwis412ed3b2006-01-08 10:45:39 +0000594class UTF8SigTest(ReadTest):
595 encoding = "utf-8-sig"
596
597 def test_partial(self):
598 self.check_partial(
599 u"\ufeff\x00\xff\u07ff\u0800\uffff",
600 [
601 u"",
602 u"",
603 u"", # First BOM has been read and skipped
604 u"",
605 u"",
606 u"\ufeff", # Second BOM has been read and emitted
607 u"\ufeff\x00", # "\x00" read and emitted
608 u"\ufeff\x00", # First byte of encoded u"\xff" read
609 u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
610 u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
611 u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
612 u"\ufeff\x00\xff\u07ff",
613 u"\ufeff\x00\xff\u07ff",
614 u"\ufeff\x00\xff\u07ff\u0800",
615 u"\ufeff\x00\xff\u07ff\u0800",
616 u"\ufeff\x00\xff\u07ff\u0800",
617 u"\ufeff\x00\xff\u07ff\u0800\uffff",
618 ]
619 )
620
Walter Dörwald39b8b6a2006-11-23 05:03:56 +0000621 def test_bug1601501(self):
622 # SF bug #1601501: check that the codec works with a buffer
623 unicode("\xef\xbb\xbf", "utf-8-sig")
624
Walter Dörwald42348272007-04-12 10:35:00 +0000625 def test_bom(self):
626 d = codecs.getincrementaldecoder("utf-8-sig")()
627 s = u"spam"
628 self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
629
Walter Dörwald183744d2007-11-19 12:41:10 +0000630 def test_stream_bom(self):
631 unistring = u"ABC\u00A1\u2200XYZ"
632 bytestring = codecs.BOM_UTF8 + "ABC\xC2\xA1\xE2\x88\x80XYZ"
633
634 reader = codecs.getreader("utf-8-sig")
635 for sizehint in [None] + range(1, 11) + \
636 [64, 128, 256, 512, 1024]:
637 istream = reader(StringIO.StringIO(bytestring))
638 ostream = StringIO.StringIO()
639 while 1:
640 if sizehint is not None:
641 data = istream.read(sizehint)
642 else:
643 data = istream.read()
644
645 if not data:
646 break
647 ostream.write(data)
648
649 got = ostream.getvalue()
650 self.assertEqual(got, unistring)
651
652 def test_stream_bare(self):
653 unistring = u"ABC\u00A1\u2200XYZ"
654 bytestring = "ABC\xC2\xA1\xE2\x88\x80XYZ"
655
656 reader = codecs.getreader("utf-8-sig")
657 for sizehint in [None] + range(1, 11) + \
658 [64, 128, 256, 512, 1024]:
659 istream = reader(StringIO.StringIO(bytestring))
660 ostream = StringIO.StringIO()
661 while 1:
662 if sizehint is not None:
663 data = istream.read(sizehint)
664 else:
665 data = istream.read()
666
667 if not data:
668 break
669 ostream.write(data)
670
671 got = ostream.getvalue()
672 self.assertEqual(got, unistring)
673
Walter Dörwald8709a422002-09-03 13:53:40 +0000674class EscapeDecodeTest(unittest.TestCase):
Walter Dörwalde22d3392005-11-17 08:52:34 +0000675 def test_empty(self):
Walter Dörwald8709a422002-09-03 13:53:40 +0000676 self.assertEquals(codecs.escape_decode(""), ("", 0))
677
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000678class RecodingTest(unittest.TestCase):
679 def test_recoding(self):
680 f = StringIO.StringIO()
681 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
682 f2.write(u"a")
683 f2.close()
684 # Python used to crash on this at exit because of a refcount
685 # bug in _codecsmodule.c
Fred Drake2e2be372001-09-20 21:33:42 +0000686
Martin v. Löwis2548c732003-04-18 10:39:54 +0000687# From RFC 3492
688punycode_testcases = [
689 # A Arabic (Egyptian):
690 (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
691 u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
692 "egbpdaj6bu4bxfgehfvwxn"),
693 # B Chinese (simplified):
694 (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
695 "ihqwcrb4cv8a8dqg056pqjye"),
696 # C Chinese (traditional):
697 (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
698 "ihqwctvzc91f659drss3x8bo0yb"),
699 # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
700 (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
701 u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
702 u"\u0065\u0073\u006B\u0079",
703 "Proprostnemluvesky-uyb24dma41a"),
704 # E Hebrew:
705 (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
706 u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
707 u"\u05D1\u05E8\u05D9\u05EA",
708 "4dbcagdahymbxekheh6e0a7fei0b"),
709 # F Hindi (Devanagari):
710 (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
711 u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
712 u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
713 u"\u0939\u0948\u0902",
714 "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
715
716 #(G) Japanese (kanji and hiragana):
717 (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
718 u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
719 "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
720
721 # (H) Korean (Hangul syllables):
722 (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
723 u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
724 u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
725 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
726 "psd879ccm6fea98c"),
727
728 # (I) Russian (Cyrillic):
729 (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
730 u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
731 u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
732 u"\u0438",
733 "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
734
735 # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
736 (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
737 u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
738 u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
739 u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
740 u"\u0061\u00F1\u006F\u006C",
741 "PorqunopuedensimplementehablarenEspaol-fmd56a"),
742
743 # (K) Vietnamese:
744 # T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
745 # <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
746 (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
747 u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
748 u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
749 u"\u0056\u0069\u1EC7\u0074",
750 "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
751
Martin v. Löwis2548c732003-04-18 10:39:54 +0000752 #(L) 3<nen>B<gumi><kinpachi><sensei>
753 (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
754 "3B-ww4c5e180e575a65lsy2b"),
Tim Peters0eadaac2003-04-24 16:02:54 +0000755
Martin v. Löwis2548c732003-04-18 10:39:54 +0000756 # (M) <amuro><namie>-with-SUPER-MONKEYS
757 (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
758 u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
759 u"\u004F\u004E\u004B\u0045\u0059\u0053",
760 "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
761
762 # (N) Hello-Another-Way-<sorezore><no><basho>
763 (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
764 u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
765 u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
766 "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
767
768 # (O) <hitotsu><yane><no><shita>2
769 (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
770 "2-u9tlzr9756bt3uc0v"),
771
772 # (P) Maji<de>Koi<suru>5<byou><mae>
773 (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
774 u"\u308B\u0035\u79D2\u524D",
775 "MajiKoi5-783gue6qz075azm5e"),
776
777 # (Q) <pafii>de<runba>
778 (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
779 "de-jg4avhby1noc0d"),
780
781 # (R) <sono><supiido><de>
782 (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
783 "d9juau41awczczp"),
784
785 # (S) -> $1.00 <-
786 (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
787 u"\u003C\u002D",
788 "-> $1.00 <--")
789 ]
790
791for i in punycode_testcases:
792 if len(i)!=2:
793 print repr(i)
794
795class PunycodeTest(unittest.TestCase):
796 def test_encode(self):
797 for uni, puny in punycode_testcases:
798 # Need to convert both strings to lower case, since
799 # some of the extended encodings use upper case, but our
800 # code produces only lower case. Converting just puny to
801 # lower is also insufficient, since some of the input characters
802 # are upper case.
803 self.assertEquals(uni.encode("punycode").lower(), puny.lower())
804
805 def test_decode(self):
806 for uni, puny in punycode_testcases:
807 self.assertEquals(uni, puny.decode("punycode"))
808
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000809class UnicodeInternalTest(unittest.TestCase):
810 def test_bug1251300(self):
811 # Decoding with unicode_internal used to not correctly handle "code
812 # points" above 0x10ffff on UCS-4 builds.
813 if sys.maxunicode > 0xffff:
814 ok = [
815 ("\x00\x10\xff\xff", u"\U0010ffff"),
816 ("\x00\x00\x01\x01", u"\U00000101"),
817 ("", u""),
818 ]
819 not_ok = [
820 "\x7f\xff\xff\xff",
821 "\x80\x00\x00\x00",
822 "\x81\x00\x00\x00",
823 "\x00",
824 "\x00\x00\x00\x00\x00",
825 ]
826 for internal, uni in ok:
827 if sys.byteorder == "little":
828 internal = "".join(reversed(internal))
829 self.assertEquals(uni, internal.decode("unicode_internal"))
830 for internal in not_ok:
831 if sys.byteorder == "little":
832 internal = "".join(reversed(internal))
833 self.assertRaises(UnicodeDecodeError, internal.decode,
834 "unicode_internal")
835
836 def test_decode_error_attributes(self):
837 if sys.maxunicode > 0xffff:
838 try:
839 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
840 except UnicodeDecodeError, ex:
841 self.assertEquals("unicode_internal", ex.encoding)
842 self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
843 self.assertEquals(4, ex.start)
844 self.assertEquals(8, ex.end)
845 else:
846 self.fail()
847
848 def test_decode_callback(self):
849 if sys.maxunicode > 0xffff:
850 codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
851 decoder = codecs.getdecoder("unicode_internal")
852 ab = u"ab".encode("unicode_internal")
853 ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
854 "UnicodeInternalTest")
855 self.assertEquals((u"ab", 12), ignored)
856
Philip Jenveybc3376f2010-06-09 17:55:28 +0000857 encoder = codecs.getencoder("string-escape")
858 self.assertEquals(encoder(r'\x00')[1], 4)
859
Martin v. Löwis2548c732003-04-18 10:39:54 +0000860# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
861nameprep_tests = [
862 # 3.1 Map to nothing.
863 ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
864 '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
865 '\xb8\x8f\xef\xbb\xbf',
866 'foobarbaz'),
867 # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
868 ('CAFE',
869 'cafe'),
870 # 3.3 Case folding 8bit U+00DF (german sharp s).
871 # The original test case is bogus; it says \xc3\xdf
872 ('\xc3\x9f',
873 'ss'),
874 # 3.4 Case folding U+0130 (turkish capital I with dot).
875 ('\xc4\xb0',
876 'i\xcc\x87'),
877 # 3.5 Case folding multibyte U+0143 U+037A.
878 ('\xc5\x83\xcd\xba',
879 '\xc5\x84 \xce\xb9'),
880 # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
881 # XXX: skip this as it fails in UCS-2 mode
882 #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
883 # 'telc\xe2\x88\x95kg\xcf\x83'),
884 (None, None),
885 # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
886 ('j\xcc\x8c\xc2\xa0\xc2\xaa',
887 '\xc7\xb0 a'),
888 # 3.8 Case folding U+1FB7 and normalization.
889 ('\xe1\xbe\xb7',
890 '\xe1\xbe\xb6\xce\xb9'),
891 # 3.9 Self-reverting case folding U+01F0 and normalization.
892 # The original test case is bogus, it says `\xc7\xf0'
893 ('\xc7\xb0',
894 '\xc7\xb0'),
895 # 3.10 Self-reverting case folding U+0390 and normalization.
896 ('\xce\x90',
897 '\xce\x90'),
898 # 3.11 Self-reverting case folding U+03B0 and normalization.
899 ('\xce\xb0',
900 '\xce\xb0'),
901 # 3.12 Self-reverting case folding U+1E96 and normalization.
902 ('\xe1\xba\x96',
903 '\xe1\xba\x96'),
904 # 3.13 Self-reverting case folding U+1F56 and normalization.
905 ('\xe1\xbd\x96',
906 '\xe1\xbd\x96'),
907 # 3.14 ASCII space character U+0020.
908 (' ',
909 ' '),
910 # 3.15 Non-ASCII 8bit space character U+00A0.
911 ('\xc2\xa0',
912 ' '),
913 # 3.16 Non-ASCII multibyte space character U+1680.
914 ('\xe1\x9a\x80',
915 None),
916 # 3.17 Non-ASCII multibyte space character U+2000.
917 ('\xe2\x80\x80',
918 ' '),
919 # 3.18 Zero Width Space U+200b.
920 ('\xe2\x80\x8b',
921 ''),
922 # 3.19 Non-ASCII multibyte space character U+3000.
923 ('\xe3\x80\x80',
924 ' '),
925 # 3.20 ASCII control characters U+0010 U+007F.
926 ('\x10\x7f',
927 '\x10\x7f'),
928 # 3.21 Non-ASCII 8bit control character U+0085.
929 ('\xc2\x85',
930 None),
931 # 3.22 Non-ASCII multibyte control character U+180E.
932 ('\xe1\xa0\x8e',
933 None),
934 # 3.23 Zero Width No-Break Space U+FEFF.
935 ('\xef\xbb\xbf',
936 ''),
937 # 3.24 Non-ASCII control character U+1D175.
938 ('\xf0\x9d\x85\xb5',
939 None),
940 # 3.25 Plane 0 private use character U+F123.
941 ('\xef\x84\xa3',
942 None),
943 # 3.26 Plane 15 private use character U+F1234.
944 ('\xf3\xb1\x88\xb4',
945 None),
946 # 3.27 Plane 16 private use character U+10F234.
947 ('\xf4\x8f\x88\xb4',
948 None),
949 # 3.28 Non-character code point U+8FFFE.
950 ('\xf2\x8f\xbf\xbe',
951 None),
952 # 3.29 Non-character code point U+10FFFF.
953 ('\xf4\x8f\xbf\xbf',
954 None),
955 # 3.30 Surrogate code U+DF42.
956 ('\xed\xbd\x82',
957 None),
958 # 3.31 Non-plain text character U+FFFD.
959 ('\xef\xbf\xbd',
960 None),
961 # 3.32 Ideographic description character U+2FF5.
962 ('\xe2\xbf\xb5',
963 None),
964 # 3.33 Display property character U+0341.
Tim Peters0eadaac2003-04-24 16:02:54 +0000965 ('\xcd\x81',
Martin v. Löwis2548c732003-04-18 10:39:54 +0000966 '\xcc\x81'),
967 # 3.34 Left-to-right mark U+200E.
968 ('\xe2\x80\x8e',
969 None),
970 # 3.35 Deprecated U+202A.
971 ('\xe2\x80\xaa',
972 None),
973 # 3.36 Language tagging character U+E0001.
974 ('\xf3\xa0\x80\x81',
975 None),
976 # 3.37 Language tagging character U+E0042.
977 ('\xf3\xa0\x81\x82',
978 None),
979 # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
980 ('foo\xd6\xbebar',
981 None),
982 # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
983 ('foo\xef\xb5\x90bar',
984 None),
985 # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
986 ('foo\xef\xb9\xb6bar',
987 'foo \xd9\x8ebar'),
988 # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
989 ('\xd8\xa71',
990 None),
991 # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
992 ('\xd8\xa71\xd8\xa8',
993 '\xd8\xa71\xd8\xa8'),
994 # 3.43 Unassigned code point U+E0002.
Martin v. Löwisb5c4b7b2003-04-18 20:21:00 +0000995 # Skip this test as we allow unassigned
996 #('\xf3\xa0\x80\x82',
997 # None),
998 (None, None),
Martin v. Löwis2548c732003-04-18 10:39:54 +0000999 # 3.44 Larger test (shrinking).
1000 # Original test case reads \xc3\xdf
1001 ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
1002 '\xaa\xce\xb0\xe2\x80\x80',
1003 'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
1004 # 3.45 Larger test (expanding).
1005 # Original test case reads \xc3\x9f
1006 ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
1007 '\x80',
1008 'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
1009 '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
1010 '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
1011 ]
1012
1013
1014class NameprepTest(unittest.TestCase):
1015 def test_nameprep(self):
1016 from encodings.idna import nameprep
1017 for pos, (orig, prepped) in enumerate(nameprep_tests):
1018 if orig is None:
1019 # Skipped
1020 continue
1021 # The Unicode strings are given in UTF-8
1022 orig = unicode(orig, "utf-8")
1023 if prepped is None:
1024 # Input contains prohibited characters
1025 self.assertRaises(UnicodeError, nameprep, orig)
1026 else:
1027 prepped = unicode(prepped, "utf-8")
1028 try:
1029 self.assertEquals(nameprep(orig), prepped)
1030 except Exception,e:
1031 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
1032
Walter Dörwald78a0be62006-04-14 18:25:39 +00001033class IDNACodecTest(unittest.TestCase):
1034 def test_builtin_decode(self):
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001035 self.assertEquals(unicode("python.org", "idna"), u"python.org")
Walter Dörwald78a0be62006-04-14 18:25:39 +00001036 self.assertEquals(unicode("python.org.", "idna"), u"python.org.")
1037 self.assertEquals(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")
1038 self.assertEquals(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")
1039
1040 def test_builtin_encode(self):
1041 self.assertEquals(u"python.org".encode("idna"), "python.org")
1042 self.assertEquals("python.org.".encode("idna"), "python.org.")
1043 self.assertEquals(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
1044 self.assertEquals(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001045
Martin v. Löwis8b595142005-08-25 11:03:38 +00001046 def test_stream(self):
1047 import StringIO
1048 r = codecs.getreader("idna")(StringIO.StringIO("abc"))
1049 r.read(3)
1050 self.assertEquals(r.read(), u"")
1051
Walter Dörwald78a0be62006-04-14 18:25:39 +00001052 def test_incremental_decode(self):
1053 self.assertEquals(
1054 "".join(codecs.iterdecode("python.org", "idna")),
1055 u"python.org"
1056 )
1057 self.assertEquals(
1058 "".join(codecs.iterdecode("python.org.", "idna")),
1059 u"python.org."
1060 )
1061 self.assertEquals(
1062 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
1063 u"pyth\xf6n.org."
1064 )
1065 self.assertEquals(
1066 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
1067 u"pyth\xf6n.org."
1068 )
1069
1070 decoder = codecs.getincrementaldecoder("idna")()
1071 self.assertEquals(decoder.decode("xn--xam", ), u"")
1072 self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
1073 self.assertEquals(decoder.decode(u"rg"), u"")
1074 self.assertEquals(decoder.decode(u"", True), u"org")
1075
1076 decoder.reset()
1077 self.assertEquals(decoder.decode("xn--xam", ), u"")
1078 self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
1079 self.assertEquals(decoder.decode("rg."), u"org.")
1080 self.assertEquals(decoder.decode("", True), u"")
1081
1082 def test_incremental_encode(self):
1083 self.assertEquals(
1084 "".join(codecs.iterencode(u"python.org", "idna")),
1085 "python.org"
1086 )
1087 self.assertEquals(
1088 "".join(codecs.iterencode(u"python.org.", "idna")),
1089 "python.org."
1090 )
1091 self.assertEquals(
1092 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
1093 "xn--pythn-mua.org."
1094 )
1095 self.assertEquals(
1096 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
1097 "xn--pythn-mua.org."
1098 )
1099
1100 encoder = codecs.getincrementalencoder("idna")()
1101 self.assertEquals(encoder.encode(u"\xe4x"), "")
1102 self.assertEquals(encoder.encode(u"ample.org"), "xn--xample-9ta.")
1103 self.assertEquals(encoder.encode(u"", True), "org")
1104
1105 encoder.reset()
1106 self.assertEquals(encoder.encode(u"\xe4x"), "")
1107 self.assertEquals(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
1108 self.assertEquals(encoder.encode(u"", True), "")
1109
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001110class CodecsModuleTest(unittest.TestCase):
1111
1112 def test_decode(self):
1113 self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
1114 u'\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001115 self.assertRaises(TypeError, codecs.decode)
1116 self.assertEquals(codecs.decode('abc'), u'abc')
1117 self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
1118
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001119 def test_encode(self):
1120 self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
1121 '\xe4\xf6\xfc')
Walter Dörwald063e1e82004-10-28 13:04:26 +00001122 self.assertRaises(TypeError, codecs.encode)
Walter Dörwald690402f2005-11-17 18:51:34 +00001123 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
Walter Dörwald063e1e82004-10-28 13:04:26 +00001124 self.assertEquals(codecs.encode(u'abc'), 'abc')
1125 self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
1126
1127 def test_register(self):
1128 self.assertRaises(TypeError, codecs.register)
Walter Dörwald690402f2005-11-17 18:51:34 +00001129 self.assertRaises(TypeError, codecs.register, 42)
Walter Dörwald063e1e82004-10-28 13:04:26 +00001130
1131 def test_lookup(self):
1132 self.assertRaises(TypeError, codecs.lookup)
1133 self.assertRaises(LookupError, codecs.lookup, "__spam__")
Walter Dörwald690402f2005-11-17 18:51:34 +00001134 self.assertRaises(LookupError, codecs.lookup, " ")
1135
1136 def test_getencoder(self):
1137 self.assertRaises(TypeError, codecs.getencoder)
1138 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
1139
1140 def test_getdecoder(self):
1141 self.assertRaises(TypeError, codecs.getdecoder)
1142 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
1143
1144 def test_getreader(self):
1145 self.assertRaises(TypeError, codecs.getreader)
1146 self.assertRaises(LookupError, codecs.getreader, "__spam__")
1147
1148 def test_getwriter(self):
1149 self.assertRaises(TypeError, codecs.getwriter)
1150 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001151
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001152class StreamReaderTest(unittest.TestCase):
1153
1154 def setUp(self):
1155 self.reader = codecs.getreader('utf-8')
1156 self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
1157
1158 def test_readlines(self):
1159 f = self.reader(self.stream)
1160 self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
1161
Georg Brandl8f99f812006-10-29 08:39:22 +00001162class EncodedFileTest(unittest.TestCase):
Tim Petersabd8a332006-11-03 02:32:46 +00001163
Georg Brandl8f99f812006-10-29 08:39:22 +00001164 def test_basic(self):
1165 f = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
Georg Brandl5b4e1c22006-10-29 09:32:16 +00001166 ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
1167 self.assertEquals(ef.read(), '\\\xd5\n\x00\x00\xae')
Georg Brandl8f99f812006-10-29 08:39:22 +00001168
1169 f = StringIO.StringIO()
1170 ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
1171 ef.write('\xc3\xbc')
1172 self.assertEquals(f.getvalue(), '\xfc')
1173
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001174class Str2StrTest(unittest.TestCase):
1175
1176 def test_read(self):
1177 sin = "\x80".encode("base64_codec")
1178 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
1179 sout = reader.read()
1180 self.assertEqual(sout, "\x80")
1181 self.assert_(isinstance(sout, str))
1182
1183 def test_readline(self):
1184 sin = "\x80".encode("base64_codec")
1185 reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
1186 sout = reader.readline()
1187 self.assertEqual(sout, "\x80")
1188 self.assert_(isinstance(sout, str))
1189
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001190all_unicode_encodings = [
1191 "ascii",
1192 "base64_codec",
1193 "big5",
1194 "big5hkscs",
1195 "charmap",
1196 "cp037",
1197 "cp1006",
1198 "cp1026",
1199 "cp1140",
1200 "cp1250",
1201 "cp1251",
1202 "cp1252",
1203 "cp1253",
1204 "cp1254",
1205 "cp1255",
1206 "cp1256",
1207 "cp1257",
1208 "cp1258",
1209 "cp424",
1210 "cp437",
1211 "cp500",
1212 "cp737",
1213 "cp775",
1214 "cp850",
1215 "cp852",
1216 "cp855",
1217 "cp856",
1218 "cp857",
1219 "cp860",
1220 "cp861",
1221 "cp862",
1222 "cp863",
1223 "cp864",
1224 "cp865",
1225 "cp866",
1226 "cp869",
1227 "cp874",
1228 "cp875",
1229 "cp932",
1230 "cp949",
1231 "cp950",
1232 "euc_jis_2004",
1233 "euc_jisx0213",
1234 "euc_jp",
1235 "euc_kr",
1236 "gb18030",
1237 "gb2312",
1238 "gbk",
1239 "hex_codec",
1240 "hp_roman8",
1241 "hz",
1242 "idna",
1243 "iso2022_jp",
1244 "iso2022_jp_1",
1245 "iso2022_jp_2",
1246 "iso2022_jp_2004",
1247 "iso2022_jp_3",
1248 "iso2022_jp_ext",
1249 "iso2022_kr",
1250 "iso8859_1",
1251 "iso8859_10",
1252 "iso8859_11",
1253 "iso8859_13",
1254 "iso8859_14",
1255 "iso8859_15",
1256 "iso8859_16",
1257 "iso8859_2",
1258 "iso8859_3",
1259 "iso8859_4",
1260 "iso8859_5",
1261 "iso8859_6",
1262 "iso8859_7",
1263 "iso8859_8",
1264 "iso8859_9",
1265 "johab",
1266 "koi8_r",
1267 "koi8_u",
1268 "latin_1",
1269 "mac_cyrillic",
1270 "mac_greek",
1271 "mac_iceland",
1272 "mac_latin2",
1273 "mac_roman",
1274 "mac_turkish",
1275 "palmos",
1276 "ptcp154",
1277 "punycode",
1278 "raw_unicode_escape",
1279 "rot_13",
1280 "shift_jis",
1281 "shift_jis_2004",
1282 "shift_jisx0213",
1283 "tis_620",
1284 "unicode_escape",
1285 "unicode_internal",
1286 "utf_16",
1287 "utf_16_be",
1288 "utf_16_le",
1289 "utf_7",
1290 "utf_8",
1291]
1292
1293if hasattr(codecs, "mbcs_encode"):
1294 all_unicode_encodings.append("mbcs")
1295
1296# The following encodings work only with str, not unicode
1297all_string_encodings = [
1298 "quopri_codec",
1299 "string_escape",
1300 "uu_codec",
1301]
1302
1303# The following encoding is not tested, because it's not supposed
1304# to work:
1305# "undefined"
1306
1307# The following encodings don't work in stateful mode
1308broken_unicode_with_streams = [
1309 "base64_codec",
1310 "hex_codec",
1311 "punycode",
1312 "unicode_internal"
1313]
Georg Brandl2c9838e2006-10-29 14:39:09 +00001314broken_incremental_coders = broken_unicode_with_streams[:]
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001315
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001316# The following encodings only support "strict" mode
1317only_strict_mode = [
1318 "idna",
1319 "zlib_codec",
Neal Norwitz1ead6982006-10-29 23:58:36 +00001320 "bz2_codec",
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001321]
1322
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001323try:
1324 import bz2
1325except ImportError:
1326 pass
1327else:
1328 all_unicode_encodings.append("bz2_codec")
1329 broken_unicode_with_streams.append("bz2_codec")
1330
1331try:
1332 import zlib
1333except ImportError:
1334 pass
1335else:
1336 all_unicode_encodings.append("zlib_codec")
1337 broken_unicode_with_streams.append("zlib_codec")
1338
1339class BasicUnicodeTest(unittest.TestCase):
1340 def test_basics(self):
1341 s = u"abc123" # all codecs should be able to encode these
1342 for encoding in all_unicode_encodings:
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001343 name = codecs.lookup(encoding).name
1344 if encoding.endswith("_codec"):
1345 name += "_codec"
1346 elif encoding == "latin_1":
1347 name = "latin_1"
1348 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001349 (bytes, size) = codecs.getencoder(encoding)(s)
1350 if encoding != "unicode_internal":
1351 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
1352 (chars, size) = codecs.getdecoder(encoding)(bytes)
1353 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1354
1355 if encoding not in broken_unicode_with_streams:
1356 # check stream reader/writer
1357 q = Queue()
1358 writer = codecs.getwriter(encoding)(q)
1359 encodedresult = ""
1360 for c in s:
1361 writer.write(c)
1362 encodedresult += q.read()
1363 q = Queue()
1364 reader = codecs.getreader(encoding)(q)
1365 decodedresult = u""
1366 for c in encodedresult:
1367 q.write(c)
1368 decodedresult += reader.read()
1369 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1370
Georg Brandl2c9838e2006-10-29 14:39:09 +00001371 if encoding not in broken_incremental_coders:
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001372 # check incremental decoder/encoder (fetched via the Python
1373 # and C API) and iterencode()/iterdecode()
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001374 try:
1375 encoder = codecs.getincrementalencoder(encoding)()
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001376 cencoder = _testcapi.codec_incrementalencoder(encoding)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001377 except LookupError: # no IncrementalEncoder
1378 pass
1379 else:
1380 # check incremental decoder/encoder
1381 encodedresult = ""
1382 for c in s:
1383 encodedresult += encoder.encode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001384 encodedresult += encoder.encode(u"", True)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001385 decoder = codecs.getincrementaldecoder(encoding)()
1386 decodedresult = u""
1387 for c in encodedresult:
1388 decodedresult += decoder.decode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001389 decodedresult += decoder.decode("", True)
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001390 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1391
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001392 # check C API
1393 encodedresult = ""
1394 for c in s:
1395 encodedresult += cencoder.encode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001396 encodedresult += cencoder.encode(u"", True)
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001397 cdecoder = _testcapi.codec_incrementaldecoder(encoding)
1398 decodedresult = u""
1399 for c in encodedresult:
1400 decodedresult += cdecoder.decode(c)
Walter Dörwald15be5ec2006-04-14 14:03:55 +00001401 decodedresult += cdecoder.decode("", True)
Walter Dörwald9ae019b2006-03-18 14:22:26 +00001402 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1403
Walter Dörwaldabb02e52006-03-15 11:35:15 +00001404 # check iterencode()/iterdecode()
1405 result = u"".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
1406 self.assertEqual(result, s, "%r != %r (encoding=%r)" % (result, s, encoding))
1407
1408 # check iterencode()/iterdecode() with empty string
1409 result = u"".join(codecs.iterdecode(codecs.iterencode(u"", encoding), encoding))
1410 self.assertEqual(result, u"")
1411
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001412 if encoding not in only_strict_mode:
1413 # check incremental decoder/encoder with errors argument
1414 try:
1415 encoder = codecs.getincrementalencoder(encoding)("ignore")
1416 cencoder = _testcapi.codec_incrementalencoder(encoding, "ignore")
1417 except LookupError: # no IncrementalEncoder
1418 pass
1419 else:
1420 encodedresult = "".join(encoder.encode(c) for c in s)
1421 decoder = codecs.getincrementaldecoder(encoding)("ignore")
1422 decodedresult = u"".join(decoder.decode(c) for c in encodedresult)
1423 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
Tim Petersabd8a332006-11-03 02:32:46 +00001424
Walter Dörwald98c70ac2006-10-29 23:02:27 +00001425 encodedresult = "".join(cencoder.encode(c) for c in s)
1426 cdecoder = _testcapi.codec_incrementaldecoder(encoding, "ignore")
1427 decodedresult = u"".join(cdecoder.decode(c) for c in encodedresult)
1428 self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
1429
Walter Dörwald729c31f2005-03-14 19:06:30 +00001430 def test_seek(self):
1431 # all codecs should be able to encode these
1432 s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
1433 for encoding in all_unicode_encodings:
1434 if encoding == "idna": # FIXME: See SF bug #1163178
1435 continue
1436 if encoding in broken_unicode_with_streams:
1437 continue
1438 reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
1439 for t in xrange(5):
1440 # Test that calling seek resets the internal codec state and buffers
1441 reader.seek(0, 0)
1442 line = reader.readline()
1443 self.assertEqual(s[:len(line)], line)
1444
Walter Dörwalde22d3392005-11-17 08:52:34 +00001445 def test_bad_decode_args(self):
1446 for encoding in all_unicode_encodings:
1447 decoder = codecs.getdecoder(encoding)
1448 self.assertRaises(TypeError, decoder)
1449 if encoding not in ("idna", "punycode"):
1450 self.assertRaises(TypeError, decoder, 42)
1451
1452 def test_bad_encode_args(self):
1453 for encoding in all_unicode_encodings:
1454 encoder = codecs.getencoder(encoding)
1455 self.assertRaises(TypeError, encoder)
1456
Neal Norwitz6d3d3392006-06-13 08:41:06 +00001457 def test_encoding_map_type_initialized(self):
1458 from encodings import cp1140
1459 # This used to crash, we are only verifying there's no crash.
1460 table_type = type(cp1140.encoding_table)
1461 self.assertEqual(table_type, table_type)
1462
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001463class BasicStrTest(unittest.TestCase):
1464 def test_basics(self):
1465 s = "abc123"
1466 for encoding in all_string_encodings:
1467 (bytes, size) = codecs.getencoder(encoding)(s)
1468 self.assertEqual(size, len(s))
1469 (chars, size) = codecs.getdecoder(encoding)(bytes)
1470 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
1471
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001472class CharmapTest(unittest.TestCase):
1473 def test_decode_with_string_map(self):
1474 self.assertEquals(
1475 codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
1476 (u"abc", 3)
1477 )
1478
1479 self.assertEquals(
1480 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
1481 (u"ab\ufffd", 3)
1482 )
1483
1484 self.assertEquals(
1485 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
1486 (u"ab\ufffd", 3)
1487 )
1488
1489 self.assertEquals(
1490 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
1491 (u"ab", 3)
1492 )
1493
1494 self.assertEquals(
1495 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
1496 (u"ab", 3)
1497 )
1498
1499 allbytes = "".join(chr(i) for i in xrange(256))
1500 self.assertEquals(
1501 codecs.charmap_decode(allbytes, "ignore", u""),
1502 (u"", len(allbytes))
1503 )
1504
Georg Brandl8f99f812006-10-29 08:39:22 +00001505class WithStmtTest(unittest.TestCase):
1506 def test_encodedfile(self):
1507 f = StringIO.StringIO("\xc3\xbc")
1508 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
1509 self.assertEquals(ef.read(), "\xfc")
1510
1511 def test_streamreaderwriter(self):
1512 f = StringIO.StringIO("\xc3\xbc")
1513 info = codecs.lookup("utf-8")
1514 with codecs.StreamReaderWriter(f, info.streamreader,
1515 info.streamwriter, 'strict') as srw:
1516 self.assertEquals(srw.read(), u"\xfc")
1517
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001518
Victor Stinnerf5a3eaf2010-05-22 02:12:28 +00001519class BomTest(unittest.TestCase):
1520 def test_seek0(self):
Victor Stinner09c0f242010-05-22 16:52:13 +00001521 data = u"1234567890"
Victor Stinnerf5a3eaf2010-05-22 02:12:28 +00001522 tests = ("utf-16",
1523 "utf-16-le",
1524 "utf-16-be",
1525 "utf-32",
1526 "utf-32-le",
1527 "utf-32-be")
1528 for encoding in tests:
Victor Stinner09c0f242010-05-22 16:52:13 +00001529 # Check if the BOM is written only once
1530 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
Victor Stinnerf5a3eaf2010-05-22 02:12:28 +00001531 f.write(data)
1532 f.write(data)
1533 f.seek(0)
1534 self.assertEquals(f.read(), data * 2)
1535 f.seek(0)
1536 self.assertEquals(f.read(), data * 2)
1537
Victor Stinner09c0f242010-05-22 16:52:13 +00001538 # Check that the BOM is written after a seek(0)
1539 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1540 f.write(data[0])
1541 self.assertNotEquals(f.tell(), 0)
1542 f.seek(0)
1543 f.write(data)
1544 f.seek(0)
1545 self.assertEquals(f.read(), data)
1546
1547 # (StreamWriter) Check that the BOM is written after a seek(0)
1548 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1549 f.writer.write(data[0])
1550 self.assertNotEquals(f.writer.tell(), 0)
1551 f.writer.seek(0)
1552 f.writer.write(data)
1553 f.seek(0)
1554 self.assertEquals(f.read(), data)
1555
1556 # Check that the BOM is not written after a seek() at a position
1557 # different than the start
1558 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1559 f.write(data)
1560 f.seek(f.tell())
1561 f.write(data)
1562 f.seek(0)
1563 self.assertEquals(f.read(), data * 2)
1564
1565 # (StreamWriter) Check that the BOM is not written after a seek()
1566 # at a position different than the start
1567 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
1568 f.writer.write(data)
1569 f.writer.seek(f.writer.tell())
1570 f.writer.write(data)
1571 f.seek(0)
1572 self.assertEquals(f.read(), data * 2)
1573
Victor Stinnerf5a3eaf2010-05-22 02:12:28 +00001574
Fred Drake2e2be372001-09-20 21:33:42 +00001575def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001576 test_support.run_unittest(
Walter Dörwald6e390802007-08-17 16:41:28 +00001577 UTF32Test,
1578 UTF32LETest,
1579 UTF32BETest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001580 UTF16Test,
Walter Dörwald69652032004-09-07 20:24:22 +00001581 UTF16LETest,
1582 UTF16BETest,
1583 UTF8Test,
Martin v. Löwis412ed3b2006-01-08 10:45:39 +00001584 UTF8SigTest,
Walter Dörwalde22d3392005-11-17 08:52:34 +00001585 UTF7Test,
1586 UTF16ExTest,
1587 ReadBufferTest,
1588 CharBufferTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001589 EscapeDecodeTest,
1590 RecodingTest,
1591 PunycodeTest,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001592 UnicodeInternalTest,
Martin v. Löwisa1dde132004-03-24 16:48:24 +00001593 NameprepTest,
Walter Dörwald78a0be62006-04-14 18:25:39 +00001594 IDNACodecTest,
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +00001595 CodecsModuleTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001596 StreamReaderTest,
Georg Brandl8f99f812006-10-29 08:39:22 +00001597 EncodedFileTest,
Walter Dörwaldc9878e12005-07-20 22:15:39 +00001598 Str2StrTest,
Walter Dörwaldee1d2472004-12-29 16:04:38 +00001599 BasicUnicodeTest,
Walter Dörwaldd1c1e102005-10-06 20:29:57 +00001600 BasicStrTest,
Georg Brandl8f99f812006-10-29 08:39:22 +00001601 CharmapTest,
1602 WithStmtTest,
Victor Stinnerf5a3eaf2010-05-22 02:12:28 +00001603 BomTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001604 )
Fred Drake2e2be372001-09-20 21:33:42 +00001605
1606
1607if __name__ == "__main__":
1608 test_main()