blob: bf9887b45a0f38876591fe3dae632668ddb759b8 [file] [log] [blame]
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001from test import support
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +02002from test.support import bigmemtest, _4G
Tim Peters499d09a2002-11-09 06:31:56 +00003
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00004import unittest
Guido van Rossum34d19282007-08-09 01:03:29 +00005from io import BytesIO
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00006import os
Nadeem Vawda37970652013-10-28 21:35:23 +01007import pickle
Antoine Pitroue71258a2015-02-26 13:08:07 +01008import glob
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +02009import random
Guido van Rossum360e4b82007-05-14 22:51:27 +000010import subprocess
Tim Peters499d09a2002-11-09 06:31:56 +000011import sys
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020012from test.support import unlink
Victor Stinner45df8202010-04-28 22:31:17 +000013
14try:
15 import threading
16except ImportError:
17 threading = None
Tim Peters499d09a2002-11-09 06:31:56 +000018
R. David Murraya21e4ca2009-03-31 23:16:50 +000019# Skip tests if the bz2 module doesn't exist.
20bz2 = support.import_module('bz2')
Tim Peters499d09a2002-11-09 06:31:56 +000021from bz2 import BZ2File, BZ2Compressor, BZ2Decompressor
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000022
Tim Peters9de06bd2002-11-09 06:45:31 +000023
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000024class BaseTest(unittest.TestCase):
Tim Peters3de75262002-11-09 05:26:15 +000025 "Base for other testcases."
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020026
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020027 TEXT_LINES = [
28 b'root:x:0:0:root:/root:/bin/bash\n',
29 b'bin:x:1:1:bin:/bin:\n',
30 b'daemon:x:2:2:daemon:/sbin:\n',
31 b'adm:x:3:4:adm:/var/adm:\n',
32 b'lp:x:4:7:lp:/var/spool/lpd:\n',
33 b'sync:x:5:0:sync:/sbin:/bin/sync\n',
34 b'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n',
35 b'halt:x:7:0:halt:/sbin:/sbin/halt\n',
36 b'mail:x:8:12:mail:/var/spool/mail:\n',
37 b'news:x:9:13:news:/var/spool/news:\n',
38 b'uucp:x:10:14:uucp:/var/spool/uucp:\n',
39 b'operator:x:11:0:operator:/root:\n',
40 b'games:x:12:100:games:/usr/games:\n',
41 b'gopher:x:13:30:gopher:/usr/lib/gopher-data:\n',
42 b'ftp:x:14:50:FTP User:/var/ftp:/bin/bash\n',
43 b'nobody:x:65534:65534:Nobody:/home:\n',
44 b'postfix:x:100:101:postfix:/var/spool/postfix:\n',
45 b'niemeyer:x:500:500::/home/niemeyer:/bin/bash\n',
46 b'postgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\n',
47 b'mysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\n',
48 b'www:x:103:104::/var/www:/bin/false\n',
49 ]
50 TEXT = b''.join(TEXT_LINES)
Guido van Rossum522a6c62007-05-22 23:13:45 +000051 DATA = b'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2<Q\xb5\x0fH\xd3\xd4\xdd\xd5\x87\xbb\xf8\x94\r\x8f\xafI\x12\xe1\xc9\xf8/E\x00pu\x89\x12]\xc9\xbbDL\nQ\x0e\t1\x12\xdf\xa0\xc0\x97\xac2O9\x89\x13\x94\x0e\x1c7\x0ed\x95I\x0c\xaaJ\xa4\x18L\x10\x05#\x9c\xaf\xba\xbc/\x97\x8a#C\xc8\xe1\x8cW\xf9\xe2\xd0\xd6M\xa7\x8bXa<e\x84t\xcbL\xb3\xa7\xd9\xcd\xd1\xcb\x84.\xaf\xb3\xab\xab\xad`n}\xa0lh\tE,\x8eZ\x15\x17VH>\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`'
Nadeem Vawda638fb9b2013-01-02 23:02:00 +010052 EMPTY_DATA = b'BZh9\x17rE8P\x90\x00\x00\x00\x00'
Nadeem Vawda1de19ac2013-12-04 23:01:15 +010053 BAD_DATA = b'this is not a valid bzip2 file'
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000054
Antoine Pitroue71258a2015-02-26 13:08:07 +010055 # Some tests need more than one block of uncompressed data. Since one block
56 # is at least 100 kB, we gather some data dynamically and compress it.
57 # Note that this assumes that compression works correctly, so we cannot
58 # simply use the bigger test data for all tests.
59 test_size = 0
60 BIG_TEXT = bytearray(128*1024)
61 for fname in glob.glob(os.path.join(os.path.dirname(__file__), '*.py')):
62 with open(fname, 'rb') as fh:
63 test_size += fh.readinto(memoryview(BIG_TEXT)[test_size:])
64 if test_size > 128*1024:
65 break
66 BIG_DATA = bz2.compress(BIG_TEXT, compresslevel=1)
67
Nadeem Vawdaaf518c12012-06-04 23:32:38 +020068 def setUp(self):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020069 self.filename = support.TESTFN
Nadeem Vawdaaf518c12012-06-04 23:32:38 +020070
71 def tearDown(self):
72 if os.path.isfile(self.filename):
73 os.unlink(self.filename)
74
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020075 if sys.platform == "win32":
76 # bunzip2 isn't available to run on Windows.
77 def decompress(self, data):
78 return bz2.decompress(data)
79 else:
Tim Peters9de06bd2002-11-09 06:45:31 +000080 def decompress(self, data):
Guido van Rossum360e4b82007-05-14 22:51:27 +000081 pop = subprocess.Popen("bunzip2", shell=True,
82 stdin=subprocess.PIPE,
83 stdout=subprocess.PIPE,
84 stderr=subprocess.STDOUT)
85 pop.stdin.write(data)
86 pop.stdin.close()
87 ret = pop.stdout.read()
88 pop.stdout.close()
Tim Peters9de06bd2002-11-09 06:45:31 +000089 if pop.wait() != 0:
90 ret = bz2.decompress(data)
91 return ret
Tim Peters499d09a2002-11-09 06:31:56 +000092
Tim Peters9de06bd2002-11-09 06:45:31 +000093
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000094class BZ2FileTest(BaseTest):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020095 "Test the BZ2File class."
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000096
Nadeem Vawda1de19ac2013-12-04 23:01:15 +010097 def createTempFile(self, streams=1, suffix=b""):
Brian Curtind2e948d2010-10-13 02:56:26 +000098 with open(self.filename, "wb") as f:
Nadeem Vawda07d291a2011-05-30 01:44:45 +020099 f.write(self.DATA * streams)
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100100 f.write(suffix)
Tim Peters230a60c2002-11-09 05:08:07 +0000101
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200102 def testBadArgs(self):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200103 self.assertRaises(TypeError, BZ2File, 123.456)
Serhiy Storchaka85c30332015-02-15 13:58:23 +0200104 self.assertRaises(ValueError, BZ2File, os.devnull, "z")
105 self.assertRaises(ValueError, BZ2File, os.devnull, "rx")
106 self.assertRaises(ValueError, BZ2File, os.devnull, "rbt")
107 self.assertRaises(ValueError, BZ2File, os.devnull, compresslevel=0)
108 self.assertRaises(ValueError, BZ2File, os.devnull, compresslevel=10)
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200109
Tim Peters3de75262002-11-09 05:26:15 +0000110 def testRead(self):
Tim Peters3de75262002-11-09 05:26:15 +0000111 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000112 with BZ2File(self.filename) as bz2f:
113 self.assertRaises(TypeError, bz2f.read, None)
114 self.assertEqual(bz2f.read(), self.TEXT)
Tim Peters230a60c2002-11-09 05:08:07 +0000115
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100116 def testReadBadFile(self):
117 self.createTempFile(streams=0, suffix=self.BAD_DATA)
118 with BZ2File(self.filename) as bz2f:
119 self.assertRaises(OSError, bz2f.read)
120
Nadeem Vawda55b43382011-05-27 01:52:15 +0200121 def testReadMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200122 self.createTempFile(streams=5)
123 with BZ2File(self.filename) as bz2f:
124 self.assertRaises(TypeError, bz2f.read, None)
125 self.assertEqual(bz2f.read(), self.TEXT * 5)
126
127 def testReadMonkeyMultiStream(self):
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200128 # Test BZ2File.read() on a multi-stream archive where a stream
129 # boundary coincides with the end of the raw read buffer.
Nadeem Vawda55b43382011-05-27 01:52:15 +0200130 buffer_size = bz2._BUFFER_SIZE
131 bz2._BUFFER_SIZE = len(self.DATA)
132 try:
133 self.createTempFile(streams=5)
134 with BZ2File(self.filename) as bz2f:
135 self.assertRaises(TypeError, bz2f.read, None)
136 self.assertEqual(bz2f.read(), self.TEXT * 5)
137 finally:
138 bz2._BUFFER_SIZE = buffer_size
139
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100140 def testReadTrailingJunk(self):
141 self.createTempFile(suffix=self.BAD_DATA)
142 with BZ2File(self.filename) as bz2f:
143 self.assertEqual(bz2f.read(), self.TEXT)
144
145 def testReadMultiStreamTrailingJunk(self):
146 self.createTempFile(streams=5, suffix=self.BAD_DATA)
147 with BZ2File(self.filename) as bz2f:
148 self.assertEqual(bz2f.read(), self.TEXT * 5)
149
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000150 def testRead0(self):
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000151 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000152 with BZ2File(self.filename) as bz2f:
153 self.assertRaises(TypeError, bz2f.read, None)
154 self.assertEqual(bz2f.read(0), b"")
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000155
Tim Peters3de75262002-11-09 05:26:15 +0000156 def testReadChunk10(self):
Tim Peters3de75262002-11-09 05:26:15 +0000157 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000158 with BZ2File(self.filename) as bz2f:
159 text = b''
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200160 while True:
Brian Curtind2e948d2010-10-13 02:56:26 +0000161 str = bz2f.read(10)
162 if not str:
163 break
164 text += str
Antoine Pitrou528d9f62011-01-26 23:29:28 +0000165 self.assertEqual(text, self.TEXT)
Tim Peters230a60c2002-11-09 05:08:07 +0000166
Nadeem Vawda55b43382011-05-27 01:52:15 +0200167 def testReadChunk10MultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200168 self.createTempFile(streams=5)
169 with BZ2File(self.filename) as bz2f:
170 text = b''
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200171 while True:
Nadeem Vawda55b43382011-05-27 01:52:15 +0200172 str = bz2f.read(10)
173 if not str:
174 break
175 text += str
176 self.assertEqual(text, self.TEXT * 5)
177
Tim Peters3de75262002-11-09 05:26:15 +0000178 def testRead100(self):
Tim Peters3de75262002-11-09 05:26:15 +0000179 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000180 with BZ2File(self.filename) as bz2f:
181 self.assertEqual(bz2f.read(100), self.TEXT[:100])
Tim Peters230a60c2002-11-09 05:08:07 +0000182
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200183 def testPeek(self):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200184 self.createTempFile()
185 with BZ2File(self.filename) as bz2f:
186 pdata = bz2f.peek()
187 self.assertNotEqual(len(pdata), 0)
188 self.assertTrue(self.TEXT.startswith(pdata))
189 self.assertEqual(bz2f.read(), self.TEXT)
190
191 def testReadInto(self):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200192 self.createTempFile()
193 with BZ2File(self.filename) as bz2f:
194 n = 128
195 b = bytearray(n)
196 self.assertEqual(bz2f.readinto(b), n)
197 self.assertEqual(b, self.TEXT[:n])
198 n = len(self.TEXT) - n
199 b = bytearray(len(self.TEXT))
200 self.assertEqual(bz2f.readinto(b), n)
201 self.assertEqual(b[:n], self.TEXT[-n:])
202
Tim Peters3de75262002-11-09 05:26:15 +0000203 def testReadLine(self):
Tim Peters3de75262002-11-09 05:26:15 +0000204 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000205 with BZ2File(self.filename) as bz2f:
206 self.assertRaises(TypeError, bz2f.readline, None)
Nadeem Vawda72e58652011-05-30 02:09:54 +0200207 for line in self.TEXT_LINES:
Brian Curtind2e948d2010-10-13 02:56:26 +0000208 self.assertEqual(bz2f.readline(), line)
Tim Peters230a60c2002-11-09 05:08:07 +0000209
Nadeem Vawda55b43382011-05-27 01:52:15 +0200210 def testReadLineMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200211 self.createTempFile(streams=5)
212 with BZ2File(self.filename) as bz2f:
213 self.assertRaises(TypeError, bz2f.readline, None)
Nadeem Vawda72e58652011-05-30 02:09:54 +0200214 for line in self.TEXT_LINES * 5:
Nadeem Vawda55b43382011-05-27 01:52:15 +0200215 self.assertEqual(bz2f.readline(), line)
216
Tim Peters3de75262002-11-09 05:26:15 +0000217 def testReadLines(self):
Tim Peters3de75262002-11-09 05:26:15 +0000218 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000219 with BZ2File(self.filename) as bz2f:
220 self.assertRaises(TypeError, bz2f.readlines, None)
Nadeem Vawda72e58652011-05-30 02:09:54 +0200221 self.assertEqual(bz2f.readlines(), self.TEXT_LINES)
Tim Peters230a60c2002-11-09 05:08:07 +0000222
Nadeem Vawda55b43382011-05-27 01:52:15 +0200223 def testReadLinesMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200224 self.createTempFile(streams=5)
225 with BZ2File(self.filename) as bz2f:
226 self.assertRaises(TypeError, bz2f.readlines, None)
Nadeem Vawda72e58652011-05-30 02:09:54 +0200227 self.assertEqual(bz2f.readlines(), self.TEXT_LINES * 5)
Nadeem Vawda55b43382011-05-27 01:52:15 +0200228
Tim Peters3de75262002-11-09 05:26:15 +0000229 def testIterator(self):
Tim Peters3de75262002-11-09 05:26:15 +0000230 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000231 with BZ2File(self.filename) as bz2f:
Nadeem Vawda72e58652011-05-30 02:09:54 +0200232 self.assertEqual(list(iter(bz2f)), self.TEXT_LINES)
Tim Peters230a60c2002-11-09 05:08:07 +0000233
Nadeem Vawda55b43382011-05-27 01:52:15 +0200234 def testIteratorMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200235 self.createTempFile(streams=5)
236 with BZ2File(self.filename) as bz2f:
Nadeem Vawda72e58652011-05-30 02:09:54 +0200237 self.assertEqual(list(iter(bz2f)), self.TEXT_LINES * 5)
Nadeem Vawda55b43382011-05-27 01:52:15 +0200238
Georg Brandl86b2fb92008-07-16 03:43:04 +0000239 def testClosedIteratorDeadlock(self):
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200240 # Issue #3309: Iteration on a closed BZ2File should release the lock.
Georg Brandl86b2fb92008-07-16 03:43:04 +0000241 self.createTempFile()
242 bz2f = BZ2File(self.filename)
243 bz2f.close()
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200244 self.assertRaises(ValueError, next, bz2f)
245 # This call will deadlock if the above call failed to release the lock.
Georg Brandl86b2fb92008-07-16 03:43:04 +0000246 self.assertRaises(ValueError, bz2f.readlines)
247
Tim Peters3de75262002-11-09 05:26:15 +0000248 def testWrite(self):
Brian Curtind2e948d2010-10-13 02:56:26 +0000249 with BZ2File(self.filename, "w") as bz2f:
250 self.assertRaises(TypeError, bz2f.write)
251 bz2f.write(self.TEXT)
252 with open(self.filename, 'rb') as f:
253 self.assertEqual(self.decompress(f.read()), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254
Tim Peters3de75262002-11-09 05:26:15 +0000255 def testWriteChunks10(self):
Brian Curtind2e948d2010-10-13 02:56:26 +0000256 with BZ2File(self.filename, "w") as bz2f:
257 n = 0
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200258 while True:
Brian Curtind2e948d2010-10-13 02:56:26 +0000259 str = self.TEXT[n*10:(n+1)*10]
260 if not str:
261 break
262 bz2f.write(str)
263 n += 1
264 with open(self.filename, 'rb') as f:
265 self.assertEqual(self.decompress(f.read()), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000266
Nadeem Vawda249ab5e2011-09-11 22:38:11 +0200267 def testWriteNonDefaultCompressLevel(self):
268 expected = bz2.compress(self.TEXT, compresslevel=5)
269 with BZ2File(self.filename, "w", compresslevel=5) as bz2f:
270 bz2f.write(self.TEXT)
271 with open(self.filename, "rb") as f:
272 self.assertEqual(f.read(), expected)
273
Tim Peters3de75262002-11-09 05:26:15 +0000274 def testWriteLines(self):
Brian Curtind2e948d2010-10-13 02:56:26 +0000275 with BZ2File(self.filename, "w") as bz2f:
276 self.assertRaises(TypeError, bz2f.writelines)
Nadeem Vawda72e58652011-05-30 02:09:54 +0200277 bz2f.writelines(self.TEXT_LINES)
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200278 # Issue #1535500: Calling writelines() on a closed BZ2File
279 # should raise an exception.
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000280 self.assertRaises(ValueError, bz2f.writelines, ["a"])
Brian Curtind2e948d2010-10-13 02:56:26 +0000281 with open(self.filename, 'rb') as f:
282 self.assertEqual(self.decompress(f.read()), self.TEXT)
Tim Peters230a60c2002-11-09 05:08:07 +0000283
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000284 def testWriteMethodsOnReadOnlyFile(self):
Brian Curtind2e948d2010-10-13 02:56:26 +0000285 with BZ2File(self.filename, "w") as bz2f:
286 bz2f.write(b"abc")
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000287
Brian Curtind2e948d2010-10-13 02:56:26 +0000288 with BZ2File(self.filename, "r") as bz2f:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200289 self.assertRaises(OSError, bz2f.write, b"a")
290 self.assertRaises(OSError, bz2f.writelines, [b"a"])
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000291
Nadeem Vawda55b43382011-05-27 01:52:15 +0200292 def testAppend(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200293 with BZ2File(self.filename, "w") as bz2f:
294 self.assertRaises(TypeError, bz2f.write)
295 bz2f.write(self.TEXT)
296 with BZ2File(self.filename, "a") as bz2f:
297 self.assertRaises(TypeError, bz2f.write)
298 bz2f.write(self.TEXT)
299 with open(self.filename, 'rb') as f:
300 self.assertEqual(self.decompress(f.read()), self.TEXT * 2)
301
Tim Peters3de75262002-11-09 05:26:15 +0000302 def testSeekForward(self):
Tim Peters3de75262002-11-09 05:26:15 +0000303 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000304 with BZ2File(self.filename) as bz2f:
305 self.assertRaises(TypeError, bz2f.seek)
306 bz2f.seek(150)
307 self.assertEqual(bz2f.read(), self.TEXT[150:])
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000308
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200309 def testSeekForwardAcrossStreams(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200310 self.createTempFile(streams=2)
311 with BZ2File(self.filename) as bz2f:
312 self.assertRaises(TypeError, bz2f.seek)
313 bz2f.seek(len(self.TEXT) + 150)
314 self.assertEqual(bz2f.read(), self.TEXT[150:])
315
Tim Peters3de75262002-11-09 05:26:15 +0000316 def testSeekBackwards(self):
Tim Peters3de75262002-11-09 05:26:15 +0000317 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000318 with BZ2File(self.filename) as bz2f:
319 bz2f.read(500)
320 bz2f.seek(-150, 1)
321 self.assertEqual(bz2f.read(), self.TEXT[500-150:])
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000322
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200323 def testSeekBackwardsAcrossStreams(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200324 self.createTempFile(streams=2)
325 with BZ2File(self.filename) as bz2f:
326 readto = len(self.TEXT) + 100
327 while readto > 0:
328 readto -= len(bz2f.read(readto))
329 bz2f.seek(-150, 1)
330 self.assertEqual(bz2f.read(), self.TEXT[100-150:] + self.TEXT)
331
Tim Peters3de75262002-11-09 05:26:15 +0000332 def testSeekBackwardsFromEnd(self):
Tim Peters3de75262002-11-09 05:26:15 +0000333 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000334 with BZ2File(self.filename) as bz2f:
335 bz2f.seek(-150, 2)
336 self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:])
Tim Peters230a60c2002-11-09 05:08:07 +0000337
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200338 def testSeekBackwardsFromEndAcrossStreams(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200339 self.createTempFile(streams=2)
340 with BZ2File(self.filename) as bz2f:
341 bz2f.seek(-1000, 2)
342 self.assertEqual(bz2f.read(), (self.TEXT * 2)[-1000:])
343
Tim Peters3de75262002-11-09 05:26:15 +0000344 def testSeekPostEnd(self):
Tim Peters3de75262002-11-09 05:26:15 +0000345 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000346 with BZ2File(self.filename) as bz2f:
347 bz2f.seek(150000)
348 self.assertEqual(bz2f.tell(), len(self.TEXT))
349 self.assertEqual(bz2f.read(), b"")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000350
Nadeem Vawda55b43382011-05-27 01:52:15 +0200351 def testSeekPostEndMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200352 self.createTempFile(streams=5)
353 with BZ2File(self.filename) as bz2f:
354 bz2f.seek(150000)
355 self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
356 self.assertEqual(bz2f.read(), b"")
357
Tim Peters3de75262002-11-09 05:26:15 +0000358 def testSeekPostEndTwice(self):
Tim Peters3de75262002-11-09 05:26:15 +0000359 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000360 with BZ2File(self.filename) as bz2f:
361 bz2f.seek(150000)
362 bz2f.seek(150000)
363 self.assertEqual(bz2f.tell(), len(self.TEXT))
364 self.assertEqual(bz2f.read(), b"")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000365
Nadeem Vawda55b43382011-05-27 01:52:15 +0200366 def testSeekPostEndTwiceMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200367 self.createTempFile(streams=5)
368 with BZ2File(self.filename) as bz2f:
369 bz2f.seek(150000)
370 bz2f.seek(150000)
371 self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
372 self.assertEqual(bz2f.read(), b"")
373
Tim Peters3de75262002-11-09 05:26:15 +0000374 def testSeekPreStart(self):
Tim Peters3de75262002-11-09 05:26:15 +0000375 self.createTempFile()
Brian Curtind2e948d2010-10-13 02:56:26 +0000376 with BZ2File(self.filename) as bz2f:
377 bz2f.seek(-150)
378 self.assertEqual(bz2f.tell(), 0)
379 self.assertEqual(bz2f.read(), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000380
Nadeem Vawda55b43382011-05-27 01:52:15 +0200381 def testSeekPreStartMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200382 self.createTempFile(streams=2)
383 with BZ2File(self.filename) as bz2f:
384 bz2f.seek(-150)
385 self.assertEqual(bz2f.tell(), 0)
386 self.assertEqual(bz2f.read(), self.TEXT * 2)
387
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200388 def testFileno(self):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200389 self.createTempFile()
Victor Stinner938f6352011-06-30 18:25:07 +0200390 with open(self.filename, 'rb') as rawf:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200391 bz2f = BZ2File(rawf)
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200392 try:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200393 self.assertEqual(bz2f.fileno(), rawf.fileno())
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200394 finally:
395 bz2f.close()
396 self.assertRaises(ValueError, bz2f.fileno)
397
398 def testSeekable(self):
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200399 bz2f = BZ2File(BytesIO(self.DATA))
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200400 try:
401 self.assertTrue(bz2f.seekable())
402 bz2f.read()
403 self.assertTrue(bz2f.seekable())
404 finally:
405 bz2f.close()
406 self.assertRaises(ValueError, bz2f.seekable)
407
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200408 bz2f = BZ2File(BytesIO(), "w")
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200409 try:
410 self.assertFalse(bz2f.seekable())
411 finally:
412 bz2f.close()
413 self.assertRaises(ValueError, bz2f.seekable)
414
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200415 src = BytesIO(self.DATA)
416 src.seekable = lambda: False
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200417 bz2f = BZ2File(src)
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200418 try:
419 self.assertFalse(bz2f.seekable())
420 finally:
421 bz2f.close()
422 self.assertRaises(ValueError, bz2f.seekable)
423
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200424 def testReadable(self):
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200425 bz2f = BZ2File(BytesIO(self.DATA))
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200426 try:
427 self.assertTrue(bz2f.readable())
428 bz2f.read()
429 self.assertTrue(bz2f.readable())
430 finally:
431 bz2f.close()
432 self.assertRaises(ValueError, bz2f.readable)
433
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200434 bz2f = BZ2File(BytesIO(), "w")
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200435 try:
436 self.assertFalse(bz2f.readable())
437 finally:
438 bz2f.close()
439 self.assertRaises(ValueError, bz2f.readable)
440
441 def testWritable(self):
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200442 bz2f = BZ2File(BytesIO(self.DATA))
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200443 try:
444 self.assertFalse(bz2f.writable())
445 bz2f.read()
446 self.assertFalse(bz2f.writable())
447 finally:
448 bz2f.close()
449 self.assertRaises(ValueError, bz2f.writable)
450
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200451 bz2f = BZ2File(BytesIO(), "w")
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200452 try:
453 self.assertTrue(bz2f.writable())
454 finally:
455 bz2f.close()
456 self.assertRaises(ValueError, bz2f.writable)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457
Neal Norwitz7cbd2472003-02-12 23:09:53 +0000458 def testOpenDel(self):
459 self.createTempFile()
Guido van Rossum805365e2007-05-07 22:24:25 +0000460 for i in range(10000):
Neal Norwitz7cbd2472003-02-12 23:09:53 +0000461 o = BZ2File(self.filename)
462 del o
463
Gustavo Niemeyer572f5232003-04-29 14:53:08 +0000464 def testOpenNonexistent(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200465 self.assertRaises(OSError, BZ2File, "/non/existent")
Gustavo Niemeyer572f5232003-04-29 14:53:08 +0000466
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200467 def testReadlinesNoNewline(self):
468 # Issue #1191043: readlines() fails on a file containing no newline.
Guido van Rossum522a6c62007-05-22 23:13:45 +0000469 data = b'BZh91AY&SY\xd9b\x89]\x00\x00\x00\x03\x80\x04\x00\x02\x00\x0c\x00 \x00!\x9ah3M\x13<]\xc9\x14\xe1BCe\x8a%t'
Brian Curtind2e948d2010-10-13 02:56:26 +0000470 with open(self.filename, "wb") as f:
471 f.write(data)
472 with BZ2File(self.filename) as bz2f:
473 lines = bz2f.readlines()
Guido van Rossum522a6c62007-05-22 23:13:45 +0000474 self.assertEqual(lines, [b'Test'])
Brian Curtind2e948d2010-10-13 02:56:26 +0000475 with BZ2File(self.filename) as bz2f:
476 xlines = list(bz2f.readlines())
Guido van Rossum522a6c62007-05-22 23:13:45 +0000477 self.assertEqual(xlines, [b'Test'])
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000478
Antoine Pitrou308705e2009-01-10 16:22:51 +0000479 def testContextProtocol(self):
Antoine Pitrou308705e2009-01-10 16:22:51 +0000480 f = None
481 with BZ2File(self.filename, "wb") as f:
482 f.write(b"xxx")
483 f = BZ2File(self.filename, "rb")
484 f.close()
485 try:
486 with f:
487 pass
488 except ValueError:
489 pass
490 else:
491 self.fail("__enter__ on a closed file didn't raise an exception")
492 try:
493 with BZ2File(self.filename, "wb") as f:
494 1/0
495 except ZeroDivisionError:
496 pass
497 else:
498 self.fail("1/0 didn't raise an exception")
499
Victor Stinner45df8202010-04-28 22:31:17 +0000500 @unittest.skipUnless(threading, 'Threading required for this test.')
Antoine Pitrouc66363f2009-10-27 17:47:14 +0000501 def testThreading(self):
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200502 # Issue #7205: Using a BZ2File from several threads shouldn't deadlock.
Antoine Pitrouc66363f2009-10-27 17:47:14 +0000503 data = b"1" * 2**20
504 nthreads = 10
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200505 with BZ2File(self.filename, 'wb') as f:
Antoine Pitrouc66363f2009-10-27 17:47:14 +0000506 def comp():
507 for i in range(5):
508 f.write(data)
509 threads = [threading.Thread(target=comp) for i in range(nthreads)]
Serhiy Storchaka263dcd22015-04-01 13:01:14 +0300510 with support.start_threads(threads):
511 pass
Antoine Pitrouc66363f2009-10-27 17:47:14 +0000512
Nadeem Vawda72750a82012-01-18 01:57:14 +0200513 def testWithoutThreading(self):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200514 module = support.import_fresh_module("bz2", blocked=("threading",))
515 with module.BZ2File(self.filename, "wb") as f:
Nadeem Vawda72750a82012-01-18 01:57:14 +0200516 f.write(b"abc")
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200517 with module.BZ2File(self.filename, "rb") as f:
Nadeem Vawda72750a82012-01-18 01:57:14 +0200518 self.assertEqual(f.read(), b"abc")
519
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200520 def testMixedIterationAndReads(self):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200521 self.createTempFile()
522 linelen = len(self.TEXT_LINES[0])
523 halflen = linelen // 2
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200524 with BZ2File(self.filename) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200525 bz2f.read(halflen)
526 self.assertEqual(next(bz2f), self.TEXT_LINES[0][halflen:])
527 self.assertEqual(bz2f.read(), self.TEXT[linelen:])
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200528 with BZ2File(self.filename) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200529 bz2f.readline()
530 self.assertEqual(next(bz2f), self.TEXT_LINES[1])
531 self.assertEqual(bz2f.readline(), self.TEXT_LINES[2])
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200532 with BZ2File(self.filename) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200533 bz2f.readlines()
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200534 self.assertRaises(StopIteration, next, bz2f)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200535 self.assertEqual(bz2f.readlines(), [])
536
Nadeem Vawdaf3ecb832011-05-30 01:58:12 +0200537 def testMultiStreamOrdering(self):
538 # Test the ordering of streams when reading a multi-stream archive.
539 data1 = b"foo" * 1000
540 data2 = b"bar" * 1000
541 with BZ2File(self.filename, "w") as bz2f:
542 bz2f.write(data1)
543 with BZ2File(self.filename, "a") as bz2f:
544 bz2f.write(data2)
545 with BZ2File(self.filename) as bz2f:
546 self.assertEqual(bz2f.read(), data1 + data2)
547
Nadeem Vawda10c87912012-06-20 01:48:50 +0200548 def testOpenBytesFilename(self):
549 str_filename = self.filename
550 try:
551 bytes_filename = str_filename.encode("ascii")
552 except UnicodeEncodeError:
553 self.skipTest("Temporary file name needs to be ASCII")
554 with BZ2File(bytes_filename, "wb") as f:
555 f.write(self.DATA)
556 with BZ2File(bytes_filename, "rb") as f:
557 self.assertEqual(f.read(), self.DATA)
558 # Sanity check that we are actually operating on the right file.
559 with BZ2File(str_filename, "rb") as f:
560 self.assertEqual(f.read(), self.DATA)
561
562
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200563 # Tests for a BZ2File wrapping another file object:
564
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200565 def testReadBytesIO(self):
Nadeem Vawda07d291a2011-05-30 01:44:45 +0200566 with BytesIO(self.DATA) as bio:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200567 with BZ2File(bio) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200568 self.assertRaises(TypeError, bz2f.read, None)
569 self.assertEqual(bz2f.read(), self.TEXT)
570 self.assertFalse(bio.closed)
571
572 def testPeekBytesIO(self):
Nadeem Vawda07d291a2011-05-30 01:44:45 +0200573 with BytesIO(self.DATA) as bio:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200574 with BZ2File(bio) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200575 pdata = bz2f.peek()
576 self.assertNotEqual(len(pdata), 0)
577 self.assertTrue(self.TEXT.startswith(pdata))
578 self.assertEqual(bz2f.read(), self.TEXT)
579
580 def testWriteBytesIO(self):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200581 with BytesIO() as bio:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200582 with BZ2File(bio, "w") as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200583 self.assertRaises(TypeError, bz2f.write)
584 bz2f.write(self.TEXT)
585 self.assertEqual(self.decompress(bio.getvalue()), self.TEXT)
586 self.assertFalse(bio.closed)
587
588 def testSeekForwardBytesIO(self):
Nadeem Vawda07d291a2011-05-30 01:44:45 +0200589 with BytesIO(self.DATA) as bio:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200590 with BZ2File(bio) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200591 self.assertRaises(TypeError, bz2f.seek)
592 bz2f.seek(150)
593 self.assertEqual(bz2f.read(), self.TEXT[150:])
594
595 def testSeekBackwardsBytesIO(self):
Nadeem Vawda07d291a2011-05-30 01:44:45 +0200596 with BytesIO(self.DATA) as bio:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200597 with BZ2File(bio) as bz2f:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200598 bz2f.read(500)
599 bz2f.seek(-150, 1)
600 self.assertEqual(bz2f.read(), self.TEXT[500-150:])
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000601
Serhiy Storchaka7c3922f2013-01-22 17:01:59 +0200602 def test_read_truncated(self):
603 # Drop the eos_magic field (6 bytes) and CRC (4 bytes).
604 truncated = self.DATA[:-10]
Serhiy Storchaka57f9b7a2013-01-22 17:07:49 +0200605 with BZ2File(BytesIO(truncated)) as f:
Serhiy Storchaka7c3922f2013-01-22 17:01:59 +0200606 self.assertRaises(EOFError, f.read)
Serhiy Storchaka57f9b7a2013-01-22 17:07:49 +0200607 with BZ2File(BytesIO(truncated)) as f:
Serhiy Storchaka7c3922f2013-01-22 17:01:59 +0200608 self.assertEqual(f.read(len(self.TEXT)), self.TEXT)
609 self.assertRaises(EOFError, f.read, 1)
610 # Incomplete 4-byte file header, and block header of at least 146 bits.
611 for i in range(22):
Serhiy Storchaka57f9b7a2013-01-22 17:07:49 +0200612 with BZ2File(BytesIO(truncated[:i])) as f:
Serhiy Storchaka7c3922f2013-01-22 17:01:59 +0200613 self.assertRaises(EOFError, f.read, 1)
614
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200615
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000616class BZ2CompressorTest(BaseTest):
Tim Peters3de75262002-11-09 05:26:15 +0000617 def testCompress(self):
Tim Peters3de75262002-11-09 05:26:15 +0000618 bz2c = BZ2Compressor()
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000619 self.assertRaises(TypeError, bz2c.compress)
Tim Peters3de75262002-11-09 05:26:15 +0000620 data = bz2c.compress(self.TEXT)
621 data += bz2c.flush()
622 self.assertEqual(self.decompress(data), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000623
Nadeem Vawda638fb9b2013-01-02 23:02:00 +0100624 def testCompressEmptyString(self):
625 bz2c = BZ2Compressor()
626 data = bz2c.compress(b'')
627 data += bz2c.flush()
628 self.assertEqual(data, self.EMPTY_DATA)
629
Tim Peters3de75262002-11-09 05:26:15 +0000630 def testCompressChunks10(self):
Tim Peters3de75262002-11-09 05:26:15 +0000631 bz2c = BZ2Compressor()
632 n = 0
Guido van Rossum522a6c62007-05-22 23:13:45 +0000633 data = b''
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200634 while True:
Tim Peters3de75262002-11-09 05:26:15 +0000635 str = self.TEXT[n*10:(n+1)*10]
636 if not str:
637 break
638 data += bz2c.compress(str)
639 n += 1
640 data += bz2c.flush()
641 self.assertEqual(self.decompress(data), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000642
Antoine Pitrou1a3ff482011-10-04 10:28:37 +0200643 @bigmemtest(size=_4G + 100, memuse=2)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200644 def testCompress4G(self, size):
645 # "Test BZ2Compressor.compress()/flush() with >4GiB input"
646 bz2c = BZ2Compressor()
647 data = b"x" * size
648 try:
649 compressed = bz2c.compress(data)
650 compressed += bz2c.flush()
651 finally:
652 data = None # Release memory
653 data = bz2.decompress(compressed)
654 try:
655 self.assertEqual(len(data), size)
656 self.assertEqual(len(data.strip(b"x")), 0)
657 finally:
658 data = None
659
Nadeem Vawda37970652013-10-28 21:35:23 +0100660 def testPickle(self):
Serhiy Storchakabad12572014-12-15 14:03:42 +0200661 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
662 with self.assertRaises(TypeError):
663 pickle.dumps(BZ2Compressor(), proto)
Nadeem Vawda37970652013-10-28 21:35:23 +0100664
665
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000666class BZ2DecompressorTest(BaseTest):
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000667 def test_Constructor(self):
668 self.assertRaises(TypeError, BZ2Decompressor, 42)
669
Tim Peters3de75262002-11-09 05:26:15 +0000670 def testDecompress(self):
Tim Peters3de75262002-11-09 05:26:15 +0000671 bz2d = BZ2Decompressor()
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000672 self.assertRaises(TypeError, bz2d.decompress)
Tim Peters3de75262002-11-09 05:26:15 +0000673 text = bz2d.decompress(self.DATA)
674 self.assertEqual(text, self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000675
Tim Peters3de75262002-11-09 05:26:15 +0000676 def testDecompressChunks10(self):
Tim Peters3de75262002-11-09 05:26:15 +0000677 bz2d = BZ2Decompressor()
Guido van Rossum522a6c62007-05-22 23:13:45 +0000678 text = b''
Tim Peters3de75262002-11-09 05:26:15 +0000679 n = 0
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200680 while True:
Tim Peters3de75262002-11-09 05:26:15 +0000681 str = self.DATA[n*10:(n+1)*10]
682 if not str:
683 break
684 text += bz2d.decompress(str)
685 n += 1
686 self.assertEqual(text, self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000687
Tim Peters3de75262002-11-09 05:26:15 +0000688 def testDecompressUnusedData(self):
Tim Peters3de75262002-11-09 05:26:15 +0000689 bz2d = BZ2Decompressor()
Guido van Rossum522a6c62007-05-22 23:13:45 +0000690 unused_data = b"this is unused data"
Tim Peters3de75262002-11-09 05:26:15 +0000691 text = bz2d.decompress(self.DATA+unused_data)
692 self.assertEqual(text, self.TEXT)
693 self.assertEqual(bz2d.unused_data, unused_data)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000694
Tim Peters3de75262002-11-09 05:26:15 +0000695 def testEOFError(self):
Tim Peters3de75262002-11-09 05:26:15 +0000696 bz2d = BZ2Decompressor()
697 text = bz2d.decompress(self.DATA)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000698 self.assertRaises(EOFError, bz2d.decompress, b"anything")
Nadeem Vawdaa1952d42012-10-28 15:20:48 +0100699 self.assertRaises(EOFError, bz2d.decompress, b"")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700
Serhiy Storchaka4847e4e2014-01-10 13:37:54 +0200701 @bigmemtest(size=_4G + 100, memuse=3.3)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200702 def testDecompress4G(self, size):
703 # "Test BZ2Decompressor.decompress() with >4GiB input"
704 blocksize = 10 * 1024 * 1024
705 block = random.getrandbits(blocksize * 8).to_bytes(blocksize, 'little')
706 try:
707 data = block * (size // blocksize + 1)
708 compressed = bz2.compress(data)
709 bz2d = BZ2Decompressor()
710 decompressed = bz2d.decompress(compressed)
711 self.assertTrue(decompressed == data)
712 finally:
713 data = None
714 compressed = None
715 decompressed = None
716
Nadeem Vawda37970652013-10-28 21:35:23 +0100717 def testPickle(self):
Serhiy Storchakabad12572014-12-15 14:03:42 +0200718 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
719 with self.assertRaises(TypeError):
720 pickle.dumps(BZ2Decompressor(), proto)
Nadeem Vawda37970652013-10-28 21:35:23 +0100721
Antoine Pitroue71258a2015-02-26 13:08:07 +0100722 def testDecompressorChunksMaxsize(self):
723 bzd = BZ2Decompressor()
724 max_length = 100
725 out = []
726
727 # Feed some input
728 len_ = len(self.BIG_DATA) - 64
729 out.append(bzd.decompress(self.BIG_DATA[:len_],
730 max_length=max_length))
731 self.assertFalse(bzd.needs_input)
732 self.assertEqual(len(out[-1]), max_length)
733
734 # Retrieve more data without providing more input
735 out.append(bzd.decompress(b'', max_length=max_length))
736 self.assertFalse(bzd.needs_input)
737 self.assertEqual(len(out[-1]), max_length)
738
739 # Retrieve more data while providing more input
740 out.append(bzd.decompress(self.BIG_DATA[len_:],
741 max_length=max_length))
742 self.assertLessEqual(len(out[-1]), max_length)
743
744 # Retrieve remaining uncompressed data
745 while not bzd.eof:
746 out.append(bzd.decompress(b'', max_length=max_length))
747 self.assertLessEqual(len(out[-1]), max_length)
748
749 out = b"".join(out)
750 self.assertEqual(out, self.BIG_TEXT)
751 self.assertEqual(bzd.unused_data, b"")
752
753 def test_decompressor_inputbuf_1(self):
754 # Test reusing input buffer after moving existing
755 # contents to beginning
756 bzd = BZ2Decompressor()
757 out = []
758
759 # Create input buffer and fill it
760 self.assertEqual(bzd.decompress(self.DATA[:100],
761 max_length=0), b'')
762
763 # Retrieve some results, freeing capacity at beginning
764 # of input buffer
765 out.append(bzd.decompress(b'', 2))
766
767 # Add more data that fits into input buffer after
768 # moving existing data to beginning
769 out.append(bzd.decompress(self.DATA[100:105], 15))
770
771 # Decompress rest of data
772 out.append(bzd.decompress(self.DATA[105:]))
773 self.assertEqual(b''.join(out), self.TEXT)
774
775 def test_decompressor_inputbuf_2(self):
776 # Test reusing input buffer by appending data at the
777 # end right away
778 bzd = BZ2Decompressor()
779 out = []
780
781 # Create input buffer and empty it
782 self.assertEqual(bzd.decompress(self.DATA[:200],
783 max_length=0), b'')
784 out.append(bzd.decompress(b''))
785
786 # Fill buffer with new data
787 out.append(bzd.decompress(self.DATA[200:280], 2))
788
789 # Append some more data, not enough to require resize
790 out.append(bzd.decompress(self.DATA[280:300], 2))
791
792 # Decompress rest of data
793 out.append(bzd.decompress(self.DATA[300:]))
794 self.assertEqual(b''.join(out), self.TEXT)
795
796 def test_decompressor_inputbuf_3(self):
797 # Test reusing input buffer after extending it
798
799 bzd = BZ2Decompressor()
800 out = []
801
802 # Create almost full input buffer
803 out.append(bzd.decompress(self.DATA[:200], 5))
804
805 # Add even more data to it, requiring resize
806 out.append(bzd.decompress(self.DATA[200:300], 5))
807
808 # Decompress rest of data
809 out.append(bzd.decompress(self.DATA[300:]))
810 self.assertEqual(b''.join(out), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000811
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200812class CompressDecompressTest(BaseTest):
Tim Peters3de75262002-11-09 05:26:15 +0000813 def testCompress(self):
Tim Peters499d09a2002-11-09 06:31:56 +0000814 data = bz2.compress(self.TEXT)
Tim Peters3de75262002-11-09 05:26:15 +0000815 self.assertEqual(self.decompress(data), self.TEXT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816
Nadeem Vawda638fb9b2013-01-02 23:02:00 +0100817 def testCompressEmptyString(self):
818 text = bz2.compress(b'')
819 self.assertEqual(text, self.EMPTY_DATA)
820
Tim Peters3de75262002-11-09 05:26:15 +0000821 def testDecompress(self):
Tim Peters499d09a2002-11-09 06:31:56 +0000822 text = bz2.decompress(self.DATA)
Tim Peters3de75262002-11-09 05:26:15 +0000823 self.assertEqual(text, self.TEXT)
Tim Peters230a60c2002-11-09 05:08:07 +0000824
Tim Peters3de75262002-11-09 05:26:15 +0000825 def testDecompressEmpty(self):
Guido van Rossum522a6c62007-05-22 23:13:45 +0000826 text = bz2.decompress(b"")
827 self.assertEqual(text, b"")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828
Nadeem Vawda638fb9b2013-01-02 23:02:00 +0100829 def testDecompressToEmptyString(self):
830 text = bz2.decompress(self.EMPTY_DATA)
831 self.assertEqual(text, b'')
832
Tim Peters3de75262002-11-09 05:26:15 +0000833 def testDecompressIncomplete(self):
Tim Peters499d09a2002-11-09 06:31:56 +0000834 self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100836 def testDecompressBadData(self):
837 self.assertRaises(OSError, bz2.decompress, self.BAD_DATA)
838
Nadeem Vawda55b43382011-05-27 01:52:15 +0200839 def testDecompressMultiStream(self):
Nadeem Vawda55b43382011-05-27 01:52:15 +0200840 text = bz2.decompress(self.DATA * 5)
841 self.assertEqual(text, self.TEXT * 5)
842
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100843 def testDecompressTrailingJunk(self):
844 text = bz2.decompress(self.DATA + self.BAD_DATA)
845 self.assertEqual(text, self.TEXT)
846
847 def testDecompressMultiStreamTrailingJunk(self):
848 text = bz2.decompress(self.DATA * 5 + self.BAD_DATA)
849 self.assertEqual(text, self.TEXT * 5)
850
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200851
852class OpenTest(BaseTest):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200853 "Test the open function."
854
855 def open(self, *args, **kwargs):
856 return bz2.open(*args, **kwargs)
857
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200858 def test_binary_modes(self):
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200859 for mode in ("wb", "xb"):
860 if mode == "xb":
861 unlink(self.filename)
862 with self.open(self.filename, mode) as f:
863 f.write(self.TEXT)
864 with open(self.filename, "rb") as f:
865 file_data = self.decompress(f.read())
866 self.assertEqual(file_data, self.TEXT)
867 with self.open(self.filename, "rb") as f:
868 self.assertEqual(f.read(), self.TEXT)
869 with self.open(self.filename, "ab") as f:
870 f.write(self.TEXT)
871 with open(self.filename, "rb") as f:
872 file_data = self.decompress(f.read())
873 self.assertEqual(file_data, self.TEXT * 2)
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200874
875 def test_implicit_binary_modes(self):
876 # Test implicit binary modes (no "b" or "t" in mode string).
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200877 for mode in ("w", "x"):
878 if mode == "x":
879 unlink(self.filename)
880 with self.open(self.filename, mode) as f:
881 f.write(self.TEXT)
882 with open(self.filename, "rb") as f:
883 file_data = self.decompress(f.read())
884 self.assertEqual(file_data, self.TEXT)
885 with self.open(self.filename, "r") as f:
886 self.assertEqual(f.read(), self.TEXT)
887 with self.open(self.filename, "a") as f:
888 f.write(self.TEXT)
889 with open(self.filename, "rb") as f:
890 file_data = self.decompress(f.read())
891 self.assertEqual(file_data, self.TEXT * 2)
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200892
893 def test_text_modes(self):
894 text = self.TEXT.decode("ascii")
895 text_native_eol = text.replace("\n", os.linesep)
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200896 for mode in ("wt", "xt"):
897 if mode == "xt":
898 unlink(self.filename)
899 with self.open(self.filename, mode) as f:
900 f.write(text)
901 with open(self.filename, "rb") as f:
902 file_data = self.decompress(f.read()).decode("ascii")
903 self.assertEqual(file_data, text_native_eol)
904 with self.open(self.filename, "rt") as f:
905 self.assertEqual(f.read(), text)
906 with self.open(self.filename, "at") as f:
907 f.write(text)
908 with open(self.filename, "rb") as f:
909 file_data = self.decompress(f.read()).decode("ascii")
910 self.assertEqual(file_data, text_native_eol * 2)
911
912 def test_x_mode(self):
913 for mode in ("x", "xb", "xt"):
914 unlink(self.filename)
915 with self.open(self.filename, mode) as f:
916 pass
917 with self.assertRaises(FileExistsError):
918 with self.open(self.filename, mode) as f:
919 pass
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200920
921 def test_fileobj(self):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200922 with self.open(BytesIO(self.DATA), "r") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200923 self.assertEqual(f.read(), self.TEXT)
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200924 with self.open(BytesIO(self.DATA), "rb") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200925 self.assertEqual(f.read(), self.TEXT)
926 text = self.TEXT.decode("ascii")
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200927 with self.open(BytesIO(self.DATA), "rt") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200928 self.assertEqual(f.read(), text)
929
930 def test_bad_params(self):
931 # Test invalid parameter combinations.
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200932 self.assertRaises(ValueError,
933 self.open, self.filename, "wbt")
934 self.assertRaises(ValueError,
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200935 self.open, self.filename, "xbt")
936 self.assertRaises(ValueError,
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200937 self.open, self.filename, "rb", encoding="utf-8")
938 self.assertRaises(ValueError,
939 self.open, self.filename, "rb", errors="ignore")
940 self.assertRaises(ValueError,
941 self.open, self.filename, "rb", newline="\n")
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200942
943 def test_encoding(self):
944 # Test non-default encoding.
945 text = self.TEXT.decode("ascii")
946 text_native_eol = text.replace("\n", os.linesep)
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200947 with self.open(self.filename, "wt", encoding="utf-16-le") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200948 f.write(text)
949 with open(self.filename, "rb") as f:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200950 file_data = self.decompress(f.read()).decode("utf-16-le")
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200951 self.assertEqual(file_data, text_native_eol)
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200952 with self.open(self.filename, "rt", encoding="utf-16-le") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200953 self.assertEqual(f.read(), text)
954
955 def test_encoding_error_handler(self):
956 # Test with non-default encoding error handler.
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200957 with self.open(self.filename, "wb") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200958 f.write(b"foo\xffbar")
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200959 with self.open(self.filename, "rt", encoding="ascii", errors="ignore") \
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200960 as f:
961 self.assertEqual(f.read(), "foobar")
962
963 def test_newline(self):
964 # Test with explicit newline (universal newline mode disabled).
965 text = self.TEXT.decode("ascii")
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200966 with self.open(self.filename, "wt", newline="\n") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200967 f.write(text)
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200968 with self.open(self.filename, "rt", newline="\r") as f:
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200969 self.assertEqual(f.readlines(), [text])
970
971
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000972def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000973 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +0000974 BZ2FileTest,
975 BZ2CompressorTest,
976 BZ2DecompressorTest,
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200977 CompressDecompressTest,
978 OpenTest,
Walter Dörwald21d3a322003-05-01 17:45:56 +0000979 )
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000980 support.reap_children()
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981
982if __name__ == '__main__':
983 test_main()