blob: 6c5a60d619daf8af00a24e9d88bcc0bdb2191f38 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
Nadeem Vawdaaf518c12012-06-04 23:32:38 +02007__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8 "open", "compress", "decompress"]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02009
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020012from builtins import open as _builtin_open
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020013import io
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020014import warnings
15
Nadeem Vawda72750a82012-01-18 01:57:14 +020016try:
17 from threading import RLock
Brett Cannoncd171c82013-07-04 17:43:24 -040018except ImportError:
Nadeem Vawda72750a82012-01-18 01:57:14 +020019 from dummy_threading import RLock
20
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020021from _bz2 import BZ2Compressor, BZ2Decompressor
22
23
24_MODE_CLOSED = 0
25_MODE_READ = 1
26_MODE_READ_EOF = 2
27_MODE_WRITE = 3
28
29_BUFFER_SIZE = 8192
30
31
32class BZ2File(io.BufferedIOBase):
33
34 """A file object providing transparent bzip2 (de)compression.
35
36 A BZ2File can act as a wrapper for an existing file object, or refer
37 directly to a named file on disk.
38
39 Note that BZ2File provides a *binary* file interface - data read is
40 returned as bytes, and data to be written should be given as bytes.
41 """
42
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +020043 def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020044 """Open a bzip2-compressed file.
45
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020046 If filename is a str or bytes object, it gives the name
47 of the file to be opened. Otherwise, it should be a file object,
48 which will be used to read or write the compressed data.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020049
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020050 mode can be 'r' for reading (default), 'w' for (over)writing,
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020051 'x' for creating exclusively, or 'a' for appending. These can
52 equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020053
54 buffering is ignored. Its use is deprecated.
55
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020056 If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
Nadeem Vawdacac89092012-02-04 13:08:11 +020057 and 9 specifying the level of compression: 1 produces the least
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020058 compression, and 9 (default) produces the most compression.
Nadeem Vawdacac89092012-02-04 13:08:11 +020059
60 If mode is 'r', the input file may be the concatenation of
61 multiple compressed streams.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020062 """
63 # This lock must be recursive, so that BufferedIOBase's
64 # readline(), readlines() and writelines() don't deadlock.
Nadeem Vawda72750a82012-01-18 01:57:14 +020065 self._lock = RLock()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020066 self._fp = None
67 self._closefp = False
68 self._mode = _MODE_CLOSED
69 self._pos = 0
70 self._size = -1
71
72 if buffering is not None:
73 warnings.warn("Use of 'buffering' argument is deprecated",
74 DeprecationWarning)
75
76 if not (1 <= compresslevel <= 9):
77 raise ValueError("compresslevel must be between 1 and 9")
78
79 if mode in ("", "r", "rb"):
80 mode = "rb"
81 mode_code = _MODE_READ
82 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +020083 self._buffer = b""
84 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020085 elif mode in ("w", "wb"):
86 mode = "wb"
87 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020088 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020089 elif mode in ("x", "xb"):
90 mode = "xb"
91 mode_code = _MODE_WRITE
92 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda55b43382011-05-27 01:52:15 +020093 elif mode in ("a", "ab"):
94 mode = "ab"
95 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020096 self._compressor = BZ2Compressor(compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020097 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020098 raise ValueError("Invalid mode: %r" % (mode,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020099
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200100 if isinstance(filename, (str, bytes)):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200101 self._fp = _builtin_open(filename, mode)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200102 self._closefp = True
103 self._mode = mode_code
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200104 elif hasattr(filename, "read") or hasattr(filename, "write"):
105 self._fp = filename
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200106 self._mode = mode_code
107 else:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200108 raise TypeError("filename must be a str or bytes object, or a file")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200109
110 def close(self):
111 """Flush and close the file.
112
113 May be called more than once without error. Once the file is
114 closed, any other operation on it will raise a ValueError.
115 """
116 with self._lock:
117 if self._mode == _MODE_CLOSED:
118 return
119 try:
120 if self._mode in (_MODE_READ, _MODE_READ_EOF):
121 self._decompressor = None
122 elif self._mode == _MODE_WRITE:
123 self._fp.write(self._compressor.flush())
124 self._compressor = None
125 finally:
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200126 try:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127 if self._closefp:
128 self._fp.close()
129 finally:
130 self._fp = None
131 self._closefp = False
132 self._mode = _MODE_CLOSED
Nadeem Vawda6c573182012-09-30 03:57:33 +0200133 self._buffer = b""
134 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200135
136 @property
137 def closed(self):
138 """True if this file is closed."""
139 return self._mode == _MODE_CLOSED
140
141 def fileno(self):
142 """Return the file descriptor for the underlying file."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200143 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200144 return self._fp.fileno()
145
146 def seekable(self):
147 """Return whether the file supports seeking."""
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200148 return self.readable() and self._fp.seekable()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200149
150 def readable(self):
151 """Return whether the file was opened for reading."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200152 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200153 return self._mode in (_MODE_READ, _MODE_READ_EOF)
154
155 def writable(self):
156 """Return whether the file was opened for writing."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200157 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 return self._mode == _MODE_WRITE
159
160 # Mode-checking helper functions.
161
162 def _check_not_closed(self):
163 if self.closed:
164 raise ValueError("I/O operation on closed file")
165
166 def _check_can_read(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200167 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200168 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200169 raise io.UnsupportedOperation("File not open for reading")
170
171 def _check_can_write(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200172 if self._mode != _MODE_WRITE:
Nadeem Vawda452add02012-10-01 23:02:50 +0200173 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200174 raise io.UnsupportedOperation("File not open for writing")
175
176 def _check_can_seek(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200177 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200178 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200179 raise io.UnsupportedOperation("Seeking is only supported "
Nadeem Vawdaf1a1af22011-05-25 00:32:08 +0200180 "on files open for reading")
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200181 if not self._fp.seekable():
182 raise io.UnsupportedOperation("The underlying file object "
183 "does not support seeking")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200184
185 # Fill the readahead buffer if it is empty. Returns False on EOF.
186 def _fill_buffer(self):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200187 if self._mode == _MODE_READ_EOF:
188 return False
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200189 # Depending on the input data, our call to the decompressor may not
190 # return any data. In this case, try again after reading another block.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200191 while self._buffer_offset == len(self._buffer):
192 rawblock = (self._decompressor.unused_data or
193 self._fp.read(_BUFFER_SIZE))
Nadeem Vawda55b43382011-05-27 01:52:15 +0200194
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200195 if not rawblock:
196 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200197 # End-of-stream marker and end of file. We're good.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200198 self._mode = _MODE_READ_EOF
199 self._size = self._pos
200 return False
201 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200202 # Problem - we were expecting more compressed data.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200203 raise EOFError("Compressed file ended before the "
204 "end-of-stream marker was reached")
Nadeem Vawda55b43382011-05-27 01:52:15 +0200205
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200206 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200207 # Continue to next stream.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200208 self._decompressor = BZ2Decompressor()
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100209 try:
210 self._buffer = self._decompressor.decompress(rawblock)
211 except OSError:
212 # Trailing data isn't a valid bzip2 stream. We're done here.
213 self._mode = _MODE_READ_EOF
214 self._size = self._pos
215 return False
216 else:
217 self._buffer = self._decompressor.decompress(rawblock)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200218 self._buffer_offset = 0
219 return True
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200220
221 # Read data until EOF.
222 # If return_data is false, consume the data without returning it.
223 def _read_all(self, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200224 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
225 self._buffer = self._buffer[self._buffer_offset:]
226 self._buffer_offset = 0
227
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200228 blocks = []
229 while self._fill_buffer():
230 if return_data:
231 blocks.append(self._buffer)
232 self._pos += len(self._buffer)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200233 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200234 if return_data:
235 return b"".join(blocks)
236
237 # Read a block of up to n bytes.
238 # If return_data is false, consume the data without returning it.
239 def _read_block(self, n, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200240 # If we have enough data buffered, return immediately.
241 end = self._buffer_offset + n
242 if end <= len(self._buffer):
243 data = self._buffer[self._buffer_offset : end]
244 self._buffer_offset = end
245 self._pos += len(data)
Nadeem Vawda9e2a28e2012-09-30 13:41:29 +0200246 return data if return_data else None
Nadeem Vawda6c573182012-09-30 03:57:33 +0200247
248 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
249 self._buffer = self._buffer[self._buffer_offset:]
250 self._buffer_offset = 0
251
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200252 blocks = []
253 while n > 0 and self._fill_buffer():
254 if n < len(self._buffer):
255 data = self._buffer[:n]
Nadeem Vawda6c573182012-09-30 03:57:33 +0200256 self._buffer_offset = n
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200257 else:
258 data = self._buffer
Nadeem Vawda6c573182012-09-30 03:57:33 +0200259 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200260 if return_data:
261 blocks.append(data)
262 self._pos += len(data)
263 n -= len(data)
264 if return_data:
265 return b"".join(blocks)
266
267 def peek(self, n=0):
268 """Return buffered data without advancing the file position.
269
270 Always returns at least one byte of data, unless at EOF.
271 The exact number of bytes returned is unspecified.
272 """
273 with self._lock:
274 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200275 if not self._fill_buffer():
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200276 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200277 return self._buffer[self._buffer_offset:]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200278
279 def read(self, size=-1):
280 """Read up to size uncompressed bytes from the file.
281
282 If size is negative or omitted, read until EOF is reached.
283 Returns b'' if the file is already at EOF.
284 """
285 with self._lock:
286 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200287 if size == 0:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200288 return b""
289 elif size < 0:
290 return self._read_all()
291 else:
292 return self._read_block(size)
293
294 def read1(self, size=-1):
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200295 """Read up to size uncompressed bytes, while trying to avoid
296 making multiple reads from the underlying stream.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200297
298 Returns b'' if the file is at EOF.
299 """
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200300 # Usually, read1() calls _fp.read() at most once. However, sometimes
301 # this does not give enough data for the decompressor to make progress.
302 # In this case we make multiple reads, to avoid returning b"".
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200303 with self._lock:
304 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200305 if (size == 0 or
306 # Only call _fill_buffer() if the buffer is actually empty.
307 # This gives a significant speedup if *size* is small.
308 (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200309 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200310 if size > 0:
311 data = self._buffer[self._buffer_offset :
312 self._buffer_offset + size]
313 self._buffer_offset += len(data)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200314 else:
Nadeem Vawda6c573182012-09-30 03:57:33 +0200315 data = self._buffer[self._buffer_offset:]
316 self._buffer = b""
317 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200318 self._pos += len(data)
319 return data
320
321 def readinto(self, b):
322 """Read up to len(b) bytes into b.
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200323
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200324 Returns the number of bytes read (0 for EOF).
325 """
326 with self._lock:
327 return io.BufferedIOBase.readinto(self, b)
328
329 def readline(self, size=-1):
330 """Read a line of uncompressed bytes from the file.
331
332 The terminating newline (if present) is retained. If size is
333 non-negative, no more than size bytes will be read (in which
334 case the line may be incomplete). Returns b'' if already at EOF.
335 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200336 if not isinstance(size, int):
337 if not hasattr(size, "__index__"):
338 raise TypeError("Integer argument expected")
339 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200340 with self._lock:
Nadeem Vawda138ad502012-10-01 23:04:11 +0200341 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200342 # Shortcut for the common case - the whole line is in the buffer.
343 if size < 0:
344 end = self._buffer.find(b"\n", self._buffer_offset) + 1
345 if end > 0:
346 line = self._buffer[self._buffer_offset : end]
347 self._buffer_offset = end
348 self._pos += len(line)
349 return line
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200350 return io.BufferedIOBase.readline(self, size)
351
352 def readlines(self, size=-1):
353 """Read a list of lines of uncompressed bytes from the file.
354
355 size can be specified to control the number of lines read: no
356 further lines will be read once the total size of the lines read
357 so far equals or exceeds size.
358 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200359 if not isinstance(size, int):
360 if not hasattr(size, "__index__"):
361 raise TypeError("Integer argument expected")
362 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200363 with self._lock:
364 return io.BufferedIOBase.readlines(self, size)
365
366 def write(self, data):
367 """Write a byte string to the file.
368
369 Returns the number of uncompressed bytes written, which is
370 always len(data). Note that due to buffering, the file on disk
371 may not reflect the data written until close() is called.
372 """
373 with self._lock:
374 self._check_can_write()
375 compressed = self._compressor.compress(data)
376 self._fp.write(compressed)
377 self._pos += len(data)
378 return len(data)
379
380 def writelines(self, seq):
381 """Write a sequence of byte strings to the file.
382
383 Returns the number of uncompressed bytes written.
384 seq can be any iterable yielding byte strings.
385
386 Line separators are not added between the written byte strings.
387 """
388 with self._lock:
389 return io.BufferedIOBase.writelines(self, seq)
390
391 # Rewind the file to the beginning of the data stream.
392 def _rewind(self):
393 self._fp.seek(0, 0)
394 self._mode = _MODE_READ
395 self._pos = 0
396 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200397 self._buffer = b""
398 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200399
400 def seek(self, offset, whence=0):
401 """Change the file position.
402
403 The new position is specified by offset, relative to the
404 position indicated by whence. Values for whence are:
405
406 0: start of stream (default); offset must not be negative
407 1: current stream position
408 2: end of stream; offset must not be positive
409
410 Returns the new file position.
411
412 Note that seeking is emulated, so depending on the parameters,
413 this operation may be extremely slow.
414 """
415 with self._lock:
416 self._check_can_seek()
417
418 # Recalculate offset as an absolute file position.
419 if whence == 0:
420 pass
421 elif whence == 1:
422 offset = self._pos + offset
423 elif whence == 2:
424 # Seeking relative to EOF - we need to know the file's size.
425 if self._size < 0:
426 self._read_all(return_data=False)
427 offset = self._size + offset
428 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200429 raise ValueError("Invalid value for whence: %s" % (whence,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200430
431 # Make it so that offset is the number of bytes to skip forward.
432 if offset < self._pos:
433 self._rewind()
434 else:
435 offset -= self._pos
436
437 # Read and discard data until we reach the desired position.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200438 self._read_block(offset, return_data=False)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200439
440 return self._pos
441
442 def tell(self):
443 """Return the current file position."""
444 with self._lock:
445 self._check_not_closed()
446 return self._pos
447
448
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200449def open(filename, mode="rb", compresslevel=9,
450 encoding=None, errors=None, newline=None):
451 """Open a bzip2-compressed file in binary or text mode.
452
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200453 The filename argument can be an actual filename (a str or bytes
454 object), or an existing file object to read from or write to.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200455
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200456 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
457 "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
458 The default mode is "rb", and the default compresslevel is 9.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200459
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200460 For binary mode, this function is equivalent to the BZ2File
461 constructor: BZ2File(filename, mode, compresslevel). In this case,
462 the encoding, errors and newline arguments must not be provided.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200463
464 For text mode, a BZ2File object is created, and wrapped in an
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200465 io.TextIOWrapper instance with the specified encoding, error
466 handling behavior, and line ending(s).
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200467
468 """
469 if "t" in mode:
470 if "b" in mode:
471 raise ValueError("Invalid mode: %r" % (mode,))
472 else:
473 if encoding is not None:
474 raise ValueError("Argument 'encoding' not supported in binary mode")
475 if errors is not None:
476 raise ValueError("Argument 'errors' not supported in binary mode")
477 if newline is not None:
478 raise ValueError("Argument 'newline' not supported in binary mode")
479
480 bz_mode = mode.replace("t", "")
481 binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
482
483 if "t" in mode:
484 return io.TextIOWrapper(binary_file, encoding, errors, newline)
485 else:
486 return binary_file
487
488
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200489def compress(data, compresslevel=9):
490 """Compress a block of data.
491
492 compresslevel, if given, must be a number between 1 and 9.
493
494 For incremental compression, use a BZ2Compressor object instead.
495 """
496 comp = BZ2Compressor(compresslevel)
497 return comp.compress(data) + comp.flush()
498
499
500def decompress(data):
501 """Decompress a block of data.
502
503 For incremental decompression, use a BZ2Decompressor object instead.
504 """
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200505 results = []
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100506 while data:
Nadeem Vawda55b43382011-05-27 01:52:15 +0200507 decomp = BZ2Decompressor()
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100508 try:
509 res = decomp.decompress(data)
510 except OSError:
511 if results:
512 break # Leftover data is not a valid bzip2 stream; ignore it.
513 else:
514 raise # Error on the first iteration; bail out.
515 results.append(res)
Nadeem Vawda55b43382011-05-27 01:52:15 +0200516 if not decomp.eof:
517 raise ValueError("Compressed data ended before the "
518 "end-of-stream marker was reached")
Nadeem Vawda55b43382011-05-27 01:52:15 +0200519 data = decomp.unused_data
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100520 return b"".join(results)