blob: 6f47bfaf6cae3a5825e5187a825bf89e56027e9f [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
Nadeem Vawdaaf518c12012-06-04 23:32:38 +02007__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8 "open", "compress", "decompress"]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02009
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12import io
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020013import warnings
14
Nadeem Vawda72750a82012-01-18 01:57:14 +020015try:
16 from threading import RLock
Brett Cannoncd171c82013-07-04 17:43:24 -040017except ImportError:
Nadeem Vawda72750a82012-01-18 01:57:14 +020018 from dummy_threading import RLock
19
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020020from _bz2 import BZ2Compressor, BZ2Decompressor
21
22
23_MODE_CLOSED = 0
24_MODE_READ = 1
25_MODE_READ_EOF = 2
26_MODE_WRITE = 3
27
28_BUFFER_SIZE = 8192
29
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020030_builtin_open = open
31
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020032
33class BZ2File(io.BufferedIOBase):
34
35 """A file object providing transparent bzip2 (de)compression.
36
37 A BZ2File can act as a wrapper for an existing file object, or refer
38 directly to a named file on disk.
39
40 Note that BZ2File provides a *binary* file interface - data read is
41 returned as bytes, and data to be written should be given as bytes.
42 """
43
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +020044 def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020045 """Open a bzip2-compressed file.
46
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020047 If filename is a str or bytes object, it gives the name
48 of the file to be opened. Otherwise, it should be a file object,
49 which will be used to read or write the compressed data.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020050
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020051 mode can be 'r' for reading (default), 'w' for (over)writing,
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020052 'x' for creating exclusively, or 'a' for appending. These can
53 equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020054
55 buffering is ignored. Its use is deprecated.
56
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020057 If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
Nadeem Vawdacac89092012-02-04 13:08:11 +020058 and 9 specifying the level of compression: 1 produces the least
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020059 compression, and 9 (default) produces the most compression.
Nadeem Vawdacac89092012-02-04 13:08:11 +020060
61 If mode is 'r', the input file may be the concatenation of
62 multiple compressed streams.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020063 """
64 # This lock must be recursive, so that BufferedIOBase's
65 # readline(), readlines() and writelines() don't deadlock.
Nadeem Vawda72750a82012-01-18 01:57:14 +020066 self._lock = RLock()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020067 self._fp = None
68 self._closefp = False
69 self._mode = _MODE_CLOSED
70 self._pos = 0
71 self._size = -1
72
73 if buffering is not None:
74 warnings.warn("Use of 'buffering' argument is deprecated",
75 DeprecationWarning)
76
77 if not (1 <= compresslevel <= 9):
78 raise ValueError("compresslevel must be between 1 and 9")
79
80 if mode in ("", "r", "rb"):
81 mode = "rb"
82 mode_code = _MODE_READ
83 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +020084 self._buffer = b""
85 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020086 elif mode in ("w", "wb"):
87 mode = "wb"
88 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020089 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020090 elif mode in ("x", "xb"):
91 mode = "xb"
92 mode_code = _MODE_WRITE
93 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda55b43382011-05-27 01:52:15 +020094 elif mode in ("a", "ab"):
95 mode = "ab"
96 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020097 self._compressor = BZ2Compressor(compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020098 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020099 raise ValueError("Invalid mode: %r" % (mode,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200100
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200101 if isinstance(filename, (str, bytes)):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200102 self._fp = _builtin_open(filename, mode)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200103 self._closefp = True
104 self._mode = mode_code
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200105 elif hasattr(filename, "read") or hasattr(filename, "write"):
106 self._fp = filename
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200107 self._mode = mode_code
108 else:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200109 raise TypeError("filename must be a str or bytes object, or a file")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200110
111 def close(self):
112 """Flush and close the file.
113
114 May be called more than once without error. Once the file is
115 closed, any other operation on it will raise a ValueError.
116 """
117 with self._lock:
118 if self._mode == _MODE_CLOSED:
119 return
120 try:
121 if self._mode in (_MODE_READ, _MODE_READ_EOF):
122 self._decompressor = None
123 elif self._mode == _MODE_WRITE:
124 self._fp.write(self._compressor.flush())
125 self._compressor = None
126 finally:
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200127 try:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200128 if self._closefp:
129 self._fp.close()
130 finally:
131 self._fp = None
132 self._closefp = False
133 self._mode = _MODE_CLOSED
Nadeem Vawda6c573182012-09-30 03:57:33 +0200134 self._buffer = b""
135 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200136
137 @property
138 def closed(self):
139 """True if this file is closed."""
140 return self._mode == _MODE_CLOSED
141
142 def fileno(self):
143 """Return the file descriptor for the underlying file."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200144 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200145 return self._fp.fileno()
146
147 def seekable(self):
148 """Return whether the file supports seeking."""
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200149 return self.readable() and self._fp.seekable()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150
151 def readable(self):
152 """Return whether the file was opened for reading."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200153 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200154 return self._mode in (_MODE_READ, _MODE_READ_EOF)
155
156 def writable(self):
157 """Return whether the file was opened for writing."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200158 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200159 return self._mode == _MODE_WRITE
160
161 # Mode-checking helper functions.
162
163 def _check_not_closed(self):
164 if self.closed:
165 raise ValueError("I/O operation on closed file")
166
167 def _check_can_read(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200168 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200169 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200170 raise io.UnsupportedOperation("File not open for reading")
171
172 def _check_can_write(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200173 if self._mode != _MODE_WRITE:
Nadeem Vawda452add02012-10-01 23:02:50 +0200174 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200175 raise io.UnsupportedOperation("File not open for writing")
176
177 def _check_can_seek(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200178 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200179 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200180 raise io.UnsupportedOperation("Seeking is only supported "
Nadeem Vawdaf1a1af22011-05-25 00:32:08 +0200181 "on files open for reading")
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200182 if not self._fp.seekable():
183 raise io.UnsupportedOperation("The underlying file object "
184 "does not support seeking")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200185
186 # Fill the readahead buffer if it is empty. Returns False on EOF.
187 def _fill_buffer(self):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200188 if self._mode == _MODE_READ_EOF:
189 return False
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200190 # Depending on the input data, our call to the decompressor may not
191 # return any data. In this case, try again after reading another block.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200192 while self._buffer_offset == len(self._buffer):
193 rawblock = (self._decompressor.unused_data or
194 self._fp.read(_BUFFER_SIZE))
Nadeem Vawda55b43382011-05-27 01:52:15 +0200195
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200196 if not rawblock:
197 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200198 # End-of-stream marker and end of file. We're good.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200199 self._mode = _MODE_READ_EOF
200 self._size = self._pos
201 return False
202 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200203 # Problem - we were expecting more compressed data.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200204 raise EOFError("Compressed file ended before the "
205 "end-of-stream marker was reached")
Nadeem Vawda55b43382011-05-27 01:52:15 +0200206
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200207 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200208 # Continue to next stream.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200209 self._decompressor = BZ2Decompressor()
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100210 try:
211 self._buffer = self._decompressor.decompress(rawblock)
212 except OSError:
213 # Trailing data isn't a valid bzip2 stream. We're done here.
214 self._mode = _MODE_READ_EOF
215 self._size = self._pos
216 return False
217 else:
218 self._buffer = self._decompressor.decompress(rawblock)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200219 self._buffer_offset = 0
220 return True
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200221
222 # Read data until EOF.
223 # If return_data is false, consume the data without returning it.
224 def _read_all(self, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200225 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
226 self._buffer = self._buffer[self._buffer_offset:]
227 self._buffer_offset = 0
228
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200229 blocks = []
230 while self._fill_buffer():
231 if return_data:
232 blocks.append(self._buffer)
233 self._pos += len(self._buffer)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200234 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200235 if return_data:
236 return b"".join(blocks)
237
238 # Read a block of up to n bytes.
239 # If return_data is false, consume the data without returning it.
240 def _read_block(self, n, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200241 # If we have enough data buffered, return immediately.
242 end = self._buffer_offset + n
243 if end <= len(self._buffer):
244 data = self._buffer[self._buffer_offset : end]
245 self._buffer_offset = end
246 self._pos += len(data)
Nadeem Vawda9e2a28e2012-09-30 13:41:29 +0200247 return data if return_data else None
Nadeem Vawda6c573182012-09-30 03:57:33 +0200248
249 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
250 self._buffer = self._buffer[self._buffer_offset:]
251 self._buffer_offset = 0
252
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200253 blocks = []
254 while n > 0 and self._fill_buffer():
255 if n < len(self._buffer):
256 data = self._buffer[:n]
Nadeem Vawda6c573182012-09-30 03:57:33 +0200257 self._buffer_offset = n
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200258 else:
259 data = self._buffer
Nadeem Vawda6c573182012-09-30 03:57:33 +0200260 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200261 if return_data:
262 blocks.append(data)
263 self._pos += len(data)
264 n -= len(data)
265 if return_data:
266 return b"".join(blocks)
267
268 def peek(self, n=0):
269 """Return buffered data without advancing the file position.
270
271 Always returns at least one byte of data, unless at EOF.
272 The exact number of bytes returned is unspecified.
273 """
274 with self._lock:
275 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200276 if not self._fill_buffer():
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200277 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200278 return self._buffer[self._buffer_offset:]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200279
280 def read(self, size=-1):
281 """Read up to size uncompressed bytes from the file.
282
283 If size is negative or omitted, read until EOF is reached.
284 Returns b'' if the file is already at EOF.
285 """
286 with self._lock:
287 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200288 if size == 0:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200289 return b""
290 elif size < 0:
291 return self._read_all()
292 else:
293 return self._read_block(size)
294
295 def read1(self, size=-1):
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200296 """Read up to size uncompressed bytes, while trying to avoid
297 making multiple reads from the underlying stream.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200298
299 Returns b'' if the file is at EOF.
300 """
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200301 # Usually, read1() calls _fp.read() at most once. However, sometimes
302 # this does not give enough data for the decompressor to make progress.
303 # In this case we make multiple reads, to avoid returning b"".
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200304 with self._lock:
305 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200306 if (size == 0 or
307 # Only call _fill_buffer() if the buffer is actually empty.
308 # This gives a significant speedup if *size* is small.
309 (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200310 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200311 if size > 0:
312 data = self._buffer[self._buffer_offset :
313 self._buffer_offset + size]
314 self._buffer_offset += len(data)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200315 else:
Nadeem Vawda6c573182012-09-30 03:57:33 +0200316 data = self._buffer[self._buffer_offset:]
317 self._buffer = b""
318 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319 self._pos += len(data)
320 return data
321
322 def readinto(self, b):
323 """Read up to len(b) bytes into b.
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200324
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200325 Returns the number of bytes read (0 for EOF).
326 """
327 with self._lock:
328 return io.BufferedIOBase.readinto(self, b)
329
330 def readline(self, size=-1):
331 """Read a line of uncompressed bytes from the file.
332
333 The terminating newline (if present) is retained. If size is
334 non-negative, no more than size bytes will be read (in which
335 case the line may be incomplete). Returns b'' if already at EOF.
336 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200337 if not isinstance(size, int):
338 if not hasattr(size, "__index__"):
339 raise TypeError("Integer argument expected")
340 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200341 with self._lock:
Nadeem Vawda138ad502012-10-01 23:04:11 +0200342 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200343 # Shortcut for the common case - the whole line is in the buffer.
344 if size < 0:
345 end = self._buffer.find(b"\n", self._buffer_offset) + 1
346 if end > 0:
347 line = self._buffer[self._buffer_offset : end]
348 self._buffer_offset = end
349 self._pos += len(line)
350 return line
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200351 return io.BufferedIOBase.readline(self, size)
352
353 def readlines(self, size=-1):
354 """Read a list of lines of uncompressed bytes from the file.
355
356 size can be specified to control the number of lines read: no
357 further lines will be read once the total size of the lines read
358 so far equals or exceeds size.
359 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200360 if not isinstance(size, int):
361 if not hasattr(size, "__index__"):
362 raise TypeError("Integer argument expected")
363 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200364 with self._lock:
365 return io.BufferedIOBase.readlines(self, size)
366
367 def write(self, data):
368 """Write a byte string to the file.
369
370 Returns the number of uncompressed bytes written, which is
371 always len(data). Note that due to buffering, the file on disk
372 may not reflect the data written until close() is called.
373 """
374 with self._lock:
375 self._check_can_write()
376 compressed = self._compressor.compress(data)
377 self._fp.write(compressed)
378 self._pos += len(data)
379 return len(data)
380
381 def writelines(self, seq):
382 """Write a sequence of byte strings to the file.
383
384 Returns the number of uncompressed bytes written.
385 seq can be any iterable yielding byte strings.
386
387 Line separators are not added between the written byte strings.
388 """
389 with self._lock:
390 return io.BufferedIOBase.writelines(self, seq)
391
392 # Rewind the file to the beginning of the data stream.
393 def _rewind(self):
394 self._fp.seek(0, 0)
395 self._mode = _MODE_READ
396 self._pos = 0
397 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200398 self._buffer = b""
399 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200400
401 def seek(self, offset, whence=0):
402 """Change the file position.
403
404 The new position is specified by offset, relative to the
405 position indicated by whence. Values for whence are:
406
407 0: start of stream (default); offset must not be negative
408 1: current stream position
409 2: end of stream; offset must not be positive
410
411 Returns the new file position.
412
413 Note that seeking is emulated, so depending on the parameters,
414 this operation may be extremely slow.
415 """
416 with self._lock:
417 self._check_can_seek()
418
419 # Recalculate offset as an absolute file position.
420 if whence == 0:
421 pass
422 elif whence == 1:
423 offset = self._pos + offset
424 elif whence == 2:
425 # Seeking relative to EOF - we need to know the file's size.
426 if self._size < 0:
427 self._read_all(return_data=False)
428 offset = self._size + offset
429 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200430 raise ValueError("Invalid value for whence: %s" % (whence,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200431
432 # Make it so that offset is the number of bytes to skip forward.
433 if offset < self._pos:
434 self._rewind()
435 else:
436 offset -= self._pos
437
438 # Read and discard data until we reach the desired position.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200439 self._read_block(offset, return_data=False)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200440
441 return self._pos
442
443 def tell(self):
444 """Return the current file position."""
445 with self._lock:
446 self._check_not_closed()
447 return self._pos
448
449
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200450def open(filename, mode="rb", compresslevel=9,
451 encoding=None, errors=None, newline=None):
452 """Open a bzip2-compressed file in binary or text mode.
453
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200454 The filename argument can be an actual filename (a str or bytes
455 object), or an existing file object to read from or write to.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200456
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200457 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
458 "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
459 The default mode is "rb", and the default compresslevel is 9.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200460
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200461 For binary mode, this function is equivalent to the BZ2File
462 constructor: BZ2File(filename, mode, compresslevel). In this case,
463 the encoding, errors and newline arguments must not be provided.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200464
465 For text mode, a BZ2File object is created, and wrapped in an
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200466 io.TextIOWrapper instance with the specified encoding, error
467 handling behavior, and line ending(s).
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200468
469 """
470 if "t" in mode:
471 if "b" in mode:
472 raise ValueError("Invalid mode: %r" % (mode,))
473 else:
474 if encoding is not None:
475 raise ValueError("Argument 'encoding' not supported in binary mode")
476 if errors is not None:
477 raise ValueError("Argument 'errors' not supported in binary mode")
478 if newline is not None:
479 raise ValueError("Argument 'newline' not supported in binary mode")
480
481 bz_mode = mode.replace("t", "")
482 binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
483
484 if "t" in mode:
485 return io.TextIOWrapper(binary_file, encoding, errors, newline)
486 else:
487 return binary_file
488
489
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200490def compress(data, compresslevel=9):
491 """Compress a block of data.
492
493 compresslevel, if given, must be a number between 1 and 9.
494
495 For incremental compression, use a BZ2Compressor object instead.
496 """
497 comp = BZ2Compressor(compresslevel)
498 return comp.compress(data) + comp.flush()
499
500
501def decompress(data):
502 """Decompress a block of data.
503
504 For incremental decompression, use a BZ2Decompressor object instead.
505 """
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200506 results = []
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100507 while data:
Nadeem Vawda55b43382011-05-27 01:52:15 +0200508 decomp = BZ2Decompressor()
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100509 try:
510 res = decomp.decompress(data)
511 except OSError:
512 if results:
513 break # Leftover data is not a valid bzip2 stream; ignore it.
514 else:
515 raise # Error on the first iteration; bail out.
516 results.append(res)
Nadeem Vawda55b43382011-05-27 01:52:15 +0200517 if not decomp.eof:
518 raise ValueError("Compressed data ended before the "
519 "end-of-stream marker was reached")
Nadeem Vawda55b43382011-05-27 01:52:15 +0200520 data = decomp.unused_data
Nadeem Vawda1de19ac2013-12-04 23:01:15 +0100521 return b"".join(results)