blob: 6e6a2b99487d96b21ee562156e5ccaf2e10d06c0 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
Nadeem Vawdaaf518c12012-06-04 23:32:38 +02007__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8 "open", "compress", "decompress"]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02009
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12import io
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020013import warnings
14
Nadeem Vawda72750a82012-01-18 01:57:14 +020015try:
16 from threading import RLock
17except ImportError:
18 from dummy_threading import RLock
19
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020020from _bz2 import BZ2Compressor, BZ2Decompressor
21
22
23_MODE_CLOSED = 0
24_MODE_READ = 1
25_MODE_READ_EOF = 2
26_MODE_WRITE = 3
27
28_BUFFER_SIZE = 8192
29
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020030_builtin_open = open
31
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020032
33class BZ2File(io.BufferedIOBase):
34
35 """A file object providing transparent bzip2 (de)compression.
36
37 A BZ2File can act as a wrapper for an existing file object, or refer
38 directly to a named file on disk.
39
40 Note that BZ2File provides a *binary* file interface - data read is
41 returned as bytes, and data to be written should be given as bytes.
42 """
43
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +020044 def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020045 """Open a bzip2-compressed file.
46
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020047 If filename is a str or bytes object, it gives the name
48 of the file to be opened. Otherwise, it should be a file object,
49 which will be used to read or write the compressed data.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020050
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020051 mode can be 'r' for reading (default), 'w' for (over)writing,
52 or 'a' for appending. These can equivalently be given as 'rb',
53 'wb', and 'ab'.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020054
55 buffering is ignored. Its use is deprecated.
56
Nadeem Vawdacac89092012-02-04 13:08:11 +020057 If mode is 'w' or 'a', compresslevel can be a number between 1
58 and 9 specifying the level of compression: 1 produces the least
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020059 compression, and 9 (default) produces the most compression.
Nadeem Vawdacac89092012-02-04 13:08:11 +020060
61 If mode is 'r', the input file may be the concatenation of
62 multiple compressed streams.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020063 """
64 # This lock must be recursive, so that BufferedIOBase's
65 # readline(), readlines() and writelines() don't deadlock.
Nadeem Vawda72750a82012-01-18 01:57:14 +020066 self._lock = RLock()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020067 self._fp = None
68 self._closefp = False
69 self._mode = _MODE_CLOSED
70 self._pos = 0
71 self._size = -1
72
73 if buffering is not None:
74 warnings.warn("Use of 'buffering' argument is deprecated",
75 DeprecationWarning)
76
77 if not (1 <= compresslevel <= 9):
78 raise ValueError("compresslevel must be between 1 and 9")
79
80 if mode in ("", "r", "rb"):
81 mode = "rb"
82 mode_code = _MODE_READ
83 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +020084 self._buffer = b""
85 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020086 elif mode in ("w", "wb"):
87 mode = "wb"
88 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020089 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda55b43382011-05-27 01:52:15 +020090 elif mode in ("a", "ab"):
91 mode = "ab"
92 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020093 self._compressor = BZ2Compressor(compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020094 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020095 raise ValueError("Invalid mode: %r" % (mode,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020096
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +020097 if isinstance(filename, (str, bytes)):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020098 self._fp = _builtin_open(filename, mode)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020099 self._closefp = True
100 self._mode = mode_code
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200101 elif hasattr(filename, "read") or hasattr(filename, "write"):
102 self._fp = filename
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200103 self._mode = mode_code
104 else:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200105 raise TypeError("filename must be a str or bytes object, or a file")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200106
107 def close(self):
108 """Flush and close the file.
109
110 May be called more than once without error. Once the file is
111 closed, any other operation on it will raise a ValueError.
112 """
113 with self._lock:
114 if self._mode == _MODE_CLOSED:
115 return
116 try:
117 if self._mode in (_MODE_READ, _MODE_READ_EOF):
118 self._decompressor = None
119 elif self._mode == _MODE_WRITE:
120 self._fp.write(self._compressor.flush())
121 self._compressor = None
122 finally:
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200123 try:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200124 if self._closefp:
125 self._fp.close()
126 finally:
127 self._fp = None
128 self._closefp = False
129 self._mode = _MODE_CLOSED
Nadeem Vawda6c573182012-09-30 03:57:33 +0200130 self._buffer = b""
131 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200132
133 @property
134 def closed(self):
135 """True if this file is closed."""
136 return self._mode == _MODE_CLOSED
137
138 def fileno(self):
139 """Return the file descriptor for the underlying file."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200140 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200141 return self._fp.fileno()
142
143 def seekable(self):
144 """Return whether the file supports seeking."""
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200145 return self.readable() and self._fp.seekable()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146
147 def readable(self):
148 """Return whether the file was opened for reading."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200149 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150 return self._mode in (_MODE_READ, _MODE_READ_EOF)
151
152 def writable(self):
153 """Return whether the file was opened for writing."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200154 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200155 return self._mode == _MODE_WRITE
156
157 # Mode-checking helper functions.
158
159 def _check_not_closed(self):
160 if self.closed:
161 raise ValueError("I/O operation on closed file")
162
163 def _check_can_read(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200164 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200165 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200166 raise io.UnsupportedOperation("File not open for reading")
167
168 def _check_can_write(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200169 if self._mode != _MODE_WRITE:
Nadeem Vawda452add02012-10-01 23:02:50 +0200170 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200171 raise io.UnsupportedOperation("File not open for writing")
172
173 def _check_can_seek(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200174 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200175 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200176 raise io.UnsupportedOperation("Seeking is only supported "
Nadeem Vawdaf1a1af22011-05-25 00:32:08 +0200177 "on files open for reading")
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200178 if not self._fp.seekable():
179 raise io.UnsupportedOperation("The underlying file object "
180 "does not support seeking")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200181
182 # Fill the readahead buffer if it is empty. Returns False on EOF.
183 def _fill_buffer(self):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200184 if self._mode == _MODE_READ_EOF:
185 return False
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200186 # Depending on the input data, our call to the decompressor may not
187 # return any data. In this case, try again after reading another block.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200188 while self._buffer_offset == len(self._buffer):
189 rawblock = (self._decompressor.unused_data or
190 self._fp.read(_BUFFER_SIZE))
Nadeem Vawda55b43382011-05-27 01:52:15 +0200191
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200192 if not rawblock:
193 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200194 # End-of-stream marker and end of file. We're good.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200195 self._mode = _MODE_READ_EOF
196 self._size = self._pos
197 return False
198 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200199 # Problem - we were expecting more compressed data.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200200 raise EOFError("Compressed file ended before the "
201 "end-of-stream marker was reached")
Nadeem Vawda55b43382011-05-27 01:52:15 +0200202
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200203 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200204 # Continue to next stream.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200205 self._decompressor = BZ2Decompressor()
206
207 self._buffer = self._decompressor.decompress(rawblock)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200208 self._buffer_offset = 0
209 return True
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200210
211 # Read data until EOF.
212 # If return_data is false, consume the data without returning it.
213 def _read_all(self, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200214 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
215 self._buffer = self._buffer[self._buffer_offset:]
216 self._buffer_offset = 0
217
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200218 blocks = []
219 while self._fill_buffer():
220 if return_data:
221 blocks.append(self._buffer)
222 self._pos += len(self._buffer)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200223 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200224 if return_data:
225 return b"".join(blocks)
226
227 # Read a block of up to n bytes.
228 # If return_data is false, consume the data without returning it.
229 def _read_block(self, n, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200230 # If we have enough data buffered, return immediately.
231 end = self._buffer_offset + n
232 if end <= len(self._buffer):
233 data = self._buffer[self._buffer_offset : end]
234 self._buffer_offset = end
235 self._pos += len(data)
Nadeem Vawda9e2a28e2012-09-30 13:41:29 +0200236 return data if return_data else None
Nadeem Vawda6c573182012-09-30 03:57:33 +0200237
238 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
239 self._buffer = self._buffer[self._buffer_offset:]
240 self._buffer_offset = 0
241
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200242 blocks = []
243 while n > 0 and self._fill_buffer():
244 if n < len(self._buffer):
245 data = self._buffer[:n]
Nadeem Vawda6c573182012-09-30 03:57:33 +0200246 self._buffer_offset = n
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200247 else:
248 data = self._buffer
Nadeem Vawda6c573182012-09-30 03:57:33 +0200249 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200250 if return_data:
251 blocks.append(data)
252 self._pos += len(data)
253 n -= len(data)
254 if return_data:
255 return b"".join(blocks)
256
257 def peek(self, n=0):
258 """Return buffered data without advancing the file position.
259
260 Always returns at least one byte of data, unless at EOF.
261 The exact number of bytes returned is unspecified.
262 """
263 with self._lock:
264 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200265 if not self._fill_buffer():
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200266 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200267 return self._buffer[self._buffer_offset:]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200268
269 def read(self, size=-1):
270 """Read up to size uncompressed bytes from the file.
271
272 If size is negative or omitted, read until EOF is reached.
273 Returns b'' if the file is already at EOF.
274 """
275 with self._lock:
276 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200277 if size == 0:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200278 return b""
279 elif size < 0:
280 return self._read_all()
281 else:
282 return self._read_block(size)
283
284 def read1(self, size=-1):
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200285 """Read up to size uncompressed bytes, while trying to avoid
286 making multiple reads from the underlying stream.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200287
288 Returns b'' if the file is at EOF.
289 """
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200290 # Usually, read1() calls _fp.read() at most once. However, sometimes
291 # this does not give enough data for the decompressor to make progress.
292 # In this case we make multiple reads, to avoid returning b"".
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200293 with self._lock:
294 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200295 if (size == 0 or
296 # Only call _fill_buffer() if the buffer is actually empty.
297 # This gives a significant speedup if *size* is small.
298 (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200299 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200300 if size > 0:
301 data = self._buffer[self._buffer_offset :
302 self._buffer_offset + size]
303 self._buffer_offset += len(data)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200304 else:
Nadeem Vawda6c573182012-09-30 03:57:33 +0200305 data = self._buffer[self._buffer_offset:]
306 self._buffer = b""
307 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200308 self._pos += len(data)
309 return data
310
311 def readinto(self, b):
312 """Read up to len(b) bytes into b.
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200313
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200314 Returns the number of bytes read (0 for EOF).
315 """
316 with self._lock:
317 return io.BufferedIOBase.readinto(self, b)
318
319 def readline(self, size=-1):
320 """Read a line of uncompressed bytes from the file.
321
322 The terminating newline (if present) is retained. If size is
323 non-negative, no more than size bytes will be read (in which
324 case the line may be incomplete). Returns b'' if already at EOF.
325 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200326 if not isinstance(size, int):
327 if not hasattr(size, "__index__"):
328 raise TypeError("Integer argument expected")
329 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200330 with self._lock:
Nadeem Vawda138ad502012-10-01 23:04:11 +0200331 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200332 # Shortcut for the common case - the whole line is in the buffer.
333 if size < 0:
334 end = self._buffer.find(b"\n", self._buffer_offset) + 1
335 if end > 0:
336 line = self._buffer[self._buffer_offset : end]
337 self._buffer_offset = end
338 self._pos += len(line)
339 return line
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200340 return io.BufferedIOBase.readline(self, size)
341
342 def readlines(self, size=-1):
343 """Read a list of lines of uncompressed bytes from the file.
344
345 size can be specified to control the number of lines read: no
346 further lines will be read once the total size of the lines read
347 so far equals or exceeds size.
348 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200349 if not isinstance(size, int):
350 if not hasattr(size, "__index__"):
351 raise TypeError("Integer argument expected")
352 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200353 with self._lock:
354 return io.BufferedIOBase.readlines(self, size)
355
356 def write(self, data):
357 """Write a byte string to the file.
358
359 Returns the number of uncompressed bytes written, which is
360 always len(data). Note that due to buffering, the file on disk
361 may not reflect the data written until close() is called.
362 """
363 with self._lock:
364 self._check_can_write()
365 compressed = self._compressor.compress(data)
366 self._fp.write(compressed)
367 self._pos += len(data)
368 return len(data)
369
370 def writelines(self, seq):
371 """Write a sequence of byte strings to the file.
372
373 Returns the number of uncompressed bytes written.
374 seq can be any iterable yielding byte strings.
375
376 Line separators are not added between the written byte strings.
377 """
378 with self._lock:
379 return io.BufferedIOBase.writelines(self, seq)
380
381 # Rewind the file to the beginning of the data stream.
382 def _rewind(self):
383 self._fp.seek(0, 0)
384 self._mode = _MODE_READ
385 self._pos = 0
386 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200387 self._buffer = b""
388 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200389
390 def seek(self, offset, whence=0):
391 """Change the file position.
392
393 The new position is specified by offset, relative to the
394 position indicated by whence. Values for whence are:
395
396 0: start of stream (default); offset must not be negative
397 1: current stream position
398 2: end of stream; offset must not be positive
399
400 Returns the new file position.
401
402 Note that seeking is emulated, so depending on the parameters,
403 this operation may be extremely slow.
404 """
405 with self._lock:
406 self._check_can_seek()
407
408 # Recalculate offset as an absolute file position.
409 if whence == 0:
410 pass
411 elif whence == 1:
412 offset = self._pos + offset
413 elif whence == 2:
414 # Seeking relative to EOF - we need to know the file's size.
415 if self._size < 0:
416 self._read_all(return_data=False)
417 offset = self._size + offset
418 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200419 raise ValueError("Invalid value for whence: %s" % (whence,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200420
421 # Make it so that offset is the number of bytes to skip forward.
422 if offset < self._pos:
423 self._rewind()
424 else:
425 offset -= self._pos
426
427 # Read and discard data until we reach the desired position.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200428 self._read_block(offset, return_data=False)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429
430 return self._pos
431
432 def tell(self):
433 """Return the current file position."""
434 with self._lock:
435 self._check_not_closed()
436 return self._pos
437
438
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200439def open(filename, mode="rb", compresslevel=9,
440 encoding=None, errors=None, newline=None):
441 """Open a bzip2-compressed file in binary or text mode.
442
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200443 The filename argument can be an actual filename (a str or bytes
444 object), or an existing file object to read from or write to.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200445
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200446 The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for
447 binary mode, or "rt", "wt" or "at" for text mode. The default mode
448 is "rb", and the default compresslevel is 9.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200449
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200450 For binary mode, this function is equivalent to the BZ2File
451 constructor: BZ2File(filename, mode, compresslevel). In this case,
452 the encoding, errors and newline arguments must not be provided.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200453
454 For text mode, a BZ2File object is created, and wrapped in an
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200455 io.TextIOWrapper instance with the specified encoding, error
456 handling behavior, and line ending(s).
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200457
458 """
459 if "t" in mode:
460 if "b" in mode:
461 raise ValueError("Invalid mode: %r" % (mode,))
462 else:
463 if encoding is not None:
464 raise ValueError("Argument 'encoding' not supported in binary mode")
465 if errors is not None:
466 raise ValueError("Argument 'errors' not supported in binary mode")
467 if newline is not None:
468 raise ValueError("Argument 'newline' not supported in binary mode")
469
470 bz_mode = mode.replace("t", "")
471 binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
472
473 if "t" in mode:
474 return io.TextIOWrapper(binary_file, encoding, errors, newline)
475 else:
476 return binary_file
477
478
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200479def compress(data, compresslevel=9):
480 """Compress a block of data.
481
482 compresslevel, if given, must be a number between 1 and 9.
483
484 For incremental compression, use a BZ2Compressor object instead.
485 """
486 comp = BZ2Compressor(compresslevel)
487 return comp.compress(data) + comp.flush()
488
489
490def decompress(data):
491 """Decompress a block of data.
492
493 For incremental decompression, use a BZ2Decompressor object instead.
494 """
495 if len(data) == 0:
496 return b""
Nadeem Vawda55b43382011-05-27 01:52:15 +0200497
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200498 results = []
Nadeem Vawda55b43382011-05-27 01:52:15 +0200499 while True:
500 decomp = BZ2Decompressor()
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200501 results.append(decomp.decompress(data))
Nadeem Vawda55b43382011-05-27 01:52:15 +0200502 if not decomp.eof:
503 raise ValueError("Compressed data ended before the "
504 "end-of-stream marker was reached")
505 if not decomp.unused_data:
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200506 return b"".join(results)
Nadeem Vawda55b43382011-05-27 01:52:15 +0200507 # There is unused data left over. Proceed to next stream.
508 data = decomp.unused_data