blob: 6bc611e71fc7c1f58ba512fccf24e1098a48adf7 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
Nadeem Vawdaaf518c12012-06-04 23:32:38 +02007__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8 "open", "compress", "decompress"]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02009
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12import io
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020013import warnings
14
Nadeem Vawda72750a82012-01-18 01:57:14 +020015try:
16 from threading import RLock
Brett Cannoncd171c82013-07-04 17:43:24 -040017except ImportError:
Nadeem Vawda72750a82012-01-18 01:57:14 +020018 from dummy_threading import RLock
19
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020020from _bz2 import BZ2Compressor, BZ2Decompressor
21
22
23_MODE_CLOSED = 0
24_MODE_READ = 1
25_MODE_READ_EOF = 2
26_MODE_WRITE = 3
27
28_BUFFER_SIZE = 8192
29
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020030_builtin_open = open
31
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020032
33class BZ2File(io.BufferedIOBase):
34
35 """A file object providing transparent bzip2 (de)compression.
36
37 A BZ2File can act as a wrapper for an existing file object, or refer
38 directly to a named file on disk.
39
40 Note that BZ2File provides a *binary* file interface - data read is
41 returned as bytes, and data to be written should be given as bytes.
42 """
43
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +020044 def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020045 """Open a bzip2-compressed file.
46
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020047 If filename is a str or bytes object, it gives the name
48 of the file to be opened. Otherwise, it should be a file object,
49 which will be used to read or write the compressed data.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020050
Nadeem Vawda4907b0a2012-10-08 20:31:34 +020051 mode can be 'r' for reading (default), 'w' for (over)writing,
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020052 'x' for creating exclusively, or 'a' for appending. These can
53 equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020054
55 buffering is ignored. Its use is deprecated.
56
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020057 If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
Nadeem Vawdacac89092012-02-04 13:08:11 +020058 and 9 specifying the level of compression: 1 produces the least
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020059 compression, and 9 (default) produces the most compression.
Nadeem Vawdacac89092012-02-04 13:08:11 +020060
61 If mode is 'r', the input file may be the concatenation of
62 multiple compressed streams.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020063 """
64 # This lock must be recursive, so that BufferedIOBase's
65 # readline(), readlines() and writelines() don't deadlock.
Nadeem Vawda72750a82012-01-18 01:57:14 +020066 self._lock = RLock()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020067 self._fp = None
68 self._closefp = False
69 self._mode = _MODE_CLOSED
70 self._pos = 0
71 self._size = -1
72
73 if buffering is not None:
74 warnings.warn("Use of 'buffering' argument is deprecated",
75 DeprecationWarning)
76
77 if not (1 <= compresslevel <= 9):
78 raise ValueError("compresslevel must be between 1 and 9")
79
80 if mode in ("", "r", "rb"):
81 mode = "rb"
82 mode_code = _MODE_READ
83 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +020084 self._buffer = b""
85 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020086 elif mode in ("w", "wb"):
87 mode = "wb"
88 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020089 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +020090 elif mode in ("x", "xb"):
91 mode = "xb"
92 mode_code = _MODE_WRITE
93 self._compressor = BZ2Compressor(compresslevel)
Nadeem Vawda55b43382011-05-27 01:52:15 +020094 elif mode in ("a", "ab"):
95 mode = "ab"
96 mode_code = _MODE_WRITE
Nadeem Vawda249ab5e2011-09-11 22:38:11 +020097 self._compressor = BZ2Compressor(compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020098 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +020099 raise ValueError("Invalid mode: %r" % (mode,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200100
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200101 if isinstance(filename, (str, bytes)):
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200102 self._fp = _builtin_open(filename, mode)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200103 self._closefp = True
104 self._mode = mode_code
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200105 elif hasattr(filename, "read") or hasattr(filename, "write"):
106 self._fp = filename
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200107 self._mode = mode_code
108 else:
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +0200109 raise TypeError("filename must be a str or bytes object, or a file")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200110
111 def close(self):
112 """Flush and close the file.
113
114 May be called more than once without error. Once the file is
115 closed, any other operation on it will raise a ValueError.
116 """
117 with self._lock:
118 if self._mode == _MODE_CLOSED:
119 return
120 try:
121 if self._mode in (_MODE_READ, _MODE_READ_EOF):
122 self._decompressor = None
123 elif self._mode == _MODE_WRITE:
124 self._fp.write(self._compressor.flush())
125 self._compressor = None
126 finally:
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200127 try:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200128 if self._closefp:
129 self._fp.close()
130 finally:
131 self._fp = None
132 self._closefp = False
133 self._mode = _MODE_CLOSED
Nadeem Vawda6c573182012-09-30 03:57:33 +0200134 self._buffer = b""
135 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200136
137 @property
138 def closed(self):
139 """True if this file is closed."""
140 return self._mode == _MODE_CLOSED
141
142 def fileno(self):
143 """Return the file descriptor for the underlying file."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200144 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200145 return self._fp.fileno()
146
147 def seekable(self):
148 """Return whether the file supports seeking."""
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200149 return self.readable() and self._fp.seekable()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150
151 def readable(self):
152 """Return whether the file was opened for reading."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200153 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200154 return self._mode in (_MODE_READ, _MODE_READ_EOF)
155
156 def writable(self):
157 """Return whether the file was opened for writing."""
Nadeem Vawda44ae4a22011-11-30 17:39:30 +0200158 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200159 return self._mode == _MODE_WRITE
160
161 # Mode-checking helper functions.
162
163 def _check_not_closed(self):
164 if self.closed:
165 raise ValueError("I/O operation on closed file")
166
167 def _check_can_read(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200168 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200169 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200170 raise io.UnsupportedOperation("File not open for reading")
171
172 def _check_can_write(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200173 if self._mode != _MODE_WRITE:
Nadeem Vawda452add02012-10-01 23:02:50 +0200174 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200175 raise io.UnsupportedOperation("File not open for writing")
176
177 def _check_can_seek(self):
Nadeem Vawdab7a0bfe2012-09-30 23:58:01 +0200178 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
Nadeem Vawda452add02012-10-01 23:02:50 +0200179 self._check_not_closed()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200180 raise io.UnsupportedOperation("Seeking is only supported "
Nadeem Vawdaf1a1af22011-05-25 00:32:08 +0200181 "on files open for reading")
Nadeem Vawdaae557d72012-02-12 01:51:38 +0200182 if not self._fp.seekable():
183 raise io.UnsupportedOperation("The underlying file object "
184 "does not support seeking")
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200185
186 # Fill the readahead buffer if it is empty. Returns False on EOF.
187 def _fill_buffer(self):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200188 if self._mode == _MODE_READ_EOF:
189 return False
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200190 # Depending on the input data, our call to the decompressor may not
191 # return any data. In this case, try again after reading another block.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200192 while self._buffer_offset == len(self._buffer):
193 rawblock = (self._decompressor.unused_data or
194 self._fp.read(_BUFFER_SIZE))
Nadeem Vawda55b43382011-05-27 01:52:15 +0200195
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200196 if not rawblock:
197 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200198 # End-of-stream marker and end of file. We're good.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200199 self._mode = _MODE_READ_EOF
200 self._size = self._pos
201 return False
202 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200203 # Problem - we were expecting more compressed data.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200204 raise EOFError("Compressed file ended before the "
205 "end-of-stream marker was reached")
Nadeem Vawda55b43382011-05-27 01:52:15 +0200206
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200207 if self._decompressor.eof:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200208 # Continue to next stream.
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200209 self._decompressor = BZ2Decompressor()
210
211 self._buffer = self._decompressor.decompress(rawblock)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200212 self._buffer_offset = 0
213 return True
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200214
215 # Read data until EOF.
216 # If return_data is false, consume the data without returning it.
217 def _read_all(self, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200218 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
219 self._buffer = self._buffer[self._buffer_offset:]
220 self._buffer_offset = 0
221
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200222 blocks = []
223 while self._fill_buffer():
224 if return_data:
225 blocks.append(self._buffer)
226 self._pos += len(self._buffer)
Nadeem Vawda6c573182012-09-30 03:57:33 +0200227 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200228 if return_data:
229 return b"".join(blocks)
230
231 # Read a block of up to n bytes.
232 # If return_data is false, consume the data without returning it.
233 def _read_block(self, n, return_data=True):
Nadeem Vawda6c573182012-09-30 03:57:33 +0200234 # If we have enough data buffered, return immediately.
235 end = self._buffer_offset + n
236 if end <= len(self._buffer):
237 data = self._buffer[self._buffer_offset : end]
238 self._buffer_offset = end
239 self._pos += len(data)
Nadeem Vawda9e2a28e2012-09-30 13:41:29 +0200240 return data if return_data else None
Nadeem Vawda6c573182012-09-30 03:57:33 +0200241
242 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
243 self._buffer = self._buffer[self._buffer_offset:]
244 self._buffer_offset = 0
245
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200246 blocks = []
247 while n > 0 and self._fill_buffer():
248 if n < len(self._buffer):
249 data = self._buffer[:n]
Nadeem Vawda6c573182012-09-30 03:57:33 +0200250 self._buffer_offset = n
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200251 else:
252 data = self._buffer
Nadeem Vawda6c573182012-09-30 03:57:33 +0200253 self._buffer = b""
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200254 if return_data:
255 blocks.append(data)
256 self._pos += len(data)
257 n -= len(data)
258 if return_data:
259 return b"".join(blocks)
260
261 def peek(self, n=0):
262 """Return buffered data without advancing the file position.
263
264 Always returns at least one byte of data, unless at EOF.
265 The exact number of bytes returned is unspecified.
266 """
267 with self._lock:
268 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200269 if not self._fill_buffer():
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200270 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200271 return self._buffer[self._buffer_offset:]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200272
273 def read(self, size=-1):
274 """Read up to size uncompressed bytes from the file.
275
276 If size is negative or omitted, read until EOF is reached.
277 Returns b'' if the file is already at EOF.
278 """
279 with self._lock:
280 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200281 if size == 0:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200282 return b""
283 elif size < 0:
284 return self._read_all()
285 else:
286 return self._read_block(size)
287
288 def read1(self, size=-1):
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200289 """Read up to size uncompressed bytes, while trying to avoid
290 making multiple reads from the underlying stream.
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200291
292 Returns b'' if the file is at EOF.
293 """
Nadeem Vawda8280b4b2012-08-04 15:29:28 +0200294 # Usually, read1() calls _fp.read() at most once. However, sometimes
295 # this does not give enough data for the decompressor to make progress.
296 # In this case we make multiple reads, to avoid returning b"".
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200297 with self._lock:
298 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200299 if (size == 0 or
300 # Only call _fill_buffer() if the buffer is actually empty.
301 # This gives a significant speedup if *size* is small.
302 (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200303 return b""
Nadeem Vawda6c573182012-09-30 03:57:33 +0200304 if size > 0:
305 data = self._buffer[self._buffer_offset :
306 self._buffer_offset + size]
307 self._buffer_offset += len(data)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200308 else:
Nadeem Vawda6c573182012-09-30 03:57:33 +0200309 data = self._buffer[self._buffer_offset:]
310 self._buffer = b""
311 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200312 self._pos += len(data)
313 return data
314
315 def readinto(self, b):
316 """Read up to len(b) bytes into b.
Antoine Pitrou24ce3862011-04-03 17:08:49 +0200317
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200318 Returns the number of bytes read (0 for EOF).
319 """
320 with self._lock:
321 return io.BufferedIOBase.readinto(self, b)
322
323 def readline(self, size=-1):
324 """Read a line of uncompressed bytes from the file.
325
326 The terminating newline (if present) is retained. If size is
327 non-negative, no more than size bytes will be read (in which
328 case the line may be incomplete). Returns b'' if already at EOF.
329 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200330 if not isinstance(size, int):
331 if not hasattr(size, "__index__"):
332 raise TypeError("Integer argument expected")
333 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200334 with self._lock:
Nadeem Vawda138ad502012-10-01 23:04:11 +0200335 self._check_can_read()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200336 # Shortcut for the common case - the whole line is in the buffer.
337 if size < 0:
338 end = self._buffer.find(b"\n", self._buffer_offset) + 1
339 if end > 0:
340 line = self._buffer[self._buffer_offset : end]
341 self._buffer_offset = end
342 self._pos += len(line)
343 return line
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200344 return io.BufferedIOBase.readline(self, size)
345
346 def readlines(self, size=-1):
347 """Read a list of lines of uncompressed bytes from the file.
348
349 size can be specified to control the number of lines read: no
350 further lines will be read once the total size of the lines read
351 so far equals or exceeds size.
352 """
Nadeem Vawdaeb70be22012-10-01 23:05:32 +0200353 if not isinstance(size, int):
354 if not hasattr(size, "__index__"):
355 raise TypeError("Integer argument expected")
356 size = size.__index__()
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200357 with self._lock:
358 return io.BufferedIOBase.readlines(self, size)
359
360 def write(self, data):
361 """Write a byte string to the file.
362
363 Returns the number of uncompressed bytes written, which is
364 always len(data). Note that due to buffering, the file on disk
365 may not reflect the data written until close() is called.
366 """
367 with self._lock:
368 self._check_can_write()
369 compressed = self._compressor.compress(data)
370 self._fp.write(compressed)
371 self._pos += len(data)
372 return len(data)
373
374 def writelines(self, seq):
375 """Write a sequence of byte strings to the file.
376
377 Returns the number of uncompressed bytes written.
378 seq can be any iterable yielding byte strings.
379
380 Line separators are not added between the written byte strings.
381 """
382 with self._lock:
383 return io.BufferedIOBase.writelines(self, seq)
384
385 # Rewind the file to the beginning of the data stream.
386 def _rewind(self):
387 self._fp.seek(0, 0)
388 self._mode = _MODE_READ
389 self._pos = 0
390 self._decompressor = BZ2Decompressor()
Nadeem Vawda6c573182012-09-30 03:57:33 +0200391 self._buffer = b""
392 self._buffer_offset = 0
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200393
394 def seek(self, offset, whence=0):
395 """Change the file position.
396
397 The new position is specified by offset, relative to the
398 position indicated by whence. Values for whence are:
399
400 0: start of stream (default); offset must not be negative
401 1: current stream position
402 2: end of stream; offset must not be positive
403
404 Returns the new file position.
405
406 Note that seeking is emulated, so depending on the parameters,
407 this operation may be extremely slow.
408 """
409 with self._lock:
410 self._check_can_seek()
411
412 # Recalculate offset as an absolute file position.
413 if whence == 0:
414 pass
415 elif whence == 1:
416 offset = self._pos + offset
417 elif whence == 2:
418 # Seeking relative to EOF - we need to know the file's size.
419 if self._size < 0:
420 self._read_all(return_data=False)
421 offset = self._size + offset
422 else:
Nadeem Vawda3b4a4f52012-10-08 19:20:49 +0200423 raise ValueError("Invalid value for whence: %s" % (whence,))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200424
425 # Make it so that offset is the number of bytes to skip forward.
426 if offset < self._pos:
427 self._rewind()
428 else:
429 offset -= self._pos
430
431 # Read and discard data until we reach the desired position.
Nadeem Vawda6c573182012-09-30 03:57:33 +0200432 self._read_block(offset, return_data=False)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200433
434 return self._pos
435
436 def tell(self):
437 """Return the current file position."""
438 with self._lock:
439 self._check_not_closed()
440 return self._pos
441
442
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200443def open(filename, mode="rb", compresslevel=9,
444 encoding=None, errors=None, newline=None):
445 """Open a bzip2-compressed file in binary or text mode.
446
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200447 The filename argument can be an actual filename (a str or bytes
448 object), or an existing file object to read from or write to.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200449
Nadeem Vawda8a9e99c2013-10-19 00:11:06 +0200450 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
451 "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
452 The default mode is "rb", and the default compresslevel is 9.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200453
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200454 For binary mode, this function is equivalent to the BZ2File
455 constructor: BZ2File(filename, mode, compresslevel). In this case,
456 the encoding, errors and newline arguments must not be provided.
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200457
458 For text mode, a BZ2File object is created, and wrapped in an
Nadeem Vawda4907b0a2012-10-08 20:31:34 +0200459 io.TextIOWrapper instance with the specified encoding, error
460 handling behavior, and line ending(s).
Nadeem Vawdaaf518c12012-06-04 23:32:38 +0200461
462 """
463 if "t" in mode:
464 if "b" in mode:
465 raise ValueError("Invalid mode: %r" % (mode,))
466 else:
467 if encoding is not None:
468 raise ValueError("Argument 'encoding' not supported in binary mode")
469 if errors is not None:
470 raise ValueError("Argument 'errors' not supported in binary mode")
471 if newline is not None:
472 raise ValueError("Argument 'newline' not supported in binary mode")
473
474 bz_mode = mode.replace("t", "")
475 binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
476
477 if "t" in mode:
478 return io.TextIOWrapper(binary_file, encoding, errors, newline)
479 else:
480 return binary_file
481
482
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200483def compress(data, compresslevel=9):
484 """Compress a block of data.
485
486 compresslevel, if given, must be a number between 1 and 9.
487
488 For incremental compression, use a BZ2Compressor object instead.
489 """
490 comp = BZ2Compressor(compresslevel)
491 return comp.compress(data) + comp.flush()
492
493
494def decompress(data):
495 """Decompress a block of data.
496
497 For incremental decompression, use a BZ2Decompressor object instead.
498 """
499 if len(data) == 0:
500 return b""
Nadeem Vawda55b43382011-05-27 01:52:15 +0200501
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200502 results = []
Nadeem Vawda55b43382011-05-27 01:52:15 +0200503 while True:
504 decomp = BZ2Decompressor()
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200505 results.append(decomp.decompress(data))
Nadeem Vawda55b43382011-05-27 01:52:15 +0200506 if not decomp.eof:
507 raise ValueError("Compressed data ended before the "
508 "end-of-stream marker was reached")
509 if not decomp.unused_data:
Nadeem Vawda98838ba2011-05-30 01:12:24 +0200510 return b"".join(results)
Nadeem Vawda55b43382011-05-27 01:52:15 +0200511 # There is unused data left over. Proceed to next stream.
512 data = decomp.unused_data