blob: 51da7aeaec18d70642bf8acc664301da4e072b3c [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 int flushed;
44#ifdef WITH_THREAD
45 PyThread_type_lock lock;
46#endif
47} BZ2Compressor;
48
49typedef struct {
50 PyObject_HEAD
51 bz_stream bzs;
52 char eof; /* T_BOOL expects a char */
53 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010054 char needs_input;
55 char *input_buffer;
56 size_t input_buffer_size;
57
58 /* bzs->avail_in is only 32 bit, so we store the true length
59 separately. Conversion and looping is encapsulated in
60 decompress_buf() */
61 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020062#ifdef WITH_THREAD
63 PyThread_type_lock lock;
64#endif
65} BZ2Decompressor;
66
Larry Hastingsf256c222014-01-25 21:30:37 -080067static PyTypeObject BZ2Compressor_Type;
68static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020069
70/* Helper functions. */
71
72static int
73catch_bz2_error(int bzerror)
74{
75 switch(bzerror) {
76 case BZ_OK:
77 case BZ_RUN_OK:
78 case BZ_FLUSH_OK:
79 case BZ_FINISH_OK:
80 case BZ_STREAM_END:
81 return 0;
82
83#ifdef BZ_CONFIG_ERROR
84 case BZ_CONFIG_ERROR:
85 PyErr_SetString(PyExc_SystemError,
86 "libbzip2 was not compiled correctly");
87 return 1;
88#endif
89 case BZ_PARAM_ERROR:
90 PyErr_SetString(PyExc_ValueError,
91 "Internal error - "
92 "invalid parameters passed to libbzip2");
93 return 1;
94 case BZ_MEM_ERROR:
95 PyErr_NoMemory();
96 return 1;
97 case BZ_DATA_ERROR:
98 case BZ_DATA_ERROR_MAGIC:
99 PyErr_SetString(PyExc_IOError, "Invalid data stream");
100 return 1;
101 case BZ_IO_ERROR:
102 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
103 return 1;
104 case BZ_UNEXPECTED_EOF:
105 PyErr_SetString(PyExc_EOFError,
106 "Compressed file ended before the logical "
107 "end-of-stream was detected");
108 return 1;
109 case BZ_SEQUENCE_ERROR:
110 PyErr_SetString(PyExc_RuntimeError,
111 "Internal error - "
112 "Invalid sequence of commands sent to libbzip2");
113 return 1;
114 default:
115 PyErr_Format(PyExc_IOError,
116 "Unrecognized error from libbzip2: %d", bzerror);
117 return 1;
118 }
119}
120
121#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100122#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200123#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125#endif
126
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100128grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200129{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200130 /* Expand the buffer by an amount proportional to the current size,
131 giving us amortized linear-time behavior. Use a less-than-double
132 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200133 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200134 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100135
136 if (max_length > 0 && new_size > (size_t) max_length)
137 new_size = (size_t) max_length;
138
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200139 if (new_size > size) {
140 return _PyBytes_Resize(buf, new_size);
141 } else { /* overflow */
142 PyErr_SetString(PyExc_OverflowError,
143 "Unable to allocate buffer - output too large");
144 return -1;
145 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146}
147
148
149/* BZ2Compressor class. */
150
151static PyObject *
152compress(BZ2Compressor *c, char *data, size_t len, int action)
153{
154 size_t data_size = 0;
155 PyObject *result;
156
Antoine Pitroue71258a2015-02-26 13:08:07 +0100157 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 if (result == NULL)
159 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100160
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200161 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100162 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200163 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100164 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 for (;;) {
166 char *this_out;
167 int bzerror;
168
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100169 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
170 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200171 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200172 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200173 len -= c->bzs.avail_in;
174 }
175
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100176 /* In regular compression mode, stop when input data is exhausted. */
177 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 break;
179
180 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
182 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100183 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200184 goto error;
185 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
186 buffer_left = PyBytes_GET_SIZE(result) - data_size;
187 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200188 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200189 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100190
191 Py_BEGIN_ALLOW_THREADS
192 this_out = c->bzs.next_out;
193 bzerror = BZ2_bzCompress(&c->bzs, action);
194 data_size += c->bzs.next_out - this_out;
195 Py_END_ALLOW_THREADS
196 if (catch_bz2_error(bzerror))
197 goto error;
198
199 /* In flushing mode, stop when all buffered data has been flushed. */
200 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
201 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200202 }
Victor Stinner706768c2014-08-16 01:03:39 +0200203 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 if (_PyBytes_Resize(&result, data_size) < 0)
205 goto error;
206 return result;
207
208error:
209 Py_XDECREF(result);
210 return NULL;
211}
212
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200213/*[clinic input]
214output preset file
215module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800216class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
217class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200218[clinic start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800219/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e3b139924f5e18cc]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200220
Larry Hastingsf256c222014-01-25 21:30:37 -0800221#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200222
223/*[clinic input]
224_bz2.BZ2Compressor.compress
225
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200226 data: Py_buffer
227 /
228
229Provide data to the compressor object.
230
231Returns a chunk of compressed data if possible, or b'' otherwise.
232
233When you have finished providing data to the compressor, call the
234flush() method to finish the compression process.
235[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200236
237static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200238_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800239/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200240{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200241 PyObject *result = NULL;
242
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200243 ACQUIRE_LOCK(self);
244 if (self->flushed)
245 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
246 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200247 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200248 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200249 return result;
250}
251
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200252/*[clinic input]
253_bz2.BZ2Compressor.flush
254
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200255Finish the compression process.
256
257Returns the compressed data left in internal buffers.
258
259The compressor object may not be used after this method is called.
260[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200261
262static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200263_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800264/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200265{
266 PyObject *result = NULL;
267
268 ACQUIRE_LOCK(self);
269 if (self->flushed)
270 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
271 else {
272 self->flushed = 1;
273 result = compress(self, NULL, 0, BZ_FINISH);
274 }
275 RELEASE_LOCK(self);
276 return result;
277}
278
Nadeem Vawda37970652013-10-28 21:35:23 +0100279static PyObject *
280BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs)
281{
282 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
283 Py_TYPE(self)->tp_name);
284 return NULL;
285}
286
Victor Stinner5064a522013-07-07 16:50:27 +0200287static void*
288BZ2_Malloc(void* ctx, int items, int size)
289{
290 if (items < 0 || size < 0)
291 return NULL;
292 if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
293 return NULL;
294 /* PyMem_Malloc() cannot be used: compress() and decompress()
295 release the GIL */
296 return PyMem_RawMalloc(items * size);
297}
298
299static void
300BZ2_Free(void* ctx, void *ptr)
301{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200302 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200303}
304
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200305/*[clinic input]
306_bz2.BZ2Compressor.__init__
307
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200308 compresslevel: int = 9
309 Compression level, as a number between 1 and 9.
310 /
311
312Create a compressor object for compressing data incrementally.
313
314For one-shot compression, use the compress() function instead.
315[clinic start generated code]*/
316
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200317static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200318_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800319/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200320{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200321 int bzerror;
322
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200323 if (!(1 <= compresslevel && compresslevel <= 9)) {
324 PyErr_SetString(PyExc_ValueError,
325 "compresslevel must be between 1 and 9");
326 return -1;
327 }
328
329#ifdef WITH_THREAD
330 self->lock = PyThread_allocate_lock();
331 if (self->lock == NULL) {
332 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
333 return -1;
334 }
335#endif
336
Victor Stinner5064a522013-07-07 16:50:27 +0200337 self->bzs.opaque = NULL;
338 self->bzs.bzalloc = BZ2_Malloc;
339 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200340 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
341 if (catch_bz2_error(bzerror))
342 goto error;
343
344 return 0;
345
346error:
347#ifdef WITH_THREAD
348 PyThread_free_lock(self->lock);
349 self->lock = NULL;
350#endif
351 return -1;
352}
353
354static void
355BZ2Compressor_dealloc(BZ2Compressor *self)
356{
357 BZ2_bzCompressEnd(&self->bzs);
358#ifdef WITH_THREAD
359 if (self->lock != NULL)
360 PyThread_free_lock(self->lock);
361#endif
362 Py_TYPE(self)->tp_free((PyObject *)self);
363}
364
365static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200366 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
367 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100368 {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200369 {NULL}
370};
371
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200372
373static PyTypeObject BZ2Compressor_Type = {
374 PyVarObject_HEAD_INIT(NULL, 0)
375 "_bz2.BZ2Compressor", /* tp_name */
376 sizeof(BZ2Compressor), /* tp_basicsize */
377 0, /* tp_itemsize */
378 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
379 0, /* tp_print */
380 0, /* tp_getattr */
381 0, /* tp_setattr */
382 0, /* tp_reserved */
383 0, /* tp_repr */
384 0, /* tp_as_number */
385 0, /* tp_as_sequence */
386 0, /* tp_as_mapping */
387 0, /* tp_hash */
388 0, /* tp_call */
389 0, /* tp_str */
390 0, /* tp_getattro */
391 0, /* tp_setattro */
392 0, /* tp_as_buffer */
393 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200394 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200395 0, /* tp_traverse */
396 0, /* tp_clear */
397 0, /* tp_richcompare */
398 0, /* tp_weaklistoffset */
399 0, /* tp_iter */
400 0, /* tp_iternext */
401 BZ2Compressor_methods, /* tp_methods */
402 0, /* tp_members */
403 0, /* tp_getset */
404 0, /* tp_base */
405 0, /* tp_dict */
406 0, /* tp_descr_get */
407 0, /* tp_descr_set */
408 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200409 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200410 0, /* tp_alloc */
411 PyType_GenericNew, /* tp_new */
412};
413
414
415/* BZ2Decompressor class. */
416
Antoine Pitroue71258a2015-02-26 13:08:07 +0100417/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
418 buffer is allocated dynamically and returned. At most max_length bytes are
419 returned, so some of the input may not be consumed. d->bzs.next_in and
420 d->bzs_avail_in_real are updated to reflect the consumed input. */
421static PyObject*
422decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200423{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100424 /* data_size is strictly positive, but because we repeatedly have to
425 compare against max_length and PyBytes_GET_SIZE we declare it as
426 signed */
427 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200428 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100429 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200430
Antoine Pitroue71258a2015-02-26 13:08:07 +0100431 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
432 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
433 else
434 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200435 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100436 return NULL;
437
438 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200439 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100440 int bzret;
441 size_t avail;
442
443 /* On a 64-bit system, buffer length might not fit in avail_out, so we
444 do decompression in chunks of no more than UINT_MAX bytes
445 each. Note that the expression for `avail` is guaranteed to be
446 positive, so the cast is safe. */
447 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
448 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
449 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
450 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200451
452 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100453 bzret = BZ2_bzDecompress(bzs);
454 data_size = bzs->next_out - PyBytes_AS_STRING(result);
455 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200456 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100457 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200458 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100459 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200461 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100462 } else if (d->bzs_avail_in_real == 0) {
463 break;
464 } else if (bzs->avail_out == 0) {
465 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200466 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100467 if (data_size == PyBytes_GET_SIZE(result) &&
468 grow_buffer(&result, max_length) == -1)
469 goto error;
470 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200471 }
472 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100473 if (data_size != PyBytes_GET_SIZE(result))
474 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200475 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100476
477 return result;
478
479error:
480 Py_XDECREF(result);
481 return NULL;
482}
483
484
485static PyObject *
486decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
487{
488 char input_buffer_in_use;
489 PyObject *result;
490 bz_stream *bzs = &d->bzs;
491
492 /* Prepend unconsumed input if necessary */
493 if (bzs->next_in != NULL) {
494 size_t avail_now, avail_total;
495
496 /* Number of bytes we can append to input buffer */
497 avail_now = (d->input_buffer + d->input_buffer_size)
498 - (bzs->next_in + d->bzs_avail_in_real);
499
500 /* Number of bytes we can append if we move existing
501 contents to beginning of buffer (overwriting
502 consumed input) */
503 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
504
505 if (avail_total < len) {
506 size_t offset = bzs->next_in - d->input_buffer;
507 char *tmp;
508 size_t new_size = d->input_buffer_size + len - avail_now;
509
510 /* Assign to temporary variable first, so we don't
511 lose address of allocated buffer if realloc fails */
512 tmp = PyMem_Realloc(d->input_buffer, new_size);
513 if (tmp == NULL) {
514 PyErr_SetNone(PyExc_MemoryError);
515 return NULL;
516 }
517 d->input_buffer = tmp;
518 d->input_buffer_size = new_size;
519
520 bzs->next_in = d->input_buffer + offset;
521 }
522 else if (avail_now < len) {
523 memmove(d->input_buffer, bzs->next_in,
524 d->bzs_avail_in_real);
525 bzs->next_in = d->input_buffer;
526 }
527 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
528 d->bzs_avail_in_real += len;
529 input_buffer_in_use = 1;
530 }
531 else {
532 bzs->next_in = data;
533 d->bzs_avail_in_real = len;
534 input_buffer_in_use = 0;
535 }
536
537 result = decompress_buf(d, max_length);
538 if(result == NULL)
539 return NULL;
540
541 if (d->eof) {
542 d->needs_input = 0;
543 if (d->bzs_avail_in_real > 0) {
544 Py_CLEAR(d->unused_data);
545 d->unused_data = PyBytes_FromStringAndSize(
546 bzs->next_in, d->bzs_avail_in_real);
547 if (d->unused_data == NULL)
548 goto error;
549 }
550 }
551 else if (d->bzs_avail_in_real == 0) {
552 bzs->next_in = NULL;
553 d->needs_input = 1;
554 }
555 else {
556 d->needs_input = 0;
557
558 /* If we did not use the input buffer, we now have
559 to copy the tail from the caller's buffer into the
560 input buffer */
561 if (!input_buffer_in_use) {
562
563 /* Discard buffer if it's too small
564 (resizing it may needlessly copy the current contents) */
565 if (d->input_buffer != NULL &&
566 d->input_buffer_size < d->bzs_avail_in_real) {
567 PyMem_Free(d->input_buffer);
568 d->input_buffer = NULL;
569 }
570
571 /* Allocate if necessary */
572 if (d->input_buffer == NULL) {
573 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
574 if (d->input_buffer == NULL) {
575 PyErr_SetNone(PyExc_MemoryError);
576 goto error;
577 }
578 d->input_buffer_size = d->bzs_avail_in_real;
579 }
580
581 /* Copy tail */
582 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
583 bzs->next_in = d->input_buffer;
584 }
585 }
586
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200587 return result;
588
589error:
590 Py_XDECREF(result);
591 return NULL;
592}
593
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200594/*[clinic input]
595_bz2.BZ2Decompressor.decompress
596
Antoine Pitroue71258a2015-02-26 13:08:07 +0100597 self: self(type="BZ2Decompressor *")
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200598 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100599 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200600
Antoine Pitroue71258a2015-02-26 13:08:07 +0100601Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200602
Antoine Pitroue71258a2015-02-26 13:08:07 +0100603If *max_length* is nonnegative, returns at most *max_length* bytes of
604decompressed data. If this limit is reached and further output can be
605produced, *self.needs_input* will be set to ``False``. In this case, the next
606call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200607
Antoine Pitroue71258a2015-02-26 13:08:07 +0100608If all of the input data was decompressed and returned (either because this
609was less than *max_length* bytes, or because *max_length* was negative),
610*self.needs_input* will be set to True.
611
612Attempting to decompress data after the end of stream is reached raises an
613EOFError. Any data found after the end of the stream is ignored and saved in
614the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200615[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200616
617static PyObject *
Antoine Pitroue71258a2015-02-26 13:08:07 +0100618_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, Py_ssize_t max_length)
619/*[clinic end generated code: output=7eeb5794035a2ca3 input=9558b424c8b00516]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200620{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200621 PyObject *result = NULL;
622
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200623 ACQUIRE_LOCK(self);
624 if (self->eof)
625 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
626 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100627 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200628 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200629 return result;
630}
631
Nadeem Vawda37970652013-10-28 21:35:23 +0100632static PyObject *
633BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs)
634{
635 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
636 Py_TYPE(self)->tp_name);
637 return NULL;
638}
639
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200640/*[clinic input]
641_bz2.BZ2Decompressor.__init__
642
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200643Create a decompressor object for decompressing data incrementally.
644
645For one-shot decompression, use the decompress() function instead.
646[clinic start generated code]*/
647
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200648static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200649_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800650/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200651{
652 int bzerror;
653
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200654#ifdef WITH_THREAD
655 self->lock = PyThread_allocate_lock();
656 if (self->lock == NULL) {
657 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
658 return -1;
659 }
660#endif
661
Antoine Pitroue71258a2015-02-26 13:08:07 +0100662 self->needs_input = 1;
663 self->bzs_avail_in_real = 0;
664 self->input_buffer = NULL;
665 self->input_buffer_size = 0;
666 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200667 if (self->unused_data == NULL)
668 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200669
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200670 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
671 if (catch_bz2_error(bzerror))
672 goto error;
673
674 return 0;
675
676error:
677 Py_CLEAR(self->unused_data);
678#ifdef WITH_THREAD
679 PyThread_free_lock(self->lock);
680 self->lock = NULL;
681#endif
682 return -1;
683}
684
685static void
686BZ2Decompressor_dealloc(BZ2Decompressor *self)
687{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100688 if(self->input_buffer != NULL)
689 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200690 BZ2_bzDecompressEnd(&self->bzs);
691 Py_CLEAR(self->unused_data);
692#ifdef WITH_THREAD
693 if (self->lock != NULL)
694 PyThread_free_lock(self->lock);
695#endif
696 Py_TYPE(self)->tp_free((PyObject *)self);
697}
698
699static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200700 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100701 {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200702 {NULL}
703};
704
705PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
706"True if the end-of-stream marker has been reached.");
707
708PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
709"Data found after the end of the compressed stream.");
710
Antoine Pitroue71258a2015-02-26 13:08:07 +0100711PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
712"True if more input is needed before more decompressed data can be produced.");
713
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200714static PyMemberDef BZ2Decompressor_members[] = {
715 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
716 READONLY, BZ2Decompressor_eof__doc__},
717 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
718 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100719 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
720 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200721 {NULL}
722};
723
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200724static PyTypeObject BZ2Decompressor_Type = {
725 PyVarObject_HEAD_INIT(NULL, 0)
726 "_bz2.BZ2Decompressor", /* tp_name */
727 sizeof(BZ2Decompressor), /* tp_basicsize */
728 0, /* tp_itemsize */
729 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
730 0, /* tp_print */
731 0, /* tp_getattr */
732 0, /* tp_setattr */
733 0, /* tp_reserved */
734 0, /* tp_repr */
735 0, /* tp_as_number */
736 0, /* tp_as_sequence */
737 0, /* tp_as_mapping */
738 0, /* tp_hash */
739 0, /* tp_call */
740 0, /* tp_str */
741 0, /* tp_getattro */
742 0, /* tp_setattro */
743 0, /* tp_as_buffer */
744 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200745 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200746 0, /* tp_traverse */
747 0, /* tp_clear */
748 0, /* tp_richcompare */
749 0, /* tp_weaklistoffset */
750 0, /* tp_iter */
751 0, /* tp_iternext */
752 BZ2Decompressor_methods, /* tp_methods */
753 BZ2Decompressor_members, /* tp_members */
754 0, /* tp_getset */
755 0, /* tp_base */
756 0, /* tp_dict */
757 0, /* tp_descr_get */
758 0, /* tp_descr_set */
759 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200760 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200761 0, /* tp_alloc */
762 PyType_GenericNew, /* tp_new */
763};
764
765
766/* Module initialization. */
767
768static struct PyModuleDef _bz2module = {
769 PyModuleDef_HEAD_INIT,
770 "_bz2",
771 NULL,
772 -1,
773 NULL,
774 NULL,
775 NULL,
776 NULL,
777 NULL
778};
779
780PyMODINIT_FUNC
781PyInit__bz2(void)
782{
783 PyObject *m;
784
785 if (PyType_Ready(&BZ2Compressor_Type) < 0)
786 return NULL;
787 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
788 return NULL;
789
790 m = PyModule_Create(&_bz2module);
791 if (m == NULL)
792 return NULL;
793
794 Py_INCREF(&BZ2Compressor_Type);
795 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
796
797 Py_INCREF(&BZ2Decompressor_Type);
798 PyModule_AddObject(m, "BZ2Decompressor",
799 (PyObject *)&BZ2Decompressor_Type);
800
801 return m;
802}