blob: 5cea42cc6b2f8151b834df31d2280af110a78e2c [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 int flushed;
44#ifdef WITH_THREAD
45 PyThread_type_lock lock;
46#endif
47} BZ2Compressor;
48
49typedef struct {
50 PyObject_HEAD
51 bz_stream bzs;
52 char eof; /* T_BOOL expects a char */
53 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010054 char needs_input;
55 char *input_buffer;
56 size_t input_buffer_size;
57
58 /* bzs->avail_in is only 32 bit, so we store the true length
59 separately. Conversion and looping is encapsulated in
60 decompress_buf() */
61 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020062#ifdef WITH_THREAD
63 PyThread_type_lock lock;
64#endif
65} BZ2Decompressor;
66
Larry Hastingsf256c222014-01-25 21:30:37 -080067static PyTypeObject BZ2Compressor_Type;
68static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020069
70/* Helper functions. */
71
72static int
73catch_bz2_error(int bzerror)
74{
75 switch(bzerror) {
76 case BZ_OK:
77 case BZ_RUN_OK:
78 case BZ_FLUSH_OK:
79 case BZ_FINISH_OK:
80 case BZ_STREAM_END:
81 return 0;
82
83#ifdef BZ_CONFIG_ERROR
84 case BZ_CONFIG_ERROR:
85 PyErr_SetString(PyExc_SystemError,
86 "libbzip2 was not compiled correctly");
87 return 1;
88#endif
89 case BZ_PARAM_ERROR:
90 PyErr_SetString(PyExc_ValueError,
91 "Internal error - "
92 "invalid parameters passed to libbzip2");
93 return 1;
94 case BZ_MEM_ERROR:
95 PyErr_NoMemory();
96 return 1;
97 case BZ_DATA_ERROR:
98 case BZ_DATA_ERROR_MAGIC:
99 PyErr_SetString(PyExc_IOError, "Invalid data stream");
100 return 1;
101 case BZ_IO_ERROR:
102 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
103 return 1;
104 case BZ_UNEXPECTED_EOF:
105 PyErr_SetString(PyExc_EOFError,
106 "Compressed file ended before the logical "
107 "end-of-stream was detected");
108 return 1;
109 case BZ_SEQUENCE_ERROR:
110 PyErr_SetString(PyExc_RuntimeError,
111 "Internal error - "
112 "Invalid sequence of commands sent to libbzip2");
113 return 1;
114 default:
115 PyErr_Format(PyExc_IOError,
116 "Unrecognized error from libbzip2: %d", bzerror);
117 return 1;
118 }
119}
120
121#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100122#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200123#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125#endif
126
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100128grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200129{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200130 /* Expand the buffer by an amount proportional to the current size,
131 giving us amortized linear-time behavior. Use a less-than-double
132 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200133 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200134 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100135
136 if (max_length > 0 && new_size > (size_t) max_length)
137 new_size = (size_t) max_length;
138
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200139 if (new_size > size) {
140 return _PyBytes_Resize(buf, new_size);
141 } else { /* overflow */
142 PyErr_SetString(PyExc_OverflowError,
143 "Unable to allocate buffer - output too large");
144 return -1;
145 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146}
147
148
149/* BZ2Compressor class. */
150
151static PyObject *
152compress(BZ2Compressor *c, char *data, size_t len, int action)
153{
154 size_t data_size = 0;
155 PyObject *result;
156
Antoine Pitroue71258a2015-02-26 13:08:07 +0100157 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 if (result == NULL)
159 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100160
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200161 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100162 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200163 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100164 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 for (;;) {
166 char *this_out;
167 int bzerror;
168
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100169 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
170 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200171 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200172 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200173 len -= c->bzs.avail_in;
174 }
175
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100176 /* In regular compression mode, stop when input data is exhausted. */
177 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 break;
179
180 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
182 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100183 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200184 goto error;
185 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
186 buffer_left = PyBytes_GET_SIZE(result) - data_size;
187 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200188 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200189 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100190
191 Py_BEGIN_ALLOW_THREADS
192 this_out = c->bzs.next_out;
193 bzerror = BZ2_bzCompress(&c->bzs, action);
194 data_size += c->bzs.next_out - this_out;
195 Py_END_ALLOW_THREADS
196 if (catch_bz2_error(bzerror))
197 goto error;
198
199 /* In flushing mode, stop when all buffered data has been flushed. */
200 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
201 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200202 }
Victor Stinner706768c2014-08-16 01:03:39 +0200203 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 if (_PyBytes_Resize(&result, data_size) < 0)
205 goto error;
206 return result;
207
208error:
209 Py_XDECREF(result);
210 return NULL;
211}
212
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200213/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800215class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
216class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200217[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300218/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200219
Larry Hastingsf256c222014-01-25 21:30:37 -0800220#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200221
222/*[clinic input]
223_bz2.BZ2Compressor.compress
224
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200225 data: Py_buffer
226 /
227
228Provide data to the compressor object.
229
230Returns a chunk of compressed data if possible, or b'' otherwise.
231
232When you have finished providing data to the compressor, call the
233flush() method to finish the compression process.
234[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200235
236static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200237_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800238/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200239{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200240 PyObject *result = NULL;
241
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200242 ACQUIRE_LOCK(self);
243 if (self->flushed)
244 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
245 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200246 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200247 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200248 return result;
249}
250
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251/*[clinic input]
252_bz2.BZ2Compressor.flush
253
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200254Finish the compression process.
255
256Returns the compressed data left in internal buffers.
257
258The compressor object may not be used after this method is called.
259[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200260
261static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200262_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800263/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200264{
265 PyObject *result = NULL;
266
267 ACQUIRE_LOCK(self);
268 if (self->flushed)
269 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
270 else {
271 self->flushed = 1;
272 result = compress(self, NULL, 0, BZ_FINISH);
273 }
274 RELEASE_LOCK(self);
275 return result;
276}
277
Nadeem Vawda37970652013-10-28 21:35:23 +0100278static PyObject *
279BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs)
280{
281 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
282 Py_TYPE(self)->tp_name);
283 return NULL;
284}
285
Victor Stinner5064a522013-07-07 16:50:27 +0200286static void*
287BZ2_Malloc(void* ctx, int items, int size)
288{
289 if (items < 0 || size < 0)
290 return NULL;
291 if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
292 return NULL;
293 /* PyMem_Malloc() cannot be used: compress() and decompress()
294 release the GIL */
295 return PyMem_RawMalloc(items * size);
296}
297
298static void
299BZ2_Free(void* ctx, void *ptr)
300{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200301 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200302}
303
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200304/*[clinic input]
305_bz2.BZ2Compressor.__init__
306
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200307 compresslevel: int = 9
308 Compression level, as a number between 1 and 9.
309 /
310
311Create a compressor object for compressing data incrementally.
312
313For one-shot compression, use the compress() function instead.
314[clinic start generated code]*/
315
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200317_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800318/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200320 int bzerror;
321
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200322 if (!(1 <= compresslevel && compresslevel <= 9)) {
323 PyErr_SetString(PyExc_ValueError,
324 "compresslevel must be between 1 and 9");
325 return -1;
326 }
327
328#ifdef WITH_THREAD
329 self->lock = PyThread_allocate_lock();
330 if (self->lock == NULL) {
331 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
332 return -1;
333 }
334#endif
335
Victor Stinner5064a522013-07-07 16:50:27 +0200336 self->bzs.opaque = NULL;
337 self->bzs.bzalloc = BZ2_Malloc;
338 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200339 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
340 if (catch_bz2_error(bzerror))
341 goto error;
342
343 return 0;
344
345error:
346#ifdef WITH_THREAD
347 PyThread_free_lock(self->lock);
348 self->lock = NULL;
349#endif
350 return -1;
351}
352
353static void
354BZ2Compressor_dealloc(BZ2Compressor *self)
355{
356 BZ2_bzCompressEnd(&self->bzs);
357#ifdef WITH_THREAD
358 if (self->lock != NULL)
359 PyThread_free_lock(self->lock);
360#endif
361 Py_TYPE(self)->tp_free((PyObject *)self);
362}
363
364static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200365 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
366 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100367 {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200368 {NULL}
369};
370
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200371
372static PyTypeObject BZ2Compressor_Type = {
373 PyVarObject_HEAD_INIT(NULL, 0)
374 "_bz2.BZ2Compressor", /* tp_name */
375 sizeof(BZ2Compressor), /* tp_basicsize */
376 0, /* tp_itemsize */
377 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
378 0, /* tp_print */
379 0, /* tp_getattr */
380 0, /* tp_setattr */
381 0, /* tp_reserved */
382 0, /* tp_repr */
383 0, /* tp_as_number */
384 0, /* tp_as_sequence */
385 0, /* tp_as_mapping */
386 0, /* tp_hash */
387 0, /* tp_call */
388 0, /* tp_str */
389 0, /* tp_getattro */
390 0, /* tp_setattro */
391 0, /* tp_as_buffer */
392 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200393 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200394 0, /* tp_traverse */
395 0, /* tp_clear */
396 0, /* tp_richcompare */
397 0, /* tp_weaklistoffset */
398 0, /* tp_iter */
399 0, /* tp_iternext */
400 BZ2Compressor_methods, /* tp_methods */
401 0, /* tp_members */
402 0, /* tp_getset */
403 0, /* tp_base */
404 0, /* tp_dict */
405 0, /* tp_descr_get */
406 0, /* tp_descr_set */
407 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200408 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200409 0, /* tp_alloc */
410 PyType_GenericNew, /* tp_new */
411};
412
413
414/* BZ2Decompressor class. */
415
Antoine Pitroue71258a2015-02-26 13:08:07 +0100416/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
417 buffer is allocated dynamically and returned. At most max_length bytes are
418 returned, so some of the input may not be consumed. d->bzs.next_in and
419 d->bzs_avail_in_real are updated to reflect the consumed input. */
420static PyObject*
421decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200422{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100423 /* data_size is strictly positive, but because we repeatedly have to
424 compare against max_length and PyBytes_GET_SIZE we declare it as
425 signed */
426 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200427 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100428 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429
Antoine Pitroue71258a2015-02-26 13:08:07 +0100430 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
431 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
432 else
433 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200434 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100435 return NULL;
436
437 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200438 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100439 int bzret;
440 size_t avail;
441
442 /* On a 64-bit system, buffer length might not fit in avail_out, so we
443 do decompression in chunks of no more than UINT_MAX bytes
444 each. Note that the expression for `avail` is guaranteed to be
445 positive, so the cast is safe. */
446 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
447 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
448 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
449 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200450
451 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100452 bzret = BZ2_bzDecompress(bzs);
453 data_size = bzs->next_out - PyBytes_AS_STRING(result);
454 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200455 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100456 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100458 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200459 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100461 } else if (d->bzs_avail_in_real == 0) {
462 break;
463 } else if (bzs->avail_out == 0) {
464 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200465 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100466 if (data_size == PyBytes_GET_SIZE(result) &&
467 grow_buffer(&result, max_length) == -1)
468 goto error;
469 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200470 }
471 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100472 if (data_size != PyBytes_GET_SIZE(result))
473 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200474 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100475
476 return result;
477
478error:
479 Py_XDECREF(result);
480 return NULL;
481}
482
483
484static PyObject *
485decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
486{
487 char input_buffer_in_use;
488 PyObject *result;
489 bz_stream *bzs = &d->bzs;
490
491 /* Prepend unconsumed input if necessary */
492 if (bzs->next_in != NULL) {
493 size_t avail_now, avail_total;
494
495 /* Number of bytes we can append to input buffer */
496 avail_now = (d->input_buffer + d->input_buffer_size)
497 - (bzs->next_in + d->bzs_avail_in_real);
498
499 /* Number of bytes we can append if we move existing
500 contents to beginning of buffer (overwriting
501 consumed input) */
502 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
503
504 if (avail_total < len) {
505 size_t offset = bzs->next_in - d->input_buffer;
506 char *tmp;
507 size_t new_size = d->input_buffer_size + len - avail_now;
508
509 /* Assign to temporary variable first, so we don't
510 lose address of allocated buffer if realloc fails */
511 tmp = PyMem_Realloc(d->input_buffer, new_size);
512 if (tmp == NULL) {
513 PyErr_SetNone(PyExc_MemoryError);
514 return NULL;
515 }
516 d->input_buffer = tmp;
517 d->input_buffer_size = new_size;
518
519 bzs->next_in = d->input_buffer + offset;
520 }
521 else if (avail_now < len) {
522 memmove(d->input_buffer, bzs->next_in,
523 d->bzs_avail_in_real);
524 bzs->next_in = d->input_buffer;
525 }
526 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
527 d->bzs_avail_in_real += len;
528 input_buffer_in_use = 1;
529 }
530 else {
531 bzs->next_in = data;
532 d->bzs_avail_in_real = len;
533 input_buffer_in_use = 0;
534 }
535
536 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000537 if(result == NULL) {
538 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100539 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000540 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100541
542 if (d->eof) {
543 d->needs_input = 0;
544 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300545 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200546 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100547 if (d->unused_data == NULL)
548 goto error;
549 }
550 }
551 else if (d->bzs_avail_in_real == 0) {
552 bzs->next_in = NULL;
553 d->needs_input = 1;
554 }
555 else {
556 d->needs_input = 0;
557
558 /* If we did not use the input buffer, we now have
559 to copy the tail from the caller's buffer into the
560 input buffer */
561 if (!input_buffer_in_use) {
562
563 /* Discard buffer if it's too small
564 (resizing it may needlessly copy the current contents) */
565 if (d->input_buffer != NULL &&
566 d->input_buffer_size < d->bzs_avail_in_real) {
567 PyMem_Free(d->input_buffer);
568 d->input_buffer = NULL;
569 }
570
571 /* Allocate if necessary */
572 if (d->input_buffer == NULL) {
573 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
574 if (d->input_buffer == NULL) {
575 PyErr_SetNone(PyExc_MemoryError);
576 goto error;
577 }
578 d->input_buffer_size = d->bzs_avail_in_real;
579 }
580
581 /* Copy tail */
582 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
583 bzs->next_in = d->input_buffer;
584 }
585 }
586
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200587 return result;
588
589error:
590 Py_XDECREF(result);
591 return NULL;
592}
593
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200594/*[clinic input]
595_bz2.BZ2Decompressor.decompress
596
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200597 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100598 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200599
Antoine Pitroue71258a2015-02-26 13:08:07 +0100600Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200601
Antoine Pitroue71258a2015-02-26 13:08:07 +0100602If *max_length* is nonnegative, returns at most *max_length* bytes of
603decompressed data. If this limit is reached and further output can be
604produced, *self.needs_input* will be set to ``False``. In this case, the next
605call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200606
Antoine Pitroue71258a2015-02-26 13:08:07 +0100607If all of the input data was decompressed and returned (either because this
608was less than *max_length* bytes, or because *max_length* was negative),
609*self.needs_input* will be set to True.
610
611Attempting to decompress data after the end of stream is reached raises an
612EOFError. Any data found after the end of the stream is ignored and saved in
613the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200614[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200615
616static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400617_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
618 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300619/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200620{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200621 PyObject *result = NULL;
622
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200623 ACQUIRE_LOCK(self);
624 if (self->eof)
625 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
626 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100627 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200628 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200629 return result;
630}
631
Nadeem Vawda37970652013-10-28 21:35:23 +0100632static PyObject *
633BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs)
634{
635 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
636 Py_TYPE(self)->tp_name);
637 return NULL;
638}
639
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200640/*[clinic input]
641_bz2.BZ2Decompressor.__init__
642
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200643Create a decompressor object for decompressing data incrementally.
644
645For one-shot decompression, use the decompress() function instead.
646[clinic start generated code]*/
647
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200648static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200649_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800650/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200651{
652 int bzerror;
653
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200654#ifdef WITH_THREAD
655 self->lock = PyThread_allocate_lock();
656 if (self->lock == NULL) {
657 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
658 return -1;
659 }
660#endif
661
Antoine Pitroue71258a2015-02-26 13:08:07 +0100662 self->needs_input = 1;
663 self->bzs_avail_in_real = 0;
664 self->input_buffer = NULL;
665 self->input_buffer_size = 0;
666 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200667 if (self->unused_data == NULL)
668 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200669
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200670 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
671 if (catch_bz2_error(bzerror))
672 goto error;
673
674 return 0;
675
676error:
677 Py_CLEAR(self->unused_data);
678#ifdef WITH_THREAD
679 PyThread_free_lock(self->lock);
680 self->lock = NULL;
681#endif
682 return -1;
683}
684
685static void
686BZ2Decompressor_dealloc(BZ2Decompressor *self)
687{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100688 if(self->input_buffer != NULL)
689 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200690 BZ2_bzDecompressEnd(&self->bzs);
691 Py_CLEAR(self->unused_data);
692#ifdef WITH_THREAD
693 if (self->lock != NULL)
694 PyThread_free_lock(self->lock);
695#endif
696 Py_TYPE(self)->tp_free((PyObject *)self);
697}
698
699static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200700 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100701 {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200702 {NULL}
703};
704
705PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
706"True if the end-of-stream marker has been reached.");
707
708PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
709"Data found after the end of the compressed stream.");
710
Antoine Pitroue71258a2015-02-26 13:08:07 +0100711PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
712"True if more input is needed before more decompressed data can be produced.");
713
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200714static PyMemberDef BZ2Decompressor_members[] = {
715 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
716 READONLY, BZ2Decompressor_eof__doc__},
717 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
718 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100719 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
720 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200721 {NULL}
722};
723
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200724static PyTypeObject BZ2Decompressor_Type = {
725 PyVarObject_HEAD_INIT(NULL, 0)
726 "_bz2.BZ2Decompressor", /* tp_name */
727 sizeof(BZ2Decompressor), /* tp_basicsize */
728 0, /* tp_itemsize */
729 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
730 0, /* tp_print */
731 0, /* tp_getattr */
732 0, /* tp_setattr */
733 0, /* tp_reserved */
734 0, /* tp_repr */
735 0, /* tp_as_number */
736 0, /* tp_as_sequence */
737 0, /* tp_as_mapping */
738 0, /* tp_hash */
739 0, /* tp_call */
740 0, /* tp_str */
741 0, /* tp_getattro */
742 0, /* tp_setattro */
743 0, /* tp_as_buffer */
744 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200745 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200746 0, /* tp_traverse */
747 0, /* tp_clear */
748 0, /* tp_richcompare */
749 0, /* tp_weaklistoffset */
750 0, /* tp_iter */
751 0, /* tp_iternext */
752 BZ2Decompressor_methods, /* tp_methods */
753 BZ2Decompressor_members, /* tp_members */
754 0, /* tp_getset */
755 0, /* tp_base */
756 0, /* tp_dict */
757 0, /* tp_descr_get */
758 0, /* tp_descr_set */
759 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200760 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200761 0, /* tp_alloc */
762 PyType_GenericNew, /* tp_new */
763};
764
765
766/* Module initialization. */
767
768static struct PyModuleDef _bz2module = {
769 PyModuleDef_HEAD_INIT,
770 "_bz2",
771 NULL,
772 -1,
773 NULL,
774 NULL,
775 NULL,
776 NULL,
777 NULL
778};
779
780PyMODINIT_FUNC
781PyInit__bz2(void)
782{
783 PyObject *m;
784
785 if (PyType_Ready(&BZ2Compressor_Type) < 0)
786 return NULL;
787 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
788 return NULL;
789
790 m = PyModule_Create(&_bz2module);
791 if (m == NULL)
792 return NULL;
793
794 Py_INCREF(&BZ2Compressor_Type);
795 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
796
797 Py_INCREF(&BZ2Decompressor_Type);
798 PyModule_AddObject(m, "BZ2Decompressor",
799 (PyObject *)&BZ2Decompressor_Type);
800
801 return m;
802}