blob: b5e5a79d50a5bb48300b40f19e781eb634675f6f [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include "pythread.h"
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02009
10#include <bzlib.h>
11#include <stdio.h>
12
13
14#ifndef BZ_CONFIG_ERROR
15#define BZ2_bzCompress bzCompress
16#define BZ2_bzCompressInit bzCompressInit
17#define BZ2_bzCompressEnd bzCompressEnd
18#define BZ2_bzDecompress bzDecompress
19#define BZ2_bzDecompressInit bzDecompressInit
20#define BZ2_bzDecompressEnd bzDecompressEnd
21#endif /* ! BZ_CONFIG_ERROR */
22
23
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020024#define ACQUIRE_LOCK(obj) do { \
25 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
26 Py_BEGIN_ALLOW_THREADS \
27 PyThread_acquire_lock((obj)->lock, 1); \
28 Py_END_ALLOW_THREADS \
29 } } while (0)
30#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020031
32
33typedef struct {
34 PyObject_HEAD
35 bz_stream bzs;
36 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020037 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020038} BZ2Compressor;
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 char eof; /* T_BOOL expects a char */
44 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010045 char needs_input;
46 char *input_buffer;
47 size_t input_buffer_size;
48
49 /* bzs->avail_in is only 32 bit, so we store the true length
50 separately. Conversion and looping is encapsulated in
51 decompress_buf() */
52 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020053 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020054} BZ2Decompressor;
55
Larry Hastingsf256c222014-01-25 21:30:37 -080056static PyTypeObject BZ2Compressor_Type;
57static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020058
59/* Helper functions. */
60
61static int
62catch_bz2_error(int bzerror)
63{
64 switch(bzerror) {
65 case BZ_OK:
66 case BZ_RUN_OK:
67 case BZ_FLUSH_OK:
68 case BZ_FINISH_OK:
69 case BZ_STREAM_END:
70 return 0;
71
72#ifdef BZ_CONFIG_ERROR
73 case BZ_CONFIG_ERROR:
74 PyErr_SetString(PyExc_SystemError,
75 "libbzip2 was not compiled correctly");
76 return 1;
77#endif
78 case BZ_PARAM_ERROR:
79 PyErr_SetString(PyExc_ValueError,
80 "Internal error - "
81 "invalid parameters passed to libbzip2");
82 return 1;
83 case BZ_MEM_ERROR:
84 PyErr_NoMemory();
85 return 1;
86 case BZ_DATA_ERROR:
87 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030088 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020089 return 1;
90 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030091 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020092 return 1;
93 case BZ_UNEXPECTED_EOF:
94 PyErr_SetString(PyExc_EOFError,
95 "Compressed file ended before the logical "
96 "end-of-stream was detected");
97 return 1;
98 case BZ_SEQUENCE_ERROR:
99 PyErr_SetString(PyExc_RuntimeError,
100 "Internal error - "
101 "Invalid sequence of commands sent to libbzip2");
102 return 1;
103 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300104 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200105 "Unrecognized error from libbzip2: %d", bzerror);
106 return 1;
107 }
108}
109
110#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100111#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200112#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100113#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200114#endif
115
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200116static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100117grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200118{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200119 /* Expand the buffer by an amount proportional to the current size,
120 giving us amortized linear-time behavior. Use a less-than-double
121 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200122 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200123 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124
125 if (max_length > 0 && new_size > (size_t) max_length)
126 new_size = (size_t) max_length;
127
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200128 if (new_size > size) {
129 return _PyBytes_Resize(buf, new_size);
130 } else { /* overflow */
131 PyErr_SetString(PyExc_OverflowError,
132 "Unable to allocate buffer - output too large");
133 return -1;
134 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200135}
136
137
138/* BZ2Compressor class. */
139
140static PyObject *
141compress(BZ2Compressor *c, char *data, size_t len, int action)
142{
143 size_t data_size = 0;
144 PyObject *result;
145
Antoine Pitroue71258a2015-02-26 13:08:07 +0100146 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200147 if (result == NULL)
148 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100149
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100151 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200152 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100153 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200154 for (;;) {
155 char *this_out;
156 int bzerror;
157
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100158 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
159 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200160 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200161 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200162 len -= c->bzs.avail_in;
163 }
164
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100165 /* In regular compression mode, stop when input data is exhausted. */
166 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200167 break;
168
169 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200170 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
171 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100172 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200173 goto error;
174 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
175 buffer_left = PyBytes_GET_SIZE(result) - data_size;
176 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200177 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100179
180 Py_BEGIN_ALLOW_THREADS
181 this_out = c->bzs.next_out;
182 bzerror = BZ2_bzCompress(&c->bzs, action);
183 data_size += c->bzs.next_out - this_out;
184 Py_END_ALLOW_THREADS
185 if (catch_bz2_error(bzerror))
186 goto error;
187
188 /* In flushing mode, stop when all buffered data has been flushed. */
189 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
190 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200191 }
Victor Stinner706768c2014-08-16 01:03:39 +0200192 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200193 if (_PyBytes_Resize(&result, data_size) < 0)
194 goto error;
195 return result;
196
197error:
198 Py_XDECREF(result);
199 return NULL;
200}
201
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200202/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200203module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800204class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
205class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200206[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300207/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200208
Larry Hastingsf256c222014-01-25 21:30:37 -0800209#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200210
211/*[clinic input]
212_bz2.BZ2Compressor.compress
213
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214 data: Py_buffer
215 /
216
217Provide data to the compressor object.
218
219Returns a chunk of compressed data if possible, or b'' otherwise.
220
221When you have finished providing data to the compressor, call the
222flush() method to finish the compression process.
223[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200224
225static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200226_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800227/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200228{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200229 PyObject *result = NULL;
230
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200231 ACQUIRE_LOCK(self);
232 if (self->flushed)
233 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
234 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200235 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200236 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200237 return result;
238}
239
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200240/*[clinic input]
241_bz2.BZ2Compressor.flush
242
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200243Finish the compression process.
244
245Returns the compressed data left in internal buffers.
246
247The compressor object may not be used after this method is called.
248[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200249
250static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800252/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200253{
254 PyObject *result = NULL;
255
256 ACQUIRE_LOCK(self);
257 if (self->flushed)
258 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
259 else {
260 self->flushed = 1;
261 result = compress(self, NULL, 0, BZ_FINISH);
262 }
263 RELEASE_LOCK(self);
264 return result;
265}
266
Victor Stinner5064a522013-07-07 16:50:27 +0200267static void*
268BZ2_Malloc(void* ctx, int items, int size)
269{
270 if (items < 0 || size < 0)
271 return NULL;
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300272 if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
Victor Stinner5064a522013-07-07 16:50:27 +0200273 return NULL;
274 /* PyMem_Malloc() cannot be used: compress() and decompress()
275 release the GIL */
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300276 return PyMem_RawMalloc((size_t)items * (size_t)size);
Victor Stinner5064a522013-07-07 16:50:27 +0200277}
278
279static void
280BZ2_Free(void* ctx, void *ptr)
281{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200282 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200283}
284
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200285/*[clinic input]
286_bz2.BZ2Compressor.__init__
287
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200288 compresslevel: int = 9
289 Compression level, as a number between 1 and 9.
290 /
291
292Create a compressor object for compressing data incrementally.
293
294For one-shot compression, use the compress() function instead.
295[clinic start generated code]*/
296
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200297static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200298_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800299/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200300{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200301 int bzerror;
302
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200303 if (!(1 <= compresslevel && compresslevel <= 9)) {
304 PyErr_SetString(PyExc_ValueError,
305 "compresslevel must be between 1 and 9");
306 return -1;
307 }
308
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200309 self->lock = PyThread_allocate_lock();
310 if (self->lock == NULL) {
311 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
312 return -1;
313 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200314
Victor Stinner5064a522013-07-07 16:50:27 +0200315 self->bzs.opaque = NULL;
316 self->bzs.bzalloc = BZ2_Malloc;
317 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200318 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
319 if (catch_bz2_error(bzerror))
320 goto error;
321
322 return 0;
323
324error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200325 PyThread_free_lock(self->lock);
326 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200327 return -1;
328}
329
330static void
331BZ2Compressor_dealloc(BZ2Compressor *self)
332{
333 BZ2_bzCompressEnd(&self->bzs);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200334 if (self->lock != NULL)
335 PyThread_free_lock(self->lock);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200336 Py_TYPE(self)->tp_free((PyObject *)self);
337}
338
339static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200340 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
341 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200342 {NULL}
343};
344
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200345
346static PyTypeObject BZ2Compressor_Type = {
347 PyVarObject_HEAD_INIT(NULL, 0)
348 "_bz2.BZ2Compressor", /* tp_name */
349 sizeof(BZ2Compressor), /* tp_basicsize */
350 0, /* tp_itemsize */
351 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
352 0, /* tp_print */
353 0, /* tp_getattr */
354 0, /* tp_setattr */
355 0, /* tp_reserved */
356 0, /* tp_repr */
357 0, /* tp_as_number */
358 0, /* tp_as_sequence */
359 0, /* tp_as_mapping */
360 0, /* tp_hash */
361 0, /* tp_call */
362 0, /* tp_str */
363 0, /* tp_getattro */
364 0, /* tp_setattro */
365 0, /* tp_as_buffer */
366 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200367 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200368 0, /* tp_traverse */
369 0, /* tp_clear */
370 0, /* tp_richcompare */
371 0, /* tp_weaklistoffset */
372 0, /* tp_iter */
373 0, /* tp_iternext */
374 BZ2Compressor_methods, /* tp_methods */
375 0, /* tp_members */
376 0, /* tp_getset */
377 0, /* tp_base */
378 0, /* tp_dict */
379 0, /* tp_descr_get */
380 0, /* tp_descr_set */
381 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200382 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200383 0, /* tp_alloc */
384 PyType_GenericNew, /* tp_new */
385};
386
387
388/* BZ2Decompressor class. */
389
Antoine Pitroue71258a2015-02-26 13:08:07 +0100390/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
391 buffer is allocated dynamically and returned. At most max_length bytes are
392 returned, so some of the input may not be consumed. d->bzs.next_in and
393 d->bzs_avail_in_real are updated to reflect the consumed input. */
394static PyObject*
395decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200396{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100397 /* data_size is strictly positive, but because we repeatedly have to
398 compare against max_length and PyBytes_GET_SIZE we declare it as
399 signed */
400 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200401 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100402 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200403
Antoine Pitroue71258a2015-02-26 13:08:07 +0100404 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
405 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
406 else
407 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200408 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100409 return NULL;
410
411 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200412 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100413 int bzret;
414 size_t avail;
415
416 /* On a 64-bit system, buffer length might not fit in avail_out, so we
417 do decompression in chunks of no more than UINT_MAX bytes
418 each. Note that the expression for `avail` is guaranteed to be
419 positive, so the cast is safe. */
420 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
421 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
422 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
423 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200424
425 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100426 bzret = BZ2_bzDecompress(bzs);
427 data_size = bzs->next_out - PyBytes_AS_STRING(result);
428 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100430 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200431 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100432 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200433 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200434 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100435 } else if (d->bzs_avail_in_real == 0) {
436 break;
437 } else if (bzs->avail_out == 0) {
438 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200439 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100440 if (data_size == PyBytes_GET_SIZE(result) &&
441 grow_buffer(&result, max_length) == -1)
442 goto error;
443 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200444 }
445 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100446 if (data_size != PyBytes_GET_SIZE(result))
447 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200448 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100449
450 return result;
451
452error:
453 Py_XDECREF(result);
454 return NULL;
455}
456
457
458static PyObject *
459decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
460{
461 char input_buffer_in_use;
462 PyObject *result;
463 bz_stream *bzs = &d->bzs;
464
465 /* Prepend unconsumed input if necessary */
466 if (bzs->next_in != NULL) {
467 size_t avail_now, avail_total;
468
469 /* Number of bytes we can append to input buffer */
470 avail_now = (d->input_buffer + d->input_buffer_size)
471 - (bzs->next_in + d->bzs_avail_in_real);
472
473 /* Number of bytes we can append if we move existing
474 contents to beginning of buffer (overwriting
475 consumed input) */
476 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
477
478 if (avail_total < len) {
479 size_t offset = bzs->next_in - d->input_buffer;
480 char *tmp;
481 size_t new_size = d->input_buffer_size + len - avail_now;
482
483 /* Assign to temporary variable first, so we don't
484 lose address of allocated buffer if realloc fails */
485 tmp = PyMem_Realloc(d->input_buffer, new_size);
486 if (tmp == NULL) {
487 PyErr_SetNone(PyExc_MemoryError);
488 return NULL;
489 }
490 d->input_buffer = tmp;
491 d->input_buffer_size = new_size;
492
493 bzs->next_in = d->input_buffer + offset;
494 }
495 else if (avail_now < len) {
496 memmove(d->input_buffer, bzs->next_in,
497 d->bzs_avail_in_real);
498 bzs->next_in = d->input_buffer;
499 }
500 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
501 d->bzs_avail_in_real += len;
502 input_buffer_in_use = 1;
503 }
504 else {
505 bzs->next_in = data;
506 d->bzs_avail_in_real = len;
507 input_buffer_in_use = 0;
508 }
509
510 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000511 if(result == NULL) {
512 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100513 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000514 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100515
516 if (d->eof) {
517 d->needs_input = 0;
518 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300519 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200520 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100521 if (d->unused_data == NULL)
522 goto error;
523 }
524 }
525 else if (d->bzs_avail_in_real == 0) {
526 bzs->next_in = NULL;
527 d->needs_input = 1;
528 }
529 else {
530 d->needs_input = 0;
531
532 /* If we did not use the input buffer, we now have
533 to copy the tail from the caller's buffer into the
534 input buffer */
535 if (!input_buffer_in_use) {
536
537 /* Discard buffer if it's too small
538 (resizing it may needlessly copy the current contents) */
539 if (d->input_buffer != NULL &&
540 d->input_buffer_size < d->bzs_avail_in_real) {
541 PyMem_Free(d->input_buffer);
542 d->input_buffer = NULL;
543 }
544
545 /* Allocate if necessary */
546 if (d->input_buffer == NULL) {
547 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
548 if (d->input_buffer == NULL) {
549 PyErr_SetNone(PyExc_MemoryError);
550 goto error;
551 }
552 d->input_buffer_size = d->bzs_avail_in_real;
553 }
554
555 /* Copy tail */
556 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
557 bzs->next_in = d->input_buffer;
558 }
559 }
560
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200561 return result;
562
563error:
564 Py_XDECREF(result);
565 return NULL;
566}
567
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200568/*[clinic input]
569_bz2.BZ2Decompressor.decompress
570
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200571 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100572 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200573
Antoine Pitroue71258a2015-02-26 13:08:07 +0100574Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200575
Antoine Pitroue71258a2015-02-26 13:08:07 +0100576If *max_length* is nonnegative, returns at most *max_length* bytes of
577decompressed data. If this limit is reached and further output can be
578produced, *self.needs_input* will be set to ``False``. In this case, the next
579call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200580
Antoine Pitroue71258a2015-02-26 13:08:07 +0100581If all of the input data was decompressed and returned (either because this
582was less than *max_length* bytes, or because *max_length* was negative),
583*self.needs_input* will be set to True.
584
585Attempting to decompress data after the end of stream is reached raises an
586EOFError. Any data found after the end of the stream is ignored and saved in
587the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200588[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200589
590static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400591_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
592 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300593/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200594{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200595 PyObject *result = NULL;
596
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200597 ACQUIRE_LOCK(self);
598 if (self->eof)
599 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
600 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100601 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200602 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200603 return result;
604}
605
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200606/*[clinic input]
607_bz2.BZ2Decompressor.__init__
608
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200609Create a decompressor object for decompressing data incrementally.
610
611For one-shot decompression, use the decompress() function instead.
612[clinic start generated code]*/
613
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200614static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200615_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800616/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200617{
618 int bzerror;
619
Victor Stinner9b7cf752018-06-23 10:35:23 +0200620 PyThread_type_lock lock = PyThread_allocate_lock();
621 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200622 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
623 return -1;
624 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200625 if (self->lock != NULL) {
626 PyThread_free_lock(self->lock);
627 }
628 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200629
Antoine Pitroue71258a2015-02-26 13:08:07 +0100630 self->needs_input = 1;
631 self->bzs_avail_in_real = 0;
632 self->input_buffer = NULL;
633 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200634 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200635 if (self->unused_data == NULL)
636 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200637
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200638 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
639 if (catch_bz2_error(bzerror))
640 goto error;
641
642 return 0;
643
644error:
645 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200646 PyThread_free_lock(self->lock);
647 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200648 return -1;
649}
650
651static void
652BZ2Decompressor_dealloc(BZ2Decompressor *self)
653{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100654 if(self->input_buffer != NULL)
655 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200656 BZ2_bzDecompressEnd(&self->bzs);
657 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200658 if (self->lock != NULL)
659 PyThread_free_lock(self->lock);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200660 Py_TYPE(self)->tp_free((PyObject *)self);
661}
662
663static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200664 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200665 {NULL}
666};
667
668PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
669"True if the end-of-stream marker has been reached.");
670
671PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
672"Data found after the end of the compressed stream.");
673
Antoine Pitroue71258a2015-02-26 13:08:07 +0100674PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
675"True if more input is needed before more decompressed data can be produced.");
676
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200677static PyMemberDef BZ2Decompressor_members[] = {
678 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
679 READONLY, BZ2Decompressor_eof__doc__},
680 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
681 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100682 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
683 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200684 {NULL}
685};
686
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200687static PyTypeObject BZ2Decompressor_Type = {
688 PyVarObject_HEAD_INIT(NULL, 0)
689 "_bz2.BZ2Decompressor", /* tp_name */
690 sizeof(BZ2Decompressor), /* tp_basicsize */
691 0, /* tp_itemsize */
692 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
693 0, /* tp_print */
694 0, /* tp_getattr */
695 0, /* tp_setattr */
696 0, /* tp_reserved */
697 0, /* tp_repr */
698 0, /* tp_as_number */
699 0, /* tp_as_sequence */
700 0, /* tp_as_mapping */
701 0, /* tp_hash */
702 0, /* tp_call */
703 0, /* tp_str */
704 0, /* tp_getattro */
705 0, /* tp_setattro */
706 0, /* tp_as_buffer */
707 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200708 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200709 0, /* tp_traverse */
710 0, /* tp_clear */
711 0, /* tp_richcompare */
712 0, /* tp_weaklistoffset */
713 0, /* tp_iter */
714 0, /* tp_iternext */
715 BZ2Decompressor_methods, /* tp_methods */
716 BZ2Decompressor_members, /* tp_members */
717 0, /* tp_getset */
718 0, /* tp_base */
719 0, /* tp_dict */
720 0, /* tp_descr_get */
721 0, /* tp_descr_set */
722 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200723 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200724 0, /* tp_alloc */
725 PyType_GenericNew, /* tp_new */
726};
727
728
729/* Module initialization. */
730
731static struct PyModuleDef _bz2module = {
732 PyModuleDef_HEAD_INIT,
733 "_bz2",
734 NULL,
735 -1,
736 NULL,
737 NULL,
738 NULL,
739 NULL,
740 NULL
741};
742
743PyMODINIT_FUNC
744PyInit__bz2(void)
745{
746 PyObject *m;
747
748 if (PyType_Ready(&BZ2Compressor_Type) < 0)
749 return NULL;
750 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
751 return NULL;
752
753 m = PyModule_Create(&_bz2module);
754 if (m == NULL)
755 return NULL;
756
757 Py_INCREF(&BZ2Compressor_Type);
758 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
759
760 Py_INCREF(&BZ2Decompressor_Type);
761 PyModule_AddObject(m, "BZ2Decompressor",
762 (PyObject *)&BZ2Decompressor_Type);
763
764 return m;
765}