blob: 3890b60b1b87b3145ca8a27136242d7389d65c25 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include "pythread.h"
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02009
10#include <bzlib.h>
11#include <stdio.h>
12
13
14#ifndef BZ_CONFIG_ERROR
15#define BZ2_bzCompress bzCompress
16#define BZ2_bzCompressInit bzCompressInit
17#define BZ2_bzCompressEnd bzCompressEnd
18#define BZ2_bzDecompress bzDecompress
19#define BZ2_bzDecompressInit bzDecompressInit
20#define BZ2_bzDecompressEnd bzDecompressEnd
21#endif /* ! BZ_CONFIG_ERROR */
22
23
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020024#define ACQUIRE_LOCK(obj) do { \
25 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
26 Py_BEGIN_ALLOW_THREADS \
27 PyThread_acquire_lock((obj)->lock, 1); \
28 Py_END_ALLOW_THREADS \
29 } } while (0)
30#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020031
32
33typedef struct {
34 PyObject_HEAD
35 bz_stream bzs;
36 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020037 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020038} BZ2Compressor;
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 char eof; /* T_BOOL expects a char */
44 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010045 char needs_input;
46 char *input_buffer;
47 size_t input_buffer_size;
48
49 /* bzs->avail_in is only 32 bit, so we store the true length
50 separately. Conversion and looping is encapsulated in
51 decompress_buf() */
52 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020053 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020054} BZ2Decompressor;
55
Larry Hastingsf256c222014-01-25 21:30:37 -080056static PyTypeObject BZ2Compressor_Type;
57static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020058
59/* Helper functions. */
60
61static int
62catch_bz2_error(int bzerror)
63{
64 switch(bzerror) {
65 case BZ_OK:
66 case BZ_RUN_OK:
67 case BZ_FLUSH_OK:
68 case BZ_FINISH_OK:
69 case BZ_STREAM_END:
70 return 0;
71
72#ifdef BZ_CONFIG_ERROR
73 case BZ_CONFIG_ERROR:
74 PyErr_SetString(PyExc_SystemError,
75 "libbzip2 was not compiled correctly");
76 return 1;
77#endif
78 case BZ_PARAM_ERROR:
79 PyErr_SetString(PyExc_ValueError,
80 "Internal error - "
81 "invalid parameters passed to libbzip2");
82 return 1;
83 case BZ_MEM_ERROR:
84 PyErr_NoMemory();
85 return 1;
86 case BZ_DATA_ERROR:
87 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030088 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020089 return 1;
90 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030091 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020092 return 1;
93 case BZ_UNEXPECTED_EOF:
94 PyErr_SetString(PyExc_EOFError,
95 "Compressed file ended before the logical "
96 "end-of-stream was detected");
97 return 1;
98 case BZ_SEQUENCE_ERROR:
99 PyErr_SetString(PyExc_RuntimeError,
100 "Internal error - "
101 "Invalid sequence of commands sent to libbzip2");
102 return 1;
103 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300104 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200105 "Unrecognized error from libbzip2: %d", bzerror);
106 return 1;
107 }
108}
109
110#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100111#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200112#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100113#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200114#endif
115
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200116static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100117grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200118{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200119 /* Expand the buffer by an amount proportional to the current size,
120 giving us amortized linear-time behavior. Use a less-than-double
121 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200122 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200123 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124
125 if (max_length > 0 && new_size > (size_t) max_length)
126 new_size = (size_t) max_length;
127
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200128 if (new_size > size) {
129 return _PyBytes_Resize(buf, new_size);
130 } else { /* overflow */
131 PyErr_SetString(PyExc_OverflowError,
132 "Unable to allocate buffer - output too large");
133 return -1;
134 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200135}
136
137
138/* BZ2Compressor class. */
139
140static PyObject *
141compress(BZ2Compressor *c, char *data, size_t len, int action)
142{
143 size_t data_size = 0;
144 PyObject *result;
145
Antoine Pitroue71258a2015-02-26 13:08:07 +0100146 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200147 if (result == NULL)
148 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100149
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100151 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200152 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100153 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200154 for (;;) {
155 char *this_out;
156 int bzerror;
157
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100158 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
159 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200160 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200161 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200162 len -= c->bzs.avail_in;
163 }
164
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100165 /* In regular compression mode, stop when input data is exhausted. */
166 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200167 break;
168
169 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200170 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
171 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100172 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200173 goto error;
174 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
175 buffer_left = PyBytes_GET_SIZE(result) - data_size;
176 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200177 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100179
180 Py_BEGIN_ALLOW_THREADS
181 this_out = c->bzs.next_out;
182 bzerror = BZ2_bzCompress(&c->bzs, action);
183 data_size += c->bzs.next_out - this_out;
184 Py_END_ALLOW_THREADS
185 if (catch_bz2_error(bzerror))
186 goto error;
187
188 /* In flushing mode, stop when all buffered data has been flushed. */
189 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
190 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200191 }
Victor Stinner706768c2014-08-16 01:03:39 +0200192 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200193 if (_PyBytes_Resize(&result, data_size) < 0)
194 goto error;
195 return result;
196
197error:
198 Py_XDECREF(result);
199 return NULL;
200}
201
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200202/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200203module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800204class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
205class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200206[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300207/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200208
Larry Hastingsf256c222014-01-25 21:30:37 -0800209#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200210
211/*[clinic input]
212_bz2.BZ2Compressor.compress
213
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214 data: Py_buffer
215 /
216
217Provide data to the compressor object.
218
219Returns a chunk of compressed data if possible, or b'' otherwise.
220
221When you have finished providing data to the compressor, call the
222flush() method to finish the compression process.
223[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200224
225static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200226_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800227/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200228{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200229 PyObject *result = NULL;
230
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200231 ACQUIRE_LOCK(self);
232 if (self->flushed)
233 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
234 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200235 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200236 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200237 return result;
238}
239
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200240/*[clinic input]
241_bz2.BZ2Compressor.flush
242
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200243Finish the compression process.
244
245Returns the compressed data left in internal buffers.
246
247The compressor object may not be used after this method is called.
248[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200249
250static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800252/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200253{
254 PyObject *result = NULL;
255
256 ACQUIRE_LOCK(self);
257 if (self->flushed)
258 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
259 else {
260 self->flushed = 1;
261 result = compress(self, NULL, 0, BZ_FINISH);
262 }
263 RELEASE_LOCK(self);
264 return result;
265}
266
Nadeem Vawda37970652013-10-28 21:35:23 +0100267static PyObject *
268BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs)
269{
270 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
271 Py_TYPE(self)->tp_name);
272 return NULL;
273}
274
Victor Stinner5064a522013-07-07 16:50:27 +0200275static void*
276BZ2_Malloc(void* ctx, int items, int size)
277{
278 if (items < 0 || size < 0)
279 return NULL;
280 if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
281 return NULL;
282 /* PyMem_Malloc() cannot be used: compress() and decompress()
283 release the GIL */
284 return PyMem_RawMalloc(items * size);
285}
286
287static void
288BZ2_Free(void* ctx, void *ptr)
289{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200290 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200291}
292
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200293/*[clinic input]
294_bz2.BZ2Compressor.__init__
295
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200296 compresslevel: int = 9
297 Compression level, as a number between 1 and 9.
298 /
299
300Create a compressor object for compressing data incrementally.
301
302For one-shot compression, use the compress() function instead.
303[clinic start generated code]*/
304
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200305static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200306_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800307/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200308{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200309 int bzerror;
310
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200311 if (!(1 <= compresslevel && compresslevel <= 9)) {
312 PyErr_SetString(PyExc_ValueError,
313 "compresslevel must be between 1 and 9");
314 return -1;
315 }
316
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200317 self->lock = PyThread_allocate_lock();
318 if (self->lock == NULL) {
319 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
320 return -1;
321 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200322
Victor Stinner5064a522013-07-07 16:50:27 +0200323 self->bzs.opaque = NULL;
324 self->bzs.bzalloc = BZ2_Malloc;
325 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200326 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
327 if (catch_bz2_error(bzerror))
328 goto error;
329
330 return 0;
331
332error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200333 PyThread_free_lock(self->lock);
334 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200335 return -1;
336}
337
338static void
339BZ2Compressor_dealloc(BZ2Compressor *self)
340{
341 BZ2_bzCompressEnd(&self->bzs);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200342 if (self->lock != NULL)
343 PyThread_free_lock(self->lock);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200344 Py_TYPE(self)->tp_free((PyObject *)self);
345}
346
347static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200348 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
349 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100350 {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200351 {NULL}
352};
353
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200354
355static PyTypeObject BZ2Compressor_Type = {
356 PyVarObject_HEAD_INIT(NULL, 0)
357 "_bz2.BZ2Compressor", /* tp_name */
358 sizeof(BZ2Compressor), /* tp_basicsize */
359 0, /* tp_itemsize */
360 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
361 0, /* tp_print */
362 0, /* tp_getattr */
363 0, /* tp_setattr */
364 0, /* tp_reserved */
365 0, /* tp_repr */
366 0, /* tp_as_number */
367 0, /* tp_as_sequence */
368 0, /* tp_as_mapping */
369 0, /* tp_hash */
370 0, /* tp_call */
371 0, /* tp_str */
372 0, /* tp_getattro */
373 0, /* tp_setattro */
374 0, /* tp_as_buffer */
375 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200376 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200377 0, /* tp_traverse */
378 0, /* tp_clear */
379 0, /* tp_richcompare */
380 0, /* tp_weaklistoffset */
381 0, /* tp_iter */
382 0, /* tp_iternext */
383 BZ2Compressor_methods, /* tp_methods */
384 0, /* tp_members */
385 0, /* tp_getset */
386 0, /* tp_base */
387 0, /* tp_dict */
388 0, /* tp_descr_get */
389 0, /* tp_descr_set */
390 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200391 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200392 0, /* tp_alloc */
393 PyType_GenericNew, /* tp_new */
394};
395
396
397/* BZ2Decompressor class. */
398
Antoine Pitroue71258a2015-02-26 13:08:07 +0100399/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
400 buffer is allocated dynamically and returned. At most max_length bytes are
401 returned, so some of the input may not be consumed. d->bzs.next_in and
402 d->bzs_avail_in_real are updated to reflect the consumed input. */
403static PyObject*
404decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200405{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100406 /* data_size is strictly positive, but because we repeatedly have to
407 compare against max_length and PyBytes_GET_SIZE we declare it as
408 signed */
409 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200410 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100411 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200412
Antoine Pitroue71258a2015-02-26 13:08:07 +0100413 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
414 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
415 else
416 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200417 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100418 return NULL;
419
420 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200421 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100422 int bzret;
423 size_t avail;
424
425 /* On a 64-bit system, buffer length might not fit in avail_out, so we
426 do decompression in chunks of no more than UINT_MAX bytes
427 each. Note that the expression for `avail` is guaranteed to be
428 positive, so the cast is safe. */
429 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
430 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
431 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
432 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200433
434 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100435 bzret = BZ2_bzDecompress(bzs);
436 data_size = bzs->next_out - PyBytes_AS_STRING(result);
437 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200438 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100439 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200440 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100441 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200442 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200443 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100444 } else if (d->bzs_avail_in_real == 0) {
445 break;
446 } else if (bzs->avail_out == 0) {
447 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200448 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100449 if (data_size == PyBytes_GET_SIZE(result) &&
450 grow_buffer(&result, max_length) == -1)
451 goto error;
452 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200453 }
454 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100455 if (data_size != PyBytes_GET_SIZE(result))
456 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100458
459 return result;
460
461error:
462 Py_XDECREF(result);
463 return NULL;
464}
465
466
467static PyObject *
468decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
469{
470 char input_buffer_in_use;
471 PyObject *result;
472 bz_stream *bzs = &d->bzs;
473
474 /* Prepend unconsumed input if necessary */
475 if (bzs->next_in != NULL) {
476 size_t avail_now, avail_total;
477
478 /* Number of bytes we can append to input buffer */
479 avail_now = (d->input_buffer + d->input_buffer_size)
480 - (bzs->next_in + d->bzs_avail_in_real);
481
482 /* Number of bytes we can append if we move existing
483 contents to beginning of buffer (overwriting
484 consumed input) */
485 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
486
487 if (avail_total < len) {
488 size_t offset = bzs->next_in - d->input_buffer;
489 char *tmp;
490 size_t new_size = d->input_buffer_size + len - avail_now;
491
492 /* Assign to temporary variable first, so we don't
493 lose address of allocated buffer if realloc fails */
494 tmp = PyMem_Realloc(d->input_buffer, new_size);
495 if (tmp == NULL) {
496 PyErr_SetNone(PyExc_MemoryError);
497 return NULL;
498 }
499 d->input_buffer = tmp;
500 d->input_buffer_size = new_size;
501
502 bzs->next_in = d->input_buffer + offset;
503 }
504 else if (avail_now < len) {
505 memmove(d->input_buffer, bzs->next_in,
506 d->bzs_avail_in_real);
507 bzs->next_in = d->input_buffer;
508 }
509 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
510 d->bzs_avail_in_real += len;
511 input_buffer_in_use = 1;
512 }
513 else {
514 bzs->next_in = data;
515 d->bzs_avail_in_real = len;
516 input_buffer_in_use = 0;
517 }
518
519 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000520 if(result == NULL) {
521 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100522 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000523 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100524
525 if (d->eof) {
526 d->needs_input = 0;
527 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300528 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200529 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100530 if (d->unused_data == NULL)
531 goto error;
532 }
533 }
534 else if (d->bzs_avail_in_real == 0) {
535 bzs->next_in = NULL;
536 d->needs_input = 1;
537 }
538 else {
539 d->needs_input = 0;
540
541 /* If we did not use the input buffer, we now have
542 to copy the tail from the caller's buffer into the
543 input buffer */
544 if (!input_buffer_in_use) {
545
546 /* Discard buffer if it's too small
547 (resizing it may needlessly copy the current contents) */
548 if (d->input_buffer != NULL &&
549 d->input_buffer_size < d->bzs_avail_in_real) {
550 PyMem_Free(d->input_buffer);
551 d->input_buffer = NULL;
552 }
553
554 /* Allocate if necessary */
555 if (d->input_buffer == NULL) {
556 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
557 if (d->input_buffer == NULL) {
558 PyErr_SetNone(PyExc_MemoryError);
559 goto error;
560 }
561 d->input_buffer_size = d->bzs_avail_in_real;
562 }
563
564 /* Copy tail */
565 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
566 bzs->next_in = d->input_buffer;
567 }
568 }
569
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200570 return result;
571
572error:
573 Py_XDECREF(result);
574 return NULL;
575}
576
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200577/*[clinic input]
578_bz2.BZ2Decompressor.decompress
579
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200580 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100581 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200582
Antoine Pitroue71258a2015-02-26 13:08:07 +0100583Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200584
Antoine Pitroue71258a2015-02-26 13:08:07 +0100585If *max_length* is nonnegative, returns at most *max_length* bytes of
586decompressed data. If this limit is reached and further output can be
587produced, *self.needs_input* will be set to ``False``. In this case, the next
588call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200589
Antoine Pitroue71258a2015-02-26 13:08:07 +0100590If all of the input data was decompressed and returned (either because this
591was less than *max_length* bytes, or because *max_length* was negative),
592*self.needs_input* will be set to True.
593
594Attempting to decompress data after the end of stream is reached raises an
595EOFError. Any data found after the end of the stream is ignored and saved in
596the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200597[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200598
599static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400600_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
601 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300602/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200603{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200604 PyObject *result = NULL;
605
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200606 ACQUIRE_LOCK(self);
607 if (self->eof)
608 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
609 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100610 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200611 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200612 return result;
613}
614
Nadeem Vawda37970652013-10-28 21:35:23 +0100615static PyObject *
616BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs)
617{
618 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
619 Py_TYPE(self)->tp_name);
620 return NULL;
621}
622
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200623/*[clinic input]
624_bz2.BZ2Decompressor.__init__
625
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200626Create a decompressor object for decompressing data incrementally.
627
628For one-shot decompression, use the decompress() function instead.
629[clinic start generated code]*/
630
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200631static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200632_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800633/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200634{
635 int bzerror;
636
Victor Stinner9b7cf752018-06-23 10:35:23 +0200637 PyThread_type_lock lock = PyThread_allocate_lock();
638 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200639 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
640 return -1;
641 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200642 if (self->lock != NULL) {
643 PyThread_free_lock(self->lock);
644 }
645 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200646
Antoine Pitroue71258a2015-02-26 13:08:07 +0100647 self->needs_input = 1;
648 self->bzs_avail_in_real = 0;
649 self->input_buffer = NULL;
650 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200651 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200652 if (self->unused_data == NULL)
653 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200654
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200655 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
656 if (catch_bz2_error(bzerror))
657 goto error;
658
659 return 0;
660
661error:
662 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200663 PyThread_free_lock(self->lock);
664 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200665 return -1;
666}
667
668static void
669BZ2Decompressor_dealloc(BZ2Decompressor *self)
670{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100671 if(self->input_buffer != NULL)
672 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200673 BZ2_bzDecompressEnd(&self->bzs);
674 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200675 if (self->lock != NULL)
676 PyThread_free_lock(self->lock);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200677 Py_TYPE(self)->tp_free((PyObject *)self);
678}
679
680static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200681 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100682 {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200683 {NULL}
684};
685
686PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
687"True if the end-of-stream marker has been reached.");
688
689PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
690"Data found after the end of the compressed stream.");
691
Antoine Pitroue71258a2015-02-26 13:08:07 +0100692PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
693"True if more input is needed before more decompressed data can be produced.");
694
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200695static PyMemberDef BZ2Decompressor_members[] = {
696 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
697 READONLY, BZ2Decompressor_eof__doc__},
698 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
699 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100700 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
701 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200702 {NULL}
703};
704
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200705static PyTypeObject BZ2Decompressor_Type = {
706 PyVarObject_HEAD_INIT(NULL, 0)
707 "_bz2.BZ2Decompressor", /* tp_name */
708 sizeof(BZ2Decompressor), /* tp_basicsize */
709 0, /* tp_itemsize */
710 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
711 0, /* tp_print */
712 0, /* tp_getattr */
713 0, /* tp_setattr */
714 0, /* tp_reserved */
715 0, /* tp_repr */
716 0, /* tp_as_number */
717 0, /* tp_as_sequence */
718 0, /* tp_as_mapping */
719 0, /* tp_hash */
720 0, /* tp_call */
721 0, /* tp_str */
722 0, /* tp_getattro */
723 0, /* tp_setattro */
724 0, /* tp_as_buffer */
725 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200726 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200727 0, /* tp_traverse */
728 0, /* tp_clear */
729 0, /* tp_richcompare */
730 0, /* tp_weaklistoffset */
731 0, /* tp_iter */
732 0, /* tp_iternext */
733 BZ2Decompressor_methods, /* tp_methods */
734 BZ2Decompressor_members, /* tp_members */
735 0, /* tp_getset */
736 0, /* tp_base */
737 0, /* tp_dict */
738 0, /* tp_descr_get */
739 0, /* tp_descr_set */
740 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200741 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200742 0, /* tp_alloc */
743 PyType_GenericNew, /* tp_new */
744};
745
746
747/* Module initialization. */
748
749static struct PyModuleDef _bz2module = {
750 PyModuleDef_HEAD_INIT,
751 "_bz2",
752 NULL,
753 -1,
754 NULL,
755 NULL,
756 NULL,
757 NULL,
758 NULL
759};
760
761PyMODINIT_FUNC
762PyInit__bz2(void)
763{
764 PyObject *m;
765
766 if (PyType_Ready(&BZ2Compressor_Type) < 0)
767 return NULL;
768 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
769 return NULL;
770
771 m = PyModule_Create(&_bz2module);
772 if (m == NULL)
773 return NULL;
774
775 Py_INCREF(&BZ2Compressor_Type);
776 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
777
778 Py_INCREF(&BZ2Decompressor_Type);
779 PyModule_AddObject(m, "BZ2Decompressor",
780 (PyObject *)&BZ2Decompressor_Type);
781
782 return m;
783}