blob: bfcdac692461f14ee945b2baecd0be7c8c52135d [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02006#include "structmember.h" // PyMemberDef
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02007
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include <bzlib.h>
9#include <stdio.h>
10
11
12#ifndef BZ_CONFIG_ERROR
13#define BZ2_bzCompress bzCompress
14#define BZ2_bzCompressInit bzCompressInit
15#define BZ2_bzCompressEnd bzCompressEnd
16#define BZ2_bzDecompress bzDecompress
17#define BZ2_bzDecompressInit bzDecompressInit
18#define BZ2_bzDecompressEnd bzDecompressEnd
19#endif /* ! BZ_CONFIG_ERROR */
20
21
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020022#define ACQUIRE_LOCK(obj) do { \
23 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
24 Py_BEGIN_ALLOW_THREADS \
25 PyThread_acquire_lock((obj)->lock, 1); \
26 Py_END_ALLOW_THREADS \
27 } } while (0)
28#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020029
30
31typedef struct {
Dong-hee Naec689182020-06-20 00:56:13 +090032 PyTypeObject *bz2_compressor_type;
33 PyTypeObject *bz2_decompressor_type;
34} _bz2_state;
35
36static inline _bz2_state*
37get_bz2_state(PyObject *module)
38{
39 void *state = PyModule_GetState(module);
40 assert(state != NULL);
41 return (_bz2_state *)state;
42}
43
44typedef struct {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020045 PyObject_HEAD
46 bz_stream bzs;
47 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020048 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020049} BZ2Compressor;
50
51typedef struct {
52 PyObject_HEAD
53 bz_stream bzs;
54 char eof; /* T_BOOL expects a char */
55 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010056 char needs_input;
57 char *input_buffer;
58 size_t input_buffer_size;
59
60 /* bzs->avail_in is only 32 bit, so we store the true length
61 separately. Conversion and looping is encapsulated in
62 decompress_buf() */
63 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020064 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020065} BZ2Decompressor;
66
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020067/* Helper functions. */
68
69static int
70catch_bz2_error(int bzerror)
71{
72 switch(bzerror) {
73 case BZ_OK:
74 case BZ_RUN_OK:
75 case BZ_FLUSH_OK:
76 case BZ_FINISH_OK:
77 case BZ_STREAM_END:
78 return 0;
79
80#ifdef BZ_CONFIG_ERROR
81 case BZ_CONFIG_ERROR:
82 PyErr_SetString(PyExc_SystemError,
83 "libbzip2 was not compiled correctly");
84 return 1;
85#endif
86 case BZ_PARAM_ERROR:
87 PyErr_SetString(PyExc_ValueError,
88 "Internal error - "
89 "invalid parameters passed to libbzip2");
90 return 1;
91 case BZ_MEM_ERROR:
92 PyErr_NoMemory();
93 return 1;
94 case BZ_DATA_ERROR:
95 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030096 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020097 return 1;
98 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030099 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200100 return 1;
101 case BZ_UNEXPECTED_EOF:
102 PyErr_SetString(PyExc_EOFError,
103 "Compressed file ended before the logical "
104 "end-of-stream was detected");
105 return 1;
106 case BZ_SEQUENCE_ERROR:
107 PyErr_SetString(PyExc_RuntimeError,
108 "Internal error - "
109 "Invalid sequence of commands sent to libbzip2");
110 return 1;
111 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300112 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200113 "Unrecognized error from libbzip2: %d", bzerror);
114 return 1;
115 }
116}
117
118#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100119#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200120#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100121#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200122#endif
123
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200124static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100125grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200126{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200127 /* Expand the buffer by an amount proportional to the current size,
128 giving us amortized linear-time behavior. Use a less-than-double
129 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200130 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200131 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100132
133 if (max_length > 0 && new_size > (size_t) max_length)
134 new_size = (size_t) max_length;
135
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200136 if (new_size > size) {
137 return _PyBytes_Resize(buf, new_size);
138 } else { /* overflow */
139 PyErr_SetString(PyExc_OverflowError,
140 "Unable to allocate buffer - output too large");
141 return -1;
142 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200143}
144
145
146/* BZ2Compressor class. */
147
148static PyObject *
149compress(BZ2Compressor *c, char *data, size_t len, int action)
150{
151 size_t data_size = 0;
152 PyObject *result;
153
Antoine Pitroue71258a2015-02-26 13:08:07 +0100154 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200155 if (result == NULL)
156 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100157
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100159 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200160 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100161 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200162 for (;;) {
163 char *this_out;
164 int bzerror;
165
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100166 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
167 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200168 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200169 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200170 len -= c->bzs.avail_in;
171 }
172
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100173 /* In regular compression mode, stop when input data is exhausted. */
174 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200175 break;
176
177 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200178 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
179 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100180 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 goto error;
182 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
183 buffer_left = PyBytes_GET_SIZE(result) - data_size;
184 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200185 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200186 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100187
188 Py_BEGIN_ALLOW_THREADS
189 this_out = c->bzs.next_out;
190 bzerror = BZ2_bzCompress(&c->bzs, action);
191 data_size += c->bzs.next_out - this_out;
192 Py_END_ALLOW_THREADS
193 if (catch_bz2_error(bzerror))
194 goto error;
195
196 /* In flushing mode, stop when all buffered data has been flushed. */
197 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
198 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200199 }
Victor Stinner706768c2014-08-16 01:03:39 +0200200 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200201 if (_PyBytes_Resize(&result, data_size) < 0)
202 goto error;
203 return result;
204
205error:
206 Py_XDECREF(result);
207 return NULL;
208}
209
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200210/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200211module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800212class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
213class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300215/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200216
Larry Hastingsf256c222014-01-25 21:30:37 -0800217#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200218
219/*[clinic input]
220_bz2.BZ2Compressor.compress
221
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200222 data: Py_buffer
223 /
224
225Provide data to the compressor object.
226
227Returns a chunk of compressed data if possible, or b'' otherwise.
228
229When you have finished providing data to the compressor, call the
230flush() method to finish the compression process.
231[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200232
233static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200234_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800235/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200236{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200237 PyObject *result = NULL;
238
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200239 ACQUIRE_LOCK(self);
240 if (self->flushed)
241 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
242 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200243 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200244 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200245 return result;
246}
247
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200248/*[clinic input]
249_bz2.BZ2Compressor.flush
250
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251Finish the compression process.
252
253Returns the compressed data left in internal buffers.
254
255The compressor object may not be used after this method is called.
256[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200257
258static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200259_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800260/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200261{
262 PyObject *result = NULL;
263
264 ACQUIRE_LOCK(self);
265 if (self->flushed)
266 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
267 else {
268 self->flushed = 1;
269 result = compress(self, NULL, 0, BZ_FINISH);
270 }
271 RELEASE_LOCK(self);
272 return result;
273}
274
Victor Stinner5064a522013-07-07 16:50:27 +0200275static void*
276BZ2_Malloc(void* ctx, int items, int size)
277{
278 if (items < 0 || size < 0)
279 return NULL;
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300280 if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
Victor Stinner5064a522013-07-07 16:50:27 +0200281 return NULL;
282 /* PyMem_Malloc() cannot be used: compress() and decompress()
283 release the GIL */
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300284 return PyMem_RawMalloc((size_t)items * (size_t)size);
Victor Stinner5064a522013-07-07 16:50:27 +0200285}
286
287static void
288BZ2_Free(void* ctx, void *ptr)
289{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200290 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200291}
292
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200293
Dong-hee Naec689182020-06-20 00:56:13 +0900294/* Argument Clinic is not used since the Argument Clinic always want to
295 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200296static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200297_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200298{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200299 int bzerror;
300
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200301 if (!(1 <= compresslevel && compresslevel <= 9)) {
302 PyErr_SetString(PyExc_ValueError,
303 "compresslevel must be between 1 and 9");
304 return -1;
305 }
306
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200307 self->lock = PyThread_allocate_lock();
308 if (self->lock == NULL) {
309 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
310 return -1;
311 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200312
Victor Stinner5064a522013-07-07 16:50:27 +0200313 self->bzs.opaque = NULL;
314 self->bzs.bzalloc = BZ2_Malloc;
315 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
317 if (catch_bz2_error(bzerror))
318 goto error;
319
320 return 0;
321
322error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200323 PyThread_free_lock(self->lock);
324 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200325 return -1;
326}
327
Dong-hee Naec689182020-06-20 00:56:13 +0900328PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
329"BZ2Compressor(compresslevel=9, /)\n"
330"--\n"
331"\n"
332"Create a compressor object for compressing data incrementally.\n"
333"\n"
334" compresslevel\n"
335" Compression level, as a number between 1 and 9.\n"
336"\n"
337"For one-shot compression, use the compress() function instead.");
338
339static int
340_bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
341{
342 int return_value = -1;
343 int compresslevel = 9;
344
345 if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
346 goto exit;
347 }
348 if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
349 goto exit;
350 }
351 if (PyTuple_GET_SIZE(args) < 1) {
352 goto skip_optional;
353 }
354 compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
355 if (compresslevel == -1 && PyErr_Occurred()) {
356 goto exit;
357 }
358skip_optional:
359 return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);
360
361exit:
362 return return_value;
363}
364
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200365static void
366BZ2Compressor_dealloc(BZ2Compressor *self)
367{
368 BZ2_bzCompressEnd(&self->bzs);
Dong-hee Naec689182020-06-20 00:56:13 +0900369 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200370 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900371 }
372 PyTypeObject *tp = Py_TYPE(self);
373 tp->tp_free((PyObject *)self);
374 Py_DECREF(tp);
375}
376
377static int
378BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
379{
380 Py_VISIT(Py_TYPE(self));
381 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200382}
383
384static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200385 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
386 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200387 {NULL}
388};
389
Dong-hee Naec689182020-06-20 00:56:13 +0900390static PyType_Slot bz2_compressor_type_slots[] = {
391 {Py_tp_dealloc, BZ2Compressor_dealloc},
392 {Py_tp_methods, BZ2Compressor_methods},
393 {Py_tp_init, _bz2_BZ2Compressor___init__},
394 {Py_tp_new, PyType_GenericNew},
395 {Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
396 {Py_tp_traverse, BZ2Compressor_traverse},
397 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200398};
399
Dong-hee Naec689182020-06-20 00:56:13 +0900400static PyType_Spec bz2_compressor_type_spec = {
401 .name = "_bz2.BZ2Compressor",
402 .basicsize = sizeof(BZ2Compressor),
403 // Calling PyType_GetModuleState() on a subclass is not safe.
404 // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
405 // which prevents to create a subclass.
406 // So calling PyType_GetModuleState() in this file is always safe.
407 .flags = Py_TPFLAGS_DEFAULT,
408 .slots = bz2_compressor_type_slots,
409};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200410
411/* BZ2Decompressor class. */
412
Antoine Pitroue71258a2015-02-26 13:08:07 +0100413/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
414 buffer is allocated dynamically and returned. At most max_length bytes are
415 returned, so some of the input may not be consumed. d->bzs.next_in and
416 d->bzs_avail_in_real are updated to reflect the consumed input. */
417static PyObject*
418decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200419{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100420 /* data_size is strictly positive, but because we repeatedly have to
421 compare against max_length and PyBytes_GET_SIZE we declare it as
422 signed */
423 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200424 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100425 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200426
Antoine Pitroue71258a2015-02-26 13:08:07 +0100427 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
428 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
429 else
430 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200431 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100432 return NULL;
433
434 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200435 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100436 int bzret;
437 size_t avail;
438
439 /* On a 64-bit system, buffer length might not fit in avail_out, so we
440 do decompression in chunks of no more than UINT_MAX bytes
441 each. Note that the expression for `avail` is guaranteed to be
442 positive, so the cast is safe. */
443 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
444 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
445 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
446 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200447
448 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100449 bzret = BZ2_bzDecompress(bzs);
450 data_size = bzs->next_out - PyBytes_AS_STRING(result);
451 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200452 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100453 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200454 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100455 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200456 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100458 } else if (d->bzs_avail_in_real == 0) {
459 break;
460 } else if (bzs->avail_out == 0) {
461 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200462 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100463 if (data_size == PyBytes_GET_SIZE(result) &&
464 grow_buffer(&result, max_length) == -1)
465 goto error;
466 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200467 }
468 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100469 if (data_size != PyBytes_GET_SIZE(result))
470 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200471 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100472
473 return result;
474
475error:
476 Py_XDECREF(result);
477 return NULL;
478}
479
480
481static PyObject *
482decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
483{
484 char input_buffer_in_use;
485 PyObject *result;
486 bz_stream *bzs = &d->bzs;
487
488 /* Prepend unconsumed input if necessary */
489 if (bzs->next_in != NULL) {
490 size_t avail_now, avail_total;
491
492 /* Number of bytes we can append to input buffer */
493 avail_now = (d->input_buffer + d->input_buffer_size)
494 - (bzs->next_in + d->bzs_avail_in_real);
495
496 /* Number of bytes we can append if we move existing
497 contents to beginning of buffer (overwriting
498 consumed input) */
499 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
500
501 if (avail_total < len) {
502 size_t offset = bzs->next_in - d->input_buffer;
503 char *tmp;
504 size_t new_size = d->input_buffer_size + len - avail_now;
505
506 /* Assign to temporary variable first, so we don't
507 lose address of allocated buffer if realloc fails */
508 tmp = PyMem_Realloc(d->input_buffer, new_size);
509 if (tmp == NULL) {
510 PyErr_SetNone(PyExc_MemoryError);
511 return NULL;
512 }
513 d->input_buffer = tmp;
514 d->input_buffer_size = new_size;
515
516 bzs->next_in = d->input_buffer + offset;
517 }
518 else if (avail_now < len) {
519 memmove(d->input_buffer, bzs->next_in,
520 d->bzs_avail_in_real);
521 bzs->next_in = d->input_buffer;
522 }
523 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
524 d->bzs_avail_in_real += len;
525 input_buffer_in_use = 1;
526 }
527 else {
528 bzs->next_in = data;
529 d->bzs_avail_in_real = len;
530 input_buffer_in_use = 0;
531 }
532
533 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000534 if(result == NULL) {
535 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100536 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000537 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100538
539 if (d->eof) {
540 d->needs_input = 0;
541 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300542 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200543 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100544 if (d->unused_data == NULL)
545 goto error;
546 }
547 }
548 else if (d->bzs_avail_in_real == 0) {
549 bzs->next_in = NULL;
550 d->needs_input = 1;
551 }
552 else {
553 d->needs_input = 0;
554
555 /* If we did not use the input buffer, we now have
556 to copy the tail from the caller's buffer into the
557 input buffer */
558 if (!input_buffer_in_use) {
559
560 /* Discard buffer if it's too small
561 (resizing it may needlessly copy the current contents) */
562 if (d->input_buffer != NULL &&
563 d->input_buffer_size < d->bzs_avail_in_real) {
564 PyMem_Free(d->input_buffer);
565 d->input_buffer = NULL;
566 }
567
568 /* Allocate if necessary */
569 if (d->input_buffer == NULL) {
570 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
571 if (d->input_buffer == NULL) {
572 PyErr_SetNone(PyExc_MemoryError);
573 goto error;
574 }
575 d->input_buffer_size = d->bzs_avail_in_real;
576 }
577
578 /* Copy tail */
579 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
580 bzs->next_in = d->input_buffer;
581 }
582 }
583
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200584 return result;
585
586error:
587 Py_XDECREF(result);
588 return NULL;
589}
590
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200591/*[clinic input]
592_bz2.BZ2Decompressor.decompress
593
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200594 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100595 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200596
Antoine Pitroue71258a2015-02-26 13:08:07 +0100597Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200598
Antoine Pitroue71258a2015-02-26 13:08:07 +0100599If *max_length* is nonnegative, returns at most *max_length* bytes of
600decompressed data. If this limit is reached and further output can be
601produced, *self.needs_input* will be set to ``False``. In this case, the next
602call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200603
Antoine Pitroue71258a2015-02-26 13:08:07 +0100604If all of the input data was decompressed and returned (either because this
605was less than *max_length* bytes, or because *max_length* was negative),
606*self.needs_input* will be set to True.
607
608Attempting to decompress data after the end of stream is reached raises an
609EOFError. Any data found after the end of the stream is ignored and saved in
610the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200611[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200612
613static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400614_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
615 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300616/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200617{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200618 PyObject *result = NULL;
619
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200620 ACQUIRE_LOCK(self);
621 if (self->eof)
622 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
623 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100624 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200625 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200626 return result;
627}
628
Dong-hee Naec689182020-06-20 00:56:13 +0900629/* Argument Clinic is not used since the Argument Clinic always want to
630 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200631static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200632_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200633{
634 int bzerror;
635
Victor Stinner9b7cf752018-06-23 10:35:23 +0200636 PyThread_type_lock lock = PyThread_allocate_lock();
637 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200638 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
639 return -1;
640 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200641 if (self->lock != NULL) {
642 PyThread_free_lock(self->lock);
643 }
644 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200645
Antoine Pitroue71258a2015-02-26 13:08:07 +0100646 self->needs_input = 1;
647 self->bzs_avail_in_real = 0;
648 self->input_buffer = NULL;
649 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200650 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200651 if (self->unused_data == NULL)
652 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200653
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200654 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
655 if (catch_bz2_error(bzerror))
656 goto error;
657
658 return 0;
659
660error:
661 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200662 PyThread_free_lock(self->lock);
663 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200664 return -1;
665}
666
Dong-hee Naec689182020-06-20 00:56:13 +0900667static int
668_bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
669{
670 int return_value = -1;
671
672 if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
673 goto exit;
674 }
675 if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
676 goto exit;
677 }
678 return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);
679
680exit:
681 return return_value;
682}
683
684PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
685"BZ2Decompressor()\n"
686"--\n"
687"\n"
688"Create a decompressor object for decompressing data incrementally.\n"
689"\n"
690"For one-shot decompression, use the decompress() function instead.");
691
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200692static void
693BZ2Decompressor_dealloc(BZ2Decompressor *self)
694{
Dong-hee Naec689182020-06-20 00:56:13 +0900695 if(self->input_buffer != NULL) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100696 PyMem_Free(self->input_buffer);
Dong-hee Naec689182020-06-20 00:56:13 +0900697 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200698 BZ2_bzDecompressEnd(&self->bzs);
699 Py_CLEAR(self->unused_data);
Dong-hee Naec689182020-06-20 00:56:13 +0900700 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200701 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900702 }
703
704 PyTypeObject *tp = Py_TYPE(self);
705 tp->tp_free((PyObject *)self);
706 Py_DECREF(tp);
707}
708
709static int
710BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
711{
712 Py_VISIT(Py_TYPE(self));
713 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200714}
715
716static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200717 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200718 {NULL}
719};
720
721PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
722"True if the end-of-stream marker has been reached.");
723
724PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
725"Data found after the end of the compressed stream.");
726
Antoine Pitroue71258a2015-02-26 13:08:07 +0100727PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
728"True if more input is needed before more decompressed data can be produced.");
729
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200730static PyMemberDef BZ2Decompressor_members[] = {
731 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
732 READONLY, BZ2Decompressor_eof__doc__},
733 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
734 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100735 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
736 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200737 {NULL}
738};
739
Dong-hee Naec689182020-06-20 00:56:13 +0900740static PyType_Slot bz2_decompressor_type_slots[] = {
741 {Py_tp_dealloc, BZ2Decompressor_dealloc},
742 {Py_tp_methods, BZ2Decompressor_methods},
743 {Py_tp_init, _bz2_BZ2Decompressor___init__},
744 {Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
745 {Py_tp_members, BZ2Decompressor_members},
746 {Py_tp_new, PyType_GenericNew},
747 {Py_tp_traverse, BZ2Decompressor_traverse},
748 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200749};
750
Dong-hee Naec689182020-06-20 00:56:13 +0900751static PyType_Spec bz2_decompressor_type_spec = {
752 .name = "_bz2.BZ2Decompressor",
753 .basicsize = sizeof(BZ2Decompressor),
754 // Calling PyType_GetModuleState() on a subclass is not safe.
755 // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
756 // which prevents to create a subclass.
757 // So calling PyType_GetModuleState() in this file is always safe.
758 .flags = Py_TPFLAGS_DEFAULT,
759 .slots = bz2_decompressor_type_slots,
760};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200761
762/* Module initialization. */
763
Hai Shi5d385172020-02-18 19:17:39 +0800764static int
765_bz2_exec(PyObject *module)
766{
Dong-hee Naec689182020-06-20 00:56:13 +0900767 _bz2_state *state = get_bz2_state(module);
768 state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
769 &bz2_compressor_type_spec, NULL);
770 if (state->bz2_compressor_type == NULL) {
Hai Shi5d385172020-02-18 19:17:39 +0800771 return -1;
772 }
773
Dong-hee Naec689182020-06-20 00:56:13 +0900774 if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
775 return -1;
776 }
777
778 state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
779 &bz2_decompressor_type_spec, NULL);
780 if (state->bz2_decompressor_type == NULL) {
781 return -1;
782 }
783
784 if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
Hai Shi5d385172020-02-18 19:17:39 +0800785 return -1;
786 }
787
788 return 0;
789}
790
Dong-hee Naec689182020-06-20 00:56:13 +0900791static int
792_bz2_traverse(PyObject *module, visitproc visit, void *arg)
793{
794 _bz2_state *state = get_bz2_state(module);
795 Py_VISIT(state->bz2_compressor_type);
796 Py_VISIT(state->bz2_decompressor_type);
797 return 0;
798}
799
800static int
801_bz2_clear(PyObject *module)
802{
803 _bz2_state *state = get_bz2_state(module);
804 Py_CLEAR(state->bz2_compressor_type);
805 Py_CLEAR(state->bz2_decompressor_type);
806 return 0;
807}
808
809static void
810_bz2_free(void *module)
811{
812 _bz2_clear((PyObject *)module);
813}
814
Hai Shi5d385172020-02-18 19:17:39 +0800815static struct PyModuleDef_Slot _bz2_slots[] = {
816 {Py_mod_exec, _bz2_exec},
817 {0, NULL}
818};
819
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200820static struct PyModuleDef _bz2module = {
821 PyModuleDef_HEAD_INIT,
Dong-hee Naec689182020-06-20 00:56:13 +0900822 .m_name = "_bz2",
823 .m_size = sizeof(_bz2_state),
824 .m_slots = _bz2_slots,
825 .m_traverse = _bz2_traverse,
826 .m_clear = _bz2_clear,
827 .m_free = _bz2_free,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200828};
829
830PyMODINIT_FUNC
831PyInit__bz2(void)
832{
Hai Shi5d385172020-02-18 19:17:39 +0800833 return PyModuleDef_Init(&_bz2module);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200834}