blob: effb0de2e6536929893c75f0f2bb90e64ea2e3cf [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02006#include "structmember.h" // PyMemberDef
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02007
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include <bzlib.h>
9#include <stdio.h>
10
11
12#ifndef BZ_CONFIG_ERROR
13#define BZ2_bzCompress bzCompress
14#define BZ2_bzCompressInit bzCompressInit
15#define BZ2_bzCompressEnd bzCompressEnd
16#define BZ2_bzDecompress bzDecompress
17#define BZ2_bzDecompressInit bzDecompressInit
18#define BZ2_bzDecompressEnd bzDecompressEnd
19#endif /* ! BZ_CONFIG_ERROR */
20
21
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020022#define ACQUIRE_LOCK(obj) do { \
23 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
24 Py_BEGIN_ALLOW_THREADS \
25 PyThread_acquire_lock((obj)->lock, 1); \
26 Py_END_ALLOW_THREADS \
27 } } while (0)
28#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020029
30
31typedef struct {
Dong-hee Naec689182020-06-20 00:56:13 +090032 PyTypeObject *bz2_compressor_type;
33 PyTypeObject *bz2_decompressor_type;
34} _bz2_state;
35
36static inline _bz2_state*
37get_bz2_state(PyObject *module)
38{
39 void *state = PyModule_GetState(module);
40 assert(state != NULL);
41 return (_bz2_state *)state;
42}
43
44typedef struct {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020045 PyObject_HEAD
46 bz_stream bzs;
47 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020048 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020049} BZ2Compressor;
50
51typedef struct {
52 PyObject_HEAD
53 bz_stream bzs;
54 char eof; /* T_BOOL expects a char */
55 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010056 char needs_input;
57 char *input_buffer;
58 size_t input_buffer_size;
59
60 /* bzs->avail_in is only 32 bit, so we store the true length
61 separately. Conversion and looping is encapsulated in
62 decompress_buf() */
63 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020064 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020065} BZ2Decompressor;
66
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020067/* Helper functions. */
68
69static int
70catch_bz2_error(int bzerror)
71{
72 switch(bzerror) {
73 case BZ_OK:
74 case BZ_RUN_OK:
75 case BZ_FLUSH_OK:
76 case BZ_FINISH_OK:
77 case BZ_STREAM_END:
78 return 0;
79
80#ifdef BZ_CONFIG_ERROR
81 case BZ_CONFIG_ERROR:
82 PyErr_SetString(PyExc_SystemError,
83 "libbzip2 was not compiled correctly");
84 return 1;
85#endif
86 case BZ_PARAM_ERROR:
87 PyErr_SetString(PyExc_ValueError,
88 "Internal error - "
89 "invalid parameters passed to libbzip2");
90 return 1;
91 case BZ_MEM_ERROR:
92 PyErr_NoMemory();
93 return 1;
94 case BZ_DATA_ERROR:
95 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030096 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020097 return 1;
98 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030099 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200100 return 1;
101 case BZ_UNEXPECTED_EOF:
102 PyErr_SetString(PyExc_EOFError,
103 "Compressed file ended before the logical "
104 "end-of-stream was detected");
105 return 1;
106 case BZ_SEQUENCE_ERROR:
107 PyErr_SetString(PyExc_RuntimeError,
108 "Internal error - "
109 "Invalid sequence of commands sent to libbzip2");
110 return 1;
111 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300112 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200113 "Unrecognized error from libbzip2: %d", bzerror);
114 return 1;
115 }
116}
117
118#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100119#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200120#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100121#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200122#endif
123
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200124static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100125grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200126{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200127 /* Expand the buffer by an amount proportional to the current size,
128 giving us amortized linear-time behavior. Use a less-than-double
129 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200130 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200131 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100132
133 if (max_length > 0 && new_size > (size_t) max_length)
134 new_size = (size_t) max_length;
135
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200136 if (new_size > size) {
137 return _PyBytes_Resize(buf, new_size);
138 } else { /* overflow */
139 PyErr_SetString(PyExc_OverflowError,
140 "Unable to allocate buffer - output too large");
141 return -1;
142 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200143}
144
145
146/* BZ2Compressor class. */
147
148static PyObject *
149compress(BZ2Compressor *c, char *data, size_t len, int action)
150{
151 size_t data_size = 0;
152 PyObject *result;
153
Antoine Pitroue71258a2015-02-26 13:08:07 +0100154 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200155 if (result == NULL)
156 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100157
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100159 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200160 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100161 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200162 for (;;) {
163 char *this_out;
164 int bzerror;
165
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100166 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
167 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200168 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200169 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200170 len -= c->bzs.avail_in;
171 }
172
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100173 /* In regular compression mode, stop when input data is exhausted. */
174 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200175 break;
176
177 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200178 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
179 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100180 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 goto error;
182 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
183 buffer_left = PyBytes_GET_SIZE(result) - data_size;
184 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200185 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200186 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100187
188 Py_BEGIN_ALLOW_THREADS
189 this_out = c->bzs.next_out;
190 bzerror = BZ2_bzCompress(&c->bzs, action);
191 data_size += c->bzs.next_out - this_out;
192 Py_END_ALLOW_THREADS
193 if (catch_bz2_error(bzerror))
194 goto error;
195
196 /* In flushing mode, stop when all buffered data has been flushed. */
197 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
198 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200199 }
Victor Stinner706768c2014-08-16 01:03:39 +0200200 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200201 if (_PyBytes_Resize(&result, data_size) < 0)
202 goto error;
203 return result;
204
205error:
206 Py_XDECREF(result);
207 return NULL;
208}
209
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200210/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200211module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800212class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
213class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300215/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200216
Larry Hastingsf256c222014-01-25 21:30:37 -0800217#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200218
219/*[clinic input]
220_bz2.BZ2Compressor.compress
221
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200222 data: Py_buffer
223 /
224
225Provide data to the compressor object.
226
227Returns a chunk of compressed data if possible, or b'' otherwise.
228
229When you have finished providing data to the compressor, call the
230flush() method to finish the compression process.
231[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200232
233static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200234_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800235/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200236{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200237 PyObject *result = NULL;
238
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200239 ACQUIRE_LOCK(self);
240 if (self->flushed)
241 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
242 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200243 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200244 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200245 return result;
246}
247
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200248/*[clinic input]
249_bz2.BZ2Compressor.flush
250
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251Finish the compression process.
252
253Returns the compressed data left in internal buffers.
254
255The compressor object may not be used after this method is called.
256[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200257
258static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200259_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800260/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200261{
262 PyObject *result = NULL;
263
264 ACQUIRE_LOCK(self);
265 if (self->flushed)
266 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
267 else {
268 self->flushed = 1;
269 result = compress(self, NULL, 0, BZ_FINISH);
270 }
271 RELEASE_LOCK(self);
272 return result;
273}
274
Dong-hee Naec689182020-06-20 00:56:13 +0900275/*[clinic input]
276_bz2.BZ2Compressor.__reduce__
277
278[clinic start generated code]*/
279
280static PyObject *
281_bz2_BZ2Compressor___reduce___impl(BZ2Compressor *self)
282/*[clinic end generated code: output=d13db66ae043e141 input=e09bccef0e6731b2]*/
283{
284 PyErr_Format(PyExc_TypeError,
285 "cannot pickle %s object",
286 Py_TYPE(self)->tp_name);
287 return NULL;
288}
289
Victor Stinner5064a522013-07-07 16:50:27 +0200290static void*
291BZ2_Malloc(void* ctx, int items, int size)
292{
293 if (items < 0 || size < 0)
294 return NULL;
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300295 if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
Victor Stinner5064a522013-07-07 16:50:27 +0200296 return NULL;
297 /* PyMem_Malloc() cannot be used: compress() and decompress()
298 release the GIL */
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300299 return PyMem_RawMalloc((size_t)items * (size_t)size);
Victor Stinner5064a522013-07-07 16:50:27 +0200300}
301
302static void
303BZ2_Free(void* ctx, void *ptr)
304{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200305 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200306}
307
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200308
Dong-hee Naec689182020-06-20 00:56:13 +0900309/* Argument Clinic is not used since the Argument Clinic always want to
310 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200311static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200312_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200313{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200314 int bzerror;
315
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316 if (!(1 <= compresslevel && compresslevel <= 9)) {
317 PyErr_SetString(PyExc_ValueError,
318 "compresslevel must be between 1 and 9");
319 return -1;
320 }
321
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200322 self->lock = PyThread_allocate_lock();
323 if (self->lock == NULL) {
324 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
325 return -1;
326 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200327
Victor Stinner5064a522013-07-07 16:50:27 +0200328 self->bzs.opaque = NULL;
329 self->bzs.bzalloc = BZ2_Malloc;
330 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200331 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
332 if (catch_bz2_error(bzerror))
333 goto error;
334
335 return 0;
336
337error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200338 PyThread_free_lock(self->lock);
339 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200340 return -1;
341}
342
Dong-hee Naec689182020-06-20 00:56:13 +0900343PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
344"BZ2Compressor(compresslevel=9, /)\n"
345"--\n"
346"\n"
347"Create a compressor object for compressing data incrementally.\n"
348"\n"
349" compresslevel\n"
350" Compression level, as a number between 1 and 9.\n"
351"\n"
352"For one-shot compression, use the compress() function instead.");
353
354static int
355_bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
356{
357 int return_value = -1;
358 int compresslevel = 9;
359
360 if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
361 goto exit;
362 }
363 if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
364 goto exit;
365 }
366 if (PyTuple_GET_SIZE(args) < 1) {
367 goto skip_optional;
368 }
369 compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
370 if (compresslevel == -1 && PyErr_Occurred()) {
371 goto exit;
372 }
373skip_optional:
374 return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);
375
376exit:
377 return return_value;
378}
379
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200380static void
381BZ2Compressor_dealloc(BZ2Compressor *self)
382{
383 BZ2_bzCompressEnd(&self->bzs);
Dong-hee Naec689182020-06-20 00:56:13 +0900384 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200385 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900386 }
387 PyTypeObject *tp = Py_TYPE(self);
388 tp->tp_free((PyObject *)self);
389 Py_DECREF(tp);
390}
391
392static int
393BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
394{
395 Py_VISIT(Py_TYPE(self));
396 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200397}
398
399static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200400 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
401 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Dong-hee Naec689182020-06-20 00:56:13 +0900402 _BZ2_BZ2COMPRESSOR___REDUCE___METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200403 {NULL}
404};
405
Dong-hee Naec689182020-06-20 00:56:13 +0900406static PyType_Slot bz2_compressor_type_slots[] = {
407 {Py_tp_dealloc, BZ2Compressor_dealloc},
408 {Py_tp_methods, BZ2Compressor_methods},
409 {Py_tp_init, _bz2_BZ2Compressor___init__},
410 {Py_tp_new, PyType_GenericNew},
411 {Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
412 {Py_tp_traverse, BZ2Compressor_traverse},
413 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200414};
415
Dong-hee Naec689182020-06-20 00:56:13 +0900416static PyType_Spec bz2_compressor_type_spec = {
417 .name = "_bz2.BZ2Compressor",
418 .basicsize = sizeof(BZ2Compressor),
419 // Calling PyType_GetModuleState() on a subclass is not safe.
420 // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
421 // which prevents to create a subclass.
422 // So calling PyType_GetModuleState() in this file is always safe.
423 .flags = Py_TPFLAGS_DEFAULT,
424 .slots = bz2_compressor_type_slots,
425};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200426
427/* BZ2Decompressor class. */
428
Antoine Pitroue71258a2015-02-26 13:08:07 +0100429/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
430 buffer is allocated dynamically and returned. At most max_length bytes are
431 returned, so some of the input may not be consumed. d->bzs.next_in and
432 d->bzs_avail_in_real are updated to reflect the consumed input. */
433static PyObject*
434decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200435{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100436 /* data_size is strictly positive, but because we repeatedly have to
437 compare against max_length and PyBytes_GET_SIZE we declare it as
438 signed */
439 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200440 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100441 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200442
Antoine Pitroue71258a2015-02-26 13:08:07 +0100443 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
444 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
445 else
446 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200447 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100448 return NULL;
449
450 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200451 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100452 int bzret;
453 size_t avail;
454
455 /* On a 64-bit system, buffer length might not fit in avail_out, so we
456 do decompression in chunks of no more than UINT_MAX bytes
457 each. Note that the expression for `avail` is guaranteed to be
458 positive, so the cast is safe. */
459 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
460 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
461 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
462 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200463
464 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100465 bzret = BZ2_bzDecompress(bzs);
466 data_size = bzs->next_out - PyBytes_AS_STRING(result);
467 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200468 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100469 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200470 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100471 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200472 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200473 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100474 } else if (d->bzs_avail_in_real == 0) {
475 break;
476 } else if (bzs->avail_out == 0) {
477 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200478 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100479 if (data_size == PyBytes_GET_SIZE(result) &&
480 grow_buffer(&result, max_length) == -1)
481 goto error;
482 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200483 }
484 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100485 if (data_size != PyBytes_GET_SIZE(result))
486 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200487 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100488
489 return result;
490
491error:
492 Py_XDECREF(result);
493 return NULL;
494}
495
496
497static PyObject *
498decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
499{
500 char input_buffer_in_use;
501 PyObject *result;
502 bz_stream *bzs = &d->bzs;
503
504 /* Prepend unconsumed input if necessary */
505 if (bzs->next_in != NULL) {
506 size_t avail_now, avail_total;
507
508 /* Number of bytes we can append to input buffer */
509 avail_now = (d->input_buffer + d->input_buffer_size)
510 - (bzs->next_in + d->bzs_avail_in_real);
511
512 /* Number of bytes we can append if we move existing
513 contents to beginning of buffer (overwriting
514 consumed input) */
515 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
516
517 if (avail_total < len) {
518 size_t offset = bzs->next_in - d->input_buffer;
519 char *tmp;
520 size_t new_size = d->input_buffer_size + len - avail_now;
521
522 /* Assign to temporary variable first, so we don't
523 lose address of allocated buffer if realloc fails */
524 tmp = PyMem_Realloc(d->input_buffer, new_size);
525 if (tmp == NULL) {
526 PyErr_SetNone(PyExc_MemoryError);
527 return NULL;
528 }
529 d->input_buffer = tmp;
530 d->input_buffer_size = new_size;
531
532 bzs->next_in = d->input_buffer + offset;
533 }
534 else if (avail_now < len) {
535 memmove(d->input_buffer, bzs->next_in,
536 d->bzs_avail_in_real);
537 bzs->next_in = d->input_buffer;
538 }
539 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
540 d->bzs_avail_in_real += len;
541 input_buffer_in_use = 1;
542 }
543 else {
544 bzs->next_in = data;
545 d->bzs_avail_in_real = len;
546 input_buffer_in_use = 0;
547 }
548
549 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000550 if(result == NULL) {
551 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100552 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000553 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100554
555 if (d->eof) {
556 d->needs_input = 0;
557 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300558 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200559 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100560 if (d->unused_data == NULL)
561 goto error;
562 }
563 }
564 else if (d->bzs_avail_in_real == 0) {
565 bzs->next_in = NULL;
566 d->needs_input = 1;
567 }
568 else {
569 d->needs_input = 0;
570
571 /* If we did not use the input buffer, we now have
572 to copy the tail from the caller's buffer into the
573 input buffer */
574 if (!input_buffer_in_use) {
575
576 /* Discard buffer if it's too small
577 (resizing it may needlessly copy the current contents) */
578 if (d->input_buffer != NULL &&
579 d->input_buffer_size < d->bzs_avail_in_real) {
580 PyMem_Free(d->input_buffer);
581 d->input_buffer = NULL;
582 }
583
584 /* Allocate if necessary */
585 if (d->input_buffer == NULL) {
586 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
587 if (d->input_buffer == NULL) {
588 PyErr_SetNone(PyExc_MemoryError);
589 goto error;
590 }
591 d->input_buffer_size = d->bzs_avail_in_real;
592 }
593
594 /* Copy tail */
595 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
596 bzs->next_in = d->input_buffer;
597 }
598 }
599
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200600 return result;
601
602error:
603 Py_XDECREF(result);
604 return NULL;
605}
606
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200607/*[clinic input]
608_bz2.BZ2Decompressor.decompress
609
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200610 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100611 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200612
Antoine Pitroue71258a2015-02-26 13:08:07 +0100613Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200614
Antoine Pitroue71258a2015-02-26 13:08:07 +0100615If *max_length* is nonnegative, returns at most *max_length* bytes of
616decompressed data. If this limit is reached and further output can be
617produced, *self.needs_input* will be set to ``False``. In this case, the next
618call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200619
Antoine Pitroue71258a2015-02-26 13:08:07 +0100620If all of the input data was decompressed and returned (either because this
621was less than *max_length* bytes, or because *max_length* was negative),
622*self.needs_input* will be set to True.
623
624Attempting to decompress data after the end of stream is reached raises an
625EOFError. Any data found after the end of the stream is ignored and saved in
626the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200627[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200628
629static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400630_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
631 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300632/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200633{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200634 PyObject *result = NULL;
635
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200636 ACQUIRE_LOCK(self);
637 if (self->eof)
638 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
639 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100640 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200641 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200642 return result;
643}
644
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200645/*[clinic input]
Dong-hee Naec689182020-06-20 00:56:13 +0900646_bz2.BZ2Decompressor.__reduce__
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200647
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200648[clinic start generated code]*/
649
Dong-hee Naec689182020-06-20 00:56:13 +0900650static PyObject *
651_bz2_BZ2Decompressor___reduce___impl(BZ2Decompressor *self)
652/*[clinic end generated code: output=f6a40650813f482e input=8db9175a609fdd43]*/
653{
654 PyErr_Format(PyExc_TypeError,
655 "cannot pickle %s object",
656 Py_TYPE(self)->tp_name);
657 return NULL;
658}
659
660/* Argument Clinic is not used since the Argument Clinic always want to
661 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200662static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200663_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200664{
665 int bzerror;
666
Victor Stinner9b7cf752018-06-23 10:35:23 +0200667 PyThread_type_lock lock = PyThread_allocate_lock();
668 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200669 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
670 return -1;
671 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200672 if (self->lock != NULL) {
673 PyThread_free_lock(self->lock);
674 }
675 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200676
Antoine Pitroue71258a2015-02-26 13:08:07 +0100677 self->needs_input = 1;
678 self->bzs_avail_in_real = 0;
679 self->input_buffer = NULL;
680 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200681 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200682 if (self->unused_data == NULL)
683 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200684
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200685 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
686 if (catch_bz2_error(bzerror))
687 goto error;
688
689 return 0;
690
691error:
692 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200693 PyThread_free_lock(self->lock);
694 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200695 return -1;
696}
697
Dong-hee Naec689182020-06-20 00:56:13 +0900698static int
699_bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
700{
701 int return_value = -1;
702
703 if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
704 goto exit;
705 }
706 if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
707 goto exit;
708 }
709 return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);
710
711exit:
712 return return_value;
713}
714
715PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
716"BZ2Decompressor()\n"
717"--\n"
718"\n"
719"Create a decompressor object for decompressing data incrementally.\n"
720"\n"
721"For one-shot decompression, use the decompress() function instead.");
722
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200723static void
724BZ2Decompressor_dealloc(BZ2Decompressor *self)
725{
Dong-hee Naec689182020-06-20 00:56:13 +0900726 if(self->input_buffer != NULL) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100727 PyMem_Free(self->input_buffer);
Dong-hee Naec689182020-06-20 00:56:13 +0900728 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200729 BZ2_bzDecompressEnd(&self->bzs);
730 Py_CLEAR(self->unused_data);
Dong-hee Naec689182020-06-20 00:56:13 +0900731 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200732 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900733 }
734
735 PyTypeObject *tp = Py_TYPE(self);
736 tp->tp_free((PyObject *)self);
737 Py_DECREF(tp);
738}
739
740static int
741BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
742{
743 Py_VISIT(Py_TYPE(self));
744 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200745}
746
747static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200748 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Dong-hee Naec689182020-06-20 00:56:13 +0900749 _BZ2_BZ2DECOMPRESSOR___REDUCE___METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200750 {NULL}
751};
752
753PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
754"True if the end-of-stream marker has been reached.");
755
756PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
757"Data found after the end of the compressed stream.");
758
Antoine Pitroue71258a2015-02-26 13:08:07 +0100759PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
760"True if more input is needed before more decompressed data can be produced.");
761
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200762static PyMemberDef BZ2Decompressor_members[] = {
763 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
764 READONLY, BZ2Decompressor_eof__doc__},
765 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
766 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100767 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
768 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200769 {NULL}
770};
771
Dong-hee Naec689182020-06-20 00:56:13 +0900772static PyType_Slot bz2_decompressor_type_slots[] = {
773 {Py_tp_dealloc, BZ2Decompressor_dealloc},
774 {Py_tp_methods, BZ2Decompressor_methods},
775 {Py_tp_init, _bz2_BZ2Decompressor___init__},
776 {Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
777 {Py_tp_members, BZ2Decompressor_members},
778 {Py_tp_new, PyType_GenericNew},
779 {Py_tp_traverse, BZ2Decompressor_traverse},
780 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200781};
782
Dong-hee Naec689182020-06-20 00:56:13 +0900783static PyType_Spec bz2_decompressor_type_spec = {
784 .name = "_bz2.BZ2Decompressor",
785 .basicsize = sizeof(BZ2Decompressor),
786 // Calling PyType_GetModuleState() on a subclass is not safe.
787 // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
788 // which prevents to create a subclass.
789 // So calling PyType_GetModuleState() in this file is always safe.
790 .flags = Py_TPFLAGS_DEFAULT,
791 .slots = bz2_decompressor_type_slots,
792};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200793
794/* Module initialization. */
795
Hai Shi5d385172020-02-18 19:17:39 +0800796static int
797_bz2_exec(PyObject *module)
798{
Dong-hee Naec689182020-06-20 00:56:13 +0900799 _bz2_state *state = get_bz2_state(module);
800 state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
801 &bz2_compressor_type_spec, NULL);
802 if (state->bz2_compressor_type == NULL) {
Hai Shi5d385172020-02-18 19:17:39 +0800803 return -1;
804 }
805
Dong-hee Naec689182020-06-20 00:56:13 +0900806 if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
807 return -1;
808 }
809
810 state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
811 &bz2_decompressor_type_spec, NULL);
812 if (state->bz2_decompressor_type == NULL) {
813 return -1;
814 }
815
816 if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
Hai Shi5d385172020-02-18 19:17:39 +0800817 return -1;
818 }
819
820 return 0;
821}
822
Dong-hee Naec689182020-06-20 00:56:13 +0900823static int
824_bz2_traverse(PyObject *module, visitproc visit, void *arg)
825{
826 _bz2_state *state = get_bz2_state(module);
827 Py_VISIT(state->bz2_compressor_type);
828 Py_VISIT(state->bz2_decompressor_type);
829 return 0;
830}
831
832static int
833_bz2_clear(PyObject *module)
834{
835 _bz2_state *state = get_bz2_state(module);
836 Py_CLEAR(state->bz2_compressor_type);
837 Py_CLEAR(state->bz2_decompressor_type);
838 return 0;
839}
840
841static void
842_bz2_free(void *module)
843{
844 _bz2_clear((PyObject *)module);
845}
846
Hai Shi5d385172020-02-18 19:17:39 +0800847static struct PyModuleDef_Slot _bz2_slots[] = {
848 {Py_mod_exec, _bz2_exec},
849 {0, NULL}
850};
851
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200852static struct PyModuleDef _bz2module = {
853 PyModuleDef_HEAD_INIT,
Dong-hee Naec689182020-06-20 00:56:13 +0900854 .m_name = "_bz2",
855 .m_size = sizeof(_bz2_state),
856 .m_slots = _bz2_slots,
857 .m_traverse = _bz2_traverse,
858 .m_clear = _bz2_clear,
859 .m_free = _bz2_free,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200860};
861
862PyMODINIT_FUNC
863PyInit__bz2(void)
864{
Hai Shi5d385172020-02-18 19:17:39 +0800865 return PyModuleDef_Init(&_bz2module);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200866}