blob: 798e9efc628f0567e913223232319efad6b5f7f5 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02006#include "structmember.h" // PyMemberDef
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02007
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include <bzlib.h>
9#include <stdio.h>
10
Ma Linf9bedb62021-04-28 14:58:54 +080011// Blocks output buffer wrappers
12#include "pycore_blocks_output_buffer.h"
13
14#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
15 #error "The maximum block size accepted by libbzip2 is UINT32_MAX."
16#endif
17
18/* On success, return value >= 0
19 On failure, return -1 */
20static inline Py_ssize_t
Ma Lin251ffa92021-05-01 07:32:49 +080021OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
22 char **next_out, uint32_t *avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080023{
24 Py_ssize_t allocated;
25
26 allocated = _BlocksOutputBuffer_InitAndGrow(
27 buffer, max_length, (void**) next_out);
28 *avail_out = (uint32_t) allocated;
29 return allocated;
30}
31
32/* On success, return value >= 0
33 On failure, return -1 */
34static inline Py_ssize_t
Ma Lin251ffa92021-05-01 07:32:49 +080035OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
36 char **next_out, uint32_t *avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080037{
38 Py_ssize_t allocated;
39
40 allocated = _BlocksOutputBuffer_Grow(
41 buffer, (void**) next_out, (Py_ssize_t) *avail_out);
42 *avail_out = (uint32_t) allocated;
43 return allocated;
44}
45
46static inline Py_ssize_t
Ma Lin251ffa92021-05-01 07:32:49 +080047OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080048{
49 return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
50}
51
52static inline PyObject *
Ma Lin251ffa92021-05-01 07:32:49 +080053OutputBuffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080054{
55 return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
56}
57
58static inline void
Ma Lin251ffa92021-05-01 07:32:49 +080059OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
Ma Linf9bedb62021-04-28 14:58:54 +080060{
61 _BlocksOutputBuffer_OnError(buffer);
62}
63
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020064
65#ifndef BZ_CONFIG_ERROR
66#define BZ2_bzCompress bzCompress
67#define BZ2_bzCompressInit bzCompressInit
68#define BZ2_bzCompressEnd bzCompressEnd
69#define BZ2_bzDecompress bzDecompress
70#define BZ2_bzDecompressInit bzDecompressInit
71#define BZ2_bzDecompressEnd bzDecompressEnd
72#endif /* ! BZ_CONFIG_ERROR */
73
74
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020075#define ACQUIRE_LOCK(obj) do { \
76 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
77 Py_BEGIN_ALLOW_THREADS \
78 PyThread_acquire_lock((obj)->lock, 1); \
79 Py_END_ALLOW_THREADS \
80 } } while (0)
81#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020082
83
84typedef struct {
Dong-hee Naec689182020-06-20 00:56:13 +090085 PyTypeObject *bz2_compressor_type;
86 PyTypeObject *bz2_decompressor_type;
87} _bz2_state;
88
89static inline _bz2_state*
90get_bz2_state(PyObject *module)
91{
92 void *state = PyModule_GetState(module);
93 assert(state != NULL);
94 return (_bz2_state *)state;
95}
96
97typedef struct {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020098 PyObject_HEAD
99 bz_stream bzs;
100 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200101 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200102} BZ2Compressor;
103
104typedef struct {
105 PyObject_HEAD
106 bz_stream bzs;
107 char eof; /* T_BOOL expects a char */
108 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100109 char needs_input;
110 char *input_buffer;
111 size_t input_buffer_size;
112
113 /* bzs->avail_in is only 32 bit, so we store the true length
114 separately. Conversion and looping is encapsulated in
115 decompress_buf() */
116 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200117 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200118} BZ2Decompressor;
119
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200120/* Helper functions. */
121
122static int
123catch_bz2_error(int bzerror)
124{
125 switch(bzerror) {
126 case BZ_OK:
127 case BZ_RUN_OK:
128 case BZ_FLUSH_OK:
129 case BZ_FINISH_OK:
130 case BZ_STREAM_END:
131 return 0;
132
133#ifdef BZ_CONFIG_ERROR
134 case BZ_CONFIG_ERROR:
135 PyErr_SetString(PyExc_SystemError,
136 "libbzip2 was not compiled correctly");
137 return 1;
138#endif
139 case BZ_PARAM_ERROR:
140 PyErr_SetString(PyExc_ValueError,
141 "Internal error - "
142 "invalid parameters passed to libbzip2");
143 return 1;
144 case BZ_MEM_ERROR:
145 PyErr_NoMemory();
146 return 1;
147 case BZ_DATA_ERROR:
148 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300149 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150 return 1;
151 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300152 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200153 return 1;
154 case BZ_UNEXPECTED_EOF:
155 PyErr_SetString(PyExc_EOFError,
156 "Compressed file ended before the logical "
157 "end-of-stream was detected");
158 return 1;
159 case BZ_SEQUENCE_ERROR:
160 PyErr_SetString(PyExc_RuntimeError,
161 "Internal error - "
162 "Invalid sequence of commands sent to libbzip2");
163 return 1;
164 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300165 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200166 "Unrecognized error from libbzip2: %d", bzerror);
167 return 1;
168 }
169}
170
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200171
172/* BZ2Compressor class. */
173
174static PyObject *
175compress(BZ2Compressor *c, char *data, size_t len, int action)
176{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200177 PyObject *result;
Ma Linf9bedb62021-04-28 14:58:54 +0800178 _BlocksOutputBuffer buffer = {.list = NULL};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200179
Ma Lin251ffa92021-05-01 07:32:49 +0800180 if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800181 goto error;
182 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200183 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100184 c->bzs.avail_in = 0;
Ma Linf9bedb62021-04-28 14:58:54 +0800185
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200186 for (;;) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200187 int bzerror;
188
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100189 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
190 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200191 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200192 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200193 len -= c->bzs.avail_in;
194 }
195
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100196 /* In regular compression mode, stop when input data is exhausted. */
197 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200198 break;
199
200 if (c->bzs.avail_out == 0) {
Ma Lin251ffa92021-05-01 07:32:49 +0800201 if (OutputBuffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800202 goto error;
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200203 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100205
206 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100207 bzerror = BZ2_bzCompress(&c->bzs, action);
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100208 Py_END_ALLOW_THREADS
Ma Linf9bedb62021-04-28 14:58:54 +0800209
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100210 if (catch_bz2_error(bzerror))
211 goto error;
212
213 /* In flushing mode, stop when all buffered data has been flushed. */
214 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
215 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200216 }
Ma Linf9bedb62021-04-28 14:58:54 +0800217
Ma Lin251ffa92021-05-01 07:32:49 +0800218 result = OutputBuffer_Finish(&buffer, c->bzs.avail_out);
Ma Linf9bedb62021-04-28 14:58:54 +0800219 if (result != NULL) {
220 return result;
221 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200222
223error:
Ma Lin251ffa92021-05-01 07:32:49 +0800224 OutputBuffer_OnError(&buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200225 return NULL;
226}
227
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200228/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200229module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800230class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
231class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200232[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300233/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200234
Larry Hastingsf256c222014-01-25 21:30:37 -0800235#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200236
237/*[clinic input]
238_bz2.BZ2Compressor.compress
239
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200240 data: Py_buffer
241 /
242
243Provide data to the compressor object.
244
245Returns a chunk of compressed data if possible, or b'' otherwise.
246
247When you have finished providing data to the compressor, call the
248flush() method to finish the compression process.
249[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200250
251static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200252_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800253/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200254{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200255 PyObject *result = NULL;
256
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200257 ACQUIRE_LOCK(self);
258 if (self->flushed)
259 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
260 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200261 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200262 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200263 return result;
264}
265
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200266/*[clinic input]
267_bz2.BZ2Compressor.flush
268
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200269Finish the compression process.
270
271Returns the compressed data left in internal buffers.
272
273The compressor object may not be used after this method is called.
274[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200275
276static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200277_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800278/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200279{
280 PyObject *result = NULL;
281
282 ACQUIRE_LOCK(self);
283 if (self->flushed)
284 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
285 else {
286 self->flushed = 1;
287 result = compress(self, NULL, 0, BZ_FINISH);
288 }
289 RELEASE_LOCK(self);
290 return result;
291}
292
Victor Stinner5064a522013-07-07 16:50:27 +0200293static void*
294BZ2_Malloc(void* ctx, int items, int size)
295{
296 if (items < 0 || size < 0)
297 return NULL;
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300298 if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
Victor Stinner5064a522013-07-07 16:50:27 +0200299 return NULL;
300 /* PyMem_Malloc() cannot be used: compress() and decompress()
301 release the GIL */
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300302 return PyMem_RawMalloc((size_t)items * (size_t)size);
Victor Stinner5064a522013-07-07 16:50:27 +0200303}
304
305static void
306BZ2_Free(void* ctx, void *ptr)
307{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200308 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200309}
310
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200311
Dong-hee Naec689182020-06-20 00:56:13 +0900312/* Argument Clinic is not used since the Argument Clinic always want to
313 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200314static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200315_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200317 int bzerror;
318
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319 if (!(1 <= compresslevel && compresslevel <= 9)) {
320 PyErr_SetString(PyExc_ValueError,
321 "compresslevel must be between 1 and 9");
322 return -1;
323 }
324
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200325 self->lock = PyThread_allocate_lock();
326 if (self->lock == NULL) {
327 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
328 return -1;
329 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200330
Victor Stinner5064a522013-07-07 16:50:27 +0200331 self->bzs.opaque = NULL;
332 self->bzs.bzalloc = BZ2_Malloc;
333 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200334 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
335 if (catch_bz2_error(bzerror))
336 goto error;
337
338 return 0;
339
340error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200341 PyThread_free_lock(self->lock);
342 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200343 return -1;
344}
345
Dong-hee Naec689182020-06-20 00:56:13 +0900346PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
347"BZ2Compressor(compresslevel=9, /)\n"
348"--\n"
349"\n"
350"Create a compressor object for compressing data incrementally.\n"
351"\n"
352" compresslevel\n"
353" Compression level, as a number between 1 and 9.\n"
354"\n"
355"For one-shot compression, use the compress() function instead.");
356
357static int
358_bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
359{
360 int return_value = -1;
361 int compresslevel = 9;
362
363 if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
364 goto exit;
365 }
366 if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
367 goto exit;
368 }
369 if (PyTuple_GET_SIZE(args) < 1) {
370 goto skip_optional;
371 }
372 compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
373 if (compresslevel == -1 && PyErr_Occurred()) {
374 goto exit;
375 }
376skip_optional:
377 return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);
378
379exit:
380 return return_value;
381}
382
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200383static void
384BZ2Compressor_dealloc(BZ2Compressor *self)
385{
386 BZ2_bzCompressEnd(&self->bzs);
Dong-hee Naec689182020-06-20 00:56:13 +0900387 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200388 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900389 }
390 PyTypeObject *tp = Py_TYPE(self);
391 tp->tp_free((PyObject *)self);
392 Py_DECREF(tp);
393}
394
395static int
396BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
397{
398 Py_VISIT(Py_TYPE(self));
399 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200400}
401
402static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200403 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
404 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200405 {NULL}
406};
407
Dong-hee Naec689182020-06-20 00:56:13 +0900408static PyType_Slot bz2_compressor_type_slots[] = {
409 {Py_tp_dealloc, BZ2Compressor_dealloc},
410 {Py_tp_methods, BZ2Compressor_methods},
411 {Py_tp_init, _bz2_BZ2Compressor___init__},
412 {Py_tp_new, PyType_GenericNew},
413 {Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
414 {Py_tp_traverse, BZ2Compressor_traverse},
415 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200416};
417
Dong-hee Naec689182020-06-20 00:56:13 +0900418static PyType_Spec bz2_compressor_type_spec = {
419 .name = "_bz2.BZ2Compressor",
420 .basicsize = sizeof(BZ2Compressor),
421 // Calling PyType_GetModuleState() on a subclass is not safe.
422 // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
423 // which prevents to create a subclass.
424 // So calling PyType_GetModuleState() in this file is always safe.
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700425 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
Dong-hee Naec689182020-06-20 00:56:13 +0900426 .slots = bz2_compressor_type_slots,
427};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200428
429/* BZ2Decompressor class. */
430
Antoine Pitroue71258a2015-02-26 13:08:07 +0100431/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
432 buffer is allocated dynamically and returned. At most max_length bytes are
433 returned, so some of the input may not be consumed. d->bzs.next_in and
434 d->bzs_avail_in_real are updated to reflect the consumed input. */
435static PyObject*
436decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200437{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100438 /* data_size is strictly positive, but because we repeatedly have to
439 compare against max_length and PyBytes_GET_SIZE we declare it as
440 signed */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200441 PyObject *result;
Ma Linf9bedb62021-04-28 14:58:54 +0800442 _BlocksOutputBuffer buffer = {.list = NULL};
Antoine Pitroue71258a2015-02-26 13:08:07 +0100443 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200444
Ma Lin251ffa92021-05-01 07:32:49 +0800445 if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800446 goto error;
447 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100448
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200449 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100450 int bzret;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100451 /* On a 64-bit system, buffer length might not fit in avail_out, so we
452 do decompression in chunks of no more than UINT_MAX bytes
453 each. Note that the expression for `avail` is guaranteed to be
454 positive, so the cast is safe. */
Antoine Pitroue71258a2015-02-26 13:08:07 +0100455 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
456 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457
458 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100459 bzret = BZ2_bzDecompress(bzs);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 Py_END_ALLOW_THREADS
Ma Linf9bedb62021-04-28 14:58:54 +0800461
462 d->bzs_avail_in_real += bzs->avail_in;
463
Antoine Pitroue71258a2015-02-26 13:08:07 +0100464 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200465 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100466 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200467 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200468 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100469 } else if (d->bzs_avail_in_real == 0) {
470 break;
471 } else if (bzs->avail_out == 0) {
Ma Lin251ffa92021-05-01 07:32:49 +0800472 if (OutputBuffer_GetDataSize(&buffer, bzs->avail_out) == max_length) {
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200473 break;
Ma Lin251ffa92021-05-01 07:32:49 +0800474 }
475 if (OutputBuffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100476 goto error;
Ma Linf9bedb62021-04-28 14:58:54 +0800477 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200478 }
479 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100480
Ma Lin251ffa92021-05-01 07:32:49 +0800481 result = OutputBuffer_Finish(&buffer, bzs->avail_out);
Ma Linf9bedb62021-04-28 14:58:54 +0800482 if (result != NULL) {
483 return result;
484 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100485
486error:
Ma Lin251ffa92021-05-01 07:32:49 +0800487 OutputBuffer_OnError(&buffer);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100488 return NULL;
489}
490
491
492static PyObject *
493decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
494{
495 char input_buffer_in_use;
496 PyObject *result;
497 bz_stream *bzs = &d->bzs;
498
499 /* Prepend unconsumed input if necessary */
500 if (bzs->next_in != NULL) {
501 size_t avail_now, avail_total;
502
503 /* Number of bytes we can append to input buffer */
504 avail_now = (d->input_buffer + d->input_buffer_size)
505 - (bzs->next_in + d->bzs_avail_in_real);
506
507 /* Number of bytes we can append if we move existing
508 contents to beginning of buffer (overwriting
509 consumed input) */
510 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
511
512 if (avail_total < len) {
513 size_t offset = bzs->next_in - d->input_buffer;
514 char *tmp;
515 size_t new_size = d->input_buffer_size + len - avail_now;
516
517 /* Assign to temporary variable first, so we don't
518 lose address of allocated buffer if realloc fails */
519 tmp = PyMem_Realloc(d->input_buffer, new_size);
520 if (tmp == NULL) {
521 PyErr_SetNone(PyExc_MemoryError);
522 return NULL;
523 }
524 d->input_buffer = tmp;
525 d->input_buffer_size = new_size;
526
527 bzs->next_in = d->input_buffer + offset;
528 }
529 else if (avail_now < len) {
530 memmove(d->input_buffer, bzs->next_in,
531 d->bzs_avail_in_real);
532 bzs->next_in = d->input_buffer;
533 }
534 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
535 d->bzs_avail_in_real += len;
536 input_buffer_in_use = 1;
537 }
538 else {
539 bzs->next_in = data;
540 d->bzs_avail_in_real = len;
541 input_buffer_in_use = 0;
542 }
543
544 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000545 if(result == NULL) {
546 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100547 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000548 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100549
550 if (d->eof) {
551 d->needs_input = 0;
552 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300553 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200554 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100555 if (d->unused_data == NULL)
556 goto error;
557 }
558 }
559 else if (d->bzs_avail_in_real == 0) {
560 bzs->next_in = NULL;
561 d->needs_input = 1;
562 }
563 else {
564 d->needs_input = 0;
565
566 /* If we did not use the input buffer, we now have
567 to copy the tail from the caller's buffer into the
568 input buffer */
569 if (!input_buffer_in_use) {
570
571 /* Discard buffer if it's too small
572 (resizing it may needlessly copy the current contents) */
573 if (d->input_buffer != NULL &&
574 d->input_buffer_size < d->bzs_avail_in_real) {
575 PyMem_Free(d->input_buffer);
576 d->input_buffer = NULL;
577 }
578
579 /* Allocate if necessary */
580 if (d->input_buffer == NULL) {
581 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
582 if (d->input_buffer == NULL) {
583 PyErr_SetNone(PyExc_MemoryError);
584 goto error;
585 }
586 d->input_buffer_size = d->bzs_avail_in_real;
587 }
588
589 /* Copy tail */
590 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
591 bzs->next_in = d->input_buffer;
592 }
593 }
594
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200595 return result;
596
597error:
598 Py_XDECREF(result);
599 return NULL;
600}
601
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200602/*[clinic input]
603_bz2.BZ2Decompressor.decompress
604
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200605 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100606 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200607
Antoine Pitroue71258a2015-02-26 13:08:07 +0100608Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200609
Antoine Pitroue71258a2015-02-26 13:08:07 +0100610If *max_length* is nonnegative, returns at most *max_length* bytes of
611decompressed data. If this limit is reached and further output can be
612produced, *self.needs_input* will be set to ``False``. In this case, the next
613call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200614
Antoine Pitroue71258a2015-02-26 13:08:07 +0100615If all of the input data was decompressed and returned (either because this
616was less than *max_length* bytes, or because *max_length* was negative),
617*self.needs_input* will be set to True.
618
619Attempting to decompress data after the end of stream is reached raises an
620EOFError. Any data found after the end of the stream is ignored and saved in
621the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200622[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200623
624static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400625_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
626 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300627/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200628{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200629 PyObject *result = NULL;
630
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200631 ACQUIRE_LOCK(self);
632 if (self->eof)
633 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
634 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100635 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200636 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200637 return result;
638}
639
Dong-hee Naec689182020-06-20 00:56:13 +0900640/* Argument Clinic is not used since the Argument Clinic always want to
641 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200642static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200643_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200644{
645 int bzerror;
646
Victor Stinner9b7cf752018-06-23 10:35:23 +0200647 PyThread_type_lock lock = PyThread_allocate_lock();
648 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200649 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
650 return -1;
651 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200652 if (self->lock != NULL) {
653 PyThread_free_lock(self->lock);
654 }
655 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200656
Antoine Pitroue71258a2015-02-26 13:08:07 +0100657 self->needs_input = 1;
658 self->bzs_avail_in_real = 0;
659 self->input_buffer = NULL;
660 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200661 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200662 if (self->unused_data == NULL)
663 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200664
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200665 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
666 if (catch_bz2_error(bzerror))
667 goto error;
668
669 return 0;
670
671error:
672 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200673 PyThread_free_lock(self->lock);
674 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200675 return -1;
676}
677
Dong-hee Naec689182020-06-20 00:56:13 +0900678static int
679_bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
680{
681 int return_value = -1;
Ma Linf9bedb62021-04-28 14:58:54 +0800682
Dong-hee Naec689182020-06-20 00:56:13 +0900683 if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
684 goto exit;
685 }
686 if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
687 goto exit;
688 }
689 return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);
690
691exit:
692 return return_value;
693}
694
695PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
696"BZ2Decompressor()\n"
697"--\n"
698"\n"
699"Create a decompressor object for decompressing data incrementally.\n"
700"\n"
701"For one-shot decompression, use the decompress() function instead.");
702
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200703static void
704BZ2Decompressor_dealloc(BZ2Decompressor *self)
705{
Dong-hee Naec689182020-06-20 00:56:13 +0900706 if(self->input_buffer != NULL) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100707 PyMem_Free(self->input_buffer);
Dong-hee Naec689182020-06-20 00:56:13 +0900708 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200709 BZ2_bzDecompressEnd(&self->bzs);
710 Py_CLEAR(self->unused_data);
Dong-hee Naec689182020-06-20 00:56:13 +0900711 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200712 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900713 }
714
715 PyTypeObject *tp = Py_TYPE(self);
716 tp->tp_free((PyObject *)self);
717 Py_DECREF(tp);
718}
719
720static int
721BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
722{
723 Py_VISIT(Py_TYPE(self));
724 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200725}
726
727static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200728 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200729 {NULL}
730};
731
732PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
733"True if the end-of-stream marker has been reached.");
734
735PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
736"Data found after the end of the compressed stream.");
737
Antoine Pitroue71258a2015-02-26 13:08:07 +0100738PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
739"True if more input is needed before more decompressed data can be produced.");
740
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200741static PyMemberDef BZ2Decompressor_members[] = {
742 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
743 READONLY, BZ2Decompressor_eof__doc__},
744 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
745 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100746 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
747 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200748 {NULL}
749};
750
Dong-hee Naec689182020-06-20 00:56:13 +0900751static PyType_Slot bz2_decompressor_type_slots[] = {
752 {Py_tp_dealloc, BZ2Decompressor_dealloc},
753 {Py_tp_methods, BZ2Decompressor_methods},
754 {Py_tp_init, _bz2_BZ2Decompressor___init__},
755 {Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
756 {Py_tp_members, BZ2Decompressor_members},
757 {Py_tp_new, PyType_GenericNew},
758 {Py_tp_traverse, BZ2Decompressor_traverse},
759 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200760};
761
Dong-hee Naec689182020-06-20 00:56:13 +0900762static PyType_Spec bz2_decompressor_type_spec = {
763 .name = "_bz2.BZ2Decompressor",
764 .basicsize = sizeof(BZ2Decompressor),
765 // Calling PyType_GetModuleState() on a subclass is not safe.
766 // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
767 // which prevents to create a subclass.
768 // So calling PyType_GetModuleState() in this file is always safe.
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700769 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
Dong-hee Naec689182020-06-20 00:56:13 +0900770 .slots = bz2_decompressor_type_slots,
771};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200772
773/* Module initialization. */
774
Hai Shi5d385172020-02-18 19:17:39 +0800775static int
776_bz2_exec(PyObject *module)
777{
Dong-hee Naec689182020-06-20 00:56:13 +0900778 _bz2_state *state = get_bz2_state(module);
779 state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
780 &bz2_compressor_type_spec, NULL);
781 if (state->bz2_compressor_type == NULL) {
Hai Shi5d385172020-02-18 19:17:39 +0800782 return -1;
783 }
784
Dong-hee Naec689182020-06-20 00:56:13 +0900785 if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
786 return -1;
787 }
788
789 state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
790 &bz2_decompressor_type_spec, NULL);
791 if (state->bz2_decompressor_type == NULL) {
792 return -1;
793 }
794
795 if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
Hai Shi5d385172020-02-18 19:17:39 +0800796 return -1;
797 }
798
799 return 0;
800}
801
Dong-hee Naec689182020-06-20 00:56:13 +0900802static int
803_bz2_traverse(PyObject *module, visitproc visit, void *arg)
804{
805 _bz2_state *state = get_bz2_state(module);
806 Py_VISIT(state->bz2_compressor_type);
807 Py_VISIT(state->bz2_decompressor_type);
808 return 0;
809}
810
811static int
812_bz2_clear(PyObject *module)
813{
814 _bz2_state *state = get_bz2_state(module);
815 Py_CLEAR(state->bz2_compressor_type);
816 Py_CLEAR(state->bz2_decompressor_type);
817 return 0;
818}
819
820static void
821_bz2_free(void *module)
822{
823 _bz2_clear((PyObject *)module);
824}
825
Hai Shi5d385172020-02-18 19:17:39 +0800826static struct PyModuleDef_Slot _bz2_slots[] = {
827 {Py_mod_exec, _bz2_exec},
828 {0, NULL}
829};
830
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200831static struct PyModuleDef _bz2module = {
832 PyModuleDef_HEAD_INIT,
Dong-hee Naec689182020-06-20 00:56:13 +0900833 .m_name = "_bz2",
834 .m_size = sizeof(_bz2_state),
835 .m_slots = _bz2_slots,
836 .m_traverse = _bz2_traverse,
837 .m_clear = _bz2_clear,
838 .m_free = _bz2_free,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200839};
840
841PyMODINIT_FUNC
842PyInit__bz2(void)
843{
Hai Shi5d385172020-02-18 19:17:39 +0800844 return PyModuleDef_Init(&_bz2module);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200845}