blob: 9893a637262915716ebfc10e0ff5a1acfb13abb7 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02006#include "structmember.h" // PyMemberDef
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02007
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include <bzlib.h>
9#include <stdio.h>
10
Ma Linf9bedb62021-04-28 14:58:54 +080011// Blocks output buffer wrappers
12#include "pycore_blocks_output_buffer.h"
13
14#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
15 #error "The maximum block size accepted by libbzip2 is UINT32_MAX."
16#endif
17
18/* On success, return value >= 0
19 On failure, return -1 */
20static inline Py_ssize_t
21Buffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
22 char **next_out, uint32_t *avail_out)
23{
24 Py_ssize_t allocated;
25
26 allocated = _BlocksOutputBuffer_InitAndGrow(
27 buffer, max_length, (void**) next_out);
28 *avail_out = (uint32_t) allocated;
29 return allocated;
30}
31
32/* On success, return value >= 0
33 On failure, return -1 */
34static inline Py_ssize_t
35Buffer_Grow(_BlocksOutputBuffer *buffer,
36 char **next_out, uint32_t *avail_out)
37{
38 Py_ssize_t allocated;
39
40 allocated = _BlocksOutputBuffer_Grow(
41 buffer, (void**) next_out, (Py_ssize_t) *avail_out);
42 *avail_out = (uint32_t) allocated;
43 return allocated;
44}
45
46static inline Py_ssize_t
47Buffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
48{
49 return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
50}
51
52static inline PyObject *
53Buffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
54{
55 return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
56}
57
58static inline void
59Buffer_OnError(_BlocksOutputBuffer *buffer)
60{
61 _BlocksOutputBuffer_OnError(buffer);
62}
63
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020064
65#ifndef BZ_CONFIG_ERROR
66#define BZ2_bzCompress bzCompress
67#define BZ2_bzCompressInit bzCompressInit
68#define BZ2_bzCompressEnd bzCompressEnd
69#define BZ2_bzDecompress bzDecompress
70#define BZ2_bzDecompressInit bzDecompressInit
71#define BZ2_bzDecompressEnd bzDecompressEnd
72#endif /* ! BZ_CONFIG_ERROR */
73
74
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020075#define ACQUIRE_LOCK(obj) do { \
76 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
77 Py_BEGIN_ALLOW_THREADS \
78 PyThread_acquire_lock((obj)->lock, 1); \
79 Py_END_ALLOW_THREADS \
80 } } while (0)
81#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020082
83
84typedef struct {
Dong-hee Naec689182020-06-20 00:56:13 +090085 PyTypeObject *bz2_compressor_type;
86 PyTypeObject *bz2_decompressor_type;
87} _bz2_state;
88
89static inline _bz2_state*
90get_bz2_state(PyObject *module)
91{
92 void *state = PyModule_GetState(module);
93 assert(state != NULL);
94 return (_bz2_state *)state;
95}
96
97typedef struct {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020098 PyObject_HEAD
99 bz_stream bzs;
100 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200101 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200102} BZ2Compressor;
103
104typedef struct {
105 PyObject_HEAD
106 bz_stream bzs;
107 char eof; /* T_BOOL expects a char */
108 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100109 char needs_input;
110 char *input_buffer;
111 size_t input_buffer_size;
112
113 /* bzs->avail_in is only 32 bit, so we store the true length
114 separately. Conversion and looping is encapsulated in
115 decompress_buf() */
116 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200117 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200118} BZ2Decompressor;
119
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200120/* Helper functions. */
121
122static int
123catch_bz2_error(int bzerror)
124{
125 switch(bzerror) {
126 case BZ_OK:
127 case BZ_RUN_OK:
128 case BZ_FLUSH_OK:
129 case BZ_FINISH_OK:
130 case BZ_STREAM_END:
131 return 0;
132
133#ifdef BZ_CONFIG_ERROR
134 case BZ_CONFIG_ERROR:
135 PyErr_SetString(PyExc_SystemError,
136 "libbzip2 was not compiled correctly");
137 return 1;
138#endif
139 case BZ_PARAM_ERROR:
140 PyErr_SetString(PyExc_ValueError,
141 "Internal error - "
142 "invalid parameters passed to libbzip2");
143 return 1;
144 case BZ_MEM_ERROR:
145 PyErr_NoMemory();
146 return 1;
147 case BZ_DATA_ERROR:
148 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300149 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150 return 1;
151 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300152 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200153 return 1;
154 case BZ_UNEXPECTED_EOF:
155 PyErr_SetString(PyExc_EOFError,
156 "Compressed file ended before the logical "
157 "end-of-stream was detected");
158 return 1;
159 case BZ_SEQUENCE_ERROR:
160 PyErr_SetString(PyExc_RuntimeError,
161 "Internal error - "
162 "Invalid sequence of commands sent to libbzip2");
163 return 1;
164 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300165 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200166 "Unrecognized error from libbzip2: %d", bzerror);
167 return 1;
168 }
169}
170
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200171
172/* BZ2Compressor class. */
173
174static PyObject *
175compress(BZ2Compressor *c, char *data, size_t len, int action)
176{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200177 PyObject *result;
Ma Linf9bedb62021-04-28 14:58:54 +0800178 _BlocksOutputBuffer buffer = {.list = NULL};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200179
Ma Linf9bedb62021-04-28 14:58:54 +0800180 if (Buffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
181 goto error;
182 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200183 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100184 c->bzs.avail_in = 0;
Ma Linf9bedb62021-04-28 14:58:54 +0800185
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200186 for (;;) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200187 int bzerror;
188
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100189 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
190 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200191 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200192 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200193 len -= c->bzs.avail_in;
194 }
195
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100196 /* In regular compression mode, stop when input data is exhausted. */
197 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200198 break;
199
200 if (c->bzs.avail_out == 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800201 if (Buffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
202 goto error;
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200203 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100205
206 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100207 bzerror = BZ2_bzCompress(&c->bzs, action);
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100208 Py_END_ALLOW_THREADS
Ma Linf9bedb62021-04-28 14:58:54 +0800209
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100210 if (catch_bz2_error(bzerror))
211 goto error;
212
213 /* In flushing mode, stop when all buffered data has been flushed. */
214 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
215 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200216 }
Ma Linf9bedb62021-04-28 14:58:54 +0800217
218 result = Buffer_Finish(&buffer, c->bzs.avail_out);
219 if (result != NULL) {
220 return result;
221 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200222
223error:
Ma Linf9bedb62021-04-28 14:58:54 +0800224 Buffer_OnError(&buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200225 return NULL;
226}
227
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200228/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200229module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800230class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
231class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200232[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300233/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200234
Larry Hastingsf256c222014-01-25 21:30:37 -0800235#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200236
237/*[clinic input]
238_bz2.BZ2Compressor.compress
239
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200240 data: Py_buffer
241 /
242
243Provide data to the compressor object.
244
245Returns a chunk of compressed data if possible, or b'' otherwise.
246
247When you have finished providing data to the compressor, call the
248flush() method to finish the compression process.
249[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200250
251static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200252_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800253/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200254{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200255 PyObject *result = NULL;
256
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200257 ACQUIRE_LOCK(self);
258 if (self->flushed)
259 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
260 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200261 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200262 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200263 return result;
264}
265
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200266/*[clinic input]
267_bz2.BZ2Compressor.flush
268
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200269Finish the compression process.
270
271Returns the compressed data left in internal buffers.
272
273The compressor object may not be used after this method is called.
274[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200275
276static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200277_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800278/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200279{
280 PyObject *result = NULL;
281
282 ACQUIRE_LOCK(self);
283 if (self->flushed)
284 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
285 else {
286 self->flushed = 1;
287 result = compress(self, NULL, 0, BZ_FINISH);
288 }
289 RELEASE_LOCK(self);
290 return result;
291}
292
Victor Stinner5064a522013-07-07 16:50:27 +0200293static void*
294BZ2_Malloc(void* ctx, int items, int size)
295{
296 if (items < 0 || size < 0)
297 return NULL;
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300298 if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
Victor Stinner5064a522013-07-07 16:50:27 +0200299 return NULL;
300 /* PyMem_Malloc() cannot be used: compress() and decompress()
301 release the GIL */
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300302 return PyMem_RawMalloc((size_t)items * (size_t)size);
Victor Stinner5064a522013-07-07 16:50:27 +0200303}
304
305static void
306BZ2_Free(void* ctx, void *ptr)
307{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200308 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200309}
310
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200311
Dong-hee Naec689182020-06-20 00:56:13 +0900312/* Argument Clinic is not used since the Argument Clinic always want to
313 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200314static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200315_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200317 int bzerror;
318
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319 if (!(1 <= compresslevel && compresslevel <= 9)) {
320 PyErr_SetString(PyExc_ValueError,
321 "compresslevel must be between 1 and 9");
322 return -1;
323 }
324
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200325 self->lock = PyThread_allocate_lock();
326 if (self->lock == NULL) {
327 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
328 return -1;
329 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200330
Victor Stinner5064a522013-07-07 16:50:27 +0200331 self->bzs.opaque = NULL;
332 self->bzs.bzalloc = BZ2_Malloc;
333 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200334 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
335 if (catch_bz2_error(bzerror))
336 goto error;
337
338 return 0;
339
340error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200341 PyThread_free_lock(self->lock);
342 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200343 return -1;
344}
345
Dong-hee Naec689182020-06-20 00:56:13 +0900346PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
347"BZ2Compressor(compresslevel=9, /)\n"
348"--\n"
349"\n"
350"Create a compressor object for compressing data incrementally.\n"
351"\n"
352" compresslevel\n"
353" Compression level, as a number between 1 and 9.\n"
354"\n"
355"For one-shot compression, use the compress() function instead.");
356
357static int
358_bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
359{
360 int return_value = -1;
361 int compresslevel = 9;
362
363 if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
364 goto exit;
365 }
366 if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
367 goto exit;
368 }
369 if (PyTuple_GET_SIZE(args) < 1) {
370 goto skip_optional;
371 }
372 compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
373 if (compresslevel == -1 && PyErr_Occurred()) {
374 goto exit;
375 }
376skip_optional:
377 return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);
378
379exit:
380 return return_value;
381}
382
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200383static void
384BZ2Compressor_dealloc(BZ2Compressor *self)
385{
386 BZ2_bzCompressEnd(&self->bzs);
Dong-hee Naec689182020-06-20 00:56:13 +0900387 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200388 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900389 }
390 PyTypeObject *tp = Py_TYPE(self);
391 tp->tp_free((PyObject *)self);
392 Py_DECREF(tp);
393}
394
395static int
396BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
397{
398 Py_VISIT(Py_TYPE(self));
399 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200400}
401
402static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200403 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
404 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200405 {NULL}
406};
407
Dong-hee Naec689182020-06-20 00:56:13 +0900408static PyType_Slot bz2_compressor_type_slots[] = {
409 {Py_tp_dealloc, BZ2Compressor_dealloc},
410 {Py_tp_methods, BZ2Compressor_methods},
411 {Py_tp_init, _bz2_BZ2Compressor___init__},
412 {Py_tp_new, PyType_GenericNew},
413 {Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
414 {Py_tp_traverse, BZ2Compressor_traverse},
415 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200416};
417
Dong-hee Naec689182020-06-20 00:56:13 +0900418static PyType_Spec bz2_compressor_type_spec = {
419 .name = "_bz2.BZ2Compressor",
420 .basicsize = sizeof(BZ2Compressor),
421 // Calling PyType_GetModuleState() on a subclass is not safe.
422 // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
423 // which prevents to create a subclass.
424 // So calling PyType_GetModuleState() in this file is always safe.
425 .flags = Py_TPFLAGS_DEFAULT,
426 .slots = bz2_compressor_type_slots,
427};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200428
429/* BZ2Decompressor class. */
430
Antoine Pitroue71258a2015-02-26 13:08:07 +0100431/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
432 buffer is allocated dynamically and returned. At most max_length bytes are
433 returned, so some of the input may not be consumed. d->bzs.next_in and
434 d->bzs_avail_in_real are updated to reflect the consumed input. */
435static PyObject*
436decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200437{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100438 /* data_size is strictly positive, but because we repeatedly have to
439 compare against max_length and PyBytes_GET_SIZE we declare it as
440 signed */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200441 PyObject *result;
Ma Linf9bedb62021-04-28 14:58:54 +0800442 _BlocksOutputBuffer buffer = {.list = NULL};
Antoine Pitroue71258a2015-02-26 13:08:07 +0100443 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200444
Ma Linf9bedb62021-04-28 14:58:54 +0800445 if (Buffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
446 goto error;
447 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100448
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200449 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100450 int bzret;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100451 /* On a 64-bit system, buffer length might not fit in avail_out, so we
452 do decompression in chunks of no more than UINT_MAX bytes
453 each. Note that the expression for `avail` is guaranteed to be
454 positive, so the cast is safe. */
Antoine Pitroue71258a2015-02-26 13:08:07 +0100455 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
456 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457
458 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100459 bzret = BZ2_bzDecompress(bzs);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 Py_END_ALLOW_THREADS
Ma Linf9bedb62021-04-28 14:58:54 +0800461
462 d->bzs_avail_in_real += bzs->avail_in;
463
Antoine Pitroue71258a2015-02-26 13:08:07 +0100464 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200465 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100466 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200467 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200468 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100469 } else if (d->bzs_avail_in_real == 0) {
470 break;
471 } else if (bzs->avail_out == 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800472 if (Buffer_GetDataSize(&buffer, bzs->avail_out) == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200473 break;
Ma Linf9bedb62021-04-28 14:58:54 +0800474 if (Buffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100475 goto error;
Ma Linf9bedb62021-04-28 14:58:54 +0800476 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200477 }
478 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100479
Ma Linf9bedb62021-04-28 14:58:54 +0800480 result = Buffer_Finish(&buffer, bzs->avail_out);
481 if (result != NULL) {
482 return result;
483 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100484
485error:
Ma Linf9bedb62021-04-28 14:58:54 +0800486 Buffer_OnError(&buffer);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100487 return NULL;
488}
489
490
491static PyObject *
492decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
493{
494 char input_buffer_in_use;
495 PyObject *result;
496 bz_stream *bzs = &d->bzs;
497
498 /* Prepend unconsumed input if necessary */
499 if (bzs->next_in != NULL) {
500 size_t avail_now, avail_total;
501
502 /* Number of bytes we can append to input buffer */
503 avail_now = (d->input_buffer + d->input_buffer_size)
504 - (bzs->next_in + d->bzs_avail_in_real);
505
506 /* Number of bytes we can append if we move existing
507 contents to beginning of buffer (overwriting
508 consumed input) */
509 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
510
511 if (avail_total < len) {
512 size_t offset = bzs->next_in - d->input_buffer;
513 char *tmp;
514 size_t new_size = d->input_buffer_size + len - avail_now;
515
516 /* Assign to temporary variable first, so we don't
517 lose address of allocated buffer if realloc fails */
518 tmp = PyMem_Realloc(d->input_buffer, new_size);
519 if (tmp == NULL) {
520 PyErr_SetNone(PyExc_MemoryError);
521 return NULL;
522 }
523 d->input_buffer = tmp;
524 d->input_buffer_size = new_size;
525
526 bzs->next_in = d->input_buffer + offset;
527 }
528 else if (avail_now < len) {
529 memmove(d->input_buffer, bzs->next_in,
530 d->bzs_avail_in_real);
531 bzs->next_in = d->input_buffer;
532 }
533 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
534 d->bzs_avail_in_real += len;
535 input_buffer_in_use = 1;
536 }
537 else {
538 bzs->next_in = data;
539 d->bzs_avail_in_real = len;
540 input_buffer_in_use = 0;
541 }
542
543 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000544 if(result == NULL) {
545 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100546 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000547 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100548
549 if (d->eof) {
550 d->needs_input = 0;
551 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300552 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200553 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100554 if (d->unused_data == NULL)
555 goto error;
556 }
557 }
558 else if (d->bzs_avail_in_real == 0) {
559 bzs->next_in = NULL;
560 d->needs_input = 1;
561 }
562 else {
563 d->needs_input = 0;
564
565 /* If we did not use the input buffer, we now have
566 to copy the tail from the caller's buffer into the
567 input buffer */
568 if (!input_buffer_in_use) {
569
570 /* Discard buffer if it's too small
571 (resizing it may needlessly copy the current contents) */
572 if (d->input_buffer != NULL &&
573 d->input_buffer_size < d->bzs_avail_in_real) {
574 PyMem_Free(d->input_buffer);
575 d->input_buffer = NULL;
576 }
577
578 /* Allocate if necessary */
579 if (d->input_buffer == NULL) {
580 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
581 if (d->input_buffer == NULL) {
582 PyErr_SetNone(PyExc_MemoryError);
583 goto error;
584 }
585 d->input_buffer_size = d->bzs_avail_in_real;
586 }
587
588 /* Copy tail */
589 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
590 bzs->next_in = d->input_buffer;
591 }
592 }
593
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200594 return result;
595
596error:
597 Py_XDECREF(result);
598 return NULL;
599}
600
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200601/*[clinic input]
602_bz2.BZ2Decompressor.decompress
603
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200604 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100605 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200606
Antoine Pitroue71258a2015-02-26 13:08:07 +0100607Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200608
Antoine Pitroue71258a2015-02-26 13:08:07 +0100609If *max_length* is nonnegative, returns at most *max_length* bytes of
610decompressed data. If this limit is reached and further output can be
611produced, *self.needs_input* will be set to ``False``. In this case, the next
612call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200613
Antoine Pitroue71258a2015-02-26 13:08:07 +0100614If all of the input data was decompressed and returned (either because this
615was less than *max_length* bytes, or because *max_length* was negative),
616*self.needs_input* will be set to True.
617
618Attempting to decompress data after the end of stream is reached raises an
619EOFError. Any data found after the end of the stream is ignored and saved in
620the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200621[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200622
623static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400624_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
625 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300626/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200627{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200628 PyObject *result = NULL;
629
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200630 ACQUIRE_LOCK(self);
631 if (self->eof)
632 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
633 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100634 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200635 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200636 return result;
637}
638
Dong-hee Naec689182020-06-20 00:56:13 +0900639/* Argument Clinic is not used since the Argument Clinic always want to
640 check the type which would be wrong here */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200641static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200642_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200643{
644 int bzerror;
645
Victor Stinner9b7cf752018-06-23 10:35:23 +0200646 PyThread_type_lock lock = PyThread_allocate_lock();
647 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200648 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
649 return -1;
650 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200651 if (self->lock != NULL) {
652 PyThread_free_lock(self->lock);
653 }
654 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200655
Antoine Pitroue71258a2015-02-26 13:08:07 +0100656 self->needs_input = 1;
657 self->bzs_avail_in_real = 0;
658 self->input_buffer = NULL;
659 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200660 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200661 if (self->unused_data == NULL)
662 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200663
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200664 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
665 if (catch_bz2_error(bzerror))
666 goto error;
667
668 return 0;
669
670error:
671 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200672 PyThread_free_lock(self->lock);
673 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200674 return -1;
675}
676
Dong-hee Naec689182020-06-20 00:56:13 +0900677static int
678_bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
679{
680 int return_value = -1;
Ma Linf9bedb62021-04-28 14:58:54 +0800681
Dong-hee Naec689182020-06-20 00:56:13 +0900682 if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
683 goto exit;
684 }
685 if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
686 goto exit;
687 }
688 return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);
689
690exit:
691 return return_value;
692}
693
694PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
695"BZ2Decompressor()\n"
696"--\n"
697"\n"
698"Create a decompressor object for decompressing data incrementally.\n"
699"\n"
700"For one-shot decompression, use the decompress() function instead.");
701
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200702static void
703BZ2Decompressor_dealloc(BZ2Decompressor *self)
704{
Dong-hee Naec689182020-06-20 00:56:13 +0900705 if(self->input_buffer != NULL) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100706 PyMem_Free(self->input_buffer);
Dong-hee Naec689182020-06-20 00:56:13 +0900707 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200708 BZ2_bzDecompressEnd(&self->bzs);
709 Py_CLEAR(self->unused_data);
Dong-hee Naec689182020-06-20 00:56:13 +0900710 if (self->lock != NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200711 PyThread_free_lock(self->lock);
Dong-hee Naec689182020-06-20 00:56:13 +0900712 }
713
714 PyTypeObject *tp = Py_TYPE(self);
715 tp->tp_free((PyObject *)self);
716 Py_DECREF(tp);
717}
718
719static int
720BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
721{
722 Py_VISIT(Py_TYPE(self));
723 return 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200724}
725
726static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200727 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200728 {NULL}
729};
730
731PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
732"True if the end-of-stream marker has been reached.");
733
734PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
735"Data found after the end of the compressed stream.");
736
Antoine Pitroue71258a2015-02-26 13:08:07 +0100737PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
738"True if more input is needed before more decompressed data can be produced.");
739
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200740static PyMemberDef BZ2Decompressor_members[] = {
741 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
742 READONLY, BZ2Decompressor_eof__doc__},
743 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
744 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100745 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
746 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200747 {NULL}
748};
749
Dong-hee Naec689182020-06-20 00:56:13 +0900750static PyType_Slot bz2_decompressor_type_slots[] = {
751 {Py_tp_dealloc, BZ2Decompressor_dealloc},
752 {Py_tp_methods, BZ2Decompressor_methods},
753 {Py_tp_init, _bz2_BZ2Decompressor___init__},
754 {Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
755 {Py_tp_members, BZ2Decompressor_members},
756 {Py_tp_new, PyType_GenericNew},
757 {Py_tp_traverse, BZ2Decompressor_traverse},
758 {0, 0}
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200759};
760
Dong-hee Naec689182020-06-20 00:56:13 +0900761static PyType_Spec bz2_decompressor_type_spec = {
762 .name = "_bz2.BZ2Decompressor",
763 .basicsize = sizeof(BZ2Decompressor),
764 // Calling PyType_GetModuleState() on a subclass is not safe.
765 // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
766 // which prevents to create a subclass.
767 // So calling PyType_GetModuleState() in this file is always safe.
768 .flags = Py_TPFLAGS_DEFAULT,
769 .slots = bz2_decompressor_type_slots,
770};
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200771
772/* Module initialization. */
773
Hai Shi5d385172020-02-18 19:17:39 +0800774static int
775_bz2_exec(PyObject *module)
776{
Dong-hee Naec689182020-06-20 00:56:13 +0900777 _bz2_state *state = get_bz2_state(module);
778 state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
779 &bz2_compressor_type_spec, NULL);
780 if (state->bz2_compressor_type == NULL) {
Hai Shi5d385172020-02-18 19:17:39 +0800781 return -1;
782 }
783
Dong-hee Naec689182020-06-20 00:56:13 +0900784 if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
785 return -1;
786 }
787
788 state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
789 &bz2_decompressor_type_spec, NULL);
790 if (state->bz2_decompressor_type == NULL) {
791 return -1;
792 }
793
794 if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
Hai Shi5d385172020-02-18 19:17:39 +0800795 return -1;
796 }
797
798 return 0;
799}
800
Dong-hee Naec689182020-06-20 00:56:13 +0900801static int
802_bz2_traverse(PyObject *module, visitproc visit, void *arg)
803{
804 _bz2_state *state = get_bz2_state(module);
805 Py_VISIT(state->bz2_compressor_type);
806 Py_VISIT(state->bz2_decompressor_type);
807 return 0;
808}
809
810static int
811_bz2_clear(PyObject *module)
812{
813 _bz2_state *state = get_bz2_state(module);
814 Py_CLEAR(state->bz2_compressor_type);
815 Py_CLEAR(state->bz2_decompressor_type);
816 return 0;
817}
818
819static void
820_bz2_free(void *module)
821{
822 _bz2_clear((PyObject *)module);
823}
824
Hai Shi5d385172020-02-18 19:17:39 +0800825static struct PyModuleDef_Slot _bz2_slots[] = {
826 {Py_mod_exec, _bz2_exec},
827 {0, NULL}
828};
829
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200830static struct PyModuleDef _bz2module = {
831 PyModuleDef_HEAD_INIT,
Dong-hee Naec689182020-06-20 00:56:13 +0900832 .m_name = "_bz2",
833 .m_size = sizeof(_bz2_state),
834 .m_slots = _bz2_slots,
835 .m_traverse = _bz2_traverse,
836 .m_clear = _bz2_clear,
837 .m_free = _bz2_free,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200838};
839
840PyMODINIT_FUNC
841PyInit__bz2(void)
842{
Hai Shi5d385172020-02-18 19:17:39 +0800843 return PyModuleDef_Init(&_bz2module);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200844}