blob: 880632c62349f18d78815e26b442f597991ff6b0 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02006#include "structmember.h" // PyMemberDef
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02007
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02008#include <bzlib.h>
9#include <stdio.h>
10
11
12#ifndef BZ_CONFIG_ERROR
13#define BZ2_bzCompress bzCompress
14#define BZ2_bzCompressInit bzCompressInit
15#define BZ2_bzCompressEnd bzCompressEnd
16#define BZ2_bzDecompress bzDecompress
17#define BZ2_bzDecompressInit bzDecompressInit
18#define BZ2_bzDecompressEnd bzDecompressEnd
19#endif /* ! BZ_CONFIG_ERROR */
20
21
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020022#define ACQUIRE_LOCK(obj) do { \
23 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
24 Py_BEGIN_ALLOW_THREADS \
25 PyThread_acquire_lock((obj)->lock, 1); \
26 Py_END_ALLOW_THREADS \
27 } } while (0)
28#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020029
30
31typedef struct {
32 PyObject_HEAD
33 bz_stream bzs;
34 int flushed;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020035 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020036} BZ2Compressor;
37
38typedef struct {
39 PyObject_HEAD
40 bz_stream bzs;
41 char eof; /* T_BOOL expects a char */
42 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010043 char needs_input;
44 char *input_buffer;
45 size_t input_buffer_size;
46
47 /* bzs->avail_in is only 32 bit, so we store the true length
48 separately. Conversion and looping is encapsulated in
49 decompress_buf() */
50 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020051 PyThread_type_lock lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020052} BZ2Decompressor;
53
Larry Hastingsf256c222014-01-25 21:30:37 -080054static PyTypeObject BZ2Compressor_Type;
55static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020056
57/* Helper functions. */
58
59static int
60catch_bz2_error(int bzerror)
61{
62 switch(bzerror) {
63 case BZ_OK:
64 case BZ_RUN_OK:
65 case BZ_FLUSH_OK:
66 case BZ_FINISH_OK:
67 case BZ_STREAM_END:
68 return 0;
69
70#ifdef BZ_CONFIG_ERROR
71 case BZ_CONFIG_ERROR:
72 PyErr_SetString(PyExc_SystemError,
73 "libbzip2 was not compiled correctly");
74 return 1;
75#endif
76 case BZ_PARAM_ERROR:
77 PyErr_SetString(PyExc_ValueError,
78 "Internal error - "
79 "invalid parameters passed to libbzip2");
80 return 1;
81 case BZ_MEM_ERROR:
82 PyErr_NoMemory();
83 return 1;
84 case BZ_DATA_ERROR:
85 case BZ_DATA_ERROR_MAGIC:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030086 PyErr_SetString(PyExc_OSError, "Invalid data stream");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020087 return 1;
88 case BZ_IO_ERROR:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +030089 PyErr_SetString(PyExc_OSError, "Unknown I/O error");
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020090 return 1;
91 case BZ_UNEXPECTED_EOF:
92 PyErr_SetString(PyExc_EOFError,
93 "Compressed file ended before the logical "
94 "end-of-stream was detected");
95 return 1;
96 case BZ_SEQUENCE_ERROR:
97 PyErr_SetString(PyExc_RuntimeError,
98 "Internal error - "
99 "Invalid sequence of commands sent to libbzip2");
100 return 1;
101 default:
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300102 PyErr_Format(PyExc_OSError,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200103 "Unrecognized error from libbzip2: %d", bzerror);
104 return 1;
105 }
106}
107
108#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100109#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200110#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100111#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200112#endif
113
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200114static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100115grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200116{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200117 /* Expand the buffer by an amount proportional to the current size,
118 giving us amortized linear-time behavior. Use a less-than-double
119 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200120 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200121 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100122
123 if (max_length > 0 && new_size > (size_t) max_length)
124 new_size = (size_t) max_length;
125
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200126 if (new_size > size) {
127 return _PyBytes_Resize(buf, new_size);
128 } else { /* overflow */
129 PyErr_SetString(PyExc_OverflowError,
130 "Unable to allocate buffer - output too large");
131 return -1;
132 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200133}
134
135
136/* BZ2Compressor class. */
137
138static PyObject *
139compress(BZ2Compressor *c, char *data, size_t len, int action)
140{
141 size_t data_size = 0;
142 PyObject *result;
143
Antoine Pitroue71258a2015-02-26 13:08:07 +0100144 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200145 if (result == NULL)
146 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100147
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200148 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100149 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200150 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100151 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200152 for (;;) {
153 char *this_out;
154 int bzerror;
155
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100156 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
157 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200158 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200159 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200160 len -= c->bzs.avail_in;
161 }
162
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100163 /* In regular compression mode, stop when input data is exhausted. */
164 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 break;
166
167 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200168 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
169 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100170 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200171 goto error;
172 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
173 buffer_left = PyBytes_GET_SIZE(result) - data_size;
174 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200175 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200176 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100177
178 Py_BEGIN_ALLOW_THREADS
179 this_out = c->bzs.next_out;
180 bzerror = BZ2_bzCompress(&c->bzs, action);
181 data_size += c->bzs.next_out - this_out;
182 Py_END_ALLOW_THREADS
183 if (catch_bz2_error(bzerror))
184 goto error;
185
186 /* In flushing mode, stop when all buffered data has been flushed. */
187 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
188 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200189 }
Victor Stinner706768c2014-08-16 01:03:39 +0200190 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200191 if (_PyBytes_Resize(&result, data_size) < 0)
192 goto error;
193 return result;
194
195error:
196 Py_XDECREF(result);
197 return NULL;
198}
199
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200200/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200201module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800202class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
203class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200204[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300205/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200206
Larry Hastingsf256c222014-01-25 21:30:37 -0800207#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200208
209/*[clinic input]
210_bz2.BZ2Compressor.compress
211
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200212 data: Py_buffer
213 /
214
215Provide data to the compressor object.
216
217Returns a chunk of compressed data if possible, or b'' otherwise.
218
219When you have finished providing data to the compressor, call the
220flush() method to finish the compression process.
221[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200222
223static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200224_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800225/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200226{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200227 PyObject *result = NULL;
228
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200229 ACQUIRE_LOCK(self);
230 if (self->flushed)
231 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
232 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200233 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200234 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200235 return result;
236}
237
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200238/*[clinic input]
239_bz2.BZ2Compressor.flush
240
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200241Finish the compression process.
242
243Returns the compressed data left in internal buffers.
244
245The compressor object may not be used after this method is called.
246[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200247
248static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200249_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800250/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200251{
252 PyObject *result = NULL;
253
254 ACQUIRE_LOCK(self);
255 if (self->flushed)
256 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
257 else {
258 self->flushed = 1;
259 result = compress(self, NULL, 0, BZ_FINISH);
260 }
261 RELEASE_LOCK(self);
262 return result;
263}
264
Victor Stinner5064a522013-07-07 16:50:27 +0200265static void*
266BZ2_Malloc(void* ctx, int items, int size)
267{
268 if (items < 0 || size < 0)
269 return NULL;
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300270 if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
Victor Stinner5064a522013-07-07 16:50:27 +0200271 return NULL;
272 /* PyMem_Malloc() cannot be used: compress() and decompress()
273 release the GIL */
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300274 return PyMem_RawMalloc((size_t)items * (size_t)size);
Victor Stinner5064a522013-07-07 16:50:27 +0200275}
276
277static void
278BZ2_Free(void* ctx, void *ptr)
279{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200280 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200281}
282
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200283/*[clinic input]
284_bz2.BZ2Compressor.__init__
285
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200286 compresslevel: int = 9
287 Compression level, as a number between 1 and 9.
288 /
289
290Create a compressor object for compressing data incrementally.
291
292For one-shot compression, use the compress() function instead.
293[clinic start generated code]*/
294
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200295static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200296_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800297/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200298{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200299 int bzerror;
300
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200301 if (!(1 <= compresslevel && compresslevel <= 9)) {
302 PyErr_SetString(PyExc_ValueError,
303 "compresslevel must be between 1 and 9");
304 return -1;
305 }
306
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200307 self->lock = PyThread_allocate_lock();
308 if (self->lock == NULL) {
309 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
310 return -1;
311 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200312
Victor Stinner5064a522013-07-07 16:50:27 +0200313 self->bzs.opaque = NULL;
314 self->bzs.bzalloc = BZ2_Malloc;
315 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
317 if (catch_bz2_error(bzerror))
318 goto error;
319
320 return 0;
321
322error:
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200323 PyThread_free_lock(self->lock);
324 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200325 return -1;
326}
327
328static void
329BZ2Compressor_dealloc(BZ2Compressor *self)
330{
331 BZ2_bzCompressEnd(&self->bzs);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200332 if (self->lock != NULL)
333 PyThread_free_lock(self->lock);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200334 Py_TYPE(self)->tp_free((PyObject *)self);
335}
336
337static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200338 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
339 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200340 {NULL}
341};
342
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200343
344static PyTypeObject BZ2Compressor_Type = {
345 PyVarObject_HEAD_INIT(NULL, 0)
346 "_bz2.BZ2Compressor", /* tp_name */
347 sizeof(BZ2Compressor), /* tp_basicsize */
348 0, /* tp_itemsize */
349 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200350 0, /* tp_vectorcall_offset */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200351 0, /* tp_getattr */
352 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200353 0, /* tp_as_async */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200354 0, /* tp_repr */
355 0, /* tp_as_number */
356 0, /* tp_as_sequence */
357 0, /* tp_as_mapping */
358 0, /* tp_hash */
359 0, /* tp_call */
360 0, /* tp_str */
361 0, /* tp_getattro */
362 0, /* tp_setattro */
363 0, /* tp_as_buffer */
364 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200365 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200366 0, /* tp_traverse */
367 0, /* tp_clear */
368 0, /* tp_richcompare */
369 0, /* tp_weaklistoffset */
370 0, /* tp_iter */
371 0, /* tp_iternext */
372 BZ2Compressor_methods, /* tp_methods */
373 0, /* tp_members */
374 0, /* tp_getset */
375 0, /* tp_base */
376 0, /* tp_dict */
377 0, /* tp_descr_get */
378 0, /* tp_descr_set */
379 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200380 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200381 0, /* tp_alloc */
382 PyType_GenericNew, /* tp_new */
383};
384
385
386/* BZ2Decompressor class. */
387
Antoine Pitroue71258a2015-02-26 13:08:07 +0100388/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
389 buffer is allocated dynamically and returned. At most max_length bytes are
390 returned, so some of the input may not be consumed. d->bzs.next_in and
391 d->bzs_avail_in_real are updated to reflect the consumed input. */
392static PyObject*
393decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200394{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100395 /* data_size is strictly positive, but because we repeatedly have to
396 compare against max_length and PyBytes_GET_SIZE we declare it as
397 signed */
398 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200399 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100400 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200401
Antoine Pitroue71258a2015-02-26 13:08:07 +0100402 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
403 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
404 else
405 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200406 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100407 return NULL;
408
409 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200410 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100411 int bzret;
412 size_t avail;
413
414 /* On a 64-bit system, buffer length might not fit in avail_out, so we
415 do decompression in chunks of no more than UINT_MAX bytes
416 each. Note that the expression for `avail` is guaranteed to be
417 positive, so the cast is safe. */
418 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
419 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
420 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
421 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200422
423 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100424 bzret = BZ2_bzDecompress(bzs);
425 data_size = bzs->next_out - PyBytes_AS_STRING(result);
426 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200427 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100428 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100430 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200431 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200432 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100433 } else if (d->bzs_avail_in_real == 0) {
434 break;
435 } else if (bzs->avail_out == 0) {
436 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200437 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100438 if (data_size == PyBytes_GET_SIZE(result) &&
439 grow_buffer(&result, max_length) == -1)
440 goto error;
441 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200442 }
443 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100444 if (data_size != PyBytes_GET_SIZE(result))
445 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200446 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100447
448 return result;
449
450error:
451 Py_XDECREF(result);
452 return NULL;
453}
454
455
456static PyObject *
457decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
458{
459 char input_buffer_in_use;
460 PyObject *result;
461 bz_stream *bzs = &d->bzs;
462
463 /* Prepend unconsumed input if necessary */
464 if (bzs->next_in != NULL) {
465 size_t avail_now, avail_total;
466
467 /* Number of bytes we can append to input buffer */
468 avail_now = (d->input_buffer + d->input_buffer_size)
469 - (bzs->next_in + d->bzs_avail_in_real);
470
471 /* Number of bytes we can append if we move existing
472 contents to beginning of buffer (overwriting
473 consumed input) */
474 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
475
476 if (avail_total < len) {
477 size_t offset = bzs->next_in - d->input_buffer;
478 char *tmp;
479 size_t new_size = d->input_buffer_size + len - avail_now;
480
481 /* Assign to temporary variable first, so we don't
482 lose address of allocated buffer if realloc fails */
483 tmp = PyMem_Realloc(d->input_buffer, new_size);
484 if (tmp == NULL) {
485 PyErr_SetNone(PyExc_MemoryError);
486 return NULL;
487 }
488 d->input_buffer = tmp;
489 d->input_buffer_size = new_size;
490
491 bzs->next_in = d->input_buffer + offset;
492 }
493 else if (avail_now < len) {
494 memmove(d->input_buffer, bzs->next_in,
495 d->bzs_avail_in_real);
496 bzs->next_in = d->input_buffer;
497 }
498 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
499 d->bzs_avail_in_real += len;
500 input_buffer_in_use = 1;
501 }
502 else {
503 bzs->next_in = data;
504 d->bzs_avail_in_real = len;
505 input_buffer_in_use = 0;
506 }
507
508 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000509 if(result == NULL) {
510 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100511 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000512 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100513
514 if (d->eof) {
515 d->needs_input = 0;
516 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300517 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200518 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100519 if (d->unused_data == NULL)
520 goto error;
521 }
522 }
523 else if (d->bzs_avail_in_real == 0) {
524 bzs->next_in = NULL;
525 d->needs_input = 1;
526 }
527 else {
528 d->needs_input = 0;
529
530 /* If we did not use the input buffer, we now have
531 to copy the tail from the caller's buffer into the
532 input buffer */
533 if (!input_buffer_in_use) {
534
535 /* Discard buffer if it's too small
536 (resizing it may needlessly copy the current contents) */
537 if (d->input_buffer != NULL &&
538 d->input_buffer_size < d->bzs_avail_in_real) {
539 PyMem_Free(d->input_buffer);
540 d->input_buffer = NULL;
541 }
542
543 /* Allocate if necessary */
544 if (d->input_buffer == NULL) {
545 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
546 if (d->input_buffer == NULL) {
547 PyErr_SetNone(PyExc_MemoryError);
548 goto error;
549 }
550 d->input_buffer_size = d->bzs_avail_in_real;
551 }
552
553 /* Copy tail */
554 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
555 bzs->next_in = d->input_buffer;
556 }
557 }
558
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200559 return result;
560
561error:
562 Py_XDECREF(result);
563 return NULL;
564}
565
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200566/*[clinic input]
567_bz2.BZ2Decompressor.decompress
568
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200569 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100570 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200571
Antoine Pitroue71258a2015-02-26 13:08:07 +0100572Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200573
Antoine Pitroue71258a2015-02-26 13:08:07 +0100574If *max_length* is nonnegative, returns at most *max_length* bytes of
575decompressed data. If this limit is reached and further output can be
576produced, *self.needs_input* will be set to ``False``. In this case, the next
577call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200578
Antoine Pitroue71258a2015-02-26 13:08:07 +0100579If all of the input data was decompressed and returned (either because this
580was less than *max_length* bytes, or because *max_length* was negative),
581*self.needs_input* will be set to True.
582
583Attempting to decompress data after the end of stream is reached raises an
584EOFError. Any data found after the end of the stream is ignored and saved in
585the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200586[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200587
588static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400589_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
590 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300591/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200592{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200593 PyObject *result = NULL;
594
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200595 ACQUIRE_LOCK(self);
596 if (self->eof)
597 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
598 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100599 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200600 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200601 return result;
602}
603
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200604/*[clinic input]
605_bz2.BZ2Decompressor.__init__
606
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200607Create a decompressor object for decompressing data incrementally.
608
609For one-shot decompression, use the decompress() function instead.
610[clinic start generated code]*/
611
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200612static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200613_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800614/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200615{
616 int bzerror;
617
Victor Stinner9b7cf752018-06-23 10:35:23 +0200618 PyThread_type_lock lock = PyThread_allocate_lock();
619 if (lock == NULL) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200620 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
621 return -1;
622 }
Victor Stinner9b7cf752018-06-23 10:35:23 +0200623 if (self->lock != NULL) {
624 PyThread_free_lock(self->lock);
625 }
626 self->lock = lock;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200627
Antoine Pitroue71258a2015-02-26 13:08:07 +0100628 self->needs_input = 1;
629 self->bzs_avail_in_real = 0;
630 self->input_buffer = NULL;
631 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +0200632 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200633 if (self->unused_data == NULL)
634 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200635
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200636 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
637 if (catch_bz2_error(bzerror))
638 goto error;
639
640 return 0;
641
642error:
643 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200644 PyThread_free_lock(self->lock);
645 self->lock = NULL;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200646 return -1;
647}
648
649static void
650BZ2Decompressor_dealloc(BZ2Decompressor *self)
651{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100652 if(self->input_buffer != NULL)
653 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200654 BZ2_bzDecompressEnd(&self->bzs);
655 Py_CLEAR(self->unused_data);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200656 if (self->lock != NULL)
657 PyThread_free_lock(self->lock);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200658 Py_TYPE(self)->tp_free((PyObject *)self);
659}
660
661static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200662 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200663 {NULL}
664};
665
666PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
667"True if the end-of-stream marker has been reached.");
668
669PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
670"Data found after the end of the compressed stream.");
671
Antoine Pitroue71258a2015-02-26 13:08:07 +0100672PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
673"True if more input is needed before more decompressed data can be produced.");
674
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200675static PyMemberDef BZ2Decompressor_members[] = {
676 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
677 READONLY, BZ2Decompressor_eof__doc__},
678 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
679 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100680 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
681 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200682 {NULL}
683};
684
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200685static PyTypeObject BZ2Decompressor_Type = {
686 PyVarObject_HEAD_INIT(NULL, 0)
687 "_bz2.BZ2Decompressor", /* tp_name */
688 sizeof(BZ2Decompressor), /* tp_basicsize */
689 0, /* tp_itemsize */
690 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200691 0, /* tp_vectorcall_offset */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200692 0, /* tp_getattr */
693 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200694 0, /* tp_as_async */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200695 0, /* tp_repr */
696 0, /* tp_as_number */
697 0, /* tp_as_sequence */
698 0, /* tp_as_mapping */
699 0, /* tp_hash */
700 0, /* tp_call */
701 0, /* tp_str */
702 0, /* tp_getattro */
703 0, /* tp_setattro */
704 0, /* tp_as_buffer */
705 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200706 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200707 0, /* tp_traverse */
708 0, /* tp_clear */
709 0, /* tp_richcompare */
710 0, /* tp_weaklistoffset */
711 0, /* tp_iter */
712 0, /* tp_iternext */
713 BZ2Decompressor_methods, /* tp_methods */
714 BZ2Decompressor_members, /* tp_members */
715 0, /* tp_getset */
716 0, /* tp_base */
717 0, /* tp_dict */
718 0, /* tp_descr_get */
719 0, /* tp_descr_set */
720 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200721 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200722 0, /* tp_alloc */
723 PyType_GenericNew, /* tp_new */
724};
725
726
727/* Module initialization. */
728
Hai Shi5d385172020-02-18 19:17:39 +0800729static int
730_bz2_exec(PyObject *module)
731{
Dong-hee Na37fcbb62020-03-25 07:08:51 +0900732 if (PyModule_AddType(module, &BZ2Compressor_Type) < 0) {
Hai Shi5d385172020-02-18 19:17:39 +0800733 return -1;
734 }
735
Dong-hee Na37fcbb62020-03-25 07:08:51 +0900736 if (PyModule_AddType(module, &BZ2Decompressor_Type) < 0) {
Hai Shi5d385172020-02-18 19:17:39 +0800737 return -1;
738 }
739
740 return 0;
741}
742
743static struct PyModuleDef_Slot _bz2_slots[] = {
744 {Py_mod_exec, _bz2_exec},
745 {0, NULL}
746};
747
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200748static struct PyModuleDef _bz2module = {
749 PyModuleDef_HEAD_INIT,
750 "_bz2",
751 NULL,
Hai Shi5d385172020-02-18 19:17:39 +0800752 0,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200753 NULL,
Hai Shi5d385172020-02-18 19:17:39 +0800754 _bz2_slots,
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200755 NULL,
756 NULL,
757 NULL
758};
759
760PyMODINIT_FUNC
761PyInit__bz2(void)
762{
Hai Shi5d385172020-02-18 19:17:39 +0800763 return PyModuleDef_Init(&_bz2module);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200764}