blob: 67e1b657f6f3142b63576b863ebab973b60ba489 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 int flushed;
44#ifdef WITH_THREAD
45 PyThread_type_lock lock;
46#endif
47} BZ2Compressor;
48
49typedef struct {
50 PyObject_HEAD
51 bz_stream bzs;
52 char eof; /* T_BOOL expects a char */
53 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010054 char needs_input;
55 char *input_buffer;
56 size_t input_buffer_size;
57
58 /* bzs->avail_in is only 32 bit, so we store the true length
59 separately. Conversion and looping is encapsulated in
60 decompress_buf() */
61 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020062#ifdef WITH_THREAD
63 PyThread_type_lock lock;
64#endif
65} BZ2Decompressor;
66
Larry Hastingsf256c222014-01-25 21:30:37 -080067static PyTypeObject BZ2Compressor_Type;
68static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020069
70/* Helper functions. */
71
72static int
73catch_bz2_error(int bzerror)
74{
75 switch(bzerror) {
76 case BZ_OK:
77 case BZ_RUN_OK:
78 case BZ_FLUSH_OK:
79 case BZ_FINISH_OK:
80 case BZ_STREAM_END:
81 return 0;
82
83#ifdef BZ_CONFIG_ERROR
84 case BZ_CONFIG_ERROR:
85 PyErr_SetString(PyExc_SystemError,
86 "libbzip2 was not compiled correctly");
87 return 1;
88#endif
89 case BZ_PARAM_ERROR:
90 PyErr_SetString(PyExc_ValueError,
91 "Internal error - "
92 "invalid parameters passed to libbzip2");
93 return 1;
94 case BZ_MEM_ERROR:
95 PyErr_NoMemory();
96 return 1;
97 case BZ_DATA_ERROR:
98 case BZ_DATA_ERROR_MAGIC:
99 PyErr_SetString(PyExc_IOError, "Invalid data stream");
100 return 1;
101 case BZ_IO_ERROR:
102 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
103 return 1;
104 case BZ_UNEXPECTED_EOF:
105 PyErr_SetString(PyExc_EOFError,
106 "Compressed file ended before the logical "
107 "end-of-stream was detected");
108 return 1;
109 case BZ_SEQUENCE_ERROR:
110 PyErr_SetString(PyExc_RuntimeError,
111 "Internal error - "
112 "Invalid sequence of commands sent to libbzip2");
113 return 1;
114 default:
115 PyErr_Format(PyExc_IOError,
116 "Unrecognized error from libbzip2: %d", bzerror);
117 return 1;
118 }
119}
120
121#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100122#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200123#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125#endif
126
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100128grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200129{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200130 /* Expand the buffer by an amount proportional to the current size,
131 giving us amortized linear-time behavior. Use a less-than-double
132 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200133 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200134 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100135
136 if (max_length > 0 && new_size > (size_t) max_length)
137 new_size = (size_t) max_length;
138
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200139 if (new_size > size) {
140 return _PyBytes_Resize(buf, new_size);
141 } else { /* overflow */
142 PyErr_SetString(PyExc_OverflowError,
143 "Unable to allocate buffer - output too large");
144 return -1;
145 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146}
147
148
149/* BZ2Compressor class. */
150
151static PyObject *
152compress(BZ2Compressor *c, char *data, size_t len, int action)
153{
154 size_t data_size = 0;
155 PyObject *result;
156
Antoine Pitroue71258a2015-02-26 13:08:07 +0100157 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 if (result == NULL)
159 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100160
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200161 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100162 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200163 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100164 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 for (;;) {
166 char *this_out;
167 int bzerror;
168
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100169 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
170 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200171 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200172 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200173 len -= c->bzs.avail_in;
174 }
175
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100176 /* In regular compression mode, stop when input data is exhausted. */
177 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 break;
179
180 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
182 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100183 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200184 goto error;
185 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
186 buffer_left = PyBytes_GET_SIZE(result) - data_size;
187 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200188 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200189 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100190
191 Py_BEGIN_ALLOW_THREADS
192 this_out = c->bzs.next_out;
193 bzerror = BZ2_bzCompress(&c->bzs, action);
194 data_size += c->bzs.next_out - this_out;
195 Py_END_ALLOW_THREADS
196 if (catch_bz2_error(bzerror))
197 goto error;
198
199 /* In flushing mode, stop when all buffered data has been flushed. */
200 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
201 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200202 }
Victor Stinner706768c2014-08-16 01:03:39 +0200203 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 if (_PyBytes_Resize(&result, data_size) < 0)
205 goto error;
206 return result;
207
208error:
209 Py_XDECREF(result);
210 return NULL;
211}
212
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200213/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800215class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
216class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200217[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300218/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200219
Larry Hastingsf256c222014-01-25 21:30:37 -0800220#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200221
222/*[clinic input]
223_bz2.BZ2Compressor.compress
224
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200225 data: Py_buffer
226 /
227
228Provide data to the compressor object.
229
230Returns a chunk of compressed data if possible, or b'' otherwise.
231
232When you have finished providing data to the compressor, call the
233flush() method to finish the compression process.
234[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200235
236static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200237_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800238/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200239{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200240 PyObject *result = NULL;
241
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200242 ACQUIRE_LOCK(self);
243 if (self->flushed)
244 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
245 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200246 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200247 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200248 return result;
249}
250
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251/*[clinic input]
252_bz2.BZ2Compressor.flush
253
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200254Finish the compression process.
255
256Returns the compressed data left in internal buffers.
257
258The compressor object may not be used after this method is called.
259[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200260
261static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200262_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800263/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200264{
265 PyObject *result = NULL;
266
267 ACQUIRE_LOCK(self);
268 if (self->flushed)
269 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
270 else {
271 self->flushed = 1;
272 result = compress(self, NULL, 0, BZ_FINISH);
273 }
274 RELEASE_LOCK(self);
275 return result;
276}
277
Nadeem Vawda37970652013-10-28 21:35:23 +0100278static PyObject *
279BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs)
280{
281 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
282 Py_TYPE(self)->tp_name);
283 return NULL;
284}
285
Victor Stinner5064a522013-07-07 16:50:27 +0200286static void*
287BZ2_Malloc(void* ctx, int items, int size)
288{
289 if (items < 0 || size < 0)
290 return NULL;
291 if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
292 return NULL;
293 /* PyMem_Malloc() cannot be used: compress() and decompress()
294 release the GIL */
295 return PyMem_RawMalloc(items * size);
296}
297
298static void
299BZ2_Free(void* ctx, void *ptr)
300{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200301 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200302}
303
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200304/*[clinic input]
305_bz2.BZ2Compressor.__init__
306
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200307 compresslevel: int = 9
308 Compression level, as a number between 1 and 9.
309 /
310
311Create a compressor object for compressing data incrementally.
312
313For one-shot compression, use the compress() function instead.
314[clinic start generated code]*/
315
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200317_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800318/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200320 int bzerror;
321
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200322 if (!(1 <= compresslevel && compresslevel <= 9)) {
323 PyErr_SetString(PyExc_ValueError,
324 "compresslevel must be between 1 and 9");
325 return -1;
326 }
327
328#ifdef WITH_THREAD
329 self->lock = PyThread_allocate_lock();
330 if (self->lock == NULL) {
331 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
332 return -1;
333 }
334#endif
335
Victor Stinner5064a522013-07-07 16:50:27 +0200336 self->bzs.opaque = NULL;
337 self->bzs.bzalloc = BZ2_Malloc;
338 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200339 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
340 if (catch_bz2_error(bzerror))
341 goto error;
342
343 return 0;
344
345error:
346#ifdef WITH_THREAD
347 PyThread_free_lock(self->lock);
348 self->lock = NULL;
349#endif
350 return -1;
351}
352
353static void
354BZ2Compressor_dealloc(BZ2Compressor *self)
355{
356 BZ2_bzCompressEnd(&self->bzs);
357#ifdef WITH_THREAD
358 if (self->lock != NULL)
359 PyThread_free_lock(self->lock);
360#endif
361 Py_TYPE(self)->tp_free((PyObject *)self);
362}
363
364static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200365 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
366 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100367 {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200368 {NULL}
369};
370
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200371
372static PyTypeObject BZ2Compressor_Type = {
373 PyVarObject_HEAD_INIT(NULL, 0)
374 "_bz2.BZ2Compressor", /* tp_name */
375 sizeof(BZ2Compressor), /* tp_basicsize */
376 0, /* tp_itemsize */
377 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
378 0, /* tp_print */
379 0, /* tp_getattr */
380 0, /* tp_setattr */
381 0, /* tp_reserved */
382 0, /* tp_repr */
383 0, /* tp_as_number */
384 0, /* tp_as_sequence */
385 0, /* tp_as_mapping */
386 0, /* tp_hash */
387 0, /* tp_call */
388 0, /* tp_str */
389 0, /* tp_getattro */
390 0, /* tp_setattro */
391 0, /* tp_as_buffer */
392 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200393 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200394 0, /* tp_traverse */
395 0, /* tp_clear */
396 0, /* tp_richcompare */
397 0, /* tp_weaklistoffset */
398 0, /* tp_iter */
399 0, /* tp_iternext */
400 BZ2Compressor_methods, /* tp_methods */
401 0, /* tp_members */
402 0, /* tp_getset */
403 0, /* tp_base */
404 0, /* tp_dict */
405 0, /* tp_descr_get */
406 0, /* tp_descr_set */
407 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200408 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200409 0, /* tp_alloc */
410 PyType_GenericNew, /* tp_new */
411};
412
413
414/* BZ2Decompressor class. */
415
Antoine Pitroue71258a2015-02-26 13:08:07 +0100416/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
417 buffer is allocated dynamically and returned. At most max_length bytes are
418 returned, so some of the input may not be consumed. d->bzs.next_in and
419 d->bzs_avail_in_real are updated to reflect the consumed input. */
420static PyObject*
421decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200422{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100423 /* data_size is strictly positive, but because we repeatedly have to
424 compare against max_length and PyBytes_GET_SIZE we declare it as
425 signed */
426 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200427 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100428 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429
Antoine Pitroue71258a2015-02-26 13:08:07 +0100430 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
431 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
432 else
433 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200434 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100435 return NULL;
436
437 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200438 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100439 int bzret;
440 size_t avail;
441
442 /* On a 64-bit system, buffer length might not fit in avail_out, so we
443 do decompression in chunks of no more than UINT_MAX bytes
444 each. Note that the expression for `avail` is guaranteed to be
445 positive, so the cast is safe. */
446 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
447 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
448 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
449 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200450
451 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100452 bzret = BZ2_bzDecompress(bzs);
453 data_size = bzs->next_out - PyBytes_AS_STRING(result);
454 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200455 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100456 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100458 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200459 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100461 } else if (d->bzs_avail_in_real == 0) {
462 break;
463 } else if (bzs->avail_out == 0) {
464 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200465 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100466 if (data_size == PyBytes_GET_SIZE(result) &&
467 grow_buffer(&result, max_length) == -1)
468 goto error;
469 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200470 }
471 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100472 if (data_size != PyBytes_GET_SIZE(result))
473 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200474 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100475
476 return result;
477
478error:
479 Py_XDECREF(result);
480 return NULL;
481}
482
483
484static PyObject *
485decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
486{
487 char input_buffer_in_use;
488 PyObject *result;
489 bz_stream *bzs = &d->bzs;
490
491 /* Prepend unconsumed input if necessary */
492 if (bzs->next_in != NULL) {
493 size_t avail_now, avail_total;
494
495 /* Number of bytes we can append to input buffer */
496 avail_now = (d->input_buffer + d->input_buffer_size)
497 - (bzs->next_in + d->bzs_avail_in_real);
498
499 /* Number of bytes we can append if we move existing
500 contents to beginning of buffer (overwriting
501 consumed input) */
502 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
503
504 if (avail_total < len) {
505 size_t offset = bzs->next_in - d->input_buffer;
506 char *tmp;
507 size_t new_size = d->input_buffer_size + len - avail_now;
508
509 /* Assign to temporary variable first, so we don't
510 lose address of allocated buffer if realloc fails */
511 tmp = PyMem_Realloc(d->input_buffer, new_size);
512 if (tmp == NULL) {
513 PyErr_SetNone(PyExc_MemoryError);
514 return NULL;
515 }
516 d->input_buffer = tmp;
517 d->input_buffer_size = new_size;
518
519 bzs->next_in = d->input_buffer + offset;
520 }
521 else if (avail_now < len) {
522 memmove(d->input_buffer, bzs->next_in,
523 d->bzs_avail_in_real);
524 bzs->next_in = d->input_buffer;
525 }
526 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
527 d->bzs_avail_in_real += len;
528 input_buffer_in_use = 1;
529 }
530 else {
531 bzs->next_in = data;
532 d->bzs_avail_in_real = len;
533 input_buffer_in_use = 0;
534 }
535
536 result = decompress_buf(d, max_length);
Martin Panter38317d32016-10-01 02:45:17 +0000537 if(result == NULL) {
538 bzs->next_in = NULL;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100539 return NULL;
Martin Panter38317d32016-10-01 02:45:17 +0000540 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100541
542 if (d->eof) {
543 d->needs_input = 0;
544 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300545 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200546 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100547 if (d->unused_data == NULL)
548 goto error;
549 }
550 }
551 else if (d->bzs_avail_in_real == 0) {
552 bzs->next_in = NULL;
553 d->needs_input = 1;
554 }
555 else {
556 d->needs_input = 0;
557
558 /* If we did not use the input buffer, we now have
559 to copy the tail from the caller's buffer into the
560 input buffer */
561 if (!input_buffer_in_use) {
562
563 /* Discard buffer if it's too small
564 (resizing it may needlessly copy the current contents) */
565 if (d->input_buffer != NULL &&
566 d->input_buffer_size < d->bzs_avail_in_real) {
567 PyMem_Free(d->input_buffer);
568 d->input_buffer = NULL;
569 }
570
571 /* Allocate if necessary */
572 if (d->input_buffer == NULL) {
573 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
574 if (d->input_buffer == NULL) {
575 PyErr_SetNone(PyExc_MemoryError);
576 goto error;
577 }
578 d->input_buffer_size = d->bzs_avail_in_real;
579 }
580
581 /* Copy tail */
582 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
583 bzs->next_in = d->input_buffer;
584 }
585 }
586
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200587 return result;
588
589error:
590 Py_XDECREF(result);
591 return NULL;
592}
593
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200594/*[clinic input]
595_bz2.BZ2Decompressor.decompress
596
Antoine Pitroue71258a2015-02-26 13:08:07 +0100597 self: self(type="BZ2Decompressor *")
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200598 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100599 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200600
Antoine Pitroue71258a2015-02-26 13:08:07 +0100601Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200602
Antoine Pitroue71258a2015-02-26 13:08:07 +0100603If *max_length* is nonnegative, returns at most *max_length* bytes of
604decompressed data. If this limit is reached and further output can be
605produced, *self.needs_input* will be set to ``False``. In this case, the next
606call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200607
Antoine Pitroue71258a2015-02-26 13:08:07 +0100608If all of the input data was decompressed and returned (either because this
609was less than *max_length* bytes, or because *max_length* was negative),
610*self.needs_input* will be set to True.
611
612Attempting to decompress data after the end of stream is reached raises an
613EOFError. Any data found after the end of the stream is ignored and saved in
614the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200615[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200616
617static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400618_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
619 Py_ssize_t max_length)
620/*[clinic end generated code: output=23e41045deb240a3 input=9558b424c8b00516]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200621{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200622 PyObject *result = NULL;
623
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200624 ACQUIRE_LOCK(self);
625 if (self->eof)
626 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
627 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100628 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200629 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200630 return result;
631}
632
Nadeem Vawda37970652013-10-28 21:35:23 +0100633static PyObject *
634BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs)
635{
636 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
637 Py_TYPE(self)->tp_name);
638 return NULL;
639}
640
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200641/*[clinic input]
642_bz2.BZ2Decompressor.__init__
643
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200644Create a decompressor object for decompressing data incrementally.
645
646For one-shot decompression, use the decompress() function instead.
647[clinic start generated code]*/
648
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200649static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200650_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800651/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200652{
653 int bzerror;
654
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200655#ifdef WITH_THREAD
656 self->lock = PyThread_allocate_lock();
657 if (self->lock == NULL) {
658 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
659 return -1;
660 }
661#endif
662
Antoine Pitroue71258a2015-02-26 13:08:07 +0100663 self->needs_input = 1;
664 self->bzs_avail_in_real = 0;
665 self->input_buffer = NULL;
666 self->input_buffer_size = 0;
667 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200668 if (self->unused_data == NULL)
669 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200670
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200671 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
672 if (catch_bz2_error(bzerror))
673 goto error;
674
675 return 0;
676
677error:
678 Py_CLEAR(self->unused_data);
679#ifdef WITH_THREAD
680 PyThread_free_lock(self->lock);
681 self->lock = NULL;
682#endif
683 return -1;
684}
685
686static void
687BZ2Decompressor_dealloc(BZ2Decompressor *self)
688{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100689 if(self->input_buffer != NULL)
690 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200691 BZ2_bzDecompressEnd(&self->bzs);
692 Py_CLEAR(self->unused_data);
693#ifdef WITH_THREAD
694 if (self->lock != NULL)
695 PyThread_free_lock(self->lock);
696#endif
697 Py_TYPE(self)->tp_free((PyObject *)self);
698}
699
700static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200701 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100702 {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200703 {NULL}
704};
705
706PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
707"True if the end-of-stream marker has been reached.");
708
709PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
710"Data found after the end of the compressed stream.");
711
Antoine Pitroue71258a2015-02-26 13:08:07 +0100712PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
713"True if more input is needed before more decompressed data can be produced.");
714
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200715static PyMemberDef BZ2Decompressor_members[] = {
716 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
717 READONLY, BZ2Decompressor_eof__doc__},
718 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
719 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100720 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
721 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200722 {NULL}
723};
724
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200725static PyTypeObject BZ2Decompressor_Type = {
726 PyVarObject_HEAD_INIT(NULL, 0)
727 "_bz2.BZ2Decompressor", /* tp_name */
728 sizeof(BZ2Decompressor), /* tp_basicsize */
729 0, /* tp_itemsize */
730 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
731 0, /* tp_print */
732 0, /* tp_getattr */
733 0, /* tp_setattr */
734 0, /* tp_reserved */
735 0, /* tp_repr */
736 0, /* tp_as_number */
737 0, /* tp_as_sequence */
738 0, /* tp_as_mapping */
739 0, /* tp_hash */
740 0, /* tp_call */
741 0, /* tp_str */
742 0, /* tp_getattro */
743 0, /* tp_setattro */
744 0, /* tp_as_buffer */
745 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200746 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200747 0, /* tp_traverse */
748 0, /* tp_clear */
749 0, /* tp_richcompare */
750 0, /* tp_weaklistoffset */
751 0, /* tp_iter */
752 0, /* tp_iternext */
753 BZ2Decompressor_methods, /* tp_methods */
754 BZ2Decompressor_members, /* tp_members */
755 0, /* tp_getset */
756 0, /* tp_base */
757 0, /* tp_dict */
758 0, /* tp_descr_get */
759 0, /* tp_descr_set */
760 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200761 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200762 0, /* tp_alloc */
763 PyType_GenericNew, /* tp_new */
764};
765
766
767/* Module initialization. */
768
769static struct PyModuleDef _bz2module = {
770 PyModuleDef_HEAD_INIT,
771 "_bz2",
772 NULL,
773 -1,
774 NULL,
775 NULL,
776 NULL,
777 NULL,
778 NULL
779};
780
781PyMODINIT_FUNC
782PyInit__bz2(void)
783{
784 PyObject *m;
785
786 if (PyType_Ready(&BZ2Compressor_Type) < 0)
787 return NULL;
788 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
789 return NULL;
790
791 m = PyModule_Create(&_bz2module);
792 if (m == NULL)
793 return NULL;
794
795 Py_INCREF(&BZ2Compressor_Type);
796 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
797
798 Py_INCREF(&BZ2Decompressor_Type);
799 PyModule_AddObject(m, "BZ2Decompressor",
800 (PyObject *)&BZ2Decompressor_Type);
801
802 return m;
803}