blob: f4077faa066a55b94f01ffbe9b1473a3aab4d7ad [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 int flushed;
44#ifdef WITH_THREAD
45 PyThread_type_lock lock;
46#endif
47} BZ2Compressor;
48
49typedef struct {
50 PyObject_HEAD
51 bz_stream bzs;
52 char eof; /* T_BOOL expects a char */
53 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010054 char needs_input;
55 char *input_buffer;
56 size_t input_buffer_size;
57
58 /* bzs->avail_in is only 32 bit, so we store the true length
59 separately. Conversion and looping is encapsulated in
60 decompress_buf() */
61 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020062#ifdef WITH_THREAD
63 PyThread_type_lock lock;
64#endif
65} BZ2Decompressor;
66
Larry Hastingsf256c222014-01-25 21:30:37 -080067static PyTypeObject BZ2Compressor_Type;
68static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020069
70/* Helper functions. */
71
72static int
73catch_bz2_error(int bzerror)
74{
75 switch(bzerror) {
76 case BZ_OK:
77 case BZ_RUN_OK:
78 case BZ_FLUSH_OK:
79 case BZ_FINISH_OK:
80 case BZ_STREAM_END:
81 return 0;
82
83#ifdef BZ_CONFIG_ERROR
84 case BZ_CONFIG_ERROR:
85 PyErr_SetString(PyExc_SystemError,
86 "libbzip2 was not compiled correctly");
87 return 1;
88#endif
89 case BZ_PARAM_ERROR:
90 PyErr_SetString(PyExc_ValueError,
91 "Internal error - "
92 "invalid parameters passed to libbzip2");
93 return 1;
94 case BZ_MEM_ERROR:
95 PyErr_NoMemory();
96 return 1;
97 case BZ_DATA_ERROR:
98 case BZ_DATA_ERROR_MAGIC:
99 PyErr_SetString(PyExc_IOError, "Invalid data stream");
100 return 1;
101 case BZ_IO_ERROR:
102 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
103 return 1;
104 case BZ_UNEXPECTED_EOF:
105 PyErr_SetString(PyExc_EOFError,
106 "Compressed file ended before the logical "
107 "end-of-stream was detected");
108 return 1;
109 case BZ_SEQUENCE_ERROR:
110 PyErr_SetString(PyExc_RuntimeError,
111 "Internal error - "
112 "Invalid sequence of commands sent to libbzip2");
113 return 1;
114 default:
115 PyErr_Format(PyExc_IOError,
116 "Unrecognized error from libbzip2: %d", bzerror);
117 return 1;
118 }
119}
120
121#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100122#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200123#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125#endif
126
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100128grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200129{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200130 /* Expand the buffer by an amount proportional to the current size,
131 giving us amortized linear-time behavior. Use a less-than-double
132 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200133 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200134 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100135
136 if (max_length > 0 && new_size > (size_t) max_length)
137 new_size = (size_t) max_length;
138
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200139 if (new_size > size) {
140 return _PyBytes_Resize(buf, new_size);
141 } else { /* overflow */
142 PyErr_SetString(PyExc_OverflowError,
143 "Unable to allocate buffer - output too large");
144 return -1;
145 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146}
147
148
149/* BZ2Compressor class. */
150
151static PyObject *
152compress(BZ2Compressor *c, char *data, size_t len, int action)
153{
154 size_t data_size = 0;
155 PyObject *result;
156
Antoine Pitroue71258a2015-02-26 13:08:07 +0100157 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 if (result == NULL)
159 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100160
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200161 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100162 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200163 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100164 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 for (;;) {
166 char *this_out;
167 int bzerror;
168
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100169 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
170 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200171 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200172 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200173 len -= c->bzs.avail_in;
174 }
175
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100176 /* In regular compression mode, stop when input data is exhausted. */
177 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 break;
179
180 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
182 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100183 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200184 goto error;
185 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
186 buffer_left = PyBytes_GET_SIZE(result) - data_size;
187 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200188 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200189 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100190
191 Py_BEGIN_ALLOW_THREADS
192 this_out = c->bzs.next_out;
193 bzerror = BZ2_bzCompress(&c->bzs, action);
194 data_size += c->bzs.next_out - this_out;
195 Py_END_ALLOW_THREADS
196 if (catch_bz2_error(bzerror))
197 goto error;
198
199 /* In flushing mode, stop when all buffered data has been flushed. */
200 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
201 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200202 }
Victor Stinner706768c2014-08-16 01:03:39 +0200203 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 if (_PyBytes_Resize(&result, data_size) < 0)
205 goto error;
206 return result;
207
208error:
209 Py_XDECREF(result);
210 return NULL;
211}
212
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200213/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800215class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
216class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200217[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300218/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200219
Larry Hastingsf256c222014-01-25 21:30:37 -0800220#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200221
222/*[clinic input]
223_bz2.BZ2Compressor.compress
224
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200225 data: Py_buffer
226 /
227
228Provide data to the compressor object.
229
230Returns a chunk of compressed data if possible, or b'' otherwise.
231
232When you have finished providing data to the compressor, call the
233flush() method to finish the compression process.
234[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200235
236static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200237_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800238/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200239{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200240 PyObject *result = NULL;
241
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200242 ACQUIRE_LOCK(self);
243 if (self->flushed)
244 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
245 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200246 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200247 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200248 return result;
249}
250
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251/*[clinic input]
252_bz2.BZ2Compressor.flush
253
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200254Finish the compression process.
255
256Returns the compressed data left in internal buffers.
257
258The compressor object may not be used after this method is called.
259[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200260
261static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200262_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800263/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200264{
265 PyObject *result = NULL;
266
267 ACQUIRE_LOCK(self);
268 if (self->flushed)
269 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
270 else {
271 self->flushed = 1;
272 result = compress(self, NULL, 0, BZ_FINISH);
273 }
274 RELEASE_LOCK(self);
275 return result;
276}
277
Nadeem Vawda37970652013-10-28 21:35:23 +0100278static PyObject *
279BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs)
280{
281 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
282 Py_TYPE(self)->tp_name);
283 return NULL;
284}
285
Victor Stinner5064a522013-07-07 16:50:27 +0200286static void*
287BZ2_Malloc(void* ctx, int items, int size)
288{
289 if (items < 0 || size < 0)
290 return NULL;
291 if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
292 return NULL;
293 /* PyMem_Malloc() cannot be used: compress() and decompress()
294 release the GIL */
295 return PyMem_RawMalloc(items * size);
296}
297
298static void
299BZ2_Free(void* ctx, void *ptr)
300{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200301 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200302}
303
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200304/*[clinic input]
305_bz2.BZ2Compressor.__init__
306
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200307 compresslevel: int = 9
308 Compression level, as a number between 1 and 9.
309 /
310
311Create a compressor object for compressing data incrementally.
312
313For one-shot compression, use the compress() function instead.
314[clinic start generated code]*/
315
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200317_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800318/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200320 int bzerror;
321
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200322 if (!(1 <= compresslevel && compresslevel <= 9)) {
323 PyErr_SetString(PyExc_ValueError,
324 "compresslevel must be between 1 and 9");
325 return -1;
326 }
327
328#ifdef WITH_THREAD
329 self->lock = PyThread_allocate_lock();
330 if (self->lock == NULL) {
331 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
332 return -1;
333 }
334#endif
335
Victor Stinner5064a522013-07-07 16:50:27 +0200336 self->bzs.opaque = NULL;
337 self->bzs.bzalloc = BZ2_Malloc;
338 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200339 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
340 if (catch_bz2_error(bzerror))
341 goto error;
342
343 return 0;
344
345error:
346#ifdef WITH_THREAD
347 PyThread_free_lock(self->lock);
348 self->lock = NULL;
349#endif
350 return -1;
351}
352
353static void
354BZ2Compressor_dealloc(BZ2Compressor *self)
355{
356 BZ2_bzCompressEnd(&self->bzs);
357#ifdef WITH_THREAD
358 if (self->lock != NULL)
359 PyThread_free_lock(self->lock);
360#endif
361 Py_TYPE(self)->tp_free((PyObject *)self);
362}
363
364static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200365 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
366 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100367 {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200368 {NULL}
369};
370
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200371
372static PyTypeObject BZ2Compressor_Type = {
373 PyVarObject_HEAD_INIT(NULL, 0)
374 "_bz2.BZ2Compressor", /* tp_name */
375 sizeof(BZ2Compressor), /* tp_basicsize */
376 0, /* tp_itemsize */
377 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
378 0, /* tp_print */
379 0, /* tp_getattr */
380 0, /* tp_setattr */
381 0, /* tp_reserved */
382 0, /* tp_repr */
383 0, /* tp_as_number */
384 0, /* tp_as_sequence */
385 0, /* tp_as_mapping */
386 0, /* tp_hash */
387 0, /* tp_call */
388 0, /* tp_str */
389 0, /* tp_getattro */
390 0, /* tp_setattro */
391 0, /* tp_as_buffer */
392 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200393 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200394 0, /* tp_traverse */
395 0, /* tp_clear */
396 0, /* tp_richcompare */
397 0, /* tp_weaklistoffset */
398 0, /* tp_iter */
399 0, /* tp_iternext */
400 BZ2Compressor_methods, /* tp_methods */
401 0, /* tp_members */
402 0, /* tp_getset */
403 0, /* tp_base */
404 0, /* tp_dict */
405 0, /* tp_descr_get */
406 0, /* tp_descr_set */
407 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200408 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200409 0, /* tp_alloc */
410 PyType_GenericNew, /* tp_new */
411};
412
413
414/* BZ2Decompressor class. */
415
Antoine Pitroue71258a2015-02-26 13:08:07 +0100416/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
417 buffer is allocated dynamically and returned. At most max_length bytes are
418 returned, so some of the input may not be consumed. d->bzs.next_in and
419 d->bzs_avail_in_real are updated to reflect the consumed input. */
420static PyObject*
421decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200422{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100423 /* data_size is strictly positive, but because we repeatedly have to
424 compare against max_length and PyBytes_GET_SIZE we declare it as
425 signed */
426 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200427 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100428 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429
Antoine Pitroue71258a2015-02-26 13:08:07 +0100430 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
431 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
432 else
433 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200434 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100435 return NULL;
436
437 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200438 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100439 int bzret;
440 size_t avail;
441
442 /* On a 64-bit system, buffer length might not fit in avail_out, so we
443 do decompression in chunks of no more than UINT_MAX bytes
444 each. Note that the expression for `avail` is guaranteed to be
445 positive, so the cast is safe. */
446 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
447 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
448 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
449 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200450
451 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100452 bzret = BZ2_bzDecompress(bzs);
453 data_size = bzs->next_out - PyBytes_AS_STRING(result);
454 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200455 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100456 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100458 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200459 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100461 } else if (d->bzs_avail_in_real == 0) {
462 break;
463 } else if (bzs->avail_out == 0) {
464 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200465 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100466 if (data_size == PyBytes_GET_SIZE(result) &&
467 grow_buffer(&result, max_length) == -1)
468 goto error;
469 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200470 }
471 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100472 if (data_size != PyBytes_GET_SIZE(result))
473 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200474 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100475
476 return result;
477
478error:
479 Py_XDECREF(result);
480 return NULL;
481}
482
483
484static PyObject *
485decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
486{
487 char input_buffer_in_use;
488 PyObject *result;
489 bz_stream *bzs = &d->bzs;
490
491 /* Prepend unconsumed input if necessary */
492 if (bzs->next_in != NULL) {
493 size_t avail_now, avail_total;
494
495 /* Number of bytes we can append to input buffer */
496 avail_now = (d->input_buffer + d->input_buffer_size)
497 - (bzs->next_in + d->bzs_avail_in_real);
498
499 /* Number of bytes we can append if we move existing
500 contents to beginning of buffer (overwriting
501 consumed input) */
502 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
503
504 if (avail_total < len) {
505 size_t offset = bzs->next_in - d->input_buffer;
506 char *tmp;
507 size_t new_size = d->input_buffer_size + len - avail_now;
508
509 /* Assign to temporary variable first, so we don't
510 lose address of allocated buffer if realloc fails */
511 tmp = PyMem_Realloc(d->input_buffer, new_size);
512 if (tmp == NULL) {
513 PyErr_SetNone(PyExc_MemoryError);
514 return NULL;
515 }
516 d->input_buffer = tmp;
517 d->input_buffer_size = new_size;
518
519 bzs->next_in = d->input_buffer + offset;
520 }
521 else if (avail_now < len) {
522 memmove(d->input_buffer, bzs->next_in,
523 d->bzs_avail_in_real);
524 bzs->next_in = d->input_buffer;
525 }
526 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
527 d->bzs_avail_in_real += len;
528 input_buffer_in_use = 1;
529 }
530 else {
531 bzs->next_in = data;
532 d->bzs_avail_in_real = len;
533 input_buffer_in_use = 0;
534 }
535
536 result = decompress_buf(d, max_length);
537 if(result == NULL)
538 return NULL;
539
540 if (d->eof) {
541 d->needs_input = 0;
542 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300543 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200544 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100545 if (d->unused_data == NULL)
546 goto error;
547 }
548 }
549 else if (d->bzs_avail_in_real == 0) {
550 bzs->next_in = NULL;
551 d->needs_input = 1;
552 }
553 else {
554 d->needs_input = 0;
555
556 /* If we did not use the input buffer, we now have
557 to copy the tail from the caller's buffer into the
558 input buffer */
559 if (!input_buffer_in_use) {
560
561 /* Discard buffer if it's too small
562 (resizing it may needlessly copy the current contents) */
563 if (d->input_buffer != NULL &&
564 d->input_buffer_size < d->bzs_avail_in_real) {
565 PyMem_Free(d->input_buffer);
566 d->input_buffer = NULL;
567 }
568
569 /* Allocate if necessary */
570 if (d->input_buffer == NULL) {
571 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
572 if (d->input_buffer == NULL) {
573 PyErr_SetNone(PyExc_MemoryError);
574 goto error;
575 }
576 d->input_buffer_size = d->bzs_avail_in_real;
577 }
578
579 /* Copy tail */
580 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
581 bzs->next_in = d->input_buffer;
582 }
583 }
584
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200585 return result;
586
587error:
588 Py_XDECREF(result);
589 return NULL;
590}
591
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200592/*[clinic input]
593_bz2.BZ2Decompressor.decompress
594
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200595 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100596 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200597
Antoine Pitroue71258a2015-02-26 13:08:07 +0100598Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200599
Antoine Pitroue71258a2015-02-26 13:08:07 +0100600If *max_length* is nonnegative, returns at most *max_length* bytes of
601decompressed data. If this limit is reached and further output can be
602produced, *self.needs_input* will be set to ``False``. In this case, the next
603call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200604
Antoine Pitroue71258a2015-02-26 13:08:07 +0100605If all of the input data was decompressed and returned (either because this
606was less than *max_length* bytes, or because *max_length* was negative),
607*self.needs_input* will be set to True.
608
609Attempting to decompress data after the end of stream is reached raises an
610EOFError. Any data found after the end of the stream is ignored and saved in
611the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200612[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200613
614static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400615_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
616 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300617/*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200618{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200619 PyObject *result = NULL;
620
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200621 ACQUIRE_LOCK(self);
622 if (self->eof)
623 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
624 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100625 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200626 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200627 return result;
628}
629
Nadeem Vawda37970652013-10-28 21:35:23 +0100630static PyObject *
631BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs)
632{
633 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
634 Py_TYPE(self)->tp_name);
635 return NULL;
636}
637
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200638/*[clinic input]
639_bz2.BZ2Decompressor.__init__
640
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200641Create a decompressor object for decompressing data incrementally.
642
643For one-shot decompression, use the decompress() function instead.
644[clinic start generated code]*/
645
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200646static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200647_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800648/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200649{
650 int bzerror;
651
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200652#ifdef WITH_THREAD
653 self->lock = PyThread_allocate_lock();
654 if (self->lock == NULL) {
655 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
656 return -1;
657 }
658#endif
659
Antoine Pitroue71258a2015-02-26 13:08:07 +0100660 self->needs_input = 1;
661 self->bzs_avail_in_real = 0;
662 self->input_buffer = NULL;
663 self->input_buffer_size = 0;
664 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200665 if (self->unused_data == NULL)
666 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200667
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200668 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
669 if (catch_bz2_error(bzerror))
670 goto error;
671
672 return 0;
673
674error:
675 Py_CLEAR(self->unused_data);
676#ifdef WITH_THREAD
677 PyThread_free_lock(self->lock);
678 self->lock = NULL;
679#endif
680 return -1;
681}
682
683static void
684BZ2Decompressor_dealloc(BZ2Decompressor *self)
685{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100686 if(self->input_buffer != NULL)
687 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200688 BZ2_bzDecompressEnd(&self->bzs);
689 Py_CLEAR(self->unused_data);
690#ifdef WITH_THREAD
691 if (self->lock != NULL)
692 PyThread_free_lock(self->lock);
693#endif
694 Py_TYPE(self)->tp_free((PyObject *)self);
695}
696
697static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200698 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100699 {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200700 {NULL}
701};
702
703PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
704"True if the end-of-stream marker has been reached.");
705
706PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
707"Data found after the end of the compressed stream.");
708
Antoine Pitroue71258a2015-02-26 13:08:07 +0100709PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
710"True if more input is needed before more decompressed data can be produced.");
711
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200712static PyMemberDef BZ2Decompressor_members[] = {
713 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
714 READONLY, BZ2Decompressor_eof__doc__},
715 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
716 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100717 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
718 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200719 {NULL}
720};
721
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200722static PyTypeObject BZ2Decompressor_Type = {
723 PyVarObject_HEAD_INIT(NULL, 0)
724 "_bz2.BZ2Decompressor", /* tp_name */
725 sizeof(BZ2Decompressor), /* tp_basicsize */
726 0, /* tp_itemsize */
727 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
728 0, /* tp_print */
729 0, /* tp_getattr */
730 0, /* tp_setattr */
731 0, /* tp_reserved */
732 0, /* tp_repr */
733 0, /* tp_as_number */
734 0, /* tp_as_sequence */
735 0, /* tp_as_mapping */
736 0, /* tp_hash */
737 0, /* tp_call */
738 0, /* tp_str */
739 0, /* tp_getattro */
740 0, /* tp_setattro */
741 0, /* tp_as_buffer */
742 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200743 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200744 0, /* tp_traverse */
745 0, /* tp_clear */
746 0, /* tp_richcompare */
747 0, /* tp_weaklistoffset */
748 0, /* tp_iter */
749 0, /* tp_iternext */
750 BZ2Decompressor_methods, /* tp_methods */
751 BZ2Decompressor_members, /* tp_members */
752 0, /* tp_getset */
753 0, /* tp_base */
754 0, /* tp_dict */
755 0, /* tp_descr_get */
756 0, /* tp_descr_set */
757 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200758 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200759 0, /* tp_alloc */
760 PyType_GenericNew, /* tp_new */
761};
762
763
764/* Module initialization. */
765
766static struct PyModuleDef _bz2module = {
767 PyModuleDef_HEAD_INIT,
768 "_bz2",
769 NULL,
770 -1,
771 NULL,
772 NULL,
773 NULL,
774 NULL,
775 NULL
776};
777
778PyMODINIT_FUNC
779PyInit__bz2(void)
780{
781 PyObject *m;
782
783 if (PyType_Ready(&BZ2Compressor_Type) < 0)
784 return NULL;
785 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
786 return NULL;
787
788 m = PyModule_Create(&_bz2module);
789 if (m == NULL)
790 return NULL;
791
792 Py_INCREF(&BZ2Compressor_Type);
793 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
794
795 Py_INCREF(&BZ2Decompressor_Type);
796 PyModule_AddObject(m, "BZ2Decompressor",
797 (PyObject *)&BZ2Decompressor_Type);
798
799 return m;
800}