blob: 425845fd4a3cc9a725374ec865f7d933c083fc05 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 int flushed;
44#ifdef WITH_THREAD
45 PyThread_type_lock lock;
46#endif
47} BZ2Compressor;
48
49typedef struct {
50 PyObject_HEAD
51 bz_stream bzs;
52 char eof; /* T_BOOL expects a char */
53 PyObject *unused_data;
Antoine Pitroue71258a2015-02-26 13:08:07 +010054 char needs_input;
55 char *input_buffer;
56 size_t input_buffer_size;
57
58 /* bzs->avail_in is only 32 bit, so we store the true length
59 separately. Conversion and looping is encapsulated in
60 decompress_buf() */
61 size_t bzs_avail_in_real;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020062#ifdef WITH_THREAD
63 PyThread_type_lock lock;
64#endif
65} BZ2Decompressor;
66
Larry Hastingsf256c222014-01-25 21:30:37 -080067static PyTypeObject BZ2Compressor_Type;
68static PyTypeObject BZ2Decompressor_Type;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020069
70/* Helper functions. */
71
72static int
73catch_bz2_error(int bzerror)
74{
75 switch(bzerror) {
76 case BZ_OK:
77 case BZ_RUN_OK:
78 case BZ_FLUSH_OK:
79 case BZ_FINISH_OK:
80 case BZ_STREAM_END:
81 return 0;
82
83#ifdef BZ_CONFIG_ERROR
84 case BZ_CONFIG_ERROR:
85 PyErr_SetString(PyExc_SystemError,
86 "libbzip2 was not compiled correctly");
87 return 1;
88#endif
89 case BZ_PARAM_ERROR:
90 PyErr_SetString(PyExc_ValueError,
91 "Internal error - "
92 "invalid parameters passed to libbzip2");
93 return 1;
94 case BZ_MEM_ERROR:
95 PyErr_NoMemory();
96 return 1;
97 case BZ_DATA_ERROR:
98 case BZ_DATA_ERROR_MAGIC:
99 PyErr_SetString(PyExc_IOError, "Invalid data stream");
100 return 1;
101 case BZ_IO_ERROR:
102 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
103 return 1;
104 case BZ_UNEXPECTED_EOF:
105 PyErr_SetString(PyExc_EOFError,
106 "Compressed file ended before the logical "
107 "end-of-stream was detected");
108 return 1;
109 case BZ_SEQUENCE_ERROR:
110 PyErr_SetString(PyExc_RuntimeError,
111 "Internal error - "
112 "Invalid sequence of commands sent to libbzip2");
113 return 1;
114 default:
115 PyErr_Format(PyExc_IOError,
116 "Unrecognized error from libbzip2: %d", bzerror);
117 return 1;
118 }
119}
120
121#if BUFSIZ < 8192
Antoine Pitroue71258a2015-02-26 13:08:07 +0100122#define INITIAL_BUFFER_SIZE 8192
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200123#else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100124#define INITIAL_BUFFER_SIZE BUFSIZ
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125#endif
126
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127static int
Antoine Pitroue71258a2015-02-26 13:08:07 +0100128grow_buffer(PyObject **buf, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200129{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200130 /* Expand the buffer by an amount proportional to the current size,
131 giving us amortized linear-time behavior. Use a less-than-double
132 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200133 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200134 size_t new_size = size + (size >> 3) + 6;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100135
136 if (max_length > 0 && new_size > (size_t) max_length)
137 new_size = (size_t) max_length;
138
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200139 if (new_size > size) {
140 return _PyBytes_Resize(buf, new_size);
141 } else { /* overflow */
142 PyErr_SetString(PyExc_OverflowError,
143 "Unable to allocate buffer - output too large");
144 return -1;
145 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146}
147
148
149/* BZ2Compressor class. */
150
151static PyObject *
152compress(BZ2Compressor *c, char *data, size_t len, int action)
153{
154 size_t data_size = 0;
155 PyObject *result;
156
Antoine Pitroue71258a2015-02-26 13:08:07 +0100157 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200158 if (result == NULL)
159 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100160
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200161 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100162 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200163 c->bzs.next_out = PyBytes_AS_STRING(result);
Antoine Pitroue71258a2015-02-26 13:08:07 +0100164 c->bzs.avail_out = INITIAL_BUFFER_SIZE;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 for (;;) {
166 char *this_out;
167 int bzerror;
168
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100169 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
170 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200171 if (c->bzs.avail_in == 0 && len > 0) {
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200172 c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200173 len -= c->bzs.avail_in;
174 }
175
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100176 /* In regular compression mode, stop when input data is exhausted. */
177 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200178 break;
179
180 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200181 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
182 if (buffer_left == 0) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100183 if (grow_buffer(&result, -1) < 0)
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200184 goto error;
185 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
186 buffer_left = PyBytes_GET_SIZE(result) - data_size;
187 }
Victor Stinnerfbf50d42013-06-04 23:18:48 +0200188 c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200189 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100190
191 Py_BEGIN_ALLOW_THREADS
192 this_out = c->bzs.next_out;
193 bzerror = BZ2_bzCompress(&c->bzs, action);
194 data_size += c->bzs.next_out - this_out;
195 Py_END_ALLOW_THREADS
196 if (catch_bz2_error(bzerror))
197 goto error;
198
199 /* In flushing mode, stop when all buffered data has been flushed. */
200 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
201 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200202 }
Victor Stinner706768c2014-08-16 01:03:39 +0200203 if (data_size != (size_t)PyBytes_GET_SIZE(result))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200204 if (_PyBytes_Resize(&result, data_size) < 0)
205 goto error;
206 return result;
207
208error:
209 Py_XDECREF(result);
210 return NULL;
211}
212
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200213/*[clinic input]
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200214module _bz2
Larry Hastingsc2047262014-01-25 20:43:29 -0800215class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
216class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200217[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300218/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200219
Larry Hastingsf256c222014-01-25 21:30:37 -0800220#include "clinic/_bz2module.c.h"
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200221
222/*[clinic input]
223_bz2.BZ2Compressor.compress
224
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200225 data: Py_buffer
226 /
227
228Provide data to the compressor object.
229
230Returns a chunk of compressed data if possible, or b'' otherwise.
231
232When you have finished providing data to the compressor, call the
233flush() method to finish the compression process.
234[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200235
236static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200237_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
Larry Hastings581ee362014-01-28 05:00:08 -0800238/*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200239{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200240 PyObject *result = NULL;
241
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200242 ACQUIRE_LOCK(self);
243 if (self->flushed)
244 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
245 else
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200246 result = compress(self, data->buf, data->len, BZ_RUN);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200247 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200248 return result;
249}
250
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200251/*[clinic input]
252_bz2.BZ2Compressor.flush
253
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200254Finish the compression process.
255
256Returns the compressed data left in internal buffers.
257
258The compressor object may not be used after this method is called.
259[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200260
261static PyObject *
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200262_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800263/*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200264{
265 PyObject *result = NULL;
266
267 ACQUIRE_LOCK(self);
268 if (self->flushed)
269 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
270 else {
271 self->flushed = 1;
272 result = compress(self, NULL, 0, BZ_FINISH);
273 }
274 RELEASE_LOCK(self);
275 return result;
276}
277
Nadeem Vawda37970652013-10-28 21:35:23 +0100278static PyObject *
279BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs)
280{
281 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
282 Py_TYPE(self)->tp_name);
283 return NULL;
284}
285
Victor Stinner5064a522013-07-07 16:50:27 +0200286static void*
287BZ2_Malloc(void* ctx, int items, int size)
288{
289 if (items < 0 || size < 0)
290 return NULL;
291 if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
292 return NULL;
293 /* PyMem_Malloc() cannot be used: compress() and decompress()
294 release the GIL */
295 return PyMem_RawMalloc(items * size);
296}
297
298static void
299BZ2_Free(void* ctx, void *ptr)
300{
Victor Stinnerb7f1f652013-07-07 17:10:34 +0200301 PyMem_RawFree(ptr);
Victor Stinner5064a522013-07-07 16:50:27 +0200302}
303
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200304/*[clinic input]
305_bz2.BZ2Compressor.__init__
306
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200307 compresslevel: int = 9
308 Compression level, as a number between 1 and 9.
309 /
310
311Create a compressor object for compressing data incrementally.
312
313For one-shot compression, use the compress() function instead.
314[clinic start generated code]*/
315
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200316static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200317_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Larry Hastings581ee362014-01-28 05:00:08 -0800318/*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200319{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200320 int bzerror;
321
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200322 if (!(1 <= compresslevel && compresslevel <= 9)) {
323 PyErr_SetString(PyExc_ValueError,
324 "compresslevel must be between 1 and 9");
325 return -1;
326 }
327
328#ifdef WITH_THREAD
329 self->lock = PyThread_allocate_lock();
330 if (self->lock == NULL) {
331 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
332 return -1;
333 }
334#endif
335
Victor Stinner5064a522013-07-07 16:50:27 +0200336 self->bzs.opaque = NULL;
337 self->bzs.bzalloc = BZ2_Malloc;
338 self->bzs.bzfree = BZ2_Free;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200339 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
340 if (catch_bz2_error(bzerror))
341 goto error;
342
343 return 0;
344
345error:
346#ifdef WITH_THREAD
347 PyThread_free_lock(self->lock);
348 self->lock = NULL;
349#endif
350 return -1;
351}
352
353static void
354BZ2Compressor_dealloc(BZ2Compressor *self)
355{
356 BZ2_bzCompressEnd(&self->bzs);
357#ifdef WITH_THREAD
358 if (self->lock != NULL)
359 PyThread_free_lock(self->lock);
360#endif
361 Py_TYPE(self)->tp_free((PyObject *)self);
362}
363
364static PyMethodDef BZ2Compressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200365 _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
366 _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100367 {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200368 {NULL}
369};
370
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200371
372static PyTypeObject BZ2Compressor_Type = {
373 PyVarObject_HEAD_INIT(NULL, 0)
374 "_bz2.BZ2Compressor", /* tp_name */
375 sizeof(BZ2Compressor), /* tp_basicsize */
376 0, /* tp_itemsize */
377 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
378 0, /* tp_print */
379 0, /* tp_getattr */
380 0, /* tp_setattr */
381 0, /* tp_reserved */
382 0, /* tp_repr */
383 0, /* tp_as_number */
384 0, /* tp_as_sequence */
385 0, /* tp_as_mapping */
386 0, /* tp_hash */
387 0, /* tp_call */
388 0, /* tp_str */
389 0, /* tp_getattro */
390 0, /* tp_setattro */
391 0, /* tp_as_buffer */
392 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200393 _bz2_BZ2Compressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200394 0, /* tp_traverse */
395 0, /* tp_clear */
396 0, /* tp_richcompare */
397 0, /* tp_weaklistoffset */
398 0, /* tp_iter */
399 0, /* tp_iternext */
400 BZ2Compressor_methods, /* tp_methods */
401 0, /* tp_members */
402 0, /* tp_getset */
403 0, /* tp_base */
404 0, /* tp_dict */
405 0, /* tp_descr_get */
406 0, /* tp_descr_set */
407 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200408 _bz2_BZ2Compressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200409 0, /* tp_alloc */
410 PyType_GenericNew, /* tp_new */
411};
412
413
414/* BZ2Decompressor class. */
415
Antoine Pitroue71258a2015-02-26 13:08:07 +0100416/* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output
417 buffer is allocated dynamically and returned. At most max_length bytes are
418 returned, so some of the input may not be consumed. d->bzs.next_in and
419 d->bzs_avail_in_real are updated to reflect the consumed input. */
420static PyObject*
421decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200422{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100423 /* data_size is strictly positive, but because we repeatedly have to
424 compare against max_length and PyBytes_GET_SIZE we declare it as
425 signed */
426 Py_ssize_t data_size = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200427 PyObject *result;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100428 bz_stream *bzs = &d->bzs;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200429
Antoine Pitroue71258a2015-02-26 13:08:07 +0100430 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
431 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
432 else
433 result = PyBytes_FromStringAndSize(NULL, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200434 if (result == NULL)
Antoine Pitroue71258a2015-02-26 13:08:07 +0100435 return NULL;
436
437 bzs->next_out = PyBytes_AS_STRING(result);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200438 for (;;) {
Antoine Pitroue71258a2015-02-26 13:08:07 +0100439 int bzret;
440 size_t avail;
441
442 /* On a 64-bit system, buffer length might not fit in avail_out, so we
443 do decompression in chunks of no more than UINT_MAX bytes
444 each. Note that the expression for `avail` is guaranteed to be
445 positive, so the cast is safe. */
446 avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
447 bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
448 bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
449 d->bzs_avail_in_real -= bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200450
451 Py_BEGIN_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100452 bzret = BZ2_bzDecompress(bzs);
453 data_size = bzs->next_out - PyBytes_AS_STRING(result);
454 d->bzs_avail_in_real += bzs->avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200455 Py_END_ALLOW_THREADS
Antoine Pitroue71258a2015-02-26 13:08:07 +0100456 if (catch_bz2_error(bzret))
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200457 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100458 if (bzret == BZ_STREAM_END) {
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200459 d->eof = 1;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200460 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100461 } else if (d->bzs_avail_in_real == 0) {
462 break;
463 } else if (bzs->avail_out == 0) {
464 if (data_size == max_length)
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200465 break;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100466 if (data_size == PyBytes_GET_SIZE(result) &&
467 grow_buffer(&result, max_length) == -1)
468 goto error;
469 bzs->next_out = PyBytes_AS_STRING(result) + data_size;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200470 }
471 }
Antoine Pitroue71258a2015-02-26 13:08:07 +0100472 if (data_size != PyBytes_GET_SIZE(result))
473 if (_PyBytes_Resize(&result, data_size) == -1)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200474 goto error;
Antoine Pitroue71258a2015-02-26 13:08:07 +0100475
476 return result;
477
478error:
479 Py_XDECREF(result);
480 return NULL;
481}
482
483
484static PyObject *
485decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
486{
487 char input_buffer_in_use;
488 PyObject *result;
489 bz_stream *bzs = &d->bzs;
490
491 /* Prepend unconsumed input if necessary */
492 if (bzs->next_in != NULL) {
493 size_t avail_now, avail_total;
494
495 /* Number of bytes we can append to input buffer */
496 avail_now = (d->input_buffer + d->input_buffer_size)
497 - (bzs->next_in + d->bzs_avail_in_real);
498
499 /* Number of bytes we can append if we move existing
500 contents to beginning of buffer (overwriting
501 consumed input) */
502 avail_total = d->input_buffer_size - d->bzs_avail_in_real;
503
504 if (avail_total < len) {
505 size_t offset = bzs->next_in - d->input_buffer;
506 char *tmp;
507 size_t new_size = d->input_buffer_size + len - avail_now;
508
509 /* Assign to temporary variable first, so we don't
510 lose address of allocated buffer if realloc fails */
511 tmp = PyMem_Realloc(d->input_buffer, new_size);
512 if (tmp == NULL) {
513 PyErr_SetNone(PyExc_MemoryError);
514 return NULL;
515 }
516 d->input_buffer = tmp;
517 d->input_buffer_size = new_size;
518
519 bzs->next_in = d->input_buffer + offset;
520 }
521 else if (avail_now < len) {
522 memmove(d->input_buffer, bzs->next_in,
523 d->bzs_avail_in_real);
524 bzs->next_in = d->input_buffer;
525 }
526 memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
527 d->bzs_avail_in_real += len;
528 input_buffer_in_use = 1;
529 }
530 else {
531 bzs->next_in = data;
532 d->bzs_avail_in_real = len;
533 input_buffer_in_use = 0;
534 }
535
536 result = decompress_buf(d, max_length);
537 if(result == NULL)
538 return NULL;
539
540 if (d->eof) {
541 d->needs_input = 0;
542 if (d->bzs_avail_in_real > 0) {
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200543 Py_SETREF(d->unused_data,
544 PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
Antoine Pitroue71258a2015-02-26 13:08:07 +0100545 if (d->unused_data == NULL)
546 goto error;
547 }
548 }
549 else if (d->bzs_avail_in_real == 0) {
550 bzs->next_in = NULL;
551 d->needs_input = 1;
552 }
553 else {
554 d->needs_input = 0;
555
556 /* If we did not use the input buffer, we now have
557 to copy the tail from the caller's buffer into the
558 input buffer */
559 if (!input_buffer_in_use) {
560
561 /* Discard buffer if it's too small
562 (resizing it may needlessly copy the current contents) */
563 if (d->input_buffer != NULL &&
564 d->input_buffer_size < d->bzs_avail_in_real) {
565 PyMem_Free(d->input_buffer);
566 d->input_buffer = NULL;
567 }
568
569 /* Allocate if necessary */
570 if (d->input_buffer == NULL) {
571 d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
572 if (d->input_buffer == NULL) {
573 PyErr_SetNone(PyExc_MemoryError);
574 goto error;
575 }
576 d->input_buffer_size = d->bzs_avail_in_real;
577 }
578
579 /* Copy tail */
580 memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
581 bzs->next_in = d->input_buffer;
582 }
583 }
584
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200585 return result;
586
587error:
588 Py_XDECREF(result);
589 return NULL;
590}
591
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200592/*[clinic input]
593_bz2.BZ2Decompressor.decompress
594
Antoine Pitroue71258a2015-02-26 13:08:07 +0100595 self: self(type="BZ2Decompressor *")
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200596 data: Py_buffer
Antoine Pitroue71258a2015-02-26 13:08:07 +0100597 max_length: Py_ssize_t=-1
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200598
Antoine Pitroue71258a2015-02-26 13:08:07 +0100599Decompress *data*, returning uncompressed data as bytes.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200600
Antoine Pitroue71258a2015-02-26 13:08:07 +0100601If *max_length* is nonnegative, returns at most *max_length* bytes of
602decompressed data. If this limit is reached and further output can be
603produced, *self.needs_input* will be set to ``False``. In this case, the next
604call to *decompress()* may provide *data* as b'' to obtain more of the output.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200605
Antoine Pitroue71258a2015-02-26 13:08:07 +0100606If all of the input data was decompressed and returned (either because this
607was less than *max_length* bytes, or because *max_length* was negative),
608*self.needs_input* will be set to True.
609
610Attempting to decompress data after the end of stream is reached raises an
611EOFError. Any data found after the end of the stream is ignored and saved in
612the unused_data attribute.
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200613[clinic start generated code]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200614
615static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400616_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
617 Py_ssize_t max_length)
618/*[clinic end generated code: output=23e41045deb240a3 input=9558b424c8b00516]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200619{
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200620 PyObject *result = NULL;
621
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200622 ACQUIRE_LOCK(self);
623 if (self->eof)
624 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
625 else
Antoine Pitroue71258a2015-02-26 13:08:07 +0100626 result = decompress(self, data->buf, data->len, max_length);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200627 RELEASE_LOCK(self);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200628 return result;
629}
630
Nadeem Vawda37970652013-10-28 21:35:23 +0100631static PyObject *
632BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs)
633{
634 PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object",
635 Py_TYPE(self)->tp_name);
636 return NULL;
637}
638
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200639/*[clinic input]
640_bz2.BZ2Decompressor.__init__
641
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200642Create a decompressor object for decompressing data incrementally.
643
644For one-shot decompression, use the decompress() function instead.
645[clinic start generated code]*/
646
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200647static int
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200648_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
Larry Hastings581ee362014-01-28 05:00:08 -0800649/*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200650{
651 int bzerror;
652
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200653#ifdef WITH_THREAD
654 self->lock = PyThread_allocate_lock();
655 if (self->lock == NULL) {
656 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
657 return -1;
658 }
659#endif
660
Antoine Pitroue71258a2015-02-26 13:08:07 +0100661 self->needs_input = 1;
662 self->bzs_avail_in_real = 0;
663 self->input_buffer = NULL;
664 self->input_buffer_size = 0;
665 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200666 if (self->unused_data == NULL)
667 goto error;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200668
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200669 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
670 if (catch_bz2_error(bzerror))
671 goto error;
672
673 return 0;
674
675error:
676 Py_CLEAR(self->unused_data);
677#ifdef WITH_THREAD
678 PyThread_free_lock(self->lock);
679 self->lock = NULL;
680#endif
681 return -1;
682}
683
684static void
685BZ2Decompressor_dealloc(BZ2Decompressor *self)
686{
Antoine Pitroue71258a2015-02-26 13:08:07 +0100687 if(self->input_buffer != NULL)
688 PyMem_Free(self->input_buffer);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200689 BZ2_bzDecompressEnd(&self->bzs);
690 Py_CLEAR(self->unused_data);
691#ifdef WITH_THREAD
692 if (self->lock != NULL)
693 PyThread_free_lock(self->lock);
694#endif
695 Py_TYPE(self)->tp_free((PyObject *)self);
696}
697
698static PyMethodDef BZ2Decompressor_methods[] = {
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200699 _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
Nadeem Vawda37970652013-10-28 21:35:23 +0100700 {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200701 {NULL}
702};
703
704PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
705"True if the end-of-stream marker has been reached.");
706
707PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
708"Data found after the end of the compressed stream.");
709
Antoine Pitroue71258a2015-02-26 13:08:07 +0100710PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
711"True if more input is needed before more decompressed data can be produced.");
712
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200713static PyMemberDef BZ2Decompressor_members[] = {
714 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
715 READONLY, BZ2Decompressor_eof__doc__},
716 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
717 READONLY, BZ2Decompressor_unused_data__doc__},
Antoine Pitroue71258a2015-02-26 13:08:07 +0100718 {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
719 BZ2Decompressor_needs_input_doc},
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200720 {NULL}
721};
722
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200723static PyTypeObject BZ2Decompressor_Type = {
724 PyVarObject_HEAD_INIT(NULL, 0)
725 "_bz2.BZ2Decompressor", /* tp_name */
726 sizeof(BZ2Decompressor), /* tp_basicsize */
727 0, /* tp_itemsize */
728 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
729 0, /* tp_print */
730 0, /* tp_getattr */
731 0, /* tp_setattr */
732 0, /* tp_reserved */
733 0, /* tp_repr */
734 0, /* tp_as_number */
735 0, /* tp_as_sequence */
736 0, /* tp_as_mapping */
737 0, /* tp_hash */
738 0, /* tp_call */
739 0, /* tp_str */
740 0, /* tp_getattro */
741 0, /* tp_setattro */
742 0, /* tp_as_buffer */
743 Py_TPFLAGS_DEFAULT, /* tp_flags */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200744 _bz2_BZ2Decompressor___init____doc__, /* tp_doc */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200745 0, /* tp_traverse */
746 0, /* tp_clear */
747 0, /* tp_richcompare */
748 0, /* tp_weaklistoffset */
749 0, /* tp_iter */
750 0, /* tp_iternext */
751 BZ2Decompressor_methods, /* tp_methods */
752 BZ2Decompressor_members, /* tp_members */
753 0, /* tp_getset */
754 0, /* tp_base */
755 0, /* tp_dict */
756 0, /* tp_descr_get */
757 0, /* tp_descr_set */
758 0, /* tp_dictoffset */
Serhiy Storchaka1bc4bb22014-01-25 12:07:57 +0200759 _bz2_BZ2Decompressor___init__, /* tp_init */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200760 0, /* tp_alloc */
761 PyType_GenericNew, /* tp_new */
762};
763
764
765/* Module initialization. */
766
767static struct PyModuleDef _bz2module = {
768 PyModuleDef_HEAD_INIT,
769 "_bz2",
770 NULL,
771 -1,
772 NULL,
773 NULL,
774 NULL,
775 NULL,
776 NULL
777};
778
779PyMODINIT_FUNC
780PyInit__bz2(void)
781{
782 PyObject *m;
783
784 if (PyType_Ready(&BZ2Compressor_Type) < 0)
785 return NULL;
786 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
787 return NULL;
788
789 m = PyModule_Create(&_bz2module);
790 if (m == NULL)
791 return NULL;
792
793 Py_INCREF(&BZ2Compressor_Type);
794 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
795
796 Py_INCREF(&BZ2Decompressor_Type);
797 PyModule_AddObject(m, "BZ2Decompressor",
798 (PyObject *)&BZ2Decompressor_Type);
799
800 return m;
801}