blob: 4eee5a2feeb9db7aa98bc48339d2a461df6e4aa6 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +020039#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
40
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020041
42typedef struct {
43 PyObject_HEAD
44 bz_stream bzs;
45 int flushed;
46#ifdef WITH_THREAD
47 PyThread_type_lock lock;
48#endif
49} BZ2Compressor;
50
51typedef struct {
52 PyObject_HEAD
53 bz_stream bzs;
54 char eof; /* T_BOOL expects a char */
55 PyObject *unused_data;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} BZ2Decompressor;
60
61
62/* Helper functions. */
63
64static int
65catch_bz2_error(int bzerror)
66{
67 switch(bzerror) {
68 case BZ_OK:
69 case BZ_RUN_OK:
70 case BZ_FLUSH_OK:
71 case BZ_FINISH_OK:
72 case BZ_STREAM_END:
73 return 0;
74
75#ifdef BZ_CONFIG_ERROR
76 case BZ_CONFIG_ERROR:
77 PyErr_SetString(PyExc_SystemError,
78 "libbzip2 was not compiled correctly");
79 return 1;
80#endif
81 case BZ_PARAM_ERROR:
82 PyErr_SetString(PyExc_ValueError,
83 "Internal error - "
84 "invalid parameters passed to libbzip2");
85 return 1;
86 case BZ_MEM_ERROR:
87 PyErr_NoMemory();
88 return 1;
89 case BZ_DATA_ERROR:
90 case BZ_DATA_ERROR_MAGIC:
91 PyErr_SetString(PyExc_IOError, "Invalid data stream");
92 return 1;
93 case BZ_IO_ERROR:
94 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
95 return 1;
96 case BZ_UNEXPECTED_EOF:
97 PyErr_SetString(PyExc_EOFError,
98 "Compressed file ended before the logical "
99 "end-of-stream was detected");
100 return 1;
101 case BZ_SEQUENCE_ERROR:
102 PyErr_SetString(PyExc_RuntimeError,
103 "Internal error - "
104 "Invalid sequence of commands sent to libbzip2");
105 return 1;
106 default:
107 PyErr_Format(PyExc_IOError,
108 "Unrecognized error from libbzip2: %d", bzerror);
109 return 1;
110 }
111}
112
113#if BUFSIZ < 8192
114#define SMALLCHUNK 8192
115#else
116#define SMALLCHUNK BUFSIZ
117#endif
118
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200119static int
120grow_buffer(PyObject **buf)
121{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200122 /* Expand the buffer by an amount proportional to the current size,
123 giving us amortized linear-time behavior. Use a less-than-double
124 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200126 size_t new_size = size + (size >> 3) + 6;
127 if (new_size > size) {
128 return _PyBytes_Resize(buf, new_size);
129 } else { /* overflow */
130 PyErr_SetString(PyExc_OverflowError,
131 "Unable to allocate buffer - output too large");
132 return -1;
133 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200134}
135
136
137/* BZ2Compressor class. */
138
139static PyObject *
140compress(BZ2Compressor *c, char *data, size_t len, int action)
141{
142 size_t data_size = 0;
143 PyObject *result;
144
145 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
146 if (result == NULL)
147 return NULL;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100148
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200149 c->bzs.next_in = data;
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100150 c->bzs.avail_in = 0;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200151 c->bzs.next_out = PyBytes_AS_STRING(result);
152 c->bzs.avail_out = PyBytes_GET_SIZE(result);
153 for (;;) {
154 char *this_out;
155 int bzerror;
156
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100157 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
158 Do compression in chunks of no more than UINT_MAX bytes each. */
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200159 if (c->bzs.avail_in == 0 && len > 0) {
160 c->bzs.avail_in = MIN(len, UINT_MAX);
161 len -= c->bzs.avail_in;
162 }
163
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100164 /* In regular compression mode, stop when input data is exhausted. */
165 if (action == BZ_RUN && c->bzs.avail_in == 0)
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200166 break;
167
168 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200169 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
170 if (buffer_left == 0) {
171 if (grow_buffer(&result) < 0)
172 goto error;
173 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
174 buffer_left = PyBytes_GET_SIZE(result) - data_size;
175 }
176 c->bzs.avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200177 }
Nadeem Vawda57cb81d2013-01-02 23:05:56 +0100178
179 Py_BEGIN_ALLOW_THREADS
180 this_out = c->bzs.next_out;
181 bzerror = BZ2_bzCompress(&c->bzs, action);
182 data_size += c->bzs.next_out - this_out;
183 Py_END_ALLOW_THREADS
184 if (catch_bz2_error(bzerror))
185 goto error;
186
187 /* In flushing mode, stop when all buffered data has been flushed. */
188 if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
189 break;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200190 }
191 if (data_size != PyBytes_GET_SIZE(result))
192 if (_PyBytes_Resize(&result, data_size) < 0)
193 goto error;
194 return result;
195
196error:
197 Py_XDECREF(result);
198 return NULL;
199}
200
201PyDoc_STRVAR(BZ2Compressor_compress__doc__,
202"compress(data) -> bytes\n"
203"\n"
204"Provide data to the compressor object. Returns a chunk of\n"
205"compressed data if possible, or b'' otherwise.\n"
206"\n"
207"When you have finished providing data to the compressor, call the\n"
208"flush() method to finish the compression process.\n");
209
210static PyObject *
211BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
212{
213 Py_buffer buffer;
214 PyObject *result = NULL;
215
216 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
217 return NULL;
218
219 ACQUIRE_LOCK(self);
220 if (self->flushed)
221 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
222 else
223 result = compress(self, buffer.buf, buffer.len, BZ_RUN);
224 RELEASE_LOCK(self);
225 PyBuffer_Release(&buffer);
226 return result;
227}
228
229PyDoc_STRVAR(BZ2Compressor_flush__doc__,
230"flush() -> bytes\n"
231"\n"
232"Finish the compression process. Returns the compressed data left\n"
233"in internal buffers.\n"
234"\n"
235"The compressor object may not be used after this method is called.\n");
236
237static PyObject *
238BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
239{
240 PyObject *result = NULL;
241
242 ACQUIRE_LOCK(self);
243 if (self->flushed)
244 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
245 else {
246 self->flushed = 1;
247 result = compress(self, NULL, 0, BZ_FINISH);
248 }
249 RELEASE_LOCK(self);
250 return result;
251}
252
253static int
254BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
255{
256 int compresslevel = 9;
257 int bzerror;
258
259 if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
260 return -1;
261 if (!(1 <= compresslevel && compresslevel <= 9)) {
262 PyErr_SetString(PyExc_ValueError,
263 "compresslevel must be between 1 and 9");
264 return -1;
265 }
266
267#ifdef WITH_THREAD
268 self->lock = PyThread_allocate_lock();
269 if (self->lock == NULL) {
270 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
271 return -1;
272 }
273#endif
274
275 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
276 if (catch_bz2_error(bzerror))
277 goto error;
278
279 return 0;
280
281error:
282#ifdef WITH_THREAD
283 PyThread_free_lock(self->lock);
284 self->lock = NULL;
285#endif
286 return -1;
287}
288
289static void
290BZ2Compressor_dealloc(BZ2Compressor *self)
291{
292 BZ2_bzCompressEnd(&self->bzs);
293#ifdef WITH_THREAD
294 if (self->lock != NULL)
295 PyThread_free_lock(self->lock);
296#endif
297 Py_TYPE(self)->tp_free((PyObject *)self);
298}
299
300static PyMethodDef BZ2Compressor_methods[] = {
301 {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
302 BZ2Compressor_compress__doc__},
303 {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS,
304 BZ2Compressor_flush__doc__},
305 {NULL}
306};
307
308PyDoc_STRVAR(BZ2Compressor__doc__,
309"BZ2Compressor(compresslevel=9)\n"
310"\n"
311"Create a compressor object for compressing data incrementally.\n"
312"\n"
313"compresslevel, if given, must be a number between 1 and 9.\n"
314"\n"
315"For one-shot compression, use the compress() function instead.\n");
316
317static PyTypeObject BZ2Compressor_Type = {
318 PyVarObject_HEAD_INIT(NULL, 0)
319 "_bz2.BZ2Compressor", /* tp_name */
320 sizeof(BZ2Compressor), /* tp_basicsize */
321 0, /* tp_itemsize */
322 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
323 0, /* tp_print */
324 0, /* tp_getattr */
325 0, /* tp_setattr */
326 0, /* tp_reserved */
327 0, /* tp_repr */
328 0, /* tp_as_number */
329 0, /* tp_as_sequence */
330 0, /* tp_as_mapping */
331 0, /* tp_hash */
332 0, /* tp_call */
333 0, /* tp_str */
334 0, /* tp_getattro */
335 0, /* tp_setattro */
336 0, /* tp_as_buffer */
337 Py_TPFLAGS_DEFAULT, /* tp_flags */
338 BZ2Compressor__doc__, /* tp_doc */
339 0, /* tp_traverse */
340 0, /* tp_clear */
341 0, /* tp_richcompare */
342 0, /* tp_weaklistoffset */
343 0, /* tp_iter */
344 0, /* tp_iternext */
345 BZ2Compressor_methods, /* tp_methods */
346 0, /* tp_members */
347 0, /* tp_getset */
348 0, /* tp_base */
349 0, /* tp_dict */
350 0, /* tp_descr_get */
351 0, /* tp_descr_set */
352 0, /* tp_dictoffset */
353 (initproc)BZ2Compressor_init, /* tp_init */
354 0, /* tp_alloc */
355 PyType_GenericNew, /* tp_new */
356};
357
358
359/* BZ2Decompressor class. */
360
361static PyObject *
362decompress(BZ2Decompressor *d, char *data, size_t len)
363{
364 size_t data_size = 0;
365 PyObject *result;
366
367 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
368 if (result == NULL)
369 return result;
370 d->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200371 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
372 Do decompression in chunks of no more than UINT_MAX bytes each. */
373 d->bzs.avail_in = MIN(len, UINT_MAX);
374 len -= d->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200375 d->bzs.next_out = PyBytes_AS_STRING(result);
376 d->bzs.avail_out = PyBytes_GET_SIZE(result);
377 for (;;) {
378 char *this_out;
379 int bzerror;
380
381 Py_BEGIN_ALLOW_THREADS
382 this_out = d->bzs.next_out;
383 bzerror = BZ2_bzDecompress(&d->bzs);
384 data_size += d->bzs.next_out - this_out;
385 Py_END_ALLOW_THREADS
386 if (catch_bz2_error(bzerror))
387 goto error;
388 if (bzerror == BZ_STREAM_END) {
389 d->eof = 1;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200390 len += d->bzs.avail_in;
391 if (len > 0) { /* Save leftover input to unused_data */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200392 Py_CLEAR(d->unused_data);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200393 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200394 if (d->unused_data == NULL)
395 goto error;
396 }
397 break;
398 }
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200399 if (d->bzs.avail_in == 0) {
400 if (len == 0)
401 break;
402 d->bzs.avail_in = MIN(len, UINT_MAX);
403 len -= d->bzs.avail_in;
404 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200405 if (d->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200406 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
407 if (buffer_left == 0) {
408 if (grow_buffer(&result) < 0)
409 goto error;
410 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
411 buffer_left = PyBytes_GET_SIZE(result) - data_size;
412 }
413 d->bzs.avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200414 }
415 }
416 if (data_size != PyBytes_GET_SIZE(result))
417 if (_PyBytes_Resize(&result, data_size) < 0)
418 goto error;
419 return result;
420
421error:
422 Py_XDECREF(result);
423 return NULL;
424}
425
426PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
427"decompress(data) -> bytes\n"
428"\n"
429"Provide data to the decompressor object. Returns a chunk of\n"
430"decompressed data if possible, or b'' otherwise.\n"
431"\n"
432"Attempting to decompress data after the end of stream is reached\n"
433"raises an EOFError. Any data found after the end of the stream\n"
434"is ignored and saved in the unused_data attribute.\n");
435
436static PyObject *
437BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
438{
439 Py_buffer buffer;
440 PyObject *result = NULL;
441
442 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
443 return NULL;
444
445 ACQUIRE_LOCK(self);
446 if (self->eof)
447 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
448 else
449 result = decompress(self, buffer.buf, buffer.len);
450 RELEASE_LOCK(self);
451 PyBuffer_Release(&buffer);
452 return result;
453}
454
455static int
456BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
457{
458 int bzerror;
459
460 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
461 return -1;
462
463#ifdef WITH_THREAD
464 self->lock = PyThread_allocate_lock();
465 if (self->lock == NULL) {
466 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
467 return -1;
468 }
469#endif
470
471 self->unused_data = PyBytes_FromStringAndSize("", 0);
472 if (self->unused_data == NULL)
473 goto error;
474
475 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
476 if (catch_bz2_error(bzerror))
477 goto error;
478
479 return 0;
480
481error:
482 Py_CLEAR(self->unused_data);
483#ifdef WITH_THREAD
484 PyThread_free_lock(self->lock);
485 self->lock = NULL;
486#endif
487 return -1;
488}
489
490static void
491BZ2Decompressor_dealloc(BZ2Decompressor *self)
492{
493 BZ2_bzDecompressEnd(&self->bzs);
494 Py_CLEAR(self->unused_data);
495#ifdef WITH_THREAD
496 if (self->lock != NULL)
497 PyThread_free_lock(self->lock);
498#endif
499 Py_TYPE(self)->tp_free((PyObject *)self);
500}
501
502static PyMethodDef BZ2Decompressor_methods[] = {
503 {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
504 BZ2Decompressor_decompress__doc__},
505 {NULL}
506};
507
508PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
509"True if the end-of-stream marker has been reached.");
510
511PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
512"Data found after the end of the compressed stream.");
513
514static PyMemberDef BZ2Decompressor_members[] = {
515 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
516 READONLY, BZ2Decompressor_eof__doc__},
517 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
518 READONLY, BZ2Decompressor_unused_data__doc__},
519 {NULL}
520};
521
522PyDoc_STRVAR(BZ2Decompressor__doc__,
523"BZ2Decompressor()\n"
524"\n"
525"Create a decompressor object for decompressing data incrementally.\n"
526"\n"
527"For one-shot decompression, use the decompress() function instead.\n");
528
529static PyTypeObject BZ2Decompressor_Type = {
530 PyVarObject_HEAD_INIT(NULL, 0)
531 "_bz2.BZ2Decompressor", /* tp_name */
532 sizeof(BZ2Decompressor), /* tp_basicsize */
533 0, /* tp_itemsize */
534 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
535 0, /* tp_print */
536 0, /* tp_getattr */
537 0, /* tp_setattr */
538 0, /* tp_reserved */
539 0, /* tp_repr */
540 0, /* tp_as_number */
541 0, /* tp_as_sequence */
542 0, /* tp_as_mapping */
543 0, /* tp_hash */
544 0, /* tp_call */
545 0, /* tp_str */
546 0, /* tp_getattro */
547 0, /* tp_setattro */
548 0, /* tp_as_buffer */
549 Py_TPFLAGS_DEFAULT, /* tp_flags */
550 BZ2Decompressor__doc__, /* tp_doc */
551 0, /* tp_traverse */
552 0, /* tp_clear */
553 0, /* tp_richcompare */
554 0, /* tp_weaklistoffset */
555 0, /* tp_iter */
556 0, /* tp_iternext */
557 BZ2Decompressor_methods, /* tp_methods */
558 BZ2Decompressor_members, /* tp_members */
559 0, /* tp_getset */
560 0, /* tp_base */
561 0, /* tp_dict */
562 0, /* tp_descr_get */
563 0, /* tp_descr_set */
564 0, /* tp_dictoffset */
565 (initproc)BZ2Decompressor_init, /* tp_init */
566 0, /* tp_alloc */
567 PyType_GenericNew, /* tp_new */
568};
569
570
571/* Module initialization. */
572
573static struct PyModuleDef _bz2module = {
574 PyModuleDef_HEAD_INIT,
575 "_bz2",
576 NULL,
577 -1,
578 NULL,
579 NULL,
580 NULL,
581 NULL,
582 NULL
583};
584
585PyMODINIT_FUNC
586PyInit__bz2(void)
587{
588 PyObject *m;
589
590 if (PyType_Ready(&BZ2Compressor_Type) < 0)
591 return NULL;
592 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
593 return NULL;
594
595 m = PyModule_Create(&_bz2module);
596 if (m == NULL)
597 return NULL;
598
599 Py_INCREF(&BZ2Compressor_Type);
600 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
601
602 Py_INCREF(&BZ2Decompressor_Type);
603 PyModule_AddObject(m, "BZ2Decompressor",
604 (PyObject *)&BZ2Decompressor_Type);
605
606 return m;
607}