blob: 5cac8e6d5183eb44885a7aedfd0f2e90c7b5b8f9 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +020039#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
40
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020041
42typedef struct {
43 PyObject_HEAD
44 bz_stream bzs;
45 int flushed;
46#ifdef WITH_THREAD
47 PyThread_type_lock lock;
48#endif
49} BZ2Compressor;
50
51typedef struct {
52 PyObject_HEAD
53 bz_stream bzs;
54 char eof; /* T_BOOL expects a char */
55 PyObject *unused_data;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} BZ2Decompressor;
60
61
62/* Helper functions. */
63
64static int
65catch_bz2_error(int bzerror)
66{
67 switch(bzerror) {
68 case BZ_OK:
69 case BZ_RUN_OK:
70 case BZ_FLUSH_OK:
71 case BZ_FINISH_OK:
72 case BZ_STREAM_END:
73 return 0;
74
75#ifdef BZ_CONFIG_ERROR
76 case BZ_CONFIG_ERROR:
77 PyErr_SetString(PyExc_SystemError,
78 "libbzip2 was not compiled correctly");
79 return 1;
80#endif
81 case BZ_PARAM_ERROR:
82 PyErr_SetString(PyExc_ValueError,
83 "Internal error - "
84 "invalid parameters passed to libbzip2");
85 return 1;
86 case BZ_MEM_ERROR:
87 PyErr_NoMemory();
88 return 1;
89 case BZ_DATA_ERROR:
90 case BZ_DATA_ERROR_MAGIC:
91 PyErr_SetString(PyExc_IOError, "Invalid data stream");
92 return 1;
93 case BZ_IO_ERROR:
94 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
95 return 1;
96 case BZ_UNEXPECTED_EOF:
97 PyErr_SetString(PyExc_EOFError,
98 "Compressed file ended before the logical "
99 "end-of-stream was detected");
100 return 1;
101 case BZ_SEQUENCE_ERROR:
102 PyErr_SetString(PyExc_RuntimeError,
103 "Internal error - "
104 "Invalid sequence of commands sent to libbzip2");
105 return 1;
106 default:
107 PyErr_Format(PyExc_IOError,
108 "Unrecognized error from libbzip2: %d", bzerror);
109 return 1;
110 }
111}
112
113#if BUFSIZ < 8192
114#define SMALLCHUNK 8192
115#else
116#define SMALLCHUNK BUFSIZ
117#endif
118
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200119static int
120grow_buffer(PyObject **buf)
121{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200122 /* Expand the buffer by an amount proportional to the current size,
123 giving us amortized linear-time behavior. Use a less-than-double
124 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200126 size_t new_size = size + (size >> 3) + 6;
127 if (new_size > size) {
128 return _PyBytes_Resize(buf, new_size);
129 } else { /* overflow */
130 PyErr_SetString(PyExc_OverflowError,
131 "Unable to allocate buffer - output too large");
132 return -1;
133 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200134}
135
136
137/* BZ2Compressor class. */
138
139static PyObject *
140compress(BZ2Compressor *c, char *data, size_t len, int action)
141{
142 size_t data_size = 0;
143 PyObject *result;
144
145 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
146 if (result == NULL)
147 return NULL;
148 c->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200149 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
150 Do compression in chunks of no more than UINT_MAX bytes each. */
151 c->bzs.avail_in = MIN(len, UINT_MAX);
152 len -= c->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200153 c->bzs.next_out = PyBytes_AS_STRING(result);
154 c->bzs.avail_out = PyBytes_GET_SIZE(result);
155 for (;;) {
156 char *this_out;
157 int bzerror;
158
159 Py_BEGIN_ALLOW_THREADS
160 this_out = c->bzs.next_out;
161 bzerror = BZ2_bzCompress(&c->bzs, action);
162 data_size += c->bzs.next_out - this_out;
163 Py_END_ALLOW_THREADS
164 if (catch_bz2_error(bzerror))
165 goto error;
166
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200167 if (c->bzs.avail_in == 0 && len > 0) {
168 c->bzs.avail_in = MIN(len, UINT_MAX);
169 len -= c->bzs.avail_in;
170 }
171
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200172 /* In regular compression mode, stop when input data is exhausted.
173 In flushing mode, stop when all buffered data has been flushed. */
174 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
175 (action == BZ_FINISH && bzerror == BZ_STREAM_END))
176 break;
177
178 if (c->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200179 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
180 if (buffer_left == 0) {
181 if (grow_buffer(&result) < 0)
182 goto error;
183 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
184 buffer_left = PyBytes_GET_SIZE(result) - data_size;
185 }
186 c->bzs.avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200187 }
188 }
189 if (data_size != PyBytes_GET_SIZE(result))
190 if (_PyBytes_Resize(&result, data_size) < 0)
191 goto error;
192 return result;
193
194error:
195 Py_XDECREF(result);
196 return NULL;
197}
198
199PyDoc_STRVAR(BZ2Compressor_compress__doc__,
200"compress(data) -> bytes\n"
201"\n"
202"Provide data to the compressor object. Returns a chunk of\n"
203"compressed data if possible, or b'' otherwise.\n"
204"\n"
205"When you have finished providing data to the compressor, call the\n"
206"flush() method to finish the compression process.\n");
207
208static PyObject *
209BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
210{
211 Py_buffer buffer;
212 PyObject *result = NULL;
213
214 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
215 return NULL;
216
217 ACQUIRE_LOCK(self);
218 if (self->flushed)
219 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
220 else
221 result = compress(self, buffer.buf, buffer.len, BZ_RUN);
222 RELEASE_LOCK(self);
223 PyBuffer_Release(&buffer);
224 return result;
225}
226
227PyDoc_STRVAR(BZ2Compressor_flush__doc__,
228"flush() -> bytes\n"
229"\n"
230"Finish the compression process. Returns the compressed data left\n"
231"in internal buffers.\n"
232"\n"
233"The compressor object may not be used after this method is called.\n");
234
235static PyObject *
236BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
237{
238 PyObject *result = NULL;
239
240 ACQUIRE_LOCK(self);
241 if (self->flushed)
242 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
243 else {
244 self->flushed = 1;
245 result = compress(self, NULL, 0, BZ_FINISH);
246 }
247 RELEASE_LOCK(self);
248 return result;
249}
250
251static int
252BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
253{
254 int compresslevel = 9;
255 int bzerror;
256
257 if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
258 return -1;
259 if (!(1 <= compresslevel && compresslevel <= 9)) {
260 PyErr_SetString(PyExc_ValueError,
261 "compresslevel must be between 1 and 9");
262 return -1;
263 }
264
265#ifdef WITH_THREAD
266 self->lock = PyThread_allocate_lock();
267 if (self->lock == NULL) {
268 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
269 return -1;
270 }
271#endif
272
273 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
274 if (catch_bz2_error(bzerror))
275 goto error;
276
277 return 0;
278
279error:
280#ifdef WITH_THREAD
281 PyThread_free_lock(self->lock);
282 self->lock = NULL;
283#endif
284 return -1;
285}
286
287static void
288BZ2Compressor_dealloc(BZ2Compressor *self)
289{
290 BZ2_bzCompressEnd(&self->bzs);
291#ifdef WITH_THREAD
292 if (self->lock != NULL)
293 PyThread_free_lock(self->lock);
294#endif
295 Py_TYPE(self)->tp_free((PyObject *)self);
296}
297
298static PyMethodDef BZ2Compressor_methods[] = {
299 {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
300 BZ2Compressor_compress__doc__},
301 {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS,
302 BZ2Compressor_flush__doc__},
303 {NULL}
304};
305
306PyDoc_STRVAR(BZ2Compressor__doc__,
307"BZ2Compressor(compresslevel=9)\n"
308"\n"
309"Create a compressor object for compressing data incrementally.\n"
310"\n"
311"compresslevel, if given, must be a number between 1 and 9.\n"
312"\n"
313"For one-shot compression, use the compress() function instead.\n");
314
315static PyTypeObject BZ2Compressor_Type = {
316 PyVarObject_HEAD_INIT(NULL, 0)
317 "_bz2.BZ2Compressor", /* tp_name */
318 sizeof(BZ2Compressor), /* tp_basicsize */
319 0, /* tp_itemsize */
320 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
321 0, /* tp_print */
322 0, /* tp_getattr */
323 0, /* tp_setattr */
324 0, /* tp_reserved */
325 0, /* tp_repr */
326 0, /* tp_as_number */
327 0, /* tp_as_sequence */
328 0, /* tp_as_mapping */
329 0, /* tp_hash */
330 0, /* tp_call */
331 0, /* tp_str */
332 0, /* tp_getattro */
333 0, /* tp_setattro */
334 0, /* tp_as_buffer */
335 Py_TPFLAGS_DEFAULT, /* tp_flags */
336 BZ2Compressor__doc__, /* tp_doc */
337 0, /* tp_traverse */
338 0, /* tp_clear */
339 0, /* tp_richcompare */
340 0, /* tp_weaklistoffset */
341 0, /* tp_iter */
342 0, /* tp_iternext */
343 BZ2Compressor_methods, /* tp_methods */
344 0, /* tp_members */
345 0, /* tp_getset */
346 0, /* tp_base */
347 0, /* tp_dict */
348 0, /* tp_descr_get */
349 0, /* tp_descr_set */
350 0, /* tp_dictoffset */
351 (initproc)BZ2Compressor_init, /* tp_init */
352 0, /* tp_alloc */
353 PyType_GenericNew, /* tp_new */
354};
355
356
357/* BZ2Decompressor class. */
358
359static PyObject *
360decompress(BZ2Decompressor *d, char *data, size_t len)
361{
362 size_t data_size = 0;
363 PyObject *result;
364
365 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
366 if (result == NULL)
367 return result;
368 d->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200369 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
370 Do decompression in chunks of no more than UINT_MAX bytes each. */
371 d->bzs.avail_in = MIN(len, UINT_MAX);
372 len -= d->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200373 d->bzs.next_out = PyBytes_AS_STRING(result);
374 d->bzs.avail_out = PyBytes_GET_SIZE(result);
375 for (;;) {
376 char *this_out;
377 int bzerror;
378
379 Py_BEGIN_ALLOW_THREADS
380 this_out = d->bzs.next_out;
381 bzerror = BZ2_bzDecompress(&d->bzs);
382 data_size += d->bzs.next_out - this_out;
383 Py_END_ALLOW_THREADS
384 if (catch_bz2_error(bzerror))
385 goto error;
386 if (bzerror == BZ_STREAM_END) {
387 d->eof = 1;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200388 len += d->bzs.avail_in;
389 if (len > 0) { /* Save leftover input to unused_data */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200390 Py_CLEAR(d->unused_data);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200391 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200392 if (d->unused_data == NULL)
393 goto error;
394 }
395 break;
396 }
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200397 if (d->bzs.avail_in == 0) {
398 if (len == 0)
399 break;
400 d->bzs.avail_in = MIN(len, UINT_MAX);
401 len -= d->bzs.avail_in;
402 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200403 if (d->bzs.avail_out == 0) {
Nadeem Vawda18b7fcc2012-10-21 21:16:58 +0200404 size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
405 if (buffer_left == 0) {
406 if (grow_buffer(&result) < 0)
407 goto error;
408 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
409 buffer_left = PyBytes_GET_SIZE(result) - data_size;
410 }
411 d->bzs.avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200412 }
413 }
414 if (data_size != PyBytes_GET_SIZE(result))
415 if (_PyBytes_Resize(&result, data_size) < 0)
416 goto error;
417 return result;
418
419error:
420 Py_XDECREF(result);
421 return NULL;
422}
423
424PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
425"decompress(data) -> bytes\n"
426"\n"
427"Provide data to the decompressor object. Returns a chunk of\n"
428"decompressed data if possible, or b'' otherwise.\n"
429"\n"
430"Attempting to decompress data after the end of stream is reached\n"
431"raises an EOFError. Any data found after the end of the stream\n"
432"is ignored and saved in the unused_data attribute.\n");
433
434static PyObject *
435BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
436{
437 Py_buffer buffer;
438 PyObject *result = NULL;
439
440 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
441 return NULL;
442
443 ACQUIRE_LOCK(self);
444 if (self->eof)
445 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
446 else
447 result = decompress(self, buffer.buf, buffer.len);
448 RELEASE_LOCK(self);
449 PyBuffer_Release(&buffer);
450 return result;
451}
452
453static int
454BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
455{
456 int bzerror;
457
458 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
459 return -1;
460
461#ifdef WITH_THREAD
462 self->lock = PyThread_allocate_lock();
463 if (self->lock == NULL) {
464 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
465 return -1;
466 }
467#endif
468
469 self->unused_data = PyBytes_FromStringAndSize("", 0);
470 if (self->unused_data == NULL)
471 goto error;
472
473 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
474 if (catch_bz2_error(bzerror))
475 goto error;
476
477 return 0;
478
479error:
480 Py_CLEAR(self->unused_data);
481#ifdef WITH_THREAD
482 PyThread_free_lock(self->lock);
483 self->lock = NULL;
484#endif
485 return -1;
486}
487
488static void
489BZ2Decompressor_dealloc(BZ2Decompressor *self)
490{
491 BZ2_bzDecompressEnd(&self->bzs);
492 Py_CLEAR(self->unused_data);
493#ifdef WITH_THREAD
494 if (self->lock != NULL)
495 PyThread_free_lock(self->lock);
496#endif
497 Py_TYPE(self)->tp_free((PyObject *)self);
498}
499
500static PyMethodDef BZ2Decompressor_methods[] = {
501 {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
502 BZ2Decompressor_decompress__doc__},
503 {NULL}
504};
505
506PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
507"True if the end-of-stream marker has been reached.");
508
509PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
510"Data found after the end of the compressed stream.");
511
512static PyMemberDef BZ2Decompressor_members[] = {
513 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
514 READONLY, BZ2Decompressor_eof__doc__},
515 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
516 READONLY, BZ2Decompressor_unused_data__doc__},
517 {NULL}
518};
519
520PyDoc_STRVAR(BZ2Decompressor__doc__,
521"BZ2Decompressor()\n"
522"\n"
523"Create a decompressor object for decompressing data incrementally.\n"
524"\n"
525"For one-shot decompression, use the decompress() function instead.\n");
526
527static PyTypeObject BZ2Decompressor_Type = {
528 PyVarObject_HEAD_INIT(NULL, 0)
529 "_bz2.BZ2Decompressor", /* tp_name */
530 sizeof(BZ2Decompressor), /* tp_basicsize */
531 0, /* tp_itemsize */
532 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
533 0, /* tp_print */
534 0, /* tp_getattr */
535 0, /* tp_setattr */
536 0, /* tp_reserved */
537 0, /* tp_repr */
538 0, /* tp_as_number */
539 0, /* tp_as_sequence */
540 0, /* tp_as_mapping */
541 0, /* tp_hash */
542 0, /* tp_call */
543 0, /* tp_str */
544 0, /* tp_getattro */
545 0, /* tp_setattro */
546 0, /* tp_as_buffer */
547 Py_TPFLAGS_DEFAULT, /* tp_flags */
548 BZ2Decompressor__doc__, /* tp_doc */
549 0, /* tp_traverse */
550 0, /* tp_clear */
551 0, /* tp_richcompare */
552 0, /* tp_weaklistoffset */
553 0, /* tp_iter */
554 0, /* tp_iternext */
555 BZ2Decompressor_methods, /* tp_methods */
556 BZ2Decompressor_members, /* tp_members */
557 0, /* tp_getset */
558 0, /* tp_base */
559 0, /* tp_dict */
560 0, /* tp_descr_get */
561 0, /* tp_descr_set */
562 0, /* tp_dictoffset */
563 (initproc)BZ2Decompressor_init, /* tp_init */
564 0, /* tp_alloc */
565 PyType_GenericNew, /* tp_new */
566};
567
568
569/* Module initialization. */
570
571static struct PyModuleDef _bz2module = {
572 PyModuleDef_HEAD_INIT,
573 "_bz2",
574 NULL,
575 -1,
576 NULL,
577 NULL,
578 NULL,
579 NULL,
580 NULL
581};
582
583PyMODINIT_FUNC
584PyInit__bz2(void)
585{
586 PyObject *m;
587
588 if (PyType_Ready(&BZ2Compressor_Type) < 0)
589 return NULL;
590 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
591 return NULL;
592
593 m = PyModule_Create(&_bz2module);
594 if (m == NULL)
595 return NULL;
596
597 Py_INCREF(&BZ2Compressor_Type);
598 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
599
600 Py_INCREF(&BZ2Decompressor_Type);
601 PyModule_AddObject(m, "BZ2Decompressor",
602 (PyObject *)&BZ2Decompressor_Type);
603
604 return m;
605}