blob: b407df9d9675aa786db14f70f559c6336b69ae47 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +020039#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
40
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020041
42typedef struct {
43 PyObject_HEAD
44 bz_stream bzs;
45 int flushed;
46#ifdef WITH_THREAD
47 PyThread_type_lock lock;
48#endif
49} BZ2Compressor;
50
51typedef struct {
52 PyObject_HEAD
53 bz_stream bzs;
54 char eof; /* T_BOOL expects a char */
55 PyObject *unused_data;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} BZ2Decompressor;
60
61
62/* Helper functions. */
63
64static int
65catch_bz2_error(int bzerror)
66{
67 switch(bzerror) {
68 case BZ_OK:
69 case BZ_RUN_OK:
70 case BZ_FLUSH_OK:
71 case BZ_FINISH_OK:
72 case BZ_STREAM_END:
73 return 0;
74
75#ifdef BZ_CONFIG_ERROR
76 case BZ_CONFIG_ERROR:
77 PyErr_SetString(PyExc_SystemError,
78 "libbzip2 was not compiled correctly");
79 return 1;
80#endif
81 case BZ_PARAM_ERROR:
82 PyErr_SetString(PyExc_ValueError,
83 "Internal error - "
84 "invalid parameters passed to libbzip2");
85 return 1;
86 case BZ_MEM_ERROR:
87 PyErr_NoMemory();
88 return 1;
89 case BZ_DATA_ERROR:
90 case BZ_DATA_ERROR_MAGIC:
91 PyErr_SetString(PyExc_IOError, "Invalid data stream");
92 return 1;
93 case BZ_IO_ERROR:
94 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
95 return 1;
96 case BZ_UNEXPECTED_EOF:
97 PyErr_SetString(PyExc_EOFError,
98 "Compressed file ended before the logical "
99 "end-of-stream was detected");
100 return 1;
101 case BZ_SEQUENCE_ERROR:
102 PyErr_SetString(PyExc_RuntimeError,
103 "Internal error - "
104 "Invalid sequence of commands sent to libbzip2");
105 return 1;
106 default:
107 PyErr_Format(PyExc_IOError,
108 "Unrecognized error from libbzip2: %d", bzerror);
109 return 1;
110 }
111}
112
113#if BUFSIZ < 8192
114#define SMALLCHUNK 8192
115#else
116#define SMALLCHUNK BUFSIZ
117#endif
118
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200119static int
120grow_buffer(PyObject **buf)
121{
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200122 /* Expand the buffer by an amount proportional to the current size,
123 giving us amortized linear-time behavior. Use a less-than-double
124 growth factor to avoid excessive allocation. */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200125 size_t size = PyBytes_GET_SIZE(*buf);
Nadeem Vawda72d6a132011-10-13 13:38:14 +0200126 return _PyBytes_Resize(buf, size + (size >> 3) + 6);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200127}
128
129
130/* BZ2Compressor class. */
131
132static PyObject *
133compress(BZ2Compressor *c, char *data, size_t len, int action)
134{
135 size_t data_size = 0;
136 PyObject *result;
137
138 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
139 if (result == NULL)
140 return NULL;
141 c->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200142 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
143 Do compression in chunks of no more than UINT_MAX bytes each. */
144 c->bzs.avail_in = MIN(len, UINT_MAX);
145 len -= c->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200146 c->bzs.next_out = PyBytes_AS_STRING(result);
147 c->bzs.avail_out = PyBytes_GET_SIZE(result);
148 for (;;) {
149 char *this_out;
150 int bzerror;
151
152 Py_BEGIN_ALLOW_THREADS
153 this_out = c->bzs.next_out;
154 bzerror = BZ2_bzCompress(&c->bzs, action);
155 data_size += c->bzs.next_out - this_out;
156 Py_END_ALLOW_THREADS
157 if (catch_bz2_error(bzerror))
158 goto error;
159
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200160 if (c->bzs.avail_in == 0 && len > 0) {
161 c->bzs.avail_in = MIN(len, UINT_MAX);
162 len -= c->bzs.avail_in;
163 }
164
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200165 /* In regular compression mode, stop when input data is exhausted.
166 In flushing mode, stop when all buffered data has been flushed. */
167 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
168 (action == BZ_FINISH && bzerror == BZ_STREAM_END))
169 break;
170
171 if (c->bzs.avail_out == 0) {
172 if (grow_buffer(&result) < 0)
173 goto error;
174 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
175 c->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
176 }
177 }
178 if (data_size != PyBytes_GET_SIZE(result))
179 if (_PyBytes_Resize(&result, data_size) < 0)
180 goto error;
181 return result;
182
183error:
184 Py_XDECREF(result);
185 return NULL;
186}
187
188PyDoc_STRVAR(BZ2Compressor_compress__doc__,
189"compress(data) -> bytes\n"
190"\n"
191"Provide data to the compressor object. Returns a chunk of\n"
192"compressed data if possible, or b'' otherwise.\n"
193"\n"
194"When you have finished providing data to the compressor, call the\n"
195"flush() method to finish the compression process.\n");
196
197static PyObject *
198BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
199{
200 Py_buffer buffer;
201 PyObject *result = NULL;
202
203 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
204 return NULL;
205
206 ACQUIRE_LOCK(self);
207 if (self->flushed)
208 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
209 else
210 result = compress(self, buffer.buf, buffer.len, BZ_RUN);
211 RELEASE_LOCK(self);
212 PyBuffer_Release(&buffer);
213 return result;
214}
215
216PyDoc_STRVAR(BZ2Compressor_flush__doc__,
217"flush() -> bytes\n"
218"\n"
219"Finish the compression process. Returns the compressed data left\n"
220"in internal buffers.\n"
221"\n"
222"The compressor object may not be used after this method is called.\n");
223
224static PyObject *
225BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
226{
227 PyObject *result = NULL;
228
229 ACQUIRE_LOCK(self);
230 if (self->flushed)
231 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
232 else {
233 self->flushed = 1;
234 result = compress(self, NULL, 0, BZ_FINISH);
235 }
236 RELEASE_LOCK(self);
237 return result;
238}
239
240static int
241BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
242{
243 int compresslevel = 9;
244 int bzerror;
245
246 if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
247 return -1;
248 if (!(1 <= compresslevel && compresslevel <= 9)) {
249 PyErr_SetString(PyExc_ValueError,
250 "compresslevel must be between 1 and 9");
251 return -1;
252 }
253
254#ifdef WITH_THREAD
255 self->lock = PyThread_allocate_lock();
256 if (self->lock == NULL) {
257 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
258 return -1;
259 }
260#endif
261
262 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
263 if (catch_bz2_error(bzerror))
264 goto error;
265
266 return 0;
267
268error:
269#ifdef WITH_THREAD
270 PyThread_free_lock(self->lock);
271 self->lock = NULL;
272#endif
273 return -1;
274}
275
276static void
277BZ2Compressor_dealloc(BZ2Compressor *self)
278{
279 BZ2_bzCompressEnd(&self->bzs);
280#ifdef WITH_THREAD
281 if (self->lock != NULL)
282 PyThread_free_lock(self->lock);
283#endif
284 Py_TYPE(self)->tp_free((PyObject *)self);
285}
286
287static PyMethodDef BZ2Compressor_methods[] = {
288 {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
289 BZ2Compressor_compress__doc__},
290 {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS,
291 BZ2Compressor_flush__doc__},
292 {NULL}
293};
294
295PyDoc_STRVAR(BZ2Compressor__doc__,
296"BZ2Compressor(compresslevel=9)\n"
297"\n"
298"Create a compressor object for compressing data incrementally.\n"
299"\n"
300"compresslevel, if given, must be a number between 1 and 9.\n"
301"\n"
302"For one-shot compression, use the compress() function instead.\n");
303
304static PyTypeObject BZ2Compressor_Type = {
305 PyVarObject_HEAD_INIT(NULL, 0)
306 "_bz2.BZ2Compressor", /* tp_name */
307 sizeof(BZ2Compressor), /* tp_basicsize */
308 0, /* tp_itemsize */
309 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
310 0, /* tp_print */
311 0, /* tp_getattr */
312 0, /* tp_setattr */
313 0, /* tp_reserved */
314 0, /* tp_repr */
315 0, /* tp_as_number */
316 0, /* tp_as_sequence */
317 0, /* tp_as_mapping */
318 0, /* tp_hash */
319 0, /* tp_call */
320 0, /* tp_str */
321 0, /* tp_getattro */
322 0, /* tp_setattro */
323 0, /* tp_as_buffer */
324 Py_TPFLAGS_DEFAULT, /* tp_flags */
325 BZ2Compressor__doc__, /* tp_doc */
326 0, /* tp_traverse */
327 0, /* tp_clear */
328 0, /* tp_richcompare */
329 0, /* tp_weaklistoffset */
330 0, /* tp_iter */
331 0, /* tp_iternext */
332 BZ2Compressor_methods, /* tp_methods */
333 0, /* tp_members */
334 0, /* tp_getset */
335 0, /* tp_base */
336 0, /* tp_dict */
337 0, /* tp_descr_get */
338 0, /* tp_descr_set */
339 0, /* tp_dictoffset */
340 (initproc)BZ2Compressor_init, /* tp_init */
341 0, /* tp_alloc */
342 PyType_GenericNew, /* tp_new */
343};
344
345
346/* BZ2Decompressor class. */
347
348static PyObject *
349decompress(BZ2Decompressor *d, char *data, size_t len)
350{
351 size_t data_size = 0;
352 PyObject *result;
353
354 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
355 if (result == NULL)
356 return result;
357 d->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200358 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
359 Do decompression in chunks of no more than UINT_MAX bytes each. */
360 d->bzs.avail_in = MIN(len, UINT_MAX);
361 len -= d->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200362 d->bzs.next_out = PyBytes_AS_STRING(result);
363 d->bzs.avail_out = PyBytes_GET_SIZE(result);
364 for (;;) {
365 char *this_out;
366 int bzerror;
367
368 Py_BEGIN_ALLOW_THREADS
369 this_out = d->bzs.next_out;
370 bzerror = BZ2_bzDecompress(&d->bzs);
371 data_size += d->bzs.next_out - this_out;
372 Py_END_ALLOW_THREADS
373 if (catch_bz2_error(bzerror))
374 goto error;
375 if (bzerror == BZ_STREAM_END) {
376 d->eof = 1;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200377 len += d->bzs.avail_in;
378 if (len > 0) { /* Save leftover input to unused_data */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200379 Py_CLEAR(d->unused_data);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200380 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200381 if (d->unused_data == NULL)
382 goto error;
383 }
384 break;
385 }
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200386 if (d->bzs.avail_in == 0) {
387 if (len == 0)
388 break;
389 d->bzs.avail_in = MIN(len, UINT_MAX);
390 len -= d->bzs.avail_in;
391 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200392 if (d->bzs.avail_out == 0) {
393 if (grow_buffer(&result) < 0)
394 goto error;
395 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
396 d->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
397 }
398 }
399 if (data_size != PyBytes_GET_SIZE(result))
400 if (_PyBytes_Resize(&result, data_size) < 0)
401 goto error;
402 return result;
403
404error:
405 Py_XDECREF(result);
406 return NULL;
407}
408
409PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
410"decompress(data) -> bytes\n"
411"\n"
412"Provide data to the decompressor object. Returns a chunk of\n"
413"decompressed data if possible, or b'' otherwise.\n"
414"\n"
415"Attempting to decompress data after the end of stream is reached\n"
416"raises an EOFError. Any data found after the end of the stream\n"
417"is ignored and saved in the unused_data attribute.\n");
418
419static PyObject *
420BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
421{
422 Py_buffer buffer;
423 PyObject *result = NULL;
424
425 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
426 return NULL;
427
428 ACQUIRE_LOCK(self);
429 if (self->eof)
430 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
431 else
432 result = decompress(self, buffer.buf, buffer.len);
433 RELEASE_LOCK(self);
434 PyBuffer_Release(&buffer);
435 return result;
436}
437
438static int
439BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
440{
441 int bzerror;
442
443 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
444 return -1;
445
446#ifdef WITH_THREAD
447 self->lock = PyThread_allocate_lock();
448 if (self->lock == NULL) {
449 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
450 return -1;
451 }
452#endif
453
454 self->unused_data = PyBytes_FromStringAndSize("", 0);
455 if (self->unused_data == NULL)
456 goto error;
457
458 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
459 if (catch_bz2_error(bzerror))
460 goto error;
461
462 return 0;
463
464error:
465 Py_CLEAR(self->unused_data);
466#ifdef WITH_THREAD
467 PyThread_free_lock(self->lock);
468 self->lock = NULL;
469#endif
470 return -1;
471}
472
473static void
474BZ2Decompressor_dealloc(BZ2Decompressor *self)
475{
476 BZ2_bzDecompressEnd(&self->bzs);
477 Py_CLEAR(self->unused_data);
478#ifdef WITH_THREAD
479 if (self->lock != NULL)
480 PyThread_free_lock(self->lock);
481#endif
482 Py_TYPE(self)->tp_free((PyObject *)self);
483}
484
485static PyMethodDef BZ2Decompressor_methods[] = {
486 {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
487 BZ2Decompressor_decompress__doc__},
488 {NULL}
489};
490
491PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
492"True if the end-of-stream marker has been reached.");
493
494PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
495"Data found after the end of the compressed stream.");
496
497static PyMemberDef BZ2Decompressor_members[] = {
498 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
499 READONLY, BZ2Decompressor_eof__doc__},
500 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
501 READONLY, BZ2Decompressor_unused_data__doc__},
502 {NULL}
503};
504
505PyDoc_STRVAR(BZ2Decompressor__doc__,
506"BZ2Decompressor()\n"
507"\n"
508"Create a decompressor object for decompressing data incrementally.\n"
509"\n"
510"For one-shot decompression, use the decompress() function instead.\n");
511
512static PyTypeObject BZ2Decompressor_Type = {
513 PyVarObject_HEAD_INIT(NULL, 0)
514 "_bz2.BZ2Decompressor", /* tp_name */
515 sizeof(BZ2Decompressor), /* tp_basicsize */
516 0, /* tp_itemsize */
517 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
518 0, /* tp_print */
519 0, /* tp_getattr */
520 0, /* tp_setattr */
521 0, /* tp_reserved */
522 0, /* tp_repr */
523 0, /* tp_as_number */
524 0, /* tp_as_sequence */
525 0, /* tp_as_mapping */
526 0, /* tp_hash */
527 0, /* tp_call */
528 0, /* tp_str */
529 0, /* tp_getattro */
530 0, /* tp_setattro */
531 0, /* tp_as_buffer */
532 Py_TPFLAGS_DEFAULT, /* tp_flags */
533 BZ2Decompressor__doc__, /* tp_doc */
534 0, /* tp_traverse */
535 0, /* tp_clear */
536 0, /* tp_richcompare */
537 0, /* tp_weaklistoffset */
538 0, /* tp_iter */
539 0, /* tp_iternext */
540 BZ2Decompressor_methods, /* tp_methods */
541 BZ2Decompressor_members, /* tp_members */
542 0, /* tp_getset */
543 0, /* tp_base */
544 0, /* tp_dict */
545 0, /* tp_descr_get */
546 0, /* tp_descr_set */
547 0, /* tp_dictoffset */
548 (initproc)BZ2Decompressor_init, /* tp_init */
549 0, /* tp_alloc */
550 PyType_GenericNew, /* tp_new */
551};
552
553
554/* Module initialization. */
555
556static struct PyModuleDef _bz2module = {
557 PyModuleDef_HEAD_INIT,
558 "_bz2",
559 NULL,
560 -1,
561 NULL,
562 NULL,
563 NULL,
564 NULL,
565 NULL
566};
567
568PyMODINIT_FUNC
569PyInit__bz2(void)
570{
571 PyObject *m;
572
573 if (PyType_Ready(&BZ2Compressor_Type) < 0)
574 return NULL;
575 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
576 return NULL;
577
578 m = PyModule_Create(&_bz2module);
579 if (m == NULL)
580 return NULL;
581
582 Py_INCREF(&BZ2Compressor_Type);
583 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
584
585 Py_INCREF(&BZ2Decompressor_Type);
586 PyModule_AddObject(m, "BZ2Decompressor",
587 (PyObject *)&BZ2Decompressor_Type);
588
589 return m;
590}