blob: 522b3e56585397e5fe8f14e51990960ac0b596a3 [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
39
40typedef struct {
41 PyObject_HEAD
42 bz_stream bzs;
43 int flushed;
44#ifdef WITH_THREAD
45 PyThread_type_lock lock;
46#endif
47} BZ2Compressor;
48
49typedef struct {
50 PyObject_HEAD
51 bz_stream bzs;
52 char eof; /* T_BOOL expects a char */
53 PyObject *unused_data;
54#ifdef WITH_THREAD
55 PyThread_type_lock lock;
56#endif
57} BZ2Decompressor;
58
59
60/* Helper functions. */
61
62static int
63catch_bz2_error(int bzerror)
64{
65 switch(bzerror) {
66 case BZ_OK:
67 case BZ_RUN_OK:
68 case BZ_FLUSH_OK:
69 case BZ_FINISH_OK:
70 case BZ_STREAM_END:
71 return 0;
72
73#ifdef BZ_CONFIG_ERROR
74 case BZ_CONFIG_ERROR:
75 PyErr_SetString(PyExc_SystemError,
76 "libbzip2 was not compiled correctly");
77 return 1;
78#endif
79 case BZ_PARAM_ERROR:
80 PyErr_SetString(PyExc_ValueError,
81 "Internal error - "
82 "invalid parameters passed to libbzip2");
83 return 1;
84 case BZ_MEM_ERROR:
85 PyErr_NoMemory();
86 return 1;
87 case BZ_DATA_ERROR:
88 case BZ_DATA_ERROR_MAGIC:
89 PyErr_SetString(PyExc_IOError, "Invalid data stream");
90 return 1;
91 case BZ_IO_ERROR:
92 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
93 return 1;
94 case BZ_UNEXPECTED_EOF:
95 PyErr_SetString(PyExc_EOFError,
96 "Compressed file ended before the logical "
97 "end-of-stream was detected");
98 return 1;
99 case BZ_SEQUENCE_ERROR:
100 PyErr_SetString(PyExc_RuntimeError,
101 "Internal error - "
102 "Invalid sequence of commands sent to libbzip2");
103 return 1;
104 default:
105 PyErr_Format(PyExc_IOError,
106 "Unrecognized error from libbzip2: %d", bzerror);
107 return 1;
108 }
109}
110
111#if BUFSIZ < 8192
112#define SMALLCHUNK 8192
113#else
114#define SMALLCHUNK BUFSIZ
115#endif
116
117#if SIZEOF_INT < 4
118#define BIGCHUNK (512 * 32)
119#else
120#define BIGCHUNK (512 * 1024)
121#endif
122
123static int
124grow_buffer(PyObject **buf)
125{
126 size_t size = PyBytes_GET_SIZE(*buf);
127 if (size <= SMALLCHUNK)
128 return _PyBytes_Resize(buf, size + SMALLCHUNK);
129 else if (size <= BIGCHUNK)
130 return _PyBytes_Resize(buf, size * 2);
131 else
132 return _PyBytes_Resize(buf, size + BIGCHUNK);
133}
134
135
136/* BZ2Compressor class. */
137
138static PyObject *
139compress(BZ2Compressor *c, char *data, size_t len, int action)
140{
141 size_t data_size = 0;
142 PyObject *result;
143
144 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
145 if (result == NULL)
146 return NULL;
147 c->bzs.next_in = data;
148 /* FIXME This is not 64-bit clean - avail_in is an int. */
149 c->bzs.avail_in = len;
150 c->bzs.next_out = PyBytes_AS_STRING(result);
151 c->bzs.avail_out = PyBytes_GET_SIZE(result);
152 for (;;) {
153 char *this_out;
154 int bzerror;
155
156 Py_BEGIN_ALLOW_THREADS
157 this_out = c->bzs.next_out;
158 bzerror = BZ2_bzCompress(&c->bzs, action);
159 data_size += c->bzs.next_out - this_out;
160 Py_END_ALLOW_THREADS
161 if (catch_bz2_error(bzerror))
162 goto error;
163
164 /* In regular compression mode, stop when input data is exhausted.
165 In flushing mode, stop when all buffered data has been flushed. */
166 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
167 (action == BZ_FINISH && bzerror == BZ_STREAM_END))
168 break;
169
170 if (c->bzs.avail_out == 0) {
171 if (grow_buffer(&result) < 0)
172 goto error;
173 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
174 c->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
175 }
176 }
177 if (data_size != PyBytes_GET_SIZE(result))
178 if (_PyBytes_Resize(&result, data_size) < 0)
179 goto error;
180 return result;
181
182error:
183 Py_XDECREF(result);
184 return NULL;
185}
186
187PyDoc_STRVAR(BZ2Compressor_compress__doc__,
188"compress(data) -> bytes\n"
189"\n"
190"Provide data to the compressor object. Returns a chunk of\n"
191"compressed data if possible, or b'' otherwise.\n"
192"\n"
193"When you have finished providing data to the compressor, call the\n"
194"flush() method to finish the compression process.\n");
195
196static PyObject *
197BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
198{
199 Py_buffer buffer;
200 PyObject *result = NULL;
201
202 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
203 return NULL;
204
205 ACQUIRE_LOCK(self);
206 if (self->flushed)
207 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
208 else
209 result = compress(self, buffer.buf, buffer.len, BZ_RUN);
210 RELEASE_LOCK(self);
211 PyBuffer_Release(&buffer);
212 return result;
213}
214
215PyDoc_STRVAR(BZ2Compressor_flush__doc__,
216"flush() -> bytes\n"
217"\n"
218"Finish the compression process. Returns the compressed data left\n"
219"in internal buffers.\n"
220"\n"
221"The compressor object may not be used after this method is called.\n");
222
223static PyObject *
224BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
225{
226 PyObject *result = NULL;
227
228 ACQUIRE_LOCK(self);
229 if (self->flushed)
230 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
231 else {
232 self->flushed = 1;
233 result = compress(self, NULL, 0, BZ_FINISH);
234 }
235 RELEASE_LOCK(self);
236 return result;
237}
238
239static int
240BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
241{
242 int compresslevel = 9;
243 int bzerror;
244
245 if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
246 return -1;
247 if (!(1 <= compresslevel && compresslevel <= 9)) {
248 PyErr_SetString(PyExc_ValueError,
249 "compresslevel must be between 1 and 9");
250 return -1;
251 }
252
253#ifdef WITH_THREAD
254 self->lock = PyThread_allocate_lock();
255 if (self->lock == NULL) {
256 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
257 return -1;
258 }
259#endif
260
261 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
262 if (catch_bz2_error(bzerror))
263 goto error;
264
265 return 0;
266
267error:
268#ifdef WITH_THREAD
269 PyThread_free_lock(self->lock);
270 self->lock = NULL;
271#endif
272 return -1;
273}
274
275static void
276BZ2Compressor_dealloc(BZ2Compressor *self)
277{
278 BZ2_bzCompressEnd(&self->bzs);
279#ifdef WITH_THREAD
280 if (self->lock != NULL)
281 PyThread_free_lock(self->lock);
282#endif
283 Py_TYPE(self)->tp_free((PyObject *)self);
284}
285
286static PyMethodDef BZ2Compressor_methods[] = {
287 {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
288 BZ2Compressor_compress__doc__},
289 {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS,
290 BZ2Compressor_flush__doc__},
291 {NULL}
292};
293
294PyDoc_STRVAR(BZ2Compressor__doc__,
295"BZ2Compressor(compresslevel=9)\n"
296"\n"
297"Create a compressor object for compressing data incrementally.\n"
298"\n"
299"compresslevel, if given, must be a number between 1 and 9.\n"
300"\n"
301"For one-shot compression, use the compress() function instead.\n");
302
303static PyTypeObject BZ2Compressor_Type = {
304 PyVarObject_HEAD_INIT(NULL, 0)
305 "_bz2.BZ2Compressor", /* tp_name */
306 sizeof(BZ2Compressor), /* tp_basicsize */
307 0, /* tp_itemsize */
308 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
309 0, /* tp_print */
310 0, /* tp_getattr */
311 0, /* tp_setattr */
312 0, /* tp_reserved */
313 0, /* tp_repr */
314 0, /* tp_as_number */
315 0, /* tp_as_sequence */
316 0, /* tp_as_mapping */
317 0, /* tp_hash */
318 0, /* tp_call */
319 0, /* tp_str */
320 0, /* tp_getattro */
321 0, /* tp_setattro */
322 0, /* tp_as_buffer */
323 Py_TPFLAGS_DEFAULT, /* tp_flags */
324 BZ2Compressor__doc__, /* tp_doc */
325 0, /* tp_traverse */
326 0, /* tp_clear */
327 0, /* tp_richcompare */
328 0, /* tp_weaklistoffset */
329 0, /* tp_iter */
330 0, /* tp_iternext */
331 BZ2Compressor_methods, /* tp_methods */
332 0, /* tp_members */
333 0, /* tp_getset */
334 0, /* tp_base */
335 0, /* tp_dict */
336 0, /* tp_descr_get */
337 0, /* tp_descr_set */
338 0, /* tp_dictoffset */
339 (initproc)BZ2Compressor_init, /* tp_init */
340 0, /* tp_alloc */
341 PyType_GenericNew, /* tp_new */
342};
343
344
345/* BZ2Decompressor class. */
346
347static PyObject *
348decompress(BZ2Decompressor *d, char *data, size_t len)
349{
350 size_t data_size = 0;
351 PyObject *result;
352
353 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
354 if (result == NULL)
355 return result;
356 d->bzs.next_in = data;
357 /* FIXME This is not 64-bit clean - avail_in is an int. */
358 d->bzs.avail_in = len;
359 d->bzs.next_out = PyBytes_AS_STRING(result);
360 d->bzs.avail_out = PyBytes_GET_SIZE(result);
361 for (;;) {
362 char *this_out;
363 int bzerror;
364
365 Py_BEGIN_ALLOW_THREADS
366 this_out = d->bzs.next_out;
367 bzerror = BZ2_bzDecompress(&d->bzs);
368 data_size += d->bzs.next_out - this_out;
369 Py_END_ALLOW_THREADS
370 if (catch_bz2_error(bzerror))
371 goto error;
372 if (bzerror == BZ_STREAM_END) {
373 d->eof = 1;
374 if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */
375 Py_CLEAR(d->unused_data);
376 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in,
377 d->bzs.avail_in);
378 if (d->unused_data == NULL)
379 goto error;
380 }
381 break;
382 }
383 if (d->bzs.avail_in == 0)
384 break;
385 if (d->bzs.avail_out == 0) {
386 if (grow_buffer(&result) < 0)
387 goto error;
388 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
389 d->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
390 }
391 }
392 if (data_size != PyBytes_GET_SIZE(result))
393 if (_PyBytes_Resize(&result, data_size) < 0)
394 goto error;
395 return result;
396
397error:
398 Py_XDECREF(result);
399 return NULL;
400}
401
402PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
403"decompress(data) -> bytes\n"
404"\n"
405"Provide data to the decompressor object. Returns a chunk of\n"
406"decompressed data if possible, or b'' otherwise.\n"
407"\n"
408"Attempting to decompress data after the end of stream is reached\n"
409"raises an EOFError. Any data found after the end of the stream\n"
410"is ignored and saved in the unused_data attribute.\n");
411
412static PyObject *
413BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
414{
415 Py_buffer buffer;
416 PyObject *result = NULL;
417
418 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
419 return NULL;
420
421 ACQUIRE_LOCK(self);
422 if (self->eof)
423 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
424 else
425 result = decompress(self, buffer.buf, buffer.len);
426 RELEASE_LOCK(self);
427 PyBuffer_Release(&buffer);
428 return result;
429}
430
431static int
432BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
433{
434 int bzerror;
435
436 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
437 return -1;
438
439#ifdef WITH_THREAD
440 self->lock = PyThread_allocate_lock();
441 if (self->lock == NULL) {
442 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
443 return -1;
444 }
445#endif
446
447 self->unused_data = PyBytes_FromStringAndSize("", 0);
448 if (self->unused_data == NULL)
449 goto error;
450
451 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
452 if (catch_bz2_error(bzerror))
453 goto error;
454
455 return 0;
456
457error:
458 Py_CLEAR(self->unused_data);
459#ifdef WITH_THREAD
460 PyThread_free_lock(self->lock);
461 self->lock = NULL;
462#endif
463 return -1;
464}
465
466static void
467BZ2Decompressor_dealloc(BZ2Decompressor *self)
468{
469 BZ2_bzDecompressEnd(&self->bzs);
470 Py_CLEAR(self->unused_data);
471#ifdef WITH_THREAD
472 if (self->lock != NULL)
473 PyThread_free_lock(self->lock);
474#endif
475 Py_TYPE(self)->tp_free((PyObject *)self);
476}
477
478static PyMethodDef BZ2Decompressor_methods[] = {
479 {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
480 BZ2Decompressor_decompress__doc__},
481 {NULL}
482};
483
484PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
485"True if the end-of-stream marker has been reached.");
486
487PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
488"Data found after the end of the compressed stream.");
489
490static PyMemberDef BZ2Decompressor_members[] = {
491 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
492 READONLY, BZ2Decompressor_eof__doc__},
493 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
494 READONLY, BZ2Decompressor_unused_data__doc__},
495 {NULL}
496};
497
498PyDoc_STRVAR(BZ2Decompressor__doc__,
499"BZ2Decompressor()\n"
500"\n"
501"Create a decompressor object for decompressing data incrementally.\n"
502"\n"
503"For one-shot decompression, use the decompress() function instead.\n");
504
505static PyTypeObject BZ2Decompressor_Type = {
506 PyVarObject_HEAD_INIT(NULL, 0)
507 "_bz2.BZ2Decompressor", /* tp_name */
508 sizeof(BZ2Decompressor), /* tp_basicsize */
509 0, /* tp_itemsize */
510 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
511 0, /* tp_print */
512 0, /* tp_getattr */
513 0, /* tp_setattr */
514 0, /* tp_reserved */
515 0, /* tp_repr */
516 0, /* tp_as_number */
517 0, /* tp_as_sequence */
518 0, /* tp_as_mapping */
519 0, /* tp_hash */
520 0, /* tp_call */
521 0, /* tp_str */
522 0, /* tp_getattro */
523 0, /* tp_setattro */
524 0, /* tp_as_buffer */
525 Py_TPFLAGS_DEFAULT, /* tp_flags */
526 BZ2Decompressor__doc__, /* tp_doc */
527 0, /* tp_traverse */
528 0, /* tp_clear */
529 0, /* tp_richcompare */
530 0, /* tp_weaklistoffset */
531 0, /* tp_iter */
532 0, /* tp_iternext */
533 BZ2Decompressor_methods, /* tp_methods */
534 BZ2Decompressor_members, /* tp_members */
535 0, /* tp_getset */
536 0, /* tp_base */
537 0, /* tp_dict */
538 0, /* tp_descr_get */
539 0, /* tp_descr_set */
540 0, /* tp_dictoffset */
541 (initproc)BZ2Decompressor_init, /* tp_init */
542 0, /* tp_alloc */
543 PyType_GenericNew, /* tp_new */
544};
545
546
547/* Module initialization. */
548
549static struct PyModuleDef _bz2module = {
550 PyModuleDef_HEAD_INIT,
551 "_bz2",
552 NULL,
553 -1,
554 NULL,
555 NULL,
556 NULL,
557 NULL,
558 NULL
559};
560
561PyMODINIT_FUNC
562PyInit__bz2(void)
563{
564 PyObject *m;
565
566 if (PyType_Ready(&BZ2Compressor_Type) < 0)
567 return NULL;
568 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
569 return NULL;
570
571 m = PyModule_Create(&_bz2module);
572 if (m == NULL)
573 return NULL;
574
575 Py_INCREF(&BZ2Compressor_Type);
576 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
577
578 Py_INCREF(&BZ2Decompressor_Type);
579 PyModule_AddObject(m, "BZ2Decompressor",
580 (PyObject *)&BZ2Decompressor_Type);
581
582 return m;
583}