blob: d329c146261bf7bfb43261b3d282dfbff1e706cc [file] [log] [blame]
Antoine Pitrou37dc5f82011-04-03 17:05:46 +02001/* _bz2 - Low-level Python interface to libbzip2. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7
8#ifdef WITH_THREAD
9#include "pythread.h"
10#endif
11
12#include <bzlib.h>
13#include <stdio.h>
14
15
16#ifndef BZ_CONFIG_ERROR
17#define BZ2_bzCompress bzCompress
18#define BZ2_bzCompressInit bzCompressInit
19#define BZ2_bzCompressEnd bzCompressEnd
20#define BZ2_bzDecompress bzDecompress
21#define BZ2_bzDecompressInit bzDecompressInit
22#define BZ2_bzDecompressEnd bzDecompressEnd
23#endif /* ! BZ_CONFIG_ERROR */
24
25
26#ifdef WITH_THREAD
27#define ACQUIRE_LOCK(obj) do { \
28 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
29 Py_BEGIN_ALLOW_THREADS \
30 PyThread_acquire_lock((obj)->lock, 1); \
31 Py_END_ALLOW_THREADS \
32 } } while (0)
33#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
34#else
35#define ACQUIRE_LOCK(obj)
36#define RELEASE_LOCK(obj)
37#endif
38
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +020039#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
40
Antoine Pitrou37dc5f82011-04-03 17:05:46 +020041
42typedef struct {
43 PyObject_HEAD
44 bz_stream bzs;
45 int flushed;
46#ifdef WITH_THREAD
47 PyThread_type_lock lock;
48#endif
49} BZ2Compressor;
50
51typedef struct {
52 PyObject_HEAD
53 bz_stream bzs;
54 char eof; /* T_BOOL expects a char */
55 PyObject *unused_data;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} BZ2Decompressor;
60
61
62/* Helper functions. */
63
64static int
65catch_bz2_error(int bzerror)
66{
67 switch(bzerror) {
68 case BZ_OK:
69 case BZ_RUN_OK:
70 case BZ_FLUSH_OK:
71 case BZ_FINISH_OK:
72 case BZ_STREAM_END:
73 return 0;
74
75#ifdef BZ_CONFIG_ERROR
76 case BZ_CONFIG_ERROR:
77 PyErr_SetString(PyExc_SystemError,
78 "libbzip2 was not compiled correctly");
79 return 1;
80#endif
81 case BZ_PARAM_ERROR:
82 PyErr_SetString(PyExc_ValueError,
83 "Internal error - "
84 "invalid parameters passed to libbzip2");
85 return 1;
86 case BZ_MEM_ERROR:
87 PyErr_NoMemory();
88 return 1;
89 case BZ_DATA_ERROR:
90 case BZ_DATA_ERROR_MAGIC:
91 PyErr_SetString(PyExc_IOError, "Invalid data stream");
92 return 1;
93 case BZ_IO_ERROR:
94 PyErr_SetString(PyExc_IOError, "Unknown I/O error");
95 return 1;
96 case BZ_UNEXPECTED_EOF:
97 PyErr_SetString(PyExc_EOFError,
98 "Compressed file ended before the logical "
99 "end-of-stream was detected");
100 return 1;
101 case BZ_SEQUENCE_ERROR:
102 PyErr_SetString(PyExc_RuntimeError,
103 "Internal error - "
104 "Invalid sequence of commands sent to libbzip2");
105 return 1;
106 default:
107 PyErr_Format(PyExc_IOError,
108 "Unrecognized error from libbzip2: %d", bzerror);
109 return 1;
110 }
111}
112
113#if BUFSIZ < 8192
114#define SMALLCHUNK 8192
115#else
116#define SMALLCHUNK BUFSIZ
117#endif
118
119#if SIZEOF_INT < 4
120#define BIGCHUNK (512 * 32)
121#else
122#define BIGCHUNK (512 * 1024)
123#endif
124
125static int
126grow_buffer(PyObject **buf)
127{
128 size_t size = PyBytes_GET_SIZE(*buf);
129 if (size <= SMALLCHUNK)
130 return _PyBytes_Resize(buf, size + SMALLCHUNK);
131 else if (size <= BIGCHUNK)
132 return _PyBytes_Resize(buf, size * 2);
133 else
134 return _PyBytes_Resize(buf, size + BIGCHUNK);
135}
136
137
138/* BZ2Compressor class. */
139
140static PyObject *
141compress(BZ2Compressor *c, char *data, size_t len, int action)
142{
143 size_t data_size = 0;
144 PyObject *result;
145
146 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
147 if (result == NULL)
148 return NULL;
149 c->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200150 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
151 Do compression in chunks of no more than UINT_MAX bytes each. */
152 c->bzs.avail_in = MIN(len, UINT_MAX);
153 len -= c->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200154 c->bzs.next_out = PyBytes_AS_STRING(result);
155 c->bzs.avail_out = PyBytes_GET_SIZE(result);
156 for (;;) {
157 char *this_out;
158 int bzerror;
159
160 Py_BEGIN_ALLOW_THREADS
161 this_out = c->bzs.next_out;
162 bzerror = BZ2_bzCompress(&c->bzs, action);
163 data_size += c->bzs.next_out - this_out;
164 Py_END_ALLOW_THREADS
165 if (catch_bz2_error(bzerror))
166 goto error;
167
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200168 if (c->bzs.avail_in == 0 && len > 0) {
169 c->bzs.avail_in = MIN(len, UINT_MAX);
170 len -= c->bzs.avail_in;
171 }
172
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200173 /* In regular compression mode, stop when input data is exhausted.
174 In flushing mode, stop when all buffered data has been flushed. */
175 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
176 (action == BZ_FINISH && bzerror == BZ_STREAM_END))
177 break;
178
179 if (c->bzs.avail_out == 0) {
180 if (grow_buffer(&result) < 0)
181 goto error;
182 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
183 c->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
184 }
185 }
186 if (data_size != PyBytes_GET_SIZE(result))
187 if (_PyBytes_Resize(&result, data_size) < 0)
188 goto error;
189 return result;
190
191error:
192 Py_XDECREF(result);
193 return NULL;
194}
195
196PyDoc_STRVAR(BZ2Compressor_compress__doc__,
197"compress(data) -> bytes\n"
198"\n"
199"Provide data to the compressor object. Returns a chunk of\n"
200"compressed data if possible, or b'' otherwise.\n"
201"\n"
202"When you have finished providing data to the compressor, call the\n"
203"flush() method to finish the compression process.\n");
204
205static PyObject *
206BZ2Compressor_compress(BZ2Compressor *self, PyObject *args)
207{
208 Py_buffer buffer;
209 PyObject *result = NULL;
210
211 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
212 return NULL;
213
214 ACQUIRE_LOCK(self);
215 if (self->flushed)
216 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
217 else
218 result = compress(self, buffer.buf, buffer.len, BZ_RUN);
219 RELEASE_LOCK(self);
220 PyBuffer_Release(&buffer);
221 return result;
222}
223
224PyDoc_STRVAR(BZ2Compressor_flush__doc__,
225"flush() -> bytes\n"
226"\n"
227"Finish the compression process. Returns the compressed data left\n"
228"in internal buffers.\n"
229"\n"
230"The compressor object may not be used after this method is called.\n");
231
232static PyObject *
233BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs)
234{
235 PyObject *result = NULL;
236
237 ACQUIRE_LOCK(self);
238 if (self->flushed)
239 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
240 else {
241 self->flushed = 1;
242 result = compress(self, NULL, 0, BZ_FINISH);
243 }
244 RELEASE_LOCK(self);
245 return result;
246}
247
248static int
249BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs)
250{
251 int compresslevel = 9;
252 int bzerror;
253
254 if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel))
255 return -1;
256 if (!(1 <= compresslevel && compresslevel <= 9)) {
257 PyErr_SetString(PyExc_ValueError,
258 "compresslevel must be between 1 and 9");
259 return -1;
260 }
261
262#ifdef WITH_THREAD
263 self->lock = PyThread_allocate_lock();
264 if (self->lock == NULL) {
265 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
266 return -1;
267 }
268#endif
269
270 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
271 if (catch_bz2_error(bzerror))
272 goto error;
273
274 return 0;
275
276error:
277#ifdef WITH_THREAD
278 PyThread_free_lock(self->lock);
279 self->lock = NULL;
280#endif
281 return -1;
282}
283
284static void
285BZ2Compressor_dealloc(BZ2Compressor *self)
286{
287 BZ2_bzCompressEnd(&self->bzs);
288#ifdef WITH_THREAD
289 if (self->lock != NULL)
290 PyThread_free_lock(self->lock);
291#endif
292 Py_TYPE(self)->tp_free((PyObject *)self);
293}
294
295static PyMethodDef BZ2Compressor_methods[] = {
296 {"compress", (PyCFunction)BZ2Compressor_compress, METH_VARARGS,
297 BZ2Compressor_compress__doc__},
298 {"flush", (PyCFunction)BZ2Compressor_flush, METH_NOARGS,
299 BZ2Compressor_flush__doc__},
300 {NULL}
301};
302
303PyDoc_STRVAR(BZ2Compressor__doc__,
304"BZ2Compressor(compresslevel=9)\n"
305"\n"
306"Create a compressor object for compressing data incrementally.\n"
307"\n"
308"compresslevel, if given, must be a number between 1 and 9.\n"
309"\n"
310"For one-shot compression, use the compress() function instead.\n");
311
312static PyTypeObject BZ2Compressor_Type = {
313 PyVarObject_HEAD_INIT(NULL, 0)
314 "_bz2.BZ2Compressor", /* tp_name */
315 sizeof(BZ2Compressor), /* tp_basicsize */
316 0, /* tp_itemsize */
317 (destructor)BZ2Compressor_dealloc, /* tp_dealloc */
318 0, /* tp_print */
319 0, /* tp_getattr */
320 0, /* tp_setattr */
321 0, /* tp_reserved */
322 0, /* tp_repr */
323 0, /* tp_as_number */
324 0, /* tp_as_sequence */
325 0, /* tp_as_mapping */
326 0, /* tp_hash */
327 0, /* tp_call */
328 0, /* tp_str */
329 0, /* tp_getattro */
330 0, /* tp_setattro */
331 0, /* tp_as_buffer */
332 Py_TPFLAGS_DEFAULT, /* tp_flags */
333 BZ2Compressor__doc__, /* tp_doc */
334 0, /* tp_traverse */
335 0, /* tp_clear */
336 0, /* tp_richcompare */
337 0, /* tp_weaklistoffset */
338 0, /* tp_iter */
339 0, /* tp_iternext */
340 BZ2Compressor_methods, /* tp_methods */
341 0, /* tp_members */
342 0, /* tp_getset */
343 0, /* tp_base */
344 0, /* tp_dict */
345 0, /* tp_descr_get */
346 0, /* tp_descr_set */
347 0, /* tp_dictoffset */
348 (initproc)BZ2Compressor_init, /* tp_init */
349 0, /* tp_alloc */
350 PyType_GenericNew, /* tp_new */
351};
352
353
354/* BZ2Decompressor class. */
355
356static PyObject *
357decompress(BZ2Decompressor *d, char *data, size_t len)
358{
359 size_t data_size = 0;
360 PyObject *result;
361
362 result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
363 if (result == NULL)
364 return result;
365 d->bzs.next_in = data;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200366 /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
367 Do decompression in chunks of no more than UINT_MAX bytes each. */
368 d->bzs.avail_in = MIN(len, UINT_MAX);
369 len -= d->bzs.avail_in;
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200370 d->bzs.next_out = PyBytes_AS_STRING(result);
371 d->bzs.avail_out = PyBytes_GET_SIZE(result);
372 for (;;) {
373 char *this_out;
374 int bzerror;
375
376 Py_BEGIN_ALLOW_THREADS
377 this_out = d->bzs.next_out;
378 bzerror = BZ2_bzDecompress(&d->bzs);
379 data_size += d->bzs.next_out - this_out;
380 Py_END_ALLOW_THREADS
381 if (catch_bz2_error(bzerror))
382 goto error;
383 if (bzerror == BZ_STREAM_END) {
384 d->eof = 1;
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200385 len += d->bzs.avail_in;
386 if (len > 0) { /* Save leftover input to unused_data */
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200387 Py_CLEAR(d->unused_data);
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200388 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200389 if (d->unused_data == NULL)
390 goto error;
391 }
392 break;
393 }
Nadeem Vawdaea4b46f2011-04-12 23:02:42 +0200394 if (d->bzs.avail_in == 0) {
395 if (len == 0)
396 break;
397 d->bzs.avail_in = MIN(len, UINT_MAX);
398 len -= d->bzs.avail_in;
399 }
Antoine Pitrou37dc5f82011-04-03 17:05:46 +0200400 if (d->bzs.avail_out == 0) {
401 if (grow_buffer(&result) < 0)
402 goto error;
403 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
404 d->bzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
405 }
406 }
407 if (data_size != PyBytes_GET_SIZE(result))
408 if (_PyBytes_Resize(&result, data_size) < 0)
409 goto error;
410 return result;
411
412error:
413 Py_XDECREF(result);
414 return NULL;
415}
416
417PyDoc_STRVAR(BZ2Decompressor_decompress__doc__,
418"decompress(data) -> bytes\n"
419"\n"
420"Provide data to the decompressor object. Returns a chunk of\n"
421"decompressed data if possible, or b'' otherwise.\n"
422"\n"
423"Attempting to decompress data after the end of stream is reached\n"
424"raises an EOFError. Any data found after the end of the stream\n"
425"is ignored and saved in the unused_data attribute.\n");
426
427static PyObject *
428BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args)
429{
430 Py_buffer buffer;
431 PyObject *result = NULL;
432
433 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
434 return NULL;
435
436 ACQUIRE_LOCK(self);
437 if (self->eof)
438 PyErr_SetString(PyExc_EOFError, "End of stream already reached");
439 else
440 result = decompress(self, buffer.buf, buffer.len);
441 RELEASE_LOCK(self);
442 PyBuffer_Release(&buffer);
443 return result;
444}
445
446static int
447BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs)
448{
449 int bzerror;
450
451 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
452 return -1;
453
454#ifdef WITH_THREAD
455 self->lock = PyThread_allocate_lock();
456 if (self->lock == NULL) {
457 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
458 return -1;
459 }
460#endif
461
462 self->unused_data = PyBytes_FromStringAndSize("", 0);
463 if (self->unused_data == NULL)
464 goto error;
465
466 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
467 if (catch_bz2_error(bzerror))
468 goto error;
469
470 return 0;
471
472error:
473 Py_CLEAR(self->unused_data);
474#ifdef WITH_THREAD
475 PyThread_free_lock(self->lock);
476 self->lock = NULL;
477#endif
478 return -1;
479}
480
481static void
482BZ2Decompressor_dealloc(BZ2Decompressor *self)
483{
484 BZ2_bzDecompressEnd(&self->bzs);
485 Py_CLEAR(self->unused_data);
486#ifdef WITH_THREAD
487 if (self->lock != NULL)
488 PyThread_free_lock(self->lock);
489#endif
490 Py_TYPE(self)->tp_free((PyObject *)self);
491}
492
493static PyMethodDef BZ2Decompressor_methods[] = {
494 {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS,
495 BZ2Decompressor_decompress__doc__},
496 {NULL}
497};
498
499PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
500"True if the end-of-stream marker has been reached.");
501
502PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
503"Data found after the end of the compressed stream.");
504
505static PyMemberDef BZ2Decompressor_members[] = {
506 {"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
507 READONLY, BZ2Decompressor_eof__doc__},
508 {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
509 READONLY, BZ2Decompressor_unused_data__doc__},
510 {NULL}
511};
512
513PyDoc_STRVAR(BZ2Decompressor__doc__,
514"BZ2Decompressor()\n"
515"\n"
516"Create a decompressor object for decompressing data incrementally.\n"
517"\n"
518"For one-shot decompression, use the decompress() function instead.\n");
519
520static PyTypeObject BZ2Decompressor_Type = {
521 PyVarObject_HEAD_INIT(NULL, 0)
522 "_bz2.BZ2Decompressor", /* tp_name */
523 sizeof(BZ2Decompressor), /* tp_basicsize */
524 0, /* tp_itemsize */
525 (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */
526 0, /* tp_print */
527 0, /* tp_getattr */
528 0, /* tp_setattr */
529 0, /* tp_reserved */
530 0, /* tp_repr */
531 0, /* tp_as_number */
532 0, /* tp_as_sequence */
533 0, /* tp_as_mapping */
534 0, /* tp_hash */
535 0, /* tp_call */
536 0, /* tp_str */
537 0, /* tp_getattro */
538 0, /* tp_setattro */
539 0, /* tp_as_buffer */
540 Py_TPFLAGS_DEFAULT, /* tp_flags */
541 BZ2Decompressor__doc__, /* tp_doc */
542 0, /* tp_traverse */
543 0, /* tp_clear */
544 0, /* tp_richcompare */
545 0, /* tp_weaklistoffset */
546 0, /* tp_iter */
547 0, /* tp_iternext */
548 BZ2Decompressor_methods, /* tp_methods */
549 BZ2Decompressor_members, /* tp_members */
550 0, /* tp_getset */
551 0, /* tp_base */
552 0, /* tp_dict */
553 0, /* tp_descr_get */
554 0, /* tp_descr_set */
555 0, /* tp_dictoffset */
556 (initproc)BZ2Decompressor_init, /* tp_init */
557 0, /* tp_alloc */
558 PyType_GenericNew, /* tp_new */
559};
560
561
562/* Module initialization. */
563
564static struct PyModuleDef _bz2module = {
565 PyModuleDef_HEAD_INIT,
566 "_bz2",
567 NULL,
568 -1,
569 NULL,
570 NULL,
571 NULL,
572 NULL,
573 NULL
574};
575
576PyMODINIT_FUNC
577PyInit__bz2(void)
578{
579 PyObject *m;
580
581 if (PyType_Ready(&BZ2Compressor_Type) < 0)
582 return NULL;
583 if (PyType_Ready(&BZ2Decompressor_Type) < 0)
584 return NULL;
585
586 m = PyModule_Create(&_bz2module);
587 if (m == NULL)
588 return NULL;
589
590 Py_INCREF(&BZ2Compressor_Type);
591 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type);
592
593 Py_INCREF(&BZ2Decompressor_Type);
594 PyModule_AddObject(m, "BZ2Decompressor",
595 (PyObject *)&BZ2Decompressor_Type);
596
597 return m;
598}