blob: 7371517246e90d5f9b5e36f55f4dcc1e5c0a53bc [file] [log] [blame]
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001/* _lzma - Low-level Python interface to liblzma. */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include "structmember.h"
7#ifdef WITH_THREAD
8#include "pythread.h"
9#endif
10
11#include <stdarg.h>
12#include <string.h>
13
14#include <lzma.h>
15
16
17#ifndef PY_LONG_LONG
18#error "This module requires PY_LONG_LONG to be defined"
19#endif
20
21
22#ifdef WITH_THREAD
23#define ACQUIRE_LOCK(obj) do { \
24 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
25 Py_BEGIN_ALLOW_THREADS \
26 PyThread_acquire_lock((obj)->lock, 1); \
27 Py_END_ALLOW_THREADS \
28 } } while (0)
29#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
30#else
31#define ACQUIRE_LOCK(obj)
32#define RELEASE_LOCK(obj)
33#endif
34
35
36/* Container formats: */
37enum {
38 FORMAT_AUTO,
39 FORMAT_XZ,
40 FORMAT_ALONE,
41 FORMAT_RAW,
42};
43
44#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
45
46
47typedef struct {
48 PyObject_HEAD
49 lzma_stream lzs;
50 int flushed;
51#ifdef WITH_THREAD
52 PyThread_type_lock lock;
53#endif
54} Compressor;
55
56typedef struct {
57 PyObject_HEAD
58 lzma_stream lzs;
59 int check;
60 char eof;
61 PyObject *unused_data;
62#ifdef WITH_THREAD
63 PyThread_type_lock lock;
64#endif
65} Decompressor;
66
67/* LZMAError class object. */
68static PyObject *Error;
69
70/* An empty tuple, used by the filter specifier parsing code. */
71static PyObject *empty_tuple;
72
73
74/* Helper functions. */
75
76static int
77catch_lzma_error(lzma_ret lzret)
78{
79 switch (lzret) {
80 case LZMA_OK:
81 case LZMA_GET_CHECK:
82 case LZMA_NO_CHECK:
83 case LZMA_STREAM_END:
84 return 0;
85 case LZMA_UNSUPPORTED_CHECK:
86 PyErr_SetString(Error, "Unsupported integrity check");
87 return 1;
88 case LZMA_MEM_ERROR:
89 PyErr_NoMemory();
90 return 1;
91 case LZMA_MEMLIMIT_ERROR:
92 PyErr_SetString(Error, "Memory usage limit exceeded");
93 return 1;
94 case LZMA_FORMAT_ERROR:
95 PyErr_SetString(Error, "Input format not supported by decoder");
96 return 1;
97 case LZMA_OPTIONS_ERROR:
98 PyErr_SetString(Error, "Invalid or unsupported options");
99 return 1;
100 case LZMA_DATA_ERROR:
101 PyErr_SetString(Error, "Corrupt input data");
102 return 1;
103 case LZMA_BUF_ERROR:
104 PyErr_SetString(Error, "Insufficient buffer space");
105 return 1;
106 case LZMA_PROG_ERROR:
107 PyErr_SetString(Error, "Internal error");
108 return 1;
109 default:
110 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
111 return 1;
112 }
113}
114
115#if BUFSIZ < 8192
116#define INITIAL_BUFFER_SIZE 8192
117#else
118#define INITIAL_BUFFER_SIZE BUFSIZ
119#endif
120
121static int
122grow_buffer(PyObject **buf)
123{
124 size_t size = PyBytes_GET_SIZE(*buf);
125 return _PyBytes_Resize(buf, size + (size >> 3) + 6);
126}
127
128
129/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
130 since the predefined conversion specifiers do not suit our needs:
131
132 uint32_t - the "I" (unsigned int) specifier is the right size, but
133 silently ignores overflows on conversion.
134
135 lzma_mode and lzma_match_finder - these are enumeration types, and
136 so the size of each is implementation-defined. Worse, different
137 enum types can be of different sizes within the same program, so
138 to be strictly correct, we need to define two separate converters.
139 */
140
141#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
142 static int \
143 FUNCNAME(PyObject *obj, void *ptr) \
144 { \
145 unsigned long val; \
146 \
147 val = PyLong_AsUnsignedLong(obj); \
148 if (PyErr_Occurred()) \
149 return 0; \
150 if ((unsigned long)(TYPE)val != val) { \
151 PyErr_SetString(PyExc_OverflowError, \
152 "Value too large for " #TYPE " type"); \
153 return 0; \
154 } \
155 *(TYPE *)ptr = val; \
156 return 1; \
157 }
158
159INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
160INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
161INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
162
163#undef INT_TYPE_CONVERTER_FUNC
164
165
166/* Filter specifier parsing functions. */
167
168static void *
169parse_filter_spec_lzma(PyObject *spec)
170{
171 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
172 "pb", "mode", "nice_len", "mf", "depth", NULL};
173 PyObject *id;
174 PyObject *preset_obj;
175 uint32_t preset = LZMA_PRESET_DEFAULT;
176 lzma_options_lzma *options;
177
178 /* First, fill in default values for all the options using a preset.
179 Then, override the defaults with any values given by the caller. */
180
181 preset_obj = PyMapping_GetItemString(spec, "preset");
182 if (preset_obj == NULL) {
183 if (PyErr_ExceptionMatches(PyExc_KeyError))
184 PyErr_Clear();
185 else
186 return NULL;
187 } else {
188 int ok = uint32_converter(preset_obj, &preset);
189 Py_DECREF(preset_obj);
190 if (!ok)
191 return NULL;
192 }
193
194 options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
195 if (options == NULL)
196 return PyErr_NoMemory();
197 memset(options, 0, sizeof *options);
198
199 if (lzma_lzma_preset(options, preset)) {
200 PyMem_Free(options);
201 PyErr_Format(Error, "lzma_lzma_preset() failed for preset %#x", preset);
202 return NULL;
203 }
204
205 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
206 "|OOO&O&O&O&O&O&O&O&", optnames,
207 &id, &preset_obj,
208 uint32_converter, &options->dict_size,
209 uint32_converter, &options->lc,
210 uint32_converter, &options->lp,
211 uint32_converter, &options->pb,
212 lzma_mode_converter, &options->mode,
213 uint32_converter, &options->nice_len,
214 lzma_mf_converter, &options->mf,
215 uint32_converter, &options->depth)) {
216 PyErr_SetString(PyExc_ValueError,
217 "Invalid filter specifier for LZMA filter");
218 PyMem_Free(options);
219 options = NULL;
220 }
221 return options;
222}
223
224static void *
225parse_filter_spec_delta(PyObject *spec)
226{
227 static char *optnames[] = {"id", "dist", NULL};
228 PyObject *id;
229 uint32_t dist = 1;
230 lzma_options_delta *options;
231
232 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
233 &id, uint32_converter, &dist)) {
234 PyErr_SetString(PyExc_ValueError,
235 "Invalid filter specifier for delta filter");
236 return NULL;
237 }
238
239 options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
240 if (options == NULL)
241 return PyErr_NoMemory();
242 memset(options, 0, sizeof *options);
243 options->type = LZMA_DELTA_TYPE_BYTE;
244 options->dist = dist;
245 return options;
246}
247
248static void *
249parse_filter_spec_bcj(PyObject *spec)
250{
251 static char *optnames[] = {"id", "start_offset", NULL};
252 PyObject *id;
253 uint32_t start_offset = 0;
254 lzma_options_bcj *options;
255
256 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
257 &id, uint32_converter, &start_offset)) {
258 PyErr_SetString(PyExc_ValueError,
259 "Invalid filter specifier for BCJ filter");
260 return NULL;
261 }
262
263 options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
264 if (options == NULL)
265 return PyErr_NoMemory();
266 memset(options, 0, sizeof *options);
267 options->start_offset = start_offset;
268 return options;
269}
270
271static void *
272parse_filter_spec(lzma_filter *f, PyObject *spec)
273{
274 PyObject *id_obj;
275
276 if (!PyMapping_Check(spec)) {
277 PyErr_SetString(PyExc_TypeError,
278 "Filter specifier must be a dict or dict-like object");
279 return NULL;
280 }
281 id_obj = PyMapping_GetItemString(spec, "id");
282 if (id_obj == NULL) {
283 if (PyErr_ExceptionMatches(PyExc_KeyError))
284 PyErr_SetString(PyExc_ValueError,
285 "Filter specifier must have an \"id\" entry");
286 return NULL;
287 }
288 f->id = PyLong_AsUnsignedLongLong(id_obj);
289 Py_DECREF(id_obj);
290 if (PyErr_Occurred())
291 return NULL;
292
293 switch (f->id) {
294 case LZMA_FILTER_LZMA1:
295 case LZMA_FILTER_LZMA2:
296 f->options = parse_filter_spec_lzma(spec);
297 return f->options;
298 case LZMA_FILTER_DELTA:
299 f->options = parse_filter_spec_delta(spec);
300 return f->options;
301 case LZMA_FILTER_X86:
302 case LZMA_FILTER_POWERPC:
303 case LZMA_FILTER_IA64:
304 case LZMA_FILTER_ARM:
305 case LZMA_FILTER_ARMTHUMB:
306 case LZMA_FILTER_SPARC:
307 f->options = parse_filter_spec_bcj(spec);
308 return f->options;
309 default:
310 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
311 return NULL;
312 }
313}
314
315static void
316free_filter_chain(lzma_filter filters[])
317{
318 int i;
319
320 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
321 PyMem_Free(filters[i].options);
322}
323
324static int
325parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
326{
327 Py_ssize_t i, num_filters;
328
329 num_filters = PySequence_Length(filterspecs);
330 if (num_filters == -1)
331 return -1;
332 if (num_filters > LZMA_FILTERS_MAX) {
333 PyErr_Format(PyExc_ValueError,
334 "Too many filters - liblzma supports a maximum of %d",
335 LZMA_FILTERS_MAX);
336 return -1;
337 }
338
339 for (i = 0; i < num_filters; i++) {
340 int ok = 1;
341 PyObject *spec = PySequence_GetItem(filterspecs, i);
342 if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
343 ok = 0;
344 Py_XDECREF(spec);
345 if (!ok) {
346 filters[i].id = LZMA_VLI_UNKNOWN;
347 free_filter_chain(filters);
348 return -1;
349 }
350 }
351 filters[num_filters].id = LZMA_VLI_UNKNOWN;
352 return 0;
353}
354
355
356/* LZMACompressor class. */
357
358static PyObject *
359compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
360{
361 size_t data_size = 0;
362 PyObject *result;
363
364 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
365 if (result == NULL)
366 return NULL;
367 c->lzs.next_in = data;
368 c->lzs.avail_in = len;
369 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
370 c->lzs.avail_out = PyBytes_GET_SIZE(result);
371 for (;;) {
372 lzma_ret lzret;
373
374 Py_BEGIN_ALLOW_THREADS
375 lzret = lzma_code(&c->lzs, action);
376 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
377 Py_END_ALLOW_THREADS
378 if (catch_lzma_error(lzret))
379 goto error;
380 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
381 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
382 break;
383 } else if (c->lzs.avail_out == 0) {
384 if (grow_buffer(&result) == -1)
385 goto error;
386 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
387 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
388 }
389 }
390 if (data_size != PyBytes_GET_SIZE(result))
391 if (_PyBytes_Resize(&result, data_size) == -1)
392 goto error;
393 return result;
394
395error:
396 Py_XDECREF(result);
397 return NULL;
398}
399
400PyDoc_STRVAR(Compressor_compress_doc,
401"compress(data) -> bytes\n"
402"\n"
403"Provide data to the compressor object. Returns a chunk of\n"
404"compressed data if possible, or b\"\" otherwise.\n"
405"\n"
406"When you have finished providing data to the compressor, call the\n"
407"flush() method to finish the conversion process.\n");
408
409static PyObject *
410Compressor_compress(Compressor *self, PyObject *args)
411{
412 Py_buffer buffer;
413 PyObject *result = NULL;
414
415 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
416 return NULL;
417
418 ACQUIRE_LOCK(self);
419 if (self->flushed)
420 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
421 else
422 result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
423 RELEASE_LOCK(self);
424 PyBuffer_Release(&buffer);
425 return result;
426}
427
428PyDoc_STRVAR(Compressor_flush_doc,
429"flush() -> bytes\n"
430"\n"
431"Finish the compression process. Returns the compressed data left\n"
432"in internal buffers.\n"
433"\n"
434"The compressor object cannot be used after this method is called.\n");
435
436static PyObject *
437Compressor_flush(Compressor *self, PyObject *noargs)
438{
439 PyObject *result = NULL;
440
441 ACQUIRE_LOCK(self);
442 if (self->flushed) {
443 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
444 } else {
445 self->flushed = 1;
446 result = compress(self, NULL, 0, LZMA_FINISH);
447 }
448 RELEASE_LOCK(self);
449 return result;
450}
451
452static int
453Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
454 PyObject *filterspecs)
455{
456 lzma_ret lzret;
457
458 if (filterspecs == Py_None) {
459 lzret = lzma_easy_encoder(lzs, preset, check);
460 } else {
461 lzma_filter filters[LZMA_FILTERS_MAX + 1];
462
463 if (parse_filter_chain_spec(filters, filterspecs) == -1)
464 return -1;
465 lzret = lzma_stream_encoder(lzs, filters, check);
466 free_filter_chain(filters);
467 }
468 if (catch_lzma_error(lzret))
469 return -1;
470 else
471 return 0;
472}
473
474static int
475Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
476{
477 lzma_ret lzret;
478
479 if (filterspecs == Py_None) {
480 lzma_options_lzma options;
481
482 if (lzma_lzma_preset(&options, preset)) {
483 PyErr_Format(Error, "Invalid compression preset: %#x", preset);
484 return -1;
485 }
486 lzret = lzma_alone_encoder(lzs, &options);
487 } else {
488 lzma_filter filters[LZMA_FILTERS_MAX + 1];
489
490 if (parse_filter_chain_spec(filters, filterspecs) == -1)
491 return -1;
492 if (filters[0].id == LZMA_FILTER_LZMA1 &&
493 filters[1].id == LZMA_VLI_UNKNOWN) {
494 lzret = lzma_alone_encoder(lzs, filters[0].options);
495 } else {
496 PyErr_SetString(PyExc_ValueError,
497 "Invalid filter chain for FORMAT_ALONE - "
498 "must be a single LZMA1 filter");
499 lzret = LZMA_PROG_ERROR;
500 }
501 free_filter_chain(filters);
502 }
503 if (PyErr_Occurred() || catch_lzma_error(lzret))
504 return -1;
505 else
506 return 0;
507}
508
509static int
510Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
511{
512 lzma_filter filters[LZMA_FILTERS_MAX + 1];
513 lzma_ret lzret;
514
515 if (filterspecs == Py_None) {
516 PyErr_SetString(PyExc_ValueError,
517 "Must specify filters for FORMAT_RAW");
518 return -1;
519 }
520 if (parse_filter_chain_spec(filters, filterspecs) == -1)
521 return -1;
522 lzret = lzma_raw_encoder(lzs, filters);
523 free_filter_chain(filters);
524 if (catch_lzma_error(lzret))
525 return -1;
526 else
527 return 0;
528}
529
530static int
531Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
532{
533 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
534 int format = FORMAT_XZ;
535 int check = -1;
536 uint32_t preset = LZMA_PRESET_DEFAULT;
537 PyObject *preset_obj = Py_None;
538 PyObject *filterspecs = Py_None;
539
540 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
541 "|iiOO:LZMACompressor", arg_names,
542 &format, &check, &preset_obj,
543 &filterspecs))
544 return -1;
545
546 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
547 PyErr_SetString(PyExc_ValueError,
548 "Integrity checks are only supported by FORMAT_XZ");
549 return -1;
550 }
551
552 if (preset_obj != Py_None && filterspecs != Py_None) {
553 PyErr_SetString(PyExc_ValueError,
554 "Cannot specify both preset and filter chain");
555 return -1;
556 }
557
558 if (preset_obj != Py_None)
559 if (!uint32_converter(preset_obj, &preset))
560 return -1;
561
562#ifdef WITH_THREAD
563 self->lock = PyThread_allocate_lock();
564 if (self->lock == NULL) {
565 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
566 return -1;
567 }
568#endif
569
570 self->flushed = 0;
571 switch (format) {
572 case FORMAT_XZ:
573 if (check == -1)
574 check = LZMA_CHECK_CRC64;
575 if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
576 break;
577 return 0;
578
579 case FORMAT_ALONE:
580 if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
581 break;
582 return 0;
583
584 case FORMAT_RAW:
585 if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
586 break;
587 return 0;
588
589 default:
590 PyErr_Format(PyExc_ValueError,
591 "Invalid container format: %d", format);
592 break;
593 }
594
595#ifdef WITH_THREAD
596 PyThread_free_lock(self->lock);
597 self->lock = NULL;
598#endif
599 return -1;
600}
601
602static void
603Compressor_dealloc(Compressor *self)
604{
605 lzma_end(&self->lzs);
606#ifdef WITH_THREAD
607 if (self->lock != NULL)
608 PyThread_free_lock(self->lock);
609#endif
610 Py_TYPE(self)->tp_free((PyObject *)self);
611}
612
613static PyMethodDef Compressor_methods[] = {
614 {"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
615 Compressor_compress_doc},
616 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
617 Compressor_flush_doc},
618 {NULL}
619};
620
621PyDoc_STRVAR(Compressor_doc,
622"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
623"\n"
624"Create a compressor object for compressing data incrementally.\n"
625"\n"
626"format specifies the container format to use for the output. This can\n"
627"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
628"\n"
629"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
630"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
631"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
632"\n"
633"The settings used by the compressor can be specified either as a\n"
634"preset compression level (with the 'preset' argument), or in detail\n"
635"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
636"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
637"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
638"the raw compressor does not support preset compression levels.\n"
639"\n"
640"preset (if provided) should be an integer in the range 0-9, optionally\n"
641"OR-ed with the constant PRESET_EXTREME.\n"
642"\n"
643"filters (if provided) should be a sequence of dicts. Each dict should\n"
644"have an entry for \"id\" indicating the ID of the filter, plus\n"
645"additional entries for options to the filter.\n"
646"\n"
647"For one-shot compression, use the compress() function instead.\n");
648
649static PyTypeObject Compressor_type = {
650 PyVarObject_HEAD_INIT(NULL, 0)
651 "_lzma.LZMACompressor", /* tp_name */
652 sizeof(Compressor), /* tp_basicsize */
653 0, /* tp_itemsize */
654 (destructor)Compressor_dealloc, /* tp_dealloc */
655 0, /* tp_print */
656 0, /* tp_getattr */
657 0, /* tp_setattr */
658 0, /* tp_reserved */
659 0, /* tp_repr */
660 0, /* tp_as_number */
661 0, /* tp_as_sequence */
662 0, /* tp_as_mapping */
663 0, /* tp_hash */
664 0, /* tp_call */
665 0, /* tp_str */
666 0, /* tp_getattro */
667 0, /* tp_setattro */
668 0, /* tp_as_buffer */
669 Py_TPFLAGS_DEFAULT, /* tp_flags */
670 Compressor_doc, /* tp_doc */
671 0, /* tp_traverse */
672 0, /* tp_clear */
673 0, /* tp_richcompare */
674 0, /* tp_weaklistoffset */
675 0, /* tp_iter */
676 0, /* tp_iternext */
677 Compressor_methods, /* tp_methods */
678 0, /* tp_members */
679 0, /* tp_getset */
680 0, /* tp_base */
681 0, /* tp_dict */
682 0, /* tp_descr_get */
683 0, /* tp_descr_set */
684 0, /* tp_dictoffset */
685 (initproc)Compressor_init, /* tp_init */
686 0, /* tp_alloc */
687 PyType_GenericNew, /* tp_new */
688};
689
690
691/* LZMADecompressor class. */
692
693static PyObject *
694decompress(Decompressor *d, uint8_t *data, size_t len)
695{
696 size_t data_size = 0;
697 PyObject *result;
698
699 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
700 if (result == NULL)
701 return NULL;
702 d->lzs.next_in = data;
703 d->lzs.avail_in = len;
704 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
705 d->lzs.avail_out = PyBytes_GET_SIZE(result);
706 for (;;) {
707 lzma_ret lzret;
708
709 Py_BEGIN_ALLOW_THREADS
710 lzret = lzma_code(&d->lzs, LZMA_RUN);
711 data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
712 Py_END_ALLOW_THREADS
713 if (catch_lzma_error(lzret))
714 goto error;
715 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
716 d->check = lzma_get_check(&d->lzs);
717 if (lzret == LZMA_STREAM_END) {
718 d->eof = 1;
719 if (d->lzs.avail_in > 0) {
720 Py_CLEAR(d->unused_data);
721 d->unused_data = PyBytes_FromStringAndSize(
722 (char *)d->lzs.next_in, d->lzs.avail_in);
723 if (d->unused_data == NULL)
724 goto error;
725 }
726 break;
727 } else if (d->lzs.avail_in == 0) {
728 break;
729 } else if (d->lzs.avail_out == 0) {
730 if (grow_buffer(&result) == -1)
731 goto error;
732 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
733 d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
734 }
735 }
736 if (data_size != PyBytes_GET_SIZE(result))
737 if (_PyBytes_Resize(&result, data_size) == -1)
738 goto error;
739 return result;
740
741error:
742 Py_XDECREF(result);
743 return NULL;
744}
745
746PyDoc_STRVAR(Decompressor_decompress_doc,
747"decompress(data) -> bytes\n"
748"\n"
749"Provide data to the decompressor object. Returns a chunk of\n"
750"decompressed data if possible, or b\"\" otherwise.\n"
751"\n"
752"Attempting to decompress data after the end of the stream is\n"
753"reached raises an EOFError. Any data found after the end of the\n"
754"stream is ignored, and saved in the unused_data attribute.\n");
755
756static PyObject *
757Decompressor_decompress(Decompressor *self, PyObject *args)
758{
759 Py_buffer buffer;
760 PyObject *result = NULL;
761
762 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
763 return NULL;
764
765 ACQUIRE_LOCK(self);
766 if (self->eof)
767 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
768 else
769 result = decompress(self, buffer.buf, buffer.len);
770 RELEASE_LOCK(self);
771 PyBuffer_Release(&buffer);
772 return result;
773}
774
775static int
776Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
777{
778 lzma_filter filters[LZMA_FILTERS_MAX + 1];
779 lzma_ret lzret;
780
781 if (parse_filter_chain_spec(filters, filterspecs) == -1)
782 return -1;
783 lzret = lzma_raw_decoder(lzs, filters);
784 free_filter_chain(filters);
785 if (catch_lzma_error(lzret))
786 return -1;
787 else
788 return 0;
789}
790
791static int
792Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
793{
794 static char *arg_names[] = {"format", "memlimit", "filters", NULL};
795 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
796 int format = FORMAT_AUTO;
797 uint64_t memlimit = UINT64_MAX;
798 PyObject *memlimit_obj = Py_None;
799 PyObject *filterspecs = Py_None;
800 lzma_ret lzret;
801
802 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
803 "|iOO:LZMADecompressor", arg_names,
804 &format, &memlimit_obj, &filterspecs))
805 return -1;
806
807 if (memlimit_obj != Py_None) {
808 if (format == FORMAT_RAW) {
809 PyErr_SetString(PyExc_ValueError,
810 "Cannot specify memory limit with FORMAT_RAW");
811 return -1;
812 }
813 memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
814 if (PyErr_Occurred())
815 return -1;
816 }
817
818 if (format == FORMAT_RAW && filterspecs == Py_None) {
819 PyErr_SetString(PyExc_ValueError,
820 "Must specify filters for FORMAT_RAW");
821 return -1;
822 } else if (format != FORMAT_RAW && filterspecs != Py_None) {
823 PyErr_SetString(PyExc_ValueError,
824 "Cannot specify filters except with FORMAT_RAW");
825 return -1;
826 }
827
828#ifdef WITH_THREAD
829 self->lock = PyThread_allocate_lock();
830 if (self->lock == NULL) {
831 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
832 return -1;
833 }
834#endif
835
836 self->check = LZMA_CHECK_UNKNOWN;
837 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
838 if (self->unused_data == NULL)
839 goto error;
840
841 switch (format) {
842 case FORMAT_AUTO:
843 lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
844 if (catch_lzma_error(lzret))
845 break;
846 return 0;
847
848 case FORMAT_XZ:
849 lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
850 if (catch_lzma_error(lzret))
851 break;
852 return 0;
853
854 case FORMAT_ALONE:
855 self->check = LZMA_CHECK_NONE;
856 lzret = lzma_alone_decoder(&self->lzs, memlimit);
857 if (catch_lzma_error(lzret))
858 break;
859 return 0;
860
861 case FORMAT_RAW:
862 self->check = LZMA_CHECK_NONE;
863 if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
864 break;
865 return 0;
866
867 default:
868 PyErr_Format(PyExc_ValueError,
869 "Invalid container format: %d", format);
870 break;
871 }
872
873error:
874 Py_CLEAR(self->unused_data);
875#ifdef WITH_THREAD
876 PyThread_free_lock(self->lock);
877 self->lock = NULL;
878#endif
879 return -1;
880}
881
882static void
883Decompressor_dealloc(Decompressor *self)
884{
885 lzma_end(&self->lzs);
886 Py_CLEAR(self->unused_data);
887#ifdef WITH_THREAD
888 if (self->lock != NULL)
889 PyThread_free_lock(self->lock);
890#endif
891 Py_TYPE(self)->tp_free((PyObject *)self);
892}
893
894static PyMethodDef Decompressor_methods[] = {
895 {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
896 Decompressor_decompress_doc},
897 {NULL}
898};
899
900PyDoc_STRVAR(Decompressor_check_doc,
901"ID of the integrity check used by the input stream.");
902
903PyDoc_STRVAR(Decompressor_eof_doc,
904"True if the end-of-stream marker has been reached.");
905
906PyDoc_STRVAR(Decompressor_unused_data_doc,
907"Data found after the end of the compressed stream.");
908
909static PyMemberDef Decompressor_members[] = {
910 {"check", T_INT, offsetof(Decompressor, check), READONLY,
911 Decompressor_check_doc},
912 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
913 Decompressor_eof_doc},
914 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
915 Decompressor_unused_data_doc},
916 {NULL}
917};
918
919PyDoc_STRVAR(Decompressor_doc,
920"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
921"\n"
922"Create a decompressor object for decompressing data incrementally.\n"
923"\n"
924"format specifies the container format of the input stream. If this is\n"
925"FORMAT_AUTO (the default), the decompressor will automatically detect\n"
926"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
927"FORMAT_RAW cannot be autodetected.\n"
928"\n"
929"memlimit can be specified to limit the amount of memory used by the\n"
930"decompressor. This will cause decompression to fail if the input\n"
931"cannot be decompressed within the given limit.\n"
932"\n"
933"filters specifies a custom filter chain. This argument is required for\n"
934"FORMAT_RAW, and not accepted with any other format. When provided,\n"
935"this should be a sequence of dicts, each indicating the ID and options\n"
936"for a single filter.\n"
937"\n"
938"For one-shot decompression, use the decompress() function instead.\n");
939
940static PyTypeObject Decompressor_type = {
941 PyVarObject_HEAD_INIT(NULL, 0)
942 "_lzma.LZMADecompressor", /* tp_name */
943 sizeof(Decompressor), /* tp_basicsize */
944 0, /* tp_itemsize */
945 (destructor)Decompressor_dealloc, /* tp_dealloc */
946 0, /* tp_print */
947 0, /* tp_getattr */
948 0, /* tp_setattr */
949 0, /* tp_reserved */
950 0, /* tp_repr */
951 0, /* tp_as_number */
952 0, /* tp_as_sequence */
953 0, /* tp_as_mapping */
954 0, /* tp_hash */
955 0, /* tp_call */
956 0, /* tp_str */
957 0, /* tp_getattro */
958 0, /* tp_setattro */
959 0, /* tp_as_buffer */
960 Py_TPFLAGS_DEFAULT, /* tp_flags */
961 Decompressor_doc, /* tp_doc */
962 0, /* tp_traverse */
963 0, /* tp_clear */
964 0, /* tp_richcompare */
965 0, /* tp_weaklistoffset */
966 0, /* tp_iter */
967 0, /* tp_iternext */
968 Decompressor_methods, /* tp_methods */
969 Decompressor_members, /* tp_members */
970 0, /* tp_getset */
971 0, /* tp_base */
972 0, /* tp_dict */
973 0, /* tp_descr_get */
974 0, /* tp_descr_set */
975 0, /* tp_dictoffset */
976 (initproc)Decompressor_init, /* tp_init */
977 0, /* tp_alloc */
978 PyType_GenericNew, /* tp_new */
979};
980
981
982/* Module-level functions. */
983
984PyDoc_STRVAR(check_is_supported_doc,
985"check_is_supported(check_id) -> bool\n"
986"\n"
987"Test whether the given integrity check is supported.\n"
988"\n"
989"Always returns True for CHECK_NONE and CHECK_CRC32.\n");
990
991static PyObject *
992check_is_supported(PyObject *self, PyObject *args)
993{
994 int check_id;
995
996 if (!PyArg_ParseTuple(args, "i:check_is_supported", &check_id))
997 return NULL;
998
999 return PyBool_FromLong(lzma_check_is_supported(check_id));
1000}
1001
1002
1003/* Module initialization. */
1004
1005static PyMethodDef module_methods[] = {
1006 {"check_is_supported", (PyCFunction)check_is_supported,
1007 METH_VARARGS, check_is_supported_doc},
1008 {NULL}
1009};
1010
1011static PyModuleDef _lzmamodule = {
1012 PyModuleDef_HEAD_INIT,
1013 "_lzma",
1014 NULL,
1015 -1,
1016 module_methods,
1017 NULL,
1018 NULL,
1019 NULL,
1020 NULL,
1021};
1022
1023/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1024 would not work correctly on platforms with 32-bit longs. */
1025static int
1026module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
1027{
1028 PyObject *o = PyLong_FromLongLong(value);
1029 if (o == NULL)
1030 return -1;
1031 if (PyModule_AddObject(m, name, o) == 0)
1032 return 0;
1033 Py_DECREF(o);
1034 return -1;
1035}
1036
1037#define ADD_INT_PREFIX_MACRO(m, macro) \
1038 module_add_int_constant(m, #macro, LZMA_ ## macro)
1039
1040PyMODINIT_FUNC
1041PyInit__lzma(void)
1042{
1043 PyObject *m;
1044
1045 empty_tuple = PyTuple_New(0);
1046 if (empty_tuple == NULL)
1047 return NULL;
1048
1049 m = PyModule_Create(&_lzmamodule);
1050 if (m == NULL)
1051 return NULL;
1052
1053 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
1054 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1055 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1056 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1057 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1058 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1059 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1060 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1061 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1062 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1063 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1064 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1065 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1066 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1067 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1068 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1069 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1070 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1071 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1072 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1073 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1074 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1075 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1076 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1077 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1078 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1079 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1080 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1081 return NULL;
1082
1083 Error = PyErr_NewExceptionWithDoc(
1084 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1085 if (Error == NULL)
1086 return NULL;
1087 Py_INCREF(Error);
1088 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1089 return NULL;
1090
1091 if (PyType_Ready(&Compressor_type) == -1)
1092 return NULL;
1093 Py_INCREF(&Compressor_type);
1094 if (PyModule_AddObject(m, "LZMACompressor",
1095 (PyObject *)&Compressor_type) == -1)
1096 return NULL;
1097
1098 if (PyType_Ready(&Decompressor_type) == -1)
1099 return NULL;
1100 Py_INCREF(&Decompressor_type);
1101 if (PyModule_AddObject(m, "LZMADecompressor",
1102 (PyObject *)&Decompressor_type) == -1)
1103 return NULL;
1104
1105 return m;
1106}