blob: a69377ce25a69b1c3bd8fa486c0fb3ede38e7450 [file] [log] [blame]
Nadeem Vawda59bb0e02011-12-01 01:18:27 +02001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02007
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
11#include "structmember.h"
12#ifdef WITH_THREAD
13#include "pythread.h"
14#endif
15
16#include <stdarg.h>
17#include <string.h>
18
19#include <lzma.h>
20
21
22#ifndef PY_LONG_LONG
23#error "This module requires PY_LONG_LONG to be defined"
24#endif
25
26
27#ifdef WITH_THREAD
28#define ACQUIRE_LOCK(obj) do { \
29 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
30 Py_BEGIN_ALLOW_THREADS \
31 PyThread_acquire_lock((obj)->lock, 1); \
32 Py_END_ALLOW_THREADS \
33 } } while (0)
34#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
35#else
36#define ACQUIRE_LOCK(obj)
37#define RELEASE_LOCK(obj)
38#endif
39
40
41/* Container formats: */
42enum {
43 FORMAT_AUTO,
44 FORMAT_XZ,
45 FORMAT_ALONE,
46 FORMAT_RAW,
47};
48
49#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
50
51
52typedef struct {
53 PyObject_HEAD
54 lzma_stream lzs;
55 int flushed;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} Compressor;
60
61typedef struct {
62 PyObject_HEAD
63 lzma_stream lzs;
64 int check;
65 char eof;
66 PyObject *unused_data;
67#ifdef WITH_THREAD
68 PyThread_type_lock lock;
69#endif
70} Decompressor;
71
72/* LZMAError class object. */
73static PyObject *Error;
74
75/* An empty tuple, used by the filter specifier parsing code. */
76static PyObject *empty_tuple;
77
78
79/* Helper functions. */
80
81static int
82catch_lzma_error(lzma_ret lzret)
83{
84 switch (lzret) {
85 case LZMA_OK:
86 case LZMA_GET_CHECK:
87 case LZMA_NO_CHECK:
88 case LZMA_STREAM_END:
89 return 0;
90 case LZMA_UNSUPPORTED_CHECK:
91 PyErr_SetString(Error, "Unsupported integrity check");
92 return 1;
93 case LZMA_MEM_ERROR:
94 PyErr_NoMemory();
95 return 1;
96 case LZMA_MEMLIMIT_ERROR:
97 PyErr_SetString(Error, "Memory usage limit exceeded");
98 return 1;
99 case LZMA_FORMAT_ERROR:
100 PyErr_SetString(Error, "Input format not supported by decoder");
101 return 1;
102 case LZMA_OPTIONS_ERROR:
103 PyErr_SetString(Error, "Invalid or unsupported options");
104 return 1;
105 case LZMA_DATA_ERROR:
106 PyErr_SetString(Error, "Corrupt input data");
107 return 1;
108 case LZMA_BUF_ERROR:
109 PyErr_SetString(Error, "Insufficient buffer space");
110 return 1;
111 case LZMA_PROG_ERROR:
112 PyErr_SetString(Error, "Internal error");
113 return 1;
114 default:
115 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
116 return 1;
117 }
118}
119
120#if BUFSIZ < 8192
121#define INITIAL_BUFFER_SIZE 8192
122#else
123#define INITIAL_BUFFER_SIZE BUFSIZ
124#endif
125
126static int
127grow_buffer(PyObject **buf)
128{
129 size_t size = PyBytes_GET_SIZE(*buf);
130 return _PyBytes_Resize(buf, size + (size >> 3) + 6);
131}
132
133
134/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
135 since the predefined conversion specifiers do not suit our needs:
136
137 uint32_t - the "I" (unsigned int) specifier is the right size, but
138 silently ignores overflows on conversion.
139
140 lzma_mode and lzma_match_finder - these are enumeration types, and
141 so the size of each is implementation-defined. Worse, different
142 enum types can be of different sizes within the same program, so
143 to be strictly correct, we need to define two separate converters.
144 */
145
146#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
147 static int \
148 FUNCNAME(PyObject *obj, void *ptr) \
149 { \
150 unsigned long val; \
151 \
152 val = PyLong_AsUnsignedLong(obj); \
153 if (PyErr_Occurred()) \
154 return 0; \
155 if ((unsigned long)(TYPE)val != val) { \
156 PyErr_SetString(PyExc_OverflowError, \
157 "Value too large for " #TYPE " type"); \
158 return 0; \
159 } \
160 *(TYPE *)ptr = val; \
161 return 1; \
162 }
163
164INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
165INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
166INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
167
168#undef INT_TYPE_CONVERTER_FUNC
169
170
171/* Filter specifier parsing functions. */
172
173static void *
174parse_filter_spec_lzma(PyObject *spec)
175{
176 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
177 "pb", "mode", "nice_len", "mf", "depth", NULL};
178 PyObject *id;
179 PyObject *preset_obj;
180 uint32_t preset = LZMA_PRESET_DEFAULT;
181 lzma_options_lzma *options;
182
183 /* First, fill in default values for all the options using a preset.
184 Then, override the defaults with any values given by the caller. */
185
186 preset_obj = PyMapping_GetItemString(spec, "preset");
187 if (preset_obj == NULL) {
188 if (PyErr_ExceptionMatches(PyExc_KeyError))
189 PyErr_Clear();
190 else
191 return NULL;
192 } else {
193 int ok = uint32_converter(preset_obj, &preset);
194 Py_DECREF(preset_obj);
195 if (!ok)
196 return NULL;
197 }
198
199 options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
200 if (options == NULL)
201 return PyErr_NoMemory();
202 memset(options, 0, sizeof *options);
203
204 if (lzma_lzma_preset(options, preset)) {
205 PyMem_Free(options);
206 PyErr_Format(Error, "lzma_lzma_preset() failed for preset %#x", preset);
207 return NULL;
208 }
209
210 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
211 "|OOO&O&O&O&O&O&O&O&", optnames,
212 &id, &preset_obj,
213 uint32_converter, &options->dict_size,
214 uint32_converter, &options->lc,
215 uint32_converter, &options->lp,
216 uint32_converter, &options->pb,
217 lzma_mode_converter, &options->mode,
218 uint32_converter, &options->nice_len,
219 lzma_mf_converter, &options->mf,
220 uint32_converter, &options->depth)) {
221 PyErr_SetString(PyExc_ValueError,
222 "Invalid filter specifier for LZMA filter");
223 PyMem_Free(options);
224 options = NULL;
225 }
226 return options;
227}
228
229static void *
230parse_filter_spec_delta(PyObject *spec)
231{
232 static char *optnames[] = {"id", "dist", NULL};
233 PyObject *id;
234 uint32_t dist = 1;
235 lzma_options_delta *options;
236
237 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
238 &id, uint32_converter, &dist)) {
239 PyErr_SetString(PyExc_ValueError,
240 "Invalid filter specifier for delta filter");
241 return NULL;
242 }
243
244 options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
245 if (options == NULL)
246 return PyErr_NoMemory();
247 memset(options, 0, sizeof *options);
248 options->type = LZMA_DELTA_TYPE_BYTE;
249 options->dist = dist;
250 return options;
251}
252
253static void *
254parse_filter_spec_bcj(PyObject *spec)
255{
256 static char *optnames[] = {"id", "start_offset", NULL};
257 PyObject *id;
258 uint32_t start_offset = 0;
259 lzma_options_bcj *options;
260
261 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
262 &id, uint32_converter, &start_offset)) {
263 PyErr_SetString(PyExc_ValueError,
264 "Invalid filter specifier for BCJ filter");
265 return NULL;
266 }
267
268 options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
269 if (options == NULL)
270 return PyErr_NoMemory();
271 memset(options, 0, sizeof *options);
272 options->start_offset = start_offset;
273 return options;
274}
275
276static void *
277parse_filter_spec(lzma_filter *f, PyObject *spec)
278{
279 PyObject *id_obj;
280
281 if (!PyMapping_Check(spec)) {
282 PyErr_SetString(PyExc_TypeError,
283 "Filter specifier must be a dict or dict-like object");
284 return NULL;
285 }
286 id_obj = PyMapping_GetItemString(spec, "id");
287 if (id_obj == NULL) {
288 if (PyErr_ExceptionMatches(PyExc_KeyError))
289 PyErr_SetString(PyExc_ValueError,
290 "Filter specifier must have an \"id\" entry");
291 return NULL;
292 }
293 f->id = PyLong_AsUnsignedLongLong(id_obj);
294 Py_DECREF(id_obj);
295 if (PyErr_Occurred())
296 return NULL;
297
298 switch (f->id) {
299 case LZMA_FILTER_LZMA1:
300 case LZMA_FILTER_LZMA2:
301 f->options = parse_filter_spec_lzma(spec);
302 return f->options;
303 case LZMA_FILTER_DELTA:
304 f->options = parse_filter_spec_delta(spec);
305 return f->options;
306 case LZMA_FILTER_X86:
307 case LZMA_FILTER_POWERPC:
308 case LZMA_FILTER_IA64:
309 case LZMA_FILTER_ARM:
310 case LZMA_FILTER_ARMTHUMB:
311 case LZMA_FILTER_SPARC:
312 f->options = parse_filter_spec_bcj(spec);
313 return f->options;
314 default:
315 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
316 return NULL;
317 }
318}
319
320static void
321free_filter_chain(lzma_filter filters[])
322{
323 int i;
324
325 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
326 PyMem_Free(filters[i].options);
327}
328
329static int
330parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
331{
332 Py_ssize_t i, num_filters;
333
334 num_filters = PySequence_Length(filterspecs);
335 if (num_filters == -1)
336 return -1;
337 if (num_filters > LZMA_FILTERS_MAX) {
338 PyErr_Format(PyExc_ValueError,
339 "Too many filters - liblzma supports a maximum of %d",
340 LZMA_FILTERS_MAX);
341 return -1;
342 }
343
344 for (i = 0; i < num_filters; i++) {
345 int ok = 1;
346 PyObject *spec = PySequence_GetItem(filterspecs, i);
347 if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
348 ok = 0;
349 Py_XDECREF(spec);
350 if (!ok) {
351 filters[i].id = LZMA_VLI_UNKNOWN;
352 free_filter_chain(filters);
353 return -1;
354 }
355 }
356 filters[num_filters].id = LZMA_VLI_UNKNOWN;
357 return 0;
358}
359
360
361/* LZMACompressor class. */
362
363static PyObject *
364compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
365{
366 size_t data_size = 0;
367 PyObject *result;
368
369 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
370 if (result == NULL)
371 return NULL;
372 c->lzs.next_in = data;
373 c->lzs.avail_in = len;
374 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
375 c->lzs.avail_out = PyBytes_GET_SIZE(result);
376 for (;;) {
377 lzma_ret lzret;
378
379 Py_BEGIN_ALLOW_THREADS
380 lzret = lzma_code(&c->lzs, action);
381 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
382 Py_END_ALLOW_THREADS
383 if (catch_lzma_error(lzret))
384 goto error;
385 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
386 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
387 break;
388 } else if (c->lzs.avail_out == 0) {
389 if (grow_buffer(&result) == -1)
390 goto error;
391 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
392 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
393 }
394 }
395 if (data_size != PyBytes_GET_SIZE(result))
396 if (_PyBytes_Resize(&result, data_size) == -1)
397 goto error;
398 return result;
399
400error:
401 Py_XDECREF(result);
402 return NULL;
403}
404
405PyDoc_STRVAR(Compressor_compress_doc,
406"compress(data) -> bytes\n"
407"\n"
408"Provide data to the compressor object. Returns a chunk of\n"
409"compressed data if possible, or b\"\" otherwise.\n"
410"\n"
411"When you have finished providing data to the compressor, call the\n"
412"flush() method to finish the conversion process.\n");
413
414static PyObject *
415Compressor_compress(Compressor *self, PyObject *args)
416{
417 Py_buffer buffer;
418 PyObject *result = NULL;
419
420 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
421 return NULL;
422
423 ACQUIRE_LOCK(self);
424 if (self->flushed)
425 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
426 else
427 result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
428 RELEASE_LOCK(self);
429 PyBuffer_Release(&buffer);
430 return result;
431}
432
433PyDoc_STRVAR(Compressor_flush_doc,
434"flush() -> bytes\n"
435"\n"
436"Finish the compression process. Returns the compressed data left\n"
437"in internal buffers.\n"
438"\n"
439"The compressor object cannot be used after this method is called.\n");
440
441static PyObject *
442Compressor_flush(Compressor *self, PyObject *noargs)
443{
444 PyObject *result = NULL;
445
446 ACQUIRE_LOCK(self);
447 if (self->flushed) {
448 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
449 } else {
450 self->flushed = 1;
451 result = compress(self, NULL, 0, LZMA_FINISH);
452 }
453 RELEASE_LOCK(self);
454 return result;
455}
456
457static int
458Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
459 PyObject *filterspecs)
460{
461 lzma_ret lzret;
462
463 if (filterspecs == Py_None) {
464 lzret = lzma_easy_encoder(lzs, preset, check);
465 } else {
466 lzma_filter filters[LZMA_FILTERS_MAX + 1];
467
468 if (parse_filter_chain_spec(filters, filterspecs) == -1)
469 return -1;
470 lzret = lzma_stream_encoder(lzs, filters, check);
471 free_filter_chain(filters);
472 }
473 if (catch_lzma_error(lzret))
474 return -1;
475 else
476 return 0;
477}
478
479static int
480Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
481{
482 lzma_ret lzret;
483
484 if (filterspecs == Py_None) {
485 lzma_options_lzma options;
486
487 if (lzma_lzma_preset(&options, preset)) {
488 PyErr_Format(Error, "Invalid compression preset: %#x", preset);
489 return -1;
490 }
491 lzret = lzma_alone_encoder(lzs, &options);
492 } else {
493 lzma_filter filters[LZMA_FILTERS_MAX + 1];
494
495 if (parse_filter_chain_spec(filters, filterspecs) == -1)
496 return -1;
497 if (filters[0].id == LZMA_FILTER_LZMA1 &&
498 filters[1].id == LZMA_VLI_UNKNOWN) {
499 lzret = lzma_alone_encoder(lzs, filters[0].options);
500 } else {
501 PyErr_SetString(PyExc_ValueError,
502 "Invalid filter chain for FORMAT_ALONE - "
503 "must be a single LZMA1 filter");
504 lzret = LZMA_PROG_ERROR;
505 }
506 free_filter_chain(filters);
507 }
508 if (PyErr_Occurred() || catch_lzma_error(lzret))
509 return -1;
510 else
511 return 0;
512}
513
514static int
515Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
516{
517 lzma_filter filters[LZMA_FILTERS_MAX + 1];
518 lzma_ret lzret;
519
520 if (filterspecs == Py_None) {
521 PyErr_SetString(PyExc_ValueError,
522 "Must specify filters for FORMAT_RAW");
523 return -1;
524 }
525 if (parse_filter_chain_spec(filters, filterspecs) == -1)
526 return -1;
527 lzret = lzma_raw_encoder(lzs, filters);
528 free_filter_chain(filters);
529 if (catch_lzma_error(lzret))
530 return -1;
531 else
532 return 0;
533}
534
535static int
536Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
537{
538 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
539 int format = FORMAT_XZ;
540 int check = -1;
541 uint32_t preset = LZMA_PRESET_DEFAULT;
542 PyObject *preset_obj = Py_None;
543 PyObject *filterspecs = Py_None;
544
545 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
546 "|iiOO:LZMACompressor", arg_names,
547 &format, &check, &preset_obj,
548 &filterspecs))
549 return -1;
550
551 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
552 PyErr_SetString(PyExc_ValueError,
553 "Integrity checks are only supported by FORMAT_XZ");
554 return -1;
555 }
556
557 if (preset_obj != Py_None && filterspecs != Py_None) {
558 PyErr_SetString(PyExc_ValueError,
559 "Cannot specify both preset and filter chain");
560 return -1;
561 }
562
563 if (preset_obj != Py_None)
564 if (!uint32_converter(preset_obj, &preset))
565 return -1;
566
567#ifdef WITH_THREAD
568 self->lock = PyThread_allocate_lock();
569 if (self->lock == NULL) {
570 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
571 return -1;
572 }
573#endif
574
575 self->flushed = 0;
576 switch (format) {
577 case FORMAT_XZ:
578 if (check == -1)
579 check = LZMA_CHECK_CRC64;
580 if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
581 break;
582 return 0;
583
584 case FORMAT_ALONE:
585 if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
586 break;
587 return 0;
588
589 case FORMAT_RAW:
590 if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
591 break;
592 return 0;
593
594 default:
595 PyErr_Format(PyExc_ValueError,
596 "Invalid container format: %d", format);
597 break;
598 }
599
600#ifdef WITH_THREAD
601 PyThread_free_lock(self->lock);
602 self->lock = NULL;
603#endif
604 return -1;
605}
606
607static void
608Compressor_dealloc(Compressor *self)
609{
610 lzma_end(&self->lzs);
611#ifdef WITH_THREAD
612 if (self->lock != NULL)
613 PyThread_free_lock(self->lock);
614#endif
615 Py_TYPE(self)->tp_free((PyObject *)self);
616}
617
618static PyMethodDef Compressor_methods[] = {
619 {"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
620 Compressor_compress_doc},
621 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
622 Compressor_flush_doc},
623 {NULL}
624};
625
626PyDoc_STRVAR(Compressor_doc,
627"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
628"\n"
629"Create a compressor object for compressing data incrementally.\n"
630"\n"
631"format specifies the container format to use for the output. This can\n"
632"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
633"\n"
634"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
635"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
636"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
637"\n"
638"The settings used by the compressor can be specified either as a\n"
639"preset compression level (with the 'preset' argument), or in detail\n"
640"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
641"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
642"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
643"the raw compressor does not support preset compression levels.\n"
644"\n"
645"preset (if provided) should be an integer in the range 0-9, optionally\n"
646"OR-ed with the constant PRESET_EXTREME.\n"
647"\n"
648"filters (if provided) should be a sequence of dicts. Each dict should\n"
649"have an entry for \"id\" indicating the ID of the filter, plus\n"
650"additional entries for options to the filter.\n"
651"\n"
652"For one-shot compression, use the compress() function instead.\n");
653
654static PyTypeObject Compressor_type = {
655 PyVarObject_HEAD_INIT(NULL, 0)
656 "_lzma.LZMACompressor", /* tp_name */
657 sizeof(Compressor), /* tp_basicsize */
658 0, /* tp_itemsize */
659 (destructor)Compressor_dealloc, /* tp_dealloc */
660 0, /* tp_print */
661 0, /* tp_getattr */
662 0, /* tp_setattr */
663 0, /* tp_reserved */
664 0, /* tp_repr */
665 0, /* tp_as_number */
666 0, /* tp_as_sequence */
667 0, /* tp_as_mapping */
668 0, /* tp_hash */
669 0, /* tp_call */
670 0, /* tp_str */
671 0, /* tp_getattro */
672 0, /* tp_setattro */
673 0, /* tp_as_buffer */
674 Py_TPFLAGS_DEFAULT, /* tp_flags */
675 Compressor_doc, /* tp_doc */
676 0, /* tp_traverse */
677 0, /* tp_clear */
678 0, /* tp_richcompare */
679 0, /* tp_weaklistoffset */
680 0, /* tp_iter */
681 0, /* tp_iternext */
682 Compressor_methods, /* tp_methods */
683 0, /* tp_members */
684 0, /* tp_getset */
685 0, /* tp_base */
686 0, /* tp_dict */
687 0, /* tp_descr_get */
688 0, /* tp_descr_set */
689 0, /* tp_dictoffset */
690 (initproc)Compressor_init, /* tp_init */
691 0, /* tp_alloc */
692 PyType_GenericNew, /* tp_new */
693};
694
695
696/* LZMADecompressor class. */
697
698static PyObject *
699decompress(Decompressor *d, uint8_t *data, size_t len)
700{
701 size_t data_size = 0;
702 PyObject *result;
703
704 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
705 if (result == NULL)
706 return NULL;
707 d->lzs.next_in = data;
708 d->lzs.avail_in = len;
709 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
710 d->lzs.avail_out = PyBytes_GET_SIZE(result);
711 for (;;) {
712 lzma_ret lzret;
713
714 Py_BEGIN_ALLOW_THREADS
715 lzret = lzma_code(&d->lzs, LZMA_RUN);
716 data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
717 Py_END_ALLOW_THREADS
718 if (catch_lzma_error(lzret))
719 goto error;
720 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
721 d->check = lzma_get_check(&d->lzs);
722 if (lzret == LZMA_STREAM_END) {
723 d->eof = 1;
724 if (d->lzs.avail_in > 0) {
725 Py_CLEAR(d->unused_data);
726 d->unused_data = PyBytes_FromStringAndSize(
727 (char *)d->lzs.next_in, d->lzs.avail_in);
728 if (d->unused_data == NULL)
729 goto error;
730 }
731 break;
732 } else if (d->lzs.avail_in == 0) {
733 break;
734 } else if (d->lzs.avail_out == 0) {
735 if (grow_buffer(&result) == -1)
736 goto error;
737 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
738 d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
739 }
740 }
741 if (data_size != PyBytes_GET_SIZE(result))
742 if (_PyBytes_Resize(&result, data_size) == -1)
743 goto error;
744 return result;
745
746error:
747 Py_XDECREF(result);
748 return NULL;
749}
750
751PyDoc_STRVAR(Decompressor_decompress_doc,
752"decompress(data) -> bytes\n"
753"\n"
754"Provide data to the decompressor object. Returns a chunk of\n"
755"decompressed data if possible, or b\"\" otherwise.\n"
756"\n"
757"Attempting to decompress data after the end of the stream is\n"
758"reached raises an EOFError. Any data found after the end of the\n"
759"stream is ignored, and saved in the unused_data attribute.\n");
760
761static PyObject *
762Decompressor_decompress(Decompressor *self, PyObject *args)
763{
764 Py_buffer buffer;
765 PyObject *result = NULL;
766
767 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
768 return NULL;
769
770 ACQUIRE_LOCK(self);
771 if (self->eof)
772 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
773 else
774 result = decompress(self, buffer.buf, buffer.len);
775 RELEASE_LOCK(self);
776 PyBuffer_Release(&buffer);
777 return result;
778}
779
780static int
781Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
782{
783 lzma_filter filters[LZMA_FILTERS_MAX + 1];
784 lzma_ret lzret;
785
786 if (parse_filter_chain_spec(filters, filterspecs) == -1)
787 return -1;
788 lzret = lzma_raw_decoder(lzs, filters);
789 free_filter_chain(filters);
790 if (catch_lzma_error(lzret))
791 return -1;
792 else
793 return 0;
794}
795
796static int
797Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
798{
799 static char *arg_names[] = {"format", "memlimit", "filters", NULL};
800 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
801 int format = FORMAT_AUTO;
802 uint64_t memlimit = UINT64_MAX;
803 PyObject *memlimit_obj = Py_None;
804 PyObject *filterspecs = Py_None;
805 lzma_ret lzret;
806
807 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
808 "|iOO:LZMADecompressor", arg_names,
809 &format, &memlimit_obj, &filterspecs))
810 return -1;
811
812 if (memlimit_obj != Py_None) {
813 if (format == FORMAT_RAW) {
814 PyErr_SetString(PyExc_ValueError,
815 "Cannot specify memory limit with FORMAT_RAW");
816 return -1;
817 }
818 memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
819 if (PyErr_Occurred())
820 return -1;
821 }
822
823 if (format == FORMAT_RAW && filterspecs == Py_None) {
824 PyErr_SetString(PyExc_ValueError,
825 "Must specify filters for FORMAT_RAW");
826 return -1;
827 } else if (format != FORMAT_RAW && filterspecs != Py_None) {
828 PyErr_SetString(PyExc_ValueError,
829 "Cannot specify filters except with FORMAT_RAW");
830 return -1;
831 }
832
833#ifdef WITH_THREAD
834 self->lock = PyThread_allocate_lock();
835 if (self->lock == NULL) {
836 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
837 return -1;
838 }
839#endif
840
841 self->check = LZMA_CHECK_UNKNOWN;
842 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
843 if (self->unused_data == NULL)
844 goto error;
845
846 switch (format) {
847 case FORMAT_AUTO:
848 lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
849 if (catch_lzma_error(lzret))
850 break;
851 return 0;
852
853 case FORMAT_XZ:
854 lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
855 if (catch_lzma_error(lzret))
856 break;
857 return 0;
858
859 case FORMAT_ALONE:
860 self->check = LZMA_CHECK_NONE;
861 lzret = lzma_alone_decoder(&self->lzs, memlimit);
862 if (catch_lzma_error(lzret))
863 break;
864 return 0;
865
866 case FORMAT_RAW:
867 self->check = LZMA_CHECK_NONE;
868 if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
869 break;
870 return 0;
871
872 default:
873 PyErr_Format(PyExc_ValueError,
874 "Invalid container format: %d", format);
875 break;
876 }
877
878error:
879 Py_CLEAR(self->unused_data);
880#ifdef WITH_THREAD
881 PyThread_free_lock(self->lock);
882 self->lock = NULL;
883#endif
884 return -1;
885}
886
887static void
888Decompressor_dealloc(Decompressor *self)
889{
890 lzma_end(&self->lzs);
891 Py_CLEAR(self->unused_data);
892#ifdef WITH_THREAD
893 if (self->lock != NULL)
894 PyThread_free_lock(self->lock);
895#endif
896 Py_TYPE(self)->tp_free((PyObject *)self);
897}
898
899static PyMethodDef Decompressor_methods[] = {
900 {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
901 Decompressor_decompress_doc},
902 {NULL}
903};
904
905PyDoc_STRVAR(Decompressor_check_doc,
906"ID of the integrity check used by the input stream.");
907
908PyDoc_STRVAR(Decompressor_eof_doc,
909"True if the end-of-stream marker has been reached.");
910
911PyDoc_STRVAR(Decompressor_unused_data_doc,
912"Data found after the end of the compressed stream.");
913
914static PyMemberDef Decompressor_members[] = {
915 {"check", T_INT, offsetof(Decompressor, check), READONLY,
916 Decompressor_check_doc},
917 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
918 Decompressor_eof_doc},
919 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
920 Decompressor_unused_data_doc},
921 {NULL}
922};
923
924PyDoc_STRVAR(Decompressor_doc,
925"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
926"\n"
927"Create a decompressor object for decompressing data incrementally.\n"
928"\n"
929"format specifies the container format of the input stream. If this is\n"
930"FORMAT_AUTO (the default), the decompressor will automatically detect\n"
931"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
932"FORMAT_RAW cannot be autodetected.\n"
933"\n"
934"memlimit can be specified to limit the amount of memory used by the\n"
935"decompressor. This will cause decompression to fail if the input\n"
936"cannot be decompressed within the given limit.\n"
937"\n"
938"filters specifies a custom filter chain. This argument is required for\n"
939"FORMAT_RAW, and not accepted with any other format. When provided,\n"
940"this should be a sequence of dicts, each indicating the ID and options\n"
941"for a single filter.\n"
942"\n"
943"For one-shot decompression, use the decompress() function instead.\n");
944
945static PyTypeObject Decompressor_type = {
946 PyVarObject_HEAD_INIT(NULL, 0)
947 "_lzma.LZMADecompressor", /* tp_name */
948 sizeof(Decompressor), /* tp_basicsize */
949 0, /* tp_itemsize */
950 (destructor)Decompressor_dealloc, /* tp_dealloc */
951 0, /* tp_print */
952 0, /* tp_getattr */
953 0, /* tp_setattr */
954 0, /* tp_reserved */
955 0, /* tp_repr */
956 0, /* tp_as_number */
957 0, /* tp_as_sequence */
958 0, /* tp_as_mapping */
959 0, /* tp_hash */
960 0, /* tp_call */
961 0, /* tp_str */
962 0, /* tp_getattro */
963 0, /* tp_setattro */
964 0, /* tp_as_buffer */
965 Py_TPFLAGS_DEFAULT, /* tp_flags */
966 Decompressor_doc, /* tp_doc */
967 0, /* tp_traverse */
968 0, /* tp_clear */
969 0, /* tp_richcompare */
970 0, /* tp_weaklistoffset */
971 0, /* tp_iter */
972 0, /* tp_iternext */
973 Decompressor_methods, /* tp_methods */
974 Decompressor_members, /* tp_members */
975 0, /* tp_getset */
976 0, /* tp_base */
977 0, /* tp_dict */
978 0, /* tp_descr_get */
979 0, /* tp_descr_set */
980 0, /* tp_dictoffset */
981 (initproc)Decompressor_init, /* tp_init */
982 0, /* tp_alloc */
983 PyType_GenericNew, /* tp_new */
984};
985
986
987/* Module-level functions. */
988
989PyDoc_STRVAR(check_is_supported_doc,
990"check_is_supported(check_id) -> bool\n"
991"\n"
992"Test whether the given integrity check is supported.\n"
993"\n"
994"Always returns True for CHECK_NONE and CHECK_CRC32.\n");
995
996static PyObject *
997check_is_supported(PyObject *self, PyObject *args)
998{
999 int check_id;
1000
1001 if (!PyArg_ParseTuple(args, "i:check_is_supported", &check_id))
1002 return NULL;
1003
1004 return PyBool_FromLong(lzma_check_is_supported(check_id));
1005}
1006
1007
1008/* Module initialization. */
1009
1010static PyMethodDef module_methods[] = {
1011 {"check_is_supported", (PyCFunction)check_is_supported,
1012 METH_VARARGS, check_is_supported_doc},
1013 {NULL}
1014};
1015
1016static PyModuleDef _lzmamodule = {
1017 PyModuleDef_HEAD_INIT,
1018 "_lzma",
1019 NULL,
1020 -1,
1021 module_methods,
1022 NULL,
1023 NULL,
1024 NULL,
1025 NULL,
1026};
1027
1028/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1029 would not work correctly on platforms with 32-bit longs. */
1030static int
1031module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
1032{
1033 PyObject *o = PyLong_FromLongLong(value);
1034 if (o == NULL)
1035 return -1;
1036 if (PyModule_AddObject(m, name, o) == 0)
1037 return 0;
1038 Py_DECREF(o);
1039 return -1;
1040}
1041
1042#define ADD_INT_PREFIX_MACRO(m, macro) \
1043 module_add_int_constant(m, #macro, LZMA_ ## macro)
1044
1045PyMODINIT_FUNC
1046PyInit__lzma(void)
1047{
1048 PyObject *m;
1049
1050 empty_tuple = PyTuple_New(0);
1051 if (empty_tuple == NULL)
1052 return NULL;
1053
1054 m = PyModule_Create(&_lzmamodule);
1055 if (m == NULL)
1056 return NULL;
1057
1058 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
1059 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1060 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1061 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1062 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1063 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1064 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1065 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1066 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1067 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1068 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1069 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1070 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1071 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1072 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1073 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1074 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1075 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1076 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1077 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1078 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1079 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1080 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1081 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1082 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1083 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1084 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1085 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1086 return NULL;
1087
1088 Error = PyErr_NewExceptionWithDoc(
1089 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1090 if (Error == NULL)
1091 return NULL;
1092 Py_INCREF(Error);
1093 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1094 return NULL;
1095
1096 if (PyType_Ready(&Compressor_type) == -1)
1097 return NULL;
1098 Py_INCREF(&Compressor_type);
1099 if (PyModule_AddObject(m, "LZMACompressor",
1100 (PyObject *)&Compressor_type) == -1)
1101 return NULL;
1102
1103 if (PyType_Ready(&Decompressor_type) == -1)
1104 return NULL;
1105 Py_INCREF(&Decompressor_type);
1106 if (PyModule_AddObject(m, "LZMADecompressor",
1107 (PyObject *)&Decompressor_type) == -1)
1108 return NULL;
1109
1110 return m;
1111}