blob: 8ea3d01b6998666e77982bb8f0e33602db3754d1 [file] [log] [blame]
Nadeem Vawda59bb0e02011-12-01 01:18:27 +02001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02007
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
11#include "structmember.h"
12#ifdef WITH_THREAD
13#include "pythread.h"
14#endif
15
16#include <stdarg.h>
17#include <string.h>
18
19#include <lzma.h>
20
21
22#ifndef PY_LONG_LONG
23#error "This module requires PY_LONG_LONG to be defined"
24#endif
25
26
27#ifdef WITH_THREAD
28#define ACQUIRE_LOCK(obj) do { \
29 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
30 Py_BEGIN_ALLOW_THREADS \
31 PyThread_acquire_lock((obj)->lock, 1); \
32 Py_END_ALLOW_THREADS \
33 } } while (0)
34#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
35#else
36#define ACQUIRE_LOCK(obj)
37#define RELEASE_LOCK(obj)
38#endif
39
40
41/* Container formats: */
42enum {
43 FORMAT_AUTO,
44 FORMAT_XZ,
45 FORMAT_ALONE,
46 FORMAT_RAW,
47};
48
49#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
50
51
52typedef struct {
53 PyObject_HEAD
54 lzma_stream lzs;
55 int flushed;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} Compressor;
60
61typedef struct {
62 PyObject_HEAD
63 lzma_stream lzs;
64 int check;
65 char eof;
66 PyObject *unused_data;
67#ifdef WITH_THREAD
68 PyThread_type_lock lock;
69#endif
70} Decompressor;
71
72/* LZMAError class object. */
73static PyObject *Error;
74
75/* An empty tuple, used by the filter specifier parsing code. */
76static PyObject *empty_tuple;
77
78
79/* Helper functions. */
80
81static int
82catch_lzma_error(lzma_ret lzret)
83{
84 switch (lzret) {
85 case LZMA_OK:
86 case LZMA_GET_CHECK:
87 case LZMA_NO_CHECK:
88 case LZMA_STREAM_END:
89 return 0;
90 case LZMA_UNSUPPORTED_CHECK:
91 PyErr_SetString(Error, "Unsupported integrity check");
92 return 1;
93 case LZMA_MEM_ERROR:
94 PyErr_NoMemory();
95 return 1;
96 case LZMA_MEMLIMIT_ERROR:
97 PyErr_SetString(Error, "Memory usage limit exceeded");
98 return 1;
99 case LZMA_FORMAT_ERROR:
100 PyErr_SetString(Error, "Input format not supported by decoder");
101 return 1;
102 case LZMA_OPTIONS_ERROR:
103 PyErr_SetString(Error, "Invalid or unsupported options");
104 return 1;
105 case LZMA_DATA_ERROR:
106 PyErr_SetString(Error, "Corrupt input data");
107 return 1;
108 case LZMA_BUF_ERROR:
109 PyErr_SetString(Error, "Insufficient buffer space");
110 return 1;
111 case LZMA_PROG_ERROR:
112 PyErr_SetString(Error, "Internal error");
113 return 1;
114 default:
115 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
116 return 1;
117 }
118}
119
120#if BUFSIZ < 8192
121#define INITIAL_BUFFER_SIZE 8192
122#else
123#define INITIAL_BUFFER_SIZE BUFSIZ
124#endif
125
126static int
127grow_buffer(PyObject **buf)
128{
129 size_t size = PyBytes_GET_SIZE(*buf);
130 return _PyBytes_Resize(buf, size + (size >> 3) + 6);
131}
132
133
134/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
135 since the predefined conversion specifiers do not suit our needs:
136
137 uint32_t - the "I" (unsigned int) specifier is the right size, but
138 silently ignores overflows on conversion.
139
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200140 lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right
141 size, but like "I" it silently ignores overflows on conversion.
142
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200143 lzma_mode and lzma_match_finder - these are enumeration types, and
144 so the size of each is implementation-defined. Worse, different
145 enum types can be of different sizes within the same program, so
146 to be strictly correct, we need to define two separate converters.
147 */
148
149#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
150 static int \
151 FUNCNAME(PyObject *obj, void *ptr) \
152 { \
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200153 unsigned PY_LONG_LONG val; \
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200154 \
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200155 val = PyLong_AsUnsignedLongLong(obj); \
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200156 if (PyErr_Occurred()) \
157 return 0; \
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200158 if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200159 PyErr_SetString(PyExc_OverflowError, \
160 "Value too large for " #TYPE " type"); \
161 return 0; \
162 } \
163 *(TYPE *)ptr = val; \
164 return 1; \
165 }
166
167INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200168INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200169INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
170INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
171
172#undef INT_TYPE_CONVERTER_FUNC
173
174
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200175/* Filter specifier parsing.
176
177 This code handles converting filter specifiers (Python dicts) into
178 the C lzma_filter structs expected by liblzma. */
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200179
180static void *
181parse_filter_spec_lzma(PyObject *spec)
182{
183 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
184 "pb", "mode", "nice_len", "mf", "depth", NULL};
185 PyObject *id;
186 PyObject *preset_obj;
187 uint32_t preset = LZMA_PRESET_DEFAULT;
188 lzma_options_lzma *options;
189
190 /* First, fill in default values for all the options using a preset.
191 Then, override the defaults with any values given by the caller. */
192
193 preset_obj = PyMapping_GetItemString(spec, "preset");
194 if (preset_obj == NULL) {
195 if (PyErr_ExceptionMatches(PyExc_KeyError))
196 PyErr_Clear();
197 else
198 return NULL;
199 } else {
200 int ok = uint32_converter(preset_obj, &preset);
201 Py_DECREF(preset_obj);
202 if (!ok)
203 return NULL;
204 }
205
206 options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
207 if (options == NULL)
208 return PyErr_NoMemory();
209 memset(options, 0, sizeof *options);
210
211 if (lzma_lzma_preset(options, preset)) {
212 PyMem_Free(options);
Nadeem Vawda54c74ec2012-05-06 13:35:47 +0200213 PyErr_Format(Error, "Invalid compression preset: %d", preset);
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200214 return NULL;
215 }
216
217 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
218 "|OOO&O&O&O&O&O&O&O&", optnames,
219 &id, &preset_obj,
220 uint32_converter, &options->dict_size,
221 uint32_converter, &options->lc,
222 uint32_converter, &options->lp,
223 uint32_converter, &options->pb,
224 lzma_mode_converter, &options->mode,
225 uint32_converter, &options->nice_len,
226 lzma_mf_converter, &options->mf,
227 uint32_converter, &options->depth)) {
228 PyErr_SetString(PyExc_ValueError,
229 "Invalid filter specifier for LZMA filter");
230 PyMem_Free(options);
231 options = NULL;
232 }
233 return options;
234}
235
236static void *
237parse_filter_spec_delta(PyObject *spec)
238{
239 static char *optnames[] = {"id", "dist", NULL};
240 PyObject *id;
241 uint32_t dist = 1;
242 lzma_options_delta *options;
243
244 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
245 &id, uint32_converter, &dist)) {
246 PyErr_SetString(PyExc_ValueError,
247 "Invalid filter specifier for delta filter");
248 return NULL;
249 }
250
251 options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
252 if (options == NULL)
253 return PyErr_NoMemory();
254 memset(options, 0, sizeof *options);
255 options->type = LZMA_DELTA_TYPE_BYTE;
256 options->dist = dist;
257 return options;
258}
259
260static void *
261parse_filter_spec_bcj(PyObject *spec)
262{
263 static char *optnames[] = {"id", "start_offset", NULL};
264 PyObject *id;
265 uint32_t start_offset = 0;
266 lzma_options_bcj *options;
267
268 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
269 &id, uint32_converter, &start_offset)) {
270 PyErr_SetString(PyExc_ValueError,
271 "Invalid filter specifier for BCJ filter");
272 return NULL;
273 }
274
275 options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
276 if (options == NULL)
277 return PyErr_NoMemory();
278 memset(options, 0, sizeof *options);
279 options->start_offset = start_offset;
280 return options;
281}
282
283static void *
284parse_filter_spec(lzma_filter *f, PyObject *spec)
285{
286 PyObject *id_obj;
287
288 if (!PyMapping_Check(spec)) {
289 PyErr_SetString(PyExc_TypeError,
290 "Filter specifier must be a dict or dict-like object");
291 return NULL;
292 }
293 id_obj = PyMapping_GetItemString(spec, "id");
294 if (id_obj == NULL) {
295 if (PyErr_ExceptionMatches(PyExc_KeyError))
296 PyErr_SetString(PyExc_ValueError,
297 "Filter specifier must have an \"id\" entry");
298 return NULL;
299 }
300 f->id = PyLong_AsUnsignedLongLong(id_obj);
301 Py_DECREF(id_obj);
302 if (PyErr_Occurred())
303 return NULL;
304
305 switch (f->id) {
306 case LZMA_FILTER_LZMA1:
307 case LZMA_FILTER_LZMA2:
308 f->options = parse_filter_spec_lzma(spec);
309 return f->options;
310 case LZMA_FILTER_DELTA:
311 f->options = parse_filter_spec_delta(spec);
312 return f->options;
313 case LZMA_FILTER_X86:
314 case LZMA_FILTER_POWERPC:
315 case LZMA_FILTER_IA64:
316 case LZMA_FILTER_ARM:
317 case LZMA_FILTER_ARMTHUMB:
318 case LZMA_FILTER_SPARC:
319 f->options = parse_filter_spec_bcj(spec);
320 return f->options;
321 default:
322 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
323 return NULL;
324 }
325}
326
327static void
328free_filter_chain(lzma_filter filters[])
329{
330 int i;
331
332 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
333 PyMem_Free(filters[i].options);
334}
335
336static int
337parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
338{
339 Py_ssize_t i, num_filters;
340
341 num_filters = PySequence_Length(filterspecs);
342 if (num_filters == -1)
343 return -1;
344 if (num_filters > LZMA_FILTERS_MAX) {
345 PyErr_Format(PyExc_ValueError,
346 "Too many filters - liblzma supports a maximum of %d",
347 LZMA_FILTERS_MAX);
348 return -1;
349 }
350
351 for (i = 0; i < num_filters; i++) {
352 int ok = 1;
353 PyObject *spec = PySequence_GetItem(filterspecs, i);
354 if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
355 ok = 0;
356 Py_XDECREF(spec);
357 if (!ok) {
358 filters[i].id = LZMA_VLI_UNKNOWN;
359 free_filter_chain(filters);
360 return -1;
361 }
362 }
363 filters[num_filters].id = LZMA_VLI_UNKNOWN;
364 return 0;
365}
366
367
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200368/* Filter specifier construction.
369
370 This code handles converting C lzma_filter structs into
371 Python-level filter specifiers (represented as dicts). */
372
373static int
374spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value)
375{
376 int status;
377 PyObject *value_object;
378
379 value_object = PyLong_FromUnsignedLongLong(value);
380 if (value_object == NULL)
381 return -1;
382
383 status = _PyDict_SetItemId(spec, key, value_object);
384 Py_DECREF(value_object);
385 return status;
386}
387
388static PyObject *
389build_filter_spec(const lzma_filter *f)
390{
391 PyObject *spec;
392
393 spec = PyDict_New();
394 if (spec == NULL)
395 return NULL;
396
397#define ADD_FIELD(SOURCE, FIELD) \
398 do { \
399 _Py_IDENTIFIER(FIELD); \
400 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
401 goto error;\
402 } while (0)
403
404 ADD_FIELD(f, id);
405
406 switch (f->id) {
Nadeem Vawda486a0452012-05-07 00:40:57 +0200407 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
408 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
409 dict_size field is used. */
410 case LZMA_FILTER_LZMA1: {
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200411 lzma_options_lzma *options = f->options;
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200412 ADD_FIELD(options, lc);
413 ADD_FIELD(options, lp);
414 ADD_FIELD(options, pb);
Nadeem Vawda486a0452012-05-07 00:40:57 +0200415 case LZMA_FILTER_LZMA2:
416 ADD_FIELD(options, dict_size);
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200417 break;
418 }
419 case LZMA_FILTER_DELTA: {
420 lzma_options_delta *options = f->options;
421 ADD_FIELD(options, dist);
422 break;
423 }
424 case LZMA_FILTER_X86:
425 case LZMA_FILTER_POWERPC:
426 case LZMA_FILTER_IA64:
427 case LZMA_FILTER_ARM:
428 case LZMA_FILTER_ARMTHUMB:
429 case LZMA_FILTER_SPARC: {
430 lzma_options_bcj *options = f->options;
431 ADD_FIELD(options, start_offset);
432 break;
433 }
434 default:
435 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
436 goto error;
437 }
438
439#undef ADD_FIELD
440
441 return spec;
442
443error:
444 Py_DECREF(spec);
445 return NULL;
446}
447
448
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200449/* LZMACompressor class. */
450
451static PyObject *
452compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
453{
454 size_t data_size = 0;
455 PyObject *result;
456
457 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
458 if (result == NULL)
459 return NULL;
460 c->lzs.next_in = data;
461 c->lzs.avail_in = len;
462 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
463 c->lzs.avail_out = PyBytes_GET_SIZE(result);
464 for (;;) {
465 lzma_ret lzret;
466
467 Py_BEGIN_ALLOW_THREADS
468 lzret = lzma_code(&c->lzs, action);
469 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
470 Py_END_ALLOW_THREADS
471 if (catch_lzma_error(lzret))
472 goto error;
473 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
474 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
475 break;
476 } else if (c->lzs.avail_out == 0) {
477 if (grow_buffer(&result) == -1)
478 goto error;
479 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
480 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
481 }
482 }
483 if (data_size != PyBytes_GET_SIZE(result))
484 if (_PyBytes_Resize(&result, data_size) == -1)
485 goto error;
486 return result;
487
488error:
489 Py_XDECREF(result);
490 return NULL;
491}
492
493PyDoc_STRVAR(Compressor_compress_doc,
494"compress(data) -> bytes\n"
495"\n"
496"Provide data to the compressor object. Returns a chunk of\n"
497"compressed data if possible, or b\"\" otherwise.\n"
498"\n"
499"When you have finished providing data to the compressor, call the\n"
500"flush() method to finish the conversion process.\n");
501
502static PyObject *
503Compressor_compress(Compressor *self, PyObject *args)
504{
505 Py_buffer buffer;
506 PyObject *result = NULL;
507
508 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
509 return NULL;
510
511 ACQUIRE_LOCK(self);
512 if (self->flushed)
513 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
514 else
515 result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
516 RELEASE_LOCK(self);
517 PyBuffer_Release(&buffer);
518 return result;
519}
520
521PyDoc_STRVAR(Compressor_flush_doc,
522"flush() -> bytes\n"
523"\n"
524"Finish the compression process. Returns the compressed data left\n"
525"in internal buffers.\n"
526"\n"
527"The compressor object cannot be used after this method is called.\n");
528
529static PyObject *
530Compressor_flush(Compressor *self, PyObject *noargs)
531{
532 PyObject *result = NULL;
533
534 ACQUIRE_LOCK(self);
535 if (self->flushed) {
536 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
537 } else {
538 self->flushed = 1;
539 result = compress(self, NULL, 0, LZMA_FINISH);
540 }
541 RELEASE_LOCK(self);
542 return result;
543}
544
545static int
546Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
547 PyObject *filterspecs)
548{
549 lzma_ret lzret;
550
551 if (filterspecs == Py_None) {
552 lzret = lzma_easy_encoder(lzs, preset, check);
553 } else {
554 lzma_filter filters[LZMA_FILTERS_MAX + 1];
555
556 if (parse_filter_chain_spec(filters, filterspecs) == -1)
557 return -1;
558 lzret = lzma_stream_encoder(lzs, filters, check);
559 free_filter_chain(filters);
560 }
561 if (catch_lzma_error(lzret))
562 return -1;
563 else
564 return 0;
565}
566
567static int
568Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
569{
570 lzma_ret lzret;
571
572 if (filterspecs == Py_None) {
573 lzma_options_lzma options;
574
575 if (lzma_lzma_preset(&options, preset)) {
Nadeem Vawda54c74ec2012-05-06 13:35:47 +0200576 PyErr_Format(Error, "Invalid compression preset: %d", preset);
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200577 return -1;
578 }
579 lzret = lzma_alone_encoder(lzs, &options);
580 } else {
581 lzma_filter filters[LZMA_FILTERS_MAX + 1];
582
583 if (parse_filter_chain_spec(filters, filterspecs) == -1)
584 return -1;
585 if (filters[0].id == LZMA_FILTER_LZMA1 &&
586 filters[1].id == LZMA_VLI_UNKNOWN) {
587 lzret = lzma_alone_encoder(lzs, filters[0].options);
588 } else {
589 PyErr_SetString(PyExc_ValueError,
590 "Invalid filter chain for FORMAT_ALONE - "
591 "must be a single LZMA1 filter");
592 lzret = LZMA_PROG_ERROR;
593 }
594 free_filter_chain(filters);
595 }
596 if (PyErr_Occurred() || catch_lzma_error(lzret))
597 return -1;
598 else
599 return 0;
600}
601
602static int
603Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
604{
605 lzma_filter filters[LZMA_FILTERS_MAX + 1];
606 lzma_ret lzret;
607
608 if (filterspecs == Py_None) {
609 PyErr_SetString(PyExc_ValueError,
610 "Must specify filters for FORMAT_RAW");
611 return -1;
612 }
613 if (parse_filter_chain_spec(filters, filterspecs) == -1)
614 return -1;
615 lzret = lzma_raw_encoder(lzs, filters);
616 free_filter_chain(filters);
617 if (catch_lzma_error(lzret))
618 return -1;
619 else
620 return 0;
621}
622
623static int
624Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
625{
626 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
627 int format = FORMAT_XZ;
628 int check = -1;
629 uint32_t preset = LZMA_PRESET_DEFAULT;
630 PyObject *preset_obj = Py_None;
631 PyObject *filterspecs = Py_None;
632
633 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
634 "|iiOO:LZMACompressor", arg_names,
635 &format, &check, &preset_obj,
636 &filterspecs))
637 return -1;
638
639 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
640 PyErr_SetString(PyExc_ValueError,
641 "Integrity checks are only supported by FORMAT_XZ");
642 return -1;
643 }
644
645 if (preset_obj != Py_None && filterspecs != Py_None) {
646 PyErr_SetString(PyExc_ValueError,
647 "Cannot specify both preset and filter chain");
648 return -1;
649 }
650
651 if (preset_obj != Py_None)
652 if (!uint32_converter(preset_obj, &preset))
653 return -1;
654
655#ifdef WITH_THREAD
656 self->lock = PyThread_allocate_lock();
657 if (self->lock == NULL) {
658 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
659 return -1;
660 }
661#endif
662
663 self->flushed = 0;
664 switch (format) {
665 case FORMAT_XZ:
666 if (check == -1)
667 check = LZMA_CHECK_CRC64;
668 if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
669 break;
670 return 0;
671
672 case FORMAT_ALONE:
673 if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
674 break;
675 return 0;
676
677 case FORMAT_RAW:
678 if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
679 break;
680 return 0;
681
682 default:
683 PyErr_Format(PyExc_ValueError,
684 "Invalid container format: %d", format);
685 break;
686 }
687
688#ifdef WITH_THREAD
689 PyThread_free_lock(self->lock);
690 self->lock = NULL;
691#endif
692 return -1;
693}
694
695static void
696Compressor_dealloc(Compressor *self)
697{
698 lzma_end(&self->lzs);
699#ifdef WITH_THREAD
700 if (self->lock != NULL)
701 PyThread_free_lock(self->lock);
702#endif
703 Py_TYPE(self)->tp_free((PyObject *)self);
704}
705
706static PyMethodDef Compressor_methods[] = {
707 {"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
708 Compressor_compress_doc},
709 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
710 Compressor_flush_doc},
711 {NULL}
712};
713
714PyDoc_STRVAR(Compressor_doc,
715"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
716"\n"
717"Create a compressor object for compressing data incrementally.\n"
718"\n"
719"format specifies the container format to use for the output. This can\n"
720"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
721"\n"
722"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
723"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
724"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
725"\n"
726"The settings used by the compressor can be specified either as a\n"
727"preset compression level (with the 'preset' argument), or in detail\n"
728"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
729"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
730"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
731"the raw compressor does not support preset compression levels.\n"
732"\n"
733"preset (if provided) should be an integer in the range 0-9, optionally\n"
734"OR-ed with the constant PRESET_EXTREME.\n"
735"\n"
736"filters (if provided) should be a sequence of dicts. Each dict should\n"
737"have an entry for \"id\" indicating the ID of the filter, plus\n"
738"additional entries for options to the filter.\n"
739"\n"
740"For one-shot compression, use the compress() function instead.\n");
741
742static PyTypeObject Compressor_type = {
743 PyVarObject_HEAD_INIT(NULL, 0)
744 "_lzma.LZMACompressor", /* tp_name */
745 sizeof(Compressor), /* tp_basicsize */
746 0, /* tp_itemsize */
747 (destructor)Compressor_dealloc, /* tp_dealloc */
748 0, /* tp_print */
749 0, /* tp_getattr */
750 0, /* tp_setattr */
751 0, /* tp_reserved */
752 0, /* tp_repr */
753 0, /* tp_as_number */
754 0, /* tp_as_sequence */
755 0, /* tp_as_mapping */
756 0, /* tp_hash */
757 0, /* tp_call */
758 0, /* tp_str */
759 0, /* tp_getattro */
760 0, /* tp_setattro */
761 0, /* tp_as_buffer */
762 Py_TPFLAGS_DEFAULT, /* tp_flags */
763 Compressor_doc, /* tp_doc */
764 0, /* tp_traverse */
765 0, /* tp_clear */
766 0, /* tp_richcompare */
767 0, /* tp_weaklistoffset */
768 0, /* tp_iter */
769 0, /* tp_iternext */
770 Compressor_methods, /* tp_methods */
771 0, /* tp_members */
772 0, /* tp_getset */
773 0, /* tp_base */
774 0, /* tp_dict */
775 0, /* tp_descr_get */
776 0, /* tp_descr_set */
777 0, /* tp_dictoffset */
778 (initproc)Compressor_init, /* tp_init */
779 0, /* tp_alloc */
780 PyType_GenericNew, /* tp_new */
781};
782
783
784/* LZMADecompressor class. */
785
786static PyObject *
787decompress(Decompressor *d, uint8_t *data, size_t len)
788{
789 size_t data_size = 0;
790 PyObject *result;
791
792 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
793 if (result == NULL)
794 return NULL;
795 d->lzs.next_in = data;
796 d->lzs.avail_in = len;
797 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
798 d->lzs.avail_out = PyBytes_GET_SIZE(result);
799 for (;;) {
800 lzma_ret lzret;
801
802 Py_BEGIN_ALLOW_THREADS
803 lzret = lzma_code(&d->lzs, LZMA_RUN);
804 data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
805 Py_END_ALLOW_THREADS
806 if (catch_lzma_error(lzret))
807 goto error;
808 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
809 d->check = lzma_get_check(&d->lzs);
810 if (lzret == LZMA_STREAM_END) {
811 d->eof = 1;
812 if (d->lzs.avail_in > 0) {
813 Py_CLEAR(d->unused_data);
814 d->unused_data = PyBytes_FromStringAndSize(
815 (char *)d->lzs.next_in, d->lzs.avail_in);
816 if (d->unused_data == NULL)
817 goto error;
818 }
819 break;
820 } else if (d->lzs.avail_in == 0) {
821 break;
822 } else if (d->lzs.avail_out == 0) {
823 if (grow_buffer(&result) == -1)
824 goto error;
825 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
826 d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
827 }
828 }
829 if (data_size != PyBytes_GET_SIZE(result))
830 if (_PyBytes_Resize(&result, data_size) == -1)
831 goto error;
832 return result;
833
834error:
835 Py_XDECREF(result);
836 return NULL;
837}
838
839PyDoc_STRVAR(Decompressor_decompress_doc,
840"decompress(data) -> bytes\n"
841"\n"
842"Provide data to the decompressor object. Returns a chunk of\n"
843"decompressed data if possible, or b\"\" otherwise.\n"
844"\n"
845"Attempting to decompress data after the end of the stream is\n"
846"reached raises an EOFError. Any data found after the end of the\n"
847"stream is ignored, and saved in the unused_data attribute.\n");
848
849static PyObject *
850Decompressor_decompress(Decompressor *self, PyObject *args)
851{
852 Py_buffer buffer;
853 PyObject *result = NULL;
854
855 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
856 return NULL;
857
858 ACQUIRE_LOCK(self);
859 if (self->eof)
860 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
861 else
862 result = decompress(self, buffer.buf, buffer.len);
863 RELEASE_LOCK(self);
864 PyBuffer_Release(&buffer);
865 return result;
866}
867
868static int
869Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
870{
871 lzma_filter filters[LZMA_FILTERS_MAX + 1];
872 lzma_ret lzret;
873
874 if (parse_filter_chain_spec(filters, filterspecs) == -1)
875 return -1;
876 lzret = lzma_raw_decoder(lzs, filters);
877 free_filter_chain(filters);
878 if (catch_lzma_error(lzret))
879 return -1;
880 else
881 return 0;
882}
883
884static int
885Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
886{
887 static char *arg_names[] = {"format", "memlimit", "filters", NULL};
888 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
889 int format = FORMAT_AUTO;
890 uint64_t memlimit = UINT64_MAX;
891 PyObject *memlimit_obj = Py_None;
892 PyObject *filterspecs = Py_None;
893 lzma_ret lzret;
894
895 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
896 "|iOO:LZMADecompressor", arg_names,
897 &format, &memlimit_obj, &filterspecs))
898 return -1;
899
900 if (memlimit_obj != Py_None) {
901 if (format == FORMAT_RAW) {
902 PyErr_SetString(PyExc_ValueError,
903 "Cannot specify memory limit with FORMAT_RAW");
904 return -1;
905 }
906 memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
907 if (PyErr_Occurred())
908 return -1;
909 }
910
911 if (format == FORMAT_RAW && filterspecs == Py_None) {
912 PyErr_SetString(PyExc_ValueError,
913 "Must specify filters for FORMAT_RAW");
914 return -1;
915 } else if (format != FORMAT_RAW && filterspecs != Py_None) {
916 PyErr_SetString(PyExc_ValueError,
917 "Cannot specify filters except with FORMAT_RAW");
918 return -1;
919 }
920
921#ifdef WITH_THREAD
922 self->lock = PyThread_allocate_lock();
923 if (self->lock == NULL) {
924 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
925 return -1;
926 }
927#endif
928
929 self->check = LZMA_CHECK_UNKNOWN;
930 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
931 if (self->unused_data == NULL)
932 goto error;
933
934 switch (format) {
935 case FORMAT_AUTO:
936 lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
937 if (catch_lzma_error(lzret))
938 break;
939 return 0;
940
941 case FORMAT_XZ:
942 lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
943 if (catch_lzma_error(lzret))
944 break;
945 return 0;
946
947 case FORMAT_ALONE:
948 self->check = LZMA_CHECK_NONE;
949 lzret = lzma_alone_decoder(&self->lzs, memlimit);
950 if (catch_lzma_error(lzret))
951 break;
952 return 0;
953
954 case FORMAT_RAW:
955 self->check = LZMA_CHECK_NONE;
956 if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
957 break;
958 return 0;
959
960 default:
961 PyErr_Format(PyExc_ValueError,
962 "Invalid container format: %d", format);
963 break;
964 }
965
966error:
967 Py_CLEAR(self->unused_data);
968#ifdef WITH_THREAD
969 PyThread_free_lock(self->lock);
970 self->lock = NULL;
971#endif
972 return -1;
973}
974
975static void
976Decompressor_dealloc(Decompressor *self)
977{
978 lzma_end(&self->lzs);
979 Py_CLEAR(self->unused_data);
980#ifdef WITH_THREAD
981 if (self->lock != NULL)
982 PyThread_free_lock(self->lock);
983#endif
984 Py_TYPE(self)->tp_free((PyObject *)self);
985}
986
987static PyMethodDef Decompressor_methods[] = {
988 {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
989 Decompressor_decompress_doc},
990 {NULL}
991};
992
993PyDoc_STRVAR(Decompressor_check_doc,
994"ID of the integrity check used by the input stream.");
995
996PyDoc_STRVAR(Decompressor_eof_doc,
997"True if the end-of-stream marker has been reached.");
998
999PyDoc_STRVAR(Decompressor_unused_data_doc,
1000"Data found after the end of the compressed stream.");
1001
1002static PyMemberDef Decompressor_members[] = {
1003 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1004 Decompressor_check_doc},
1005 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1006 Decompressor_eof_doc},
1007 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1008 Decompressor_unused_data_doc},
1009 {NULL}
1010};
1011
1012PyDoc_STRVAR(Decompressor_doc,
1013"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
1014"\n"
1015"Create a decompressor object for decompressing data incrementally.\n"
1016"\n"
1017"format specifies the container format of the input stream. If this is\n"
1018"FORMAT_AUTO (the default), the decompressor will automatically detect\n"
1019"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
1020"FORMAT_RAW cannot be autodetected.\n"
1021"\n"
1022"memlimit can be specified to limit the amount of memory used by the\n"
1023"decompressor. This will cause decompression to fail if the input\n"
1024"cannot be decompressed within the given limit.\n"
1025"\n"
1026"filters specifies a custom filter chain. This argument is required for\n"
1027"FORMAT_RAW, and not accepted with any other format. When provided,\n"
1028"this should be a sequence of dicts, each indicating the ID and options\n"
1029"for a single filter.\n"
1030"\n"
1031"For one-shot decompression, use the decompress() function instead.\n");
1032
1033static PyTypeObject Decompressor_type = {
1034 PyVarObject_HEAD_INIT(NULL, 0)
1035 "_lzma.LZMADecompressor", /* tp_name */
1036 sizeof(Decompressor), /* tp_basicsize */
1037 0, /* tp_itemsize */
1038 (destructor)Decompressor_dealloc, /* tp_dealloc */
1039 0, /* tp_print */
1040 0, /* tp_getattr */
1041 0, /* tp_setattr */
1042 0, /* tp_reserved */
1043 0, /* tp_repr */
1044 0, /* tp_as_number */
1045 0, /* tp_as_sequence */
1046 0, /* tp_as_mapping */
1047 0, /* tp_hash */
1048 0, /* tp_call */
1049 0, /* tp_str */
1050 0, /* tp_getattro */
1051 0, /* tp_setattro */
1052 0, /* tp_as_buffer */
1053 Py_TPFLAGS_DEFAULT, /* tp_flags */
1054 Decompressor_doc, /* tp_doc */
1055 0, /* tp_traverse */
1056 0, /* tp_clear */
1057 0, /* tp_richcompare */
1058 0, /* tp_weaklistoffset */
1059 0, /* tp_iter */
1060 0, /* tp_iternext */
1061 Decompressor_methods, /* tp_methods */
1062 Decompressor_members, /* tp_members */
1063 0, /* tp_getset */
1064 0, /* tp_base */
1065 0, /* tp_dict */
1066 0, /* tp_descr_get */
1067 0, /* tp_descr_set */
1068 0, /* tp_dictoffset */
1069 (initproc)Decompressor_init, /* tp_init */
1070 0, /* tp_alloc */
1071 PyType_GenericNew, /* tp_new */
1072};
1073
1074
1075/* Module-level functions. */
1076
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001077PyDoc_STRVAR(is_check_supported_doc,
1078"is_check_supported(check_id) -> bool\n"
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001079"\n"
1080"Test whether the given integrity check is supported.\n"
1081"\n"
1082"Always returns True for CHECK_NONE and CHECK_CRC32.\n");
1083
1084static PyObject *
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001085is_check_supported(PyObject *self, PyObject *args)
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001086{
1087 int check_id;
1088
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001089 if (!PyArg_ParseTuple(args, "i:is_check_supported", &check_id))
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001090 return NULL;
1091
1092 return PyBool_FromLong(lzma_check_is_supported(check_id));
1093}
1094
1095
Nadeem Vawdaf55b3292012-05-06 23:01:27 +02001096PyDoc_STRVAR(encode_filter_properties_doc,
1097"encode_filter_properties(filter) -> bytes\n"
1098"\n"
1099"Return a bytes object encoding the options (properties) of the filter\n"
1100"specified by *filter* (a dict).\n"
1101"\n"
1102"The result does not include the filter ID itself, only the options.\n"
1103"\n"
1104"This function is primarily of interest to users implementing custom\n"
1105"file formats.\n");
1106
1107static PyObject *
1108encode_filter_properties(PyObject *self, PyObject *args)
1109{
1110 PyObject *filterspec;
1111 lzma_filter filter;
1112 lzma_ret lzret;
1113 uint32_t encoded_size;
1114 PyObject *result = NULL;
1115
1116 if (!PyArg_ParseTuple(args, "O:encode_filter_properties", &filterspec))
1117 return NULL;
1118
1119 if (parse_filter_spec(&filter, filterspec) == NULL)
1120 return NULL;
1121
1122 lzret = lzma_properties_size(&encoded_size, &filter);
1123 if (catch_lzma_error(lzret))
1124 goto error;
1125
1126 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1127 if (result == NULL)
1128 goto error;
1129
1130 lzret = lzma_properties_encode(
1131 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1132 if (catch_lzma_error(lzret))
1133 goto error;
1134
1135 PyMem_Free(filter.options);
1136 return result;
1137
1138error:
1139 Py_XDECREF(result);
1140 PyMem_Free(filter.options);
1141 return NULL;
1142}
1143
1144
1145PyDoc_STRVAR(decode_filter_properties_doc,
1146"decode_filter_properties(filter_id, encoded_props) -> dict\n"
1147"\n"
1148"Return a dict describing a filter with ID *filter_id*, and options\n"
1149"(properties) decoded from the bytes object *encoded_props*.\n"
1150"\n"
1151"This function is primarily of interest to users implementing custom\n"
1152"file formats.\n");
1153
1154static PyObject *
1155decode_filter_properties(PyObject *self, PyObject *args)
1156{
1157 Py_buffer encoded_props;
1158 lzma_filter filter;
1159 lzma_ret lzret;
1160 PyObject *result = NULL;
1161
1162 if (!PyArg_ParseTuple(args, "O&y*:decode_filter_properties",
1163 lzma_vli_converter, &filter.id, &encoded_props))
1164 return NULL;
1165
1166 lzret = lzma_properties_decode(
1167 &filter, NULL, encoded_props.buf, encoded_props.len);
1168 PyBuffer_Release(&encoded_props);
1169 if (catch_lzma_error(lzret))
1170 return NULL;
1171
1172 result = build_filter_spec(&filter);
1173
1174 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1175 allocated by lzma_properties_decode() using the default allocator. */
1176 free(filter.options);
1177 return result;
1178}
1179
1180
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001181/* Module initialization. */
1182
1183static PyMethodDef module_methods[] = {
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001184 {"is_check_supported", (PyCFunction)is_check_supported,
1185 METH_VARARGS, is_check_supported_doc},
Nadeem Vawdaf55b3292012-05-06 23:01:27 +02001186 {"encode_filter_properties", (PyCFunction)encode_filter_properties,
1187 METH_VARARGS, encode_filter_properties_doc},
1188 {"decode_filter_properties", (PyCFunction)decode_filter_properties,
1189 METH_VARARGS, decode_filter_properties_doc},
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001190 {NULL}
1191};
1192
1193static PyModuleDef _lzmamodule = {
1194 PyModuleDef_HEAD_INIT,
1195 "_lzma",
1196 NULL,
1197 -1,
1198 module_methods,
1199 NULL,
1200 NULL,
1201 NULL,
1202 NULL,
1203};
1204
1205/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1206 would not work correctly on platforms with 32-bit longs. */
1207static int
1208module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
1209{
1210 PyObject *o = PyLong_FromLongLong(value);
1211 if (o == NULL)
1212 return -1;
1213 if (PyModule_AddObject(m, name, o) == 0)
1214 return 0;
1215 Py_DECREF(o);
1216 return -1;
1217}
1218
1219#define ADD_INT_PREFIX_MACRO(m, macro) \
1220 module_add_int_constant(m, #macro, LZMA_ ## macro)
1221
1222PyMODINIT_FUNC
1223PyInit__lzma(void)
1224{
1225 PyObject *m;
1226
1227 empty_tuple = PyTuple_New(0);
1228 if (empty_tuple == NULL)
1229 return NULL;
1230
1231 m = PyModule_Create(&_lzmamodule);
1232 if (m == NULL)
1233 return NULL;
1234
1235 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
1236 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1237 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1238 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1239 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1240 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1241 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1242 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1243 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1244 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1245 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1246 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1247 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1248 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1249 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1250 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1251 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1252 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1253 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1254 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1255 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1256 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1257 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1258 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1259 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1260 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1261 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1262 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1263 return NULL;
1264
1265 Error = PyErr_NewExceptionWithDoc(
1266 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1267 if (Error == NULL)
1268 return NULL;
1269 Py_INCREF(Error);
1270 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1271 return NULL;
1272
1273 if (PyType_Ready(&Compressor_type) == -1)
1274 return NULL;
1275 Py_INCREF(&Compressor_type);
1276 if (PyModule_AddObject(m, "LZMACompressor",
1277 (PyObject *)&Compressor_type) == -1)
1278 return NULL;
1279
1280 if (PyType_Ready(&Decompressor_type) == -1)
1281 return NULL;
1282 Py_INCREF(&Decompressor_type);
1283 if (PyModule_AddObject(m, "LZMADecompressor",
1284 (PyObject *)&Decompressor_type) == -1)
1285 return NULL;
1286
1287 return m;
1288}