blob: 92dd06d5c089d6227dc16e1dbd9e16bcde4c237f [file] [log] [blame]
Nadeem Vawda59bb0e02011-12-01 01:18:27 +02001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02007
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
11#include "structmember.h"
12#ifdef WITH_THREAD
13#include "pythread.h"
14#endif
15
16#include <stdarg.h>
17#include <string.h>
18
19#include <lzma.h>
20
21
22#ifndef PY_LONG_LONG
23#error "This module requires PY_LONG_LONG to be defined"
24#endif
25
26
27#ifdef WITH_THREAD
28#define ACQUIRE_LOCK(obj) do { \
29 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
30 Py_BEGIN_ALLOW_THREADS \
31 PyThread_acquire_lock((obj)->lock, 1); \
32 Py_END_ALLOW_THREADS \
33 } } while (0)
34#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
35#else
36#define ACQUIRE_LOCK(obj)
37#define RELEASE_LOCK(obj)
38#endif
39
40
41/* Container formats: */
42enum {
43 FORMAT_AUTO,
44 FORMAT_XZ,
45 FORMAT_ALONE,
46 FORMAT_RAW,
47};
48
49#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
50
51
52typedef struct {
53 PyObject_HEAD
54 lzma_stream lzs;
55 int flushed;
56#ifdef WITH_THREAD
57 PyThread_type_lock lock;
58#endif
59} Compressor;
60
61typedef struct {
62 PyObject_HEAD
63 lzma_stream lzs;
64 int check;
65 char eof;
66 PyObject *unused_data;
67#ifdef WITH_THREAD
68 PyThread_type_lock lock;
69#endif
70} Decompressor;
71
72/* LZMAError class object. */
73static PyObject *Error;
74
75/* An empty tuple, used by the filter specifier parsing code. */
76static PyObject *empty_tuple;
77
78
79/* Helper functions. */
80
81static int
82catch_lzma_error(lzma_ret lzret)
83{
84 switch (lzret) {
85 case LZMA_OK:
86 case LZMA_GET_CHECK:
87 case LZMA_NO_CHECK:
88 case LZMA_STREAM_END:
89 return 0;
90 case LZMA_UNSUPPORTED_CHECK:
91 PyErr_SetString(Error, "Unsupported integrity check");
92 return 1;
93 case LZMA_MEM_ERROR:
94 PyErr_NoMemory();
95 return 1;
96 case LZMA_MEMLIMIT_ERROR:
97 PyErr_SetString(Error, "Memory usage limit exceeded");
98 return 1;
99 case LZMA_FORMAT_ERROR:
100 PyErr_SetString(Error, "Input format not supported by decoder");
101 return 1;
102 case LZMA_OPTIONS_ERROR:
103 PyErr_SetString(Error, "Invalid or unsupported options");
104 return 1;
105 case LZMA_DATA_ERROR:
106 PyErr_SetString(Error, "Corrupt input data");
107 return 1;
108 case LZMA_BUF_ERROR:
109 PyErr_SetString(Error, "Insufficient buffer space");
110 return 1;
111 case LZMA_PROG_ERROR:
112 PyErr_SetString(Error, "Internal error");
113 return 1;
114 default:
115 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
116 return 1;
117 }
118}
119
120#if BUFSIZ < 8192
121#define INITIAL_BUFFER_SIZE 8192
122#else
123#define INITIAL_BUFFER_SIZE BUFSIZ
124#endif
125
126static int
127grow_buffer(PyObject **buf)
128{
129 size_t size = PyBytes_GET_SIZE(*buf);
130 return _PyBytes_Resize(buf, size + (size >> 3) + 6);
131}
132
133
134/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
135 since the predefined conversion specifiers do not suit our needs:
136
137 uint32_t - the "I" (unsigned int) specifier is the right size, but
138 silently ignores overflows on conversion.
139
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200140 lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right
141 size, but like "I" it silently ignores overflows on conversion.
142
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200143 lzma_mode and lzma_match_finder - these are enumeration types, and
144 so the size of each is implementation-defined. Worse, different
145 enum types can be of different sizes within the same program, so
146 to be strictly correct, we need to define two separate converters.
147 */
148
149#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
150 static int \
151 FUNCNAME(PyObject *obj, void *ptr) \
152 { \
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200153 unsigned PY_LONG_LONG val; \
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200154 \
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200155 val = PyLong_AsUnsignedLongLong(obj); \
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200156 if (PyErr_Occurred()) \
157 return 0; \
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200158 if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200159 PyErr_SetString(PyExc_OverflowError, \
160 "Value too large for " #TYPE " type"); \
161 return 0; \
162 } \
163 *(TYPE *)ptr = val; \
164 return 1; \
165 }
166
167INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200168INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200169INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
170INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
171
172#undef INT_TYPE_CONVERTER_FUNC
173
174
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200175/* Filter specifier parsing.
176
177 This code handles converting filter specifiers (Python dicts) into
178 the C lzma_filter structs expected by liblzma. */
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200179
180static void *
181parse_filter_spec_lzma(PyObject *spec)
182{
183 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
184 "pb", "mode", "nice_len", "mf", "depth", NULL};
185 PyObject *id;
186 PyObject *preset_obj;
187 uint32_t preset = LZMA_PRESET_DEFAULT;
188 lzma_options_lzma *options;
189
190 /* First, fill in default values for all the options using a preset.
191 Then, override the defaults with any values given by the caller. */
192
193 preset_obj = PyMapping_GetItemString(spec, "preset");
194 if (preset_obj == NULL) {
195 if (PyErr_ExceptionMatches(PyExc_KeyError))
196 PyErr_Clear();
197 else
198 return NULL;
199 } else {
200 int ok = uint32_converter(preset_obj, &preset);
201 Py_DECREF(preset_obj);
202 if (!ok)
203 return NULL;
204 }
205
206 options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
207 if (options == NULL)
208 return PyErr_NoMemory();
209 memset(options, 0, sizeof *options);
210
211 if (lzma_lzma_preset(options, preset)) {
212 PyMem_Free(options);
Nadeem Vawda54c74ec2012-05-06 13:35:47 +0200213 PyErr_Format(Error, "Invalid compression preset: %d", preset);
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200214 return NULL;
215 }
216
217 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
218 "|OOO&O&O&O&O&O&O&O&", optnames,
219 &id, &preset_obj,
220 uint32_converter, &options->dict_size,
221 uint32_converter, &options->lc,
222 uint32_converter, &options->lp,
223 uint32_converter, &options->pb,
224 lzma_mode_converter, &options->mode,
225 uint32_converter, &options->nice_len,
226 lzma_mf_converter, &options->mf,
227 uint32_converter, &options->depth)) {
228 PyErr_SetString(PyExc_ValueError,
229 "Invalid filter specifier for LZMA filter");
230 PyMem_Free(options);
231 options = NULL;
232 }
233 return options;
234}
235
236static void *
237parse_filter_spec_delta(PyObject *spec)
238{
239 static char *optnames[] = {"id", "dist", NULL};
240 PyObject *id;
241 uint32_t dist = 1;
242 lzma_options_delta *options;
243
244 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
245 &id, uint32_converter, &dist)) {
246 PyErr_SetString(PyExc_ValueError,
247 "Invalid filter specifier for delta filter");
248 return NULL;
249 }
250
251 options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
252 if (options == NULL)
253 return PyErr_NoMemory();
254 memset(options, 0, sizeof *options);
255 options->type = LZMA_DELTA_TYPE_BYTE;
256 options->dist = dist;
257 return options;
258}
259
260static void *
261parse_filter_spec_bcj(PyObject *spec)
262{
263 static char *optnames[] = {"id", "start_offset", NULL};
264 PyObject *id;
265 uint32_t start_offset = 0;
266 lzma_options_bcj *options;
267
268 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
269 &id, uint32_converter, &start_offset)) {
270 PyErr_SetString(PyExc_ValueError,
271 "Invalid filter specifier for BCJ filter");
272 return NULL;
273 }
274
275 options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
276 if (options == NULL)
277 return PyErr_NoMemory();
278 memset(options, 0, sizeof *options);
279 options->start_offset = start_offset;
280 return options;
281}
282
283static void *
284parse_filter_spec(lzma_filter *f, PyObject *spec)
285{
286 PyObject *id_obj;
287
288 if (!PyMapping_Check(spec)) {
289 PyErr_SetString(PyExc_TypeError,
290 "Filter specifier must be a dict or dict-like object");
291 return NULL;
292 }
293 id_obj = PyMapping_GetItemString(spec, "id");
294 if (id_obj == NULL) {
295 if (PyErr_ExceptionMatches(PyExc_KeyError))
296 PyErr_SetString(PyExc_ValueError,
297 "Filter specifier must have an \"id\" entry");
298 return NULL;
299 }
300 f->id = PyLong_AsUnsignedLongLong(id_obj);
301 Py_DECREF(id_obj);
302 if (PyErr_Occurred())
303 return NULL;
304
305 switch (f->id) {
306 case LZMA_FILTER_LZMA1:
307 case LZMA_FILTER_LZMA2:
308 f->options = parse_filter_spec_lzma(spec);
309 return f->options;
310 case LZMA_FILTER_DELTA:
311 f->options = parse_filter_spec_delta(spec);
312 return f->options;
313 case LZMA_FILTER_X86:
314 case LZMA_FILTER_POWERPC:
315 case LZMA_FILTER_IA64:
316 case LZMA_FILTER_ARM:
317 case LZMA_FILTER_ARMTHUMB:
318 case LZMA_FILTER_SPARC:
319 f->options = parse_filter_spec_bcj(spec);
320 return f->options;
321 default:
322 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
323 return NULL;
324 }
325}
326
327static void
328free_filter_chain(lzma_filter filters[])
329{
330 int i;
331
332 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
333 PyMem_Free(filters[i].options);
334}
335
336static int
337parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
338{
339 Py_ssize_t i, num_filters;
340
341 num_filters = PySequence_Length(filterspecs);
342 if (num_filters == -1)
343 return -1;
344 if (num_filters > LZMA_FILTERS_MAX) {
345 PyErr_Format(PyExc_ValueError,
346 "Too many filters - liblzma supports a maximum of %d",
347 LZMA_FILTERS_MAX);
348 return -1;
349 }
350
351 for (i = 0; i < num_filters; i++) {
352 int ok = 1;
353 PyObject *spec = PySequence_GetItem(filterspecs, i);
354 if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
355 ok = 0;
356 Py_XDECREF(spec);
357 if (!ok) {
358 filters[i].id = LZMA_VLI_UNKNOWN;
359 free_filter_chain(filters);
360 return -1;
361 }
362 }
363 filters[num_filters].id = LZMA_VLI_UNKNOWN;
364 return 0;
365}
366
367
Nadeem Vawdaf55b3292012-05-06 23:01:27 +0200368/* Filter specifier construction.
369
370 This code handles converting C lzma_filter structs into
371 Python-level filter specifiers (represented as dicts). */
372
373static int
374spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value)
375{
376 int status;
377 PyObject *value_object;
378
379 value_object = PyLong_FromUnsignedLongLong(value);
380 if (value_object == NULL)
381 return -1;
382
383 status = _PyDict_SetItemId(spec, key, value_object);
384 Py_DECREF(value_object);
385 return status;
386}
387
388static PyObject *
389build_filter_spec(const lzma_filter *f)
390{
391 PyObject *spec;
392
393 spec = PyDict_New();
394 if (spec == NULL)
395 return NULL;
396
397#define ADD_FIELD(SOURCE, FIELD) \
398 do { \
399 _Py_IDENTIFIER(FIELD); \
400 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
401 goto error;\
402 } while (0)
403
404 ADD_FIELD(f, id);
405
406 switch (f->id) {
407 case LZMA_FILTER_LZMA1:
408 case LZMA_FILTER_LZMA2: {
409 lzma_options_lzma *options = f->options;
410 ADD_FIELD(options, dict_size);
411 ADD_FIELD(options, lc);
412 ADD_FIELD(options, lp);
413 ADD_FIELD(options, pb);
414 ADD_FIELD(options, mode);
415 ADD_FIELD(options, nice_len);
416 ADD_FIELD(options, mf);
417 ADD_FIELD(options, depth);
418 break;
419 }
420 case LZMA_FILTER_DELTA: {
421 lzma_options_delta *options = f->options;
422 ADD_FIELD(options, dist);
423 break;
424 }
425 case LZMA_FILTER_X86:
426 case LZMA_FILTER_POWERPC:
427 case LZMA_FILTER_IA64:
428 case LZMA_FILTER_ARM:
429 case LZMA_FILTER_ARMTHUMB:
430 case LZMA_FILTER_SPARC: {
431 lzma_options_bcj *options = f->options;
432 ADD_FIELD(options, start_offset);
433 break;
434 }
435 default:
436 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
437 goto error;
438 }
439
440#undef ADD_FIELD
441
442 return spec;
443
444error:
445 Py_DECREF(spec);
446 return NULL;
447}
448
449
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200450/* LZMACompressor class. */
451
452static PyObject *
453compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
454{
455 size_t data_size = 0;
456 PyObject *result;
457
458 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
459 if (result == NULL)
460 return NULL;
461 c->lzs.next_in = data;
462 c->lzs.avail_in = len;
463 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
464 c->lzs.avail_out = PyBytes_GET_SIZE(result);
465 for (;;) {
466 lzma_ret lzret;
467
468 Py_BEGIN_ALLOW_THREADS
469 lzret = lzma_code(&c->lzs, action);
470 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
471 Py_END_ALLOW_THREADS
472 if (catch_lzma_error(lzret))
473 goto error;
474 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
475 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
476 break;
477 } else if (c->lzs.avail_out == 0) {
478 if (grow_buffer(&result) == -1)
479 goto error;
480 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
481 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
482 }
483 }
484 if (data_size != PyBytes_GET_SIZE(result))
485 if (_PyBytes_Resize(&result, data_size) == -1)
486 goto error;
487 return result;
488
489error:
490 Py_XDECREF(result);
491 return NULL;
492}
493
494PyDoc_STRVAR(Compressor_compress_doc,
495"compress(data) -> bytes\n"
496"\n"
497"Provide data to the compressor object. Returns a chunk of\n"
498"compressed data if possible, or b\"\" otherwise.\n"
499"\n"
500"When you have finished providing data to the compressor, call the\n"
501"flush() method to finish the conversion process.\n");
502
503static PyObject *
504Compressor_compress(Compressor *self, PyObject *args)
505{
506 Py_buffer buffer;
507 PyObject *result = NULL;
508
509 if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
510 return NULL;
511
512 ACQUIRE_LOCK(self);
513 if (self->flushed)
514 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
515 else
516 result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
517 RELEASE_LOCK(self);
518 PyBuffer_Release(&buffer);
519 return result;
520}
521
522PyDoc_STRVAR(Compressor_flush_doc,
523"flush() -> bytes\n"
524"\n"
525"Finish the compression process. Returns the compressed data left\n"
526"in internal buffers.\n"
527"\n"
528"The compressor object cannot be used after this method is called.\n");
529
530static PyObject *
531Compressor_flush(Compressor *self, PyObject *noargs)
532{
533 PyObject *result = NULL;
534
535 ACQUIRE_LOCK(self);
536 if (self->flushed) {
537 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
538 } else {
539 self->flushed = 1;
540 result = compress(self, NULL, 0, LZMA_FINISH);
541 }
542 RELEASE_LOCK(self);
543 return result;
544}
545
546static int
547Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
548 PyObject *filterspecs)
549{
550 lzma_ret lzret;
551
552 if (filterspecs == Py_None) {
553 lzret = lzma_easy_encoder(lzs, preset, check);
554 } else {
555 lzma_filter filters[LZMA_FILTERS_MAX + 1];
556
557 if (parse_filter_chain_spec(filters, filterspecs) == -1)
558 return -1;
559 lzret = lzma_stream_encoder(lzs, filters, check);
560 free_filter_chain(filters);
561 }
562 if (catch_lzma_error(lzret))
563 return -1;
564 else
565 return 0;
566}
567
568static int
569Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
570{
571 lzma_ret lzret;
572
573 if (filterspecs == Py_None) {
574 lzma_options_lzma options;
575
576 if (lzma_lzma_preset(&options, preset)) {
Nadeem Vawda54c74ec2012-05-06 13:35:47 +0200577 PyErr_Format(Error, "Invalid compression preset: %d", preset);
Nadeem Vawda3ff069e2011-11-30 00:25:06 +0200578 return -1;
579 }
580 lzret = lzma_alone_encoder(lzs, &options);
581 } else {
582 lzma_filter filters[LZMA_FILTERS_MAX + 1];
583
584 if (parse_filter_chain_spec(filters, filterspecs) == -1)
585 return -1;
586 if (filters[0].id == LZMA_FILTER_LZMA1 &&
587 filters[1].id == LZMA_VLI_UNKNOWN) {
588 lzret = lzma_alone_encoder(lzs, filters[0].options);
589 } else {
590 PyErr_SetString(PyExc_ValueError,
591 "Invalid filter chain for FORMAT_ALONE - "
592 "must be a single LZMA1 filter");
593 lzret = LZMA_PROG_ERROR;
594 }
595 free_filter_chain(filters);
596 }
597 if (PyErr_Occurred() || catch_lzma_error(lzret))
598 return -1;
599 else
600 return 0;
601}
602
603static int
604Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
605{
606 lzma_filter filters[LZMA_FILTERS_MAX + 1];
607 lzma_ret lzret;
608
609 if (filterspecs == Py_None) {
610 PyErr_SetString(PyExc_ValueError,
611 "Must specify filters for FORMAT_RAW");
612 return -1;
613 }
614 if (parse_filter_chain_spec(filters, filterspecs) == -1)
615 return -1;
616 lzret = lzma_raw_encoder(lzs, filters);
617 free_filter_chain(filters);
618 if (catch_lzma_error(lzret))
619 return -1;
620 else
621 return 0;
622}
623
624static int
625Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
626{
627 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
628 int format = FORMAT_XZ;
629 int check = -1;
630 uint32_t preset = LZMA_PRESET_DEFAULT;
631 PyObject *preset_obj = Py_None;
632 PyObject *filterspecs = Py_None;
633
634 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
635 "|iiOO:LZMACompressor", arg_names,
636 &format, &check, &preset_obj,
637 &filterspecs))
638 return -1;
639
640 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
641 PyErr_SetString(PyExc_ValueError,
642 "Integrity checks are only supported by FORMAT_XZ");
643 return -1;
644 }
645
646 if (preset_obj != Py_None && filterspecs != Py_None) {
647 PyErr_SetString(PyExc_ValueError,
648 "Cannot specify both preset and filter chain");
649 return -1;
650 }
651
652 if (preset_obj != Py_None)
653 if (!uint32_converter(preset_obj, &preset))
654 return -1;
655
656#ifdef WITH_THREAD
657 self->lock = PyThread_allocate_lock();
658 if (self->lock == NULL) {
659 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
660 return -1;
661 }
662#endif
663
664 self->flushed = 0;
665 switch (format) {
666 case FORMAT_XZ:
667 if (check == -1)
668 check = LZMA_CHECK_CRC64;
669 if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
670 break;
671 return 0;
672
673 case FORMAT_ALONE:
674 if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
675 break;
676 return 0;
677
678 case FORMAT_RAW:
679 if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
680 break;
681 return 0;
682
683 default:
684 PyErr_Format(PyExc_ValueError,
685 "Invalid container format: %d", format);
686 break;
687 }
688
689#ifdef WITH_THREAD
690 PyThread_free_lock(self->lock);
691 self->lock = NULL;
692#endif
693 return -1;
694}
695
696static void
697Compressor_dealloc(Compressor *self)
698{
699 lzma_end(&self->lzs);
700#ifdef WITH_THREAD
701 if (self->lock != NULL)
702 PyThread_free_lock(self->lock);
703#endif
704 Py_TYPE(self)->tp_free((PyObject *)self);
705}
706
707static PyMethodDef Compressor_methods[] = {
708 {"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
709 Compressor_compress_doc},
710 {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
711 Compressor_flush_doc},
712 {NULL}
713};
714
715PyDoc_STRVAR(Compressor_doc,
716"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
717"\n"
718"Create a compressor object for compressing data incrementally.\n"
719"\n"
720"format specifies the container format to use for the output. This can\n"
721"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
722"\n"
723"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
724"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
725"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
726"\n"
727"The settings used by the compressor can be specified either as a\n"
728"preset compression level (with the 'preset' argument), or in detail\n"
729"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
730"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
731"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
732"the raw compressor does not support preset compression levels.\n"
733"\n"
734"preset (if provided) should be an integer in the range 0-9, optionally\n"
735"OR-ed with the constant PRESET_EXTREME.\n"
736"\n"
737"filters (if provided) should be a sequence of dicts. Each dict should\n"
738"have an entry for \"id\" indicating the ID of the filter, plus\n"
739"additional entries for options to the filter.\n"
740"\n"
741"For one-shot compression, use the compress() function instead.\n");
742
743static PyTypeObject Compressor_type = {
744 PyVarObject_HEAD_INIT(NULL, 0)
745 "_lzma.LZMACompressor", /* tp_name */
746 sizeof(Compressor), /* tp_basicsize */
747 0, /* tp_itemsize */
748 (destructor)Compressor_dealloc, /* tp_dealloc */
749 0, /* tp_print */
750 0, /* tp_getattr */
751 0, /* tp_setattr */
752 0, /* tp_reserved */
753 0, /* tp_repr */
754 0, /* tp_as_number */
755 0, /* tp_as_sequence */
756 0, /* tp_as_mapping */
757 0, /* tp_hash */
758 0, /* tp_call */
759 0, /* tp_str */
760 0, /* tp_getattro */
761 0, /* tp_setattro */
762 0, /* tp_as_buffer */
763 Py_TPFLAGS_DEFAULT, /* tp_flags */
764 Compressor_doc, /* tp_doc */
765 0, /* tp_traverse */
766 0, /* tp_clear */
767 0, /* tp_richcompare */
768 0, /* tp_weaklistoffset */
769 0, /* tp_iter */
770 0, /* tp_iternext */
771 Compressor_methods, /* tp_methods */
772 0, /* tp_members */
773 0, /* tp_getset */
774 0, /* tp_base */
775 0, /* tp_dict */
776 0, /* tp_descr_get */
777 0, /* tp_descr_set */
778 0, /* tp_dictoffset */
779 (initproc)Compressor_init, /* tp_init */
780 0, /* tp_alloc */
781 PyType_GenericNew, /* tp_new */
782};
783
784
785/* LZMADecompressor class. */
786
787static PyObject *
788decompress(Decompressor *d, uint8_t *data, size_t len)
789{
790 size_t data_size = 0;
791 PyObject *result;
792
793 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
794 if (result == NULL)
795 return NULL;
796 d->lzs.next_in = data;
797 d->lzs.avail_in = len;
798 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
799 d->lzs.avail_out = PyBytes_GET_SIZE(result);
800 for (;;) {
801 lzma_ret lzret;
802
803 Py_BEGIN_ALLOW_THREADS
804 lzret = lzma_code(&d->lzs, LZMA_RUN);
805 data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
806 Py_END_ALLOW_THREADS
807 if (catch_lzma_error(lzret))
808 goto error;
809 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
810 d->check = lzma_get_check(&d->lzs);
811 if (lzret == LZMA_STREAM_END) {
812 d->eof = 1;
813 if (d->lzs.avail_in > 0) {
814 Py_CLEAR(d->unused_data);
815 d->unused_data = PyBytes_FromStringAndSize(
816 (char *)d->lzs.next_in, d->lzs.avail_in);
817 if (d->unused_data == NULL)
818 goto error;
819 }
820 break;
821 } else if (d->lzs.avail_in == 0) {
822 break;
823 } else if (d->lzs.avail_out == 0) {
824 if (grow_buffer(&result) == -1)
825 goto error;
826 d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
827 d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
828 }
829 }
830 if (data_size != PyBytes_GET_SIZE(result))
831 if (_PyBytes_Resize(&result, data_size) == -1)
832 goto error;
833 return result;
834
835error:
836 Py_XDECREF(result);
837 return NULL;
838}
839
840PyDoc_STRVAR(Decompressor_decompress_doc,
841"decompress(data) -> bytes\n"
842"\n"
843"Provide data to the decompressor object. Returns a chunk of\n"
844"decompressed data if possible, or b\"\" otherwise.\n"
845"\n"
846"Attempting to decompress data after the end of the stream is\n"
847"reached raises an EOFError. Any data found after the end of the\n"
848"stream is ignored, and saved in the unused_data attribute.\n");
849
850static PyObject *
851Decompressor_decompress(Decompressor *self, PyObject *args)
852{
853 Py_buffer buffer;
854 PyObject *result = NULL;
855
856 if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
857 return NULL;
858
859 ACQUIRE_LOCK(self);
860 if (self->eof)
861 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
862 else
863 result = decompress(self, buffer.buf, buffer.len);
864 RELEASE_LOCK(self);
865 PyBuffer_Release(&buffer);
866 return result;
867}
868
869static int
870Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
871{
872 lzma_filter filters[LZMA_FILTERS_MAX + 1];
873 lzma_ret lzret;
874
875 if (parse_filter_chain_spec(filters, filterspecs) == -1)
876 return -1;
877 lzret = lzma_raw_decoder(lzs, filters);
878 free_filter_chain(filters);
879 if (catch_lzma_error(lzret))
880 return -1;
881 else
882 return 0;
883}
884
885static int
886Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
887{
888 static char *arg_names[] = {"format", "memlimit", "filters", NULL};
889 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
890 int format = FORMAT_AUTO;
891 uint64_t memlimit = UINT64_MAX;
892 PyObject *memlimit_obj = Py_None;
893 PyObject *filterspecs = Py_None;
894 lzma_ret lzret;
895
896 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
897 "|iOO:LZMADecompressor", arg_names,
898 &format, &memlimit_obj, &filterspecs))
899 return -1;
900
901 if (memlimit_obj != Py_None) {
902 if (format == FORMAT_RAW) {
903 PyErr_SetString(PyExc_ValueError,
904 "Cannot specify memory limit with FORMAT_RAW");
905 return -1;
906 }
907 memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
908 if (PyErr_Occurred())
909 return -1;
910 }
911
912 if (format == FORMAT_RAW && filterspecs == Py_None) {
913 PyErr_SetString(PyExc_ValueError,
914 "Must specify filters for FORMAT_RAW");
915 return -1;
916 } else if (format != FORMAT_RAW && filterspecs != Py_None) {
917 PyErr_SetString(PyExc_ValueError,
918 "Cannot specify filters except with FORMAT_RAW");
919 return -1;
920 }
921
922#ifdef WITH_THREAD
923 self->lock = PyThread_allocate_lock();
924 if (self->lock == NULL) {
925 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
926 return -1;
927 }
928#endif
929
930 self->check = LZMA_CHECK_UNKNOWN;
931 self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
932 if (self->unused_data == NULL)
933 goto error;
934
935 switch (format) {
936 case FORMAT_AUTO:
937 lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
938 if (catch_lzma_error(lzret))
939 break;
940 return 0;
941
942 case FORMAT_XZ:
943 lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
944 if (catch_lzma_error(lzret))
945 break;
946 return 0;
947
948 case FORMAT_ALONE:
949 self->check = LZMA_CHECK_NONE;
950 lzret = lzma_alone_decoder(&self->lzs, memlimit);
951 if (catch_lzma_error(lzret))
952 break;
953 return 0;
954
955 case FORMAT_RAW:
956 self->check = LZMA_CHECK_NONE;
957 if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
958 break;
959 return 0;
960
961 default:
962 PyErr_Format(PyExc_ValueError,
963 "Invalid container format: %d", format);
964 break;
965 }
966
967error:
968 Py_CLEAR(self->unused_data);
969#ifdef WITH_THREAD
970 PyThread_free_lock(self->lock);
971 self->lock = NULL;
972#endif
973 return -1;
974}
975
976static void
977Decompressor_dealloc(Decompressor *self)
978{
979 lzma_end(&self->lzs);
980 Py_CLEAR(self->unused_data);
981#ifdef WITH_THREAD
982 if (self->lock != NULL)
983 PyThread_free_lock(self->lock);
984#endif
985 Py_TYPE(self)->tp_free((PyObject *)self);
986}
987
988static PyMethodDef Decompressor_methods[] = {
989 {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
990 Decompressor_decompress_doc},
991 {NULL}
992};
993
994PyDoc_STRVAR(Decompressor_check_doc,
995"ID of the integrity check used by the input stream.");
996
997PyDoc_STRVAR(Decompressor_eof_doc,
998"True if the end-of-stream marker has been reached.");
999
1000PyDoc_STRVAR(Decompressor_unused_data_doc,
1001"Data found after the end of the compressed stream.");
1002
1003static PyMemberDef Decompressor_members[] = {
1004 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1005 Decompressor_check_doc},
1006 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1007 Decompressor_eof_doc},
1008 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1009 Decompressor_unused_data_doc},
1010 {NULL}
1011};
1012
1013PyDoc_STRVAR(Decompressor_doc,
1014"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
1015"\n"
1016"Create a decompressor object for decompressing data incrementally.\n"
1017"\n"
1018"format specifies the container format of the input stream. If this is\n"
1019"FORMAT_AUTO (the default), the decompressor will automatically detect\n"
1020"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
1021"FORMAT_RAW cannot be autodetected.\n"
1022"\n"
1023"memlimit can be specified to limit the amount of memory used by the\n"
1024"decompressor. This will cause decompression to fail if the input\n"
1025"cannot be decompressed within the given limit.\n"
1026"\n"
1027"filters specifies a custom filter chain. This argument is required for\n"
1028"FORMAT_RAW, and not accepted with any other format. When provided,\n"
1029"this should be a sequence of dicts, each indicating the ID and options\n"
1030"for a single filter.\n"
1031"\n"
1032"For one-shot decompression, use the decompress() function instead.\n");
1033
1034static PyTypeObject Decompressor_type = {
1035 PyVarObject_HEAD_INIT(NULL, 0)
1036 "_lzma.LZMADecompressor", /* tp_name */
1037 sizeof(Decompressor), /* tp_basicsize */
1038 0, /* tp_itemsize */
1039 (destructor)Decompressor_dealloc, /* tp_dealloc */
1040 0, /* tp_print */
1041 0, /* tp_getattr */
1042 0, /* tp_setattr */
1043 0, /* tp_reserved */
1044 0, /* tp_repr */
1045 0, /* tp_as_number */
1046 0, /* tp_as_sequence */
1047 0, /* tp_as_mapping */
1048 0, /* tp_hash */
1049 0, /* tp_call */
1050 0, /* tp_str */
1051 0, /* tp_getattro */
1052 0, /* tp_setattro */
1053 0, /* tp_as_buffer */
1054 Py_TPFLAGS_DEFAULT, /* tp_flags */
1055 Decompressor_doc, /* tp_doc */
1056 0, /* tp_traverse */
1057 0, /* tp_clear */
1058 0, /* tp_richcompare */
1059 0, /* tp_weaklistoffset */
1060 0, /* tp_iter */
1061 0, /* tp_iternext */
1062 Decompressor_methods, /* tp_methods */
1063 Decompressor_members, /* tp_members */
1064 0, /* tp_getset */
1065 0, /* tp_base */
1066 0, /* tp_dict */
1067 0, /* tp_descr_get */
1068 0, /* tp_descr_set */
1069 0, /* tp_dictoffset */
1070 (initproc)Decompressor_init, /* tp_init */
1071 0, /* tp_alloc */
1072 PyType_GenericNew, /* tp_new */
1073};
1074
1075
1076/* Module-level functions. */
1077
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001078PyDoc_STRVAR(is_check_supported_doc,
1079"is_check_supported(check_id) -> bool\n"
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001080"\n"
1081"Test whether the given integrity check is supported.\n"
1082"\n"
1083"Always returns True for CHECK_NONE and CHECK_CRC32.\n");
1084
1085static PyObject *
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001086is_check_supported(PyObject *self, PyObject *args)
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001087{
1088 int check_id;
1089
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001090 if (!PyArg_ParseTuple(args, "i:is_check_supported", &check_id))
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001091 return NULL;
1092
1093 return PyBool_FromLong(lzma_check_is_supported(check_id));
1094}
1095
1096
Nadeem Vawdaf55b3292012-05-06 23:01:27 +02001097PyDoc_STRVAR(encode_filter_properties_doc,
1098"encode_filter_properties(filter) -> bytes\n"
1099"\n"
1100"Return a bytes object encoding the options (properties) of the filter\n"
1101"specified by *filter* (a dict).\n"
1102"\n"
1103"The result does not include the filter ID itself, only the options.\n"
1104"\n"
1105"This function is primarily of interest to users implementing custom\n"
1106"file formats.\n");
1107
1108static PyObject *
1109encode_filter_properties(PyObject *self, PyObject *args)
1110{
1111 PyObject *filterspec;
1112 lzma_filter filter;
1113 lzma_ret lzret;
1114 uint32_t encoded_size;
1115 PyObject *result = NULL;
1116
1117 if (!PyArg_ParseTuple(args, "O:encode_filter_properties", &filterspec))
1118 return NULL;
1119
1120 if (parse_filter_spec(&filter, filterspec) == NULL)
1121 return NULL;
1122
1123 lzret = lzma_properties_size(&encoded_size, &filter);
1124 if (catch_lzma_error(lzret))
1125 goto error;
1126
1127 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1128 if (result == NULL)
1129 goto error;
1130
1131 lzret = lzma_properties_encode(
1132 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1133 if (catch_lzma_error(lzret))
1134 goto error;
1135
1136 PyMem_Free(filter.options);
1137 return result;
1138
1139error:
1140 Py_XDECREF(result);
1141 PyMem_Free(filter.options);
1142 return NULL;
1143}
1144
1145
1146PyDoc_STRVAR(decode_filter_properties_doc,
1147"decode_filter_properties(filter_id, encoded_props) -> dict\n"
1148"\n"
1149"Return a dict describing a filter with ID *filter_id*, and options\n"
1150"(properties) decoded from the bytes object *encoded_props*.\n"
1151"\n"
1152"This function is primarily of interest to users implementing custom\n"
1153"file formats.\n");
1154
1155static PyObject *
1156decode_filter_properties(PyObject *self, PyObject *args)
1157{
1158 Py_buffer encoded_props;
1159 lzma_filter filter;
1160 lzma_ret lzret;
1161 PyObject *result = NULL;
1162
1163 if (!PyArg_ParseTuple(args, "O&y*:decode_filter_properties",
1164 lzma_vli_converter, &filter.id, &encoded_props))
1165 return NULL;
1166
1167 lzret = lzma_properties_decode(
1168 &filter, NULL, encoded_props.buf, encoded_props.len);
1169 PyBuffer_Release(&encoded_props);
1170 if (catch_lzma_error(lzret))
1171 return NULL;
1172
1173 result = build_filter_spec(&filter);
1174
1175 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1176 allocated by lzma_properties_decode() using the default allocator. */
1177 free(filter.options);
1178 return result;
1179}
1180
1181
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001182/* Module initialization. */
1183
1184static PyMethodDef module_methods[] = {
Nadeem Vawdabc459bb2012-05-06 23:01:51 +02001185 {"is_check_supported", (PyCFunction)is_check_supported,
1186 METH_VARARGS, is_check_supported_doc},
Nadeem Vawdaf55b3292012-05-06 23:01:27 +02001187 {"encode_filter_properties", (PyCFunction)encode_filter_properties,
1188 METH_VARARGS, encode_filter_properties_doc},
1189 {"decode_filter_properties", (PyCFunction)decode_filter_properties,
1190 METH_VARARGS, decode_filter_properties_doc},
Nadeem Vawda3ff069e2011-11-30 00:25:06 +02001191 {NULL}
1192};
1193
1194static PyModuleDef _lzmamodule = {
1195 PyModuleDef_HEAD_INIT,
1196 "_lzma",
1197 NULL,
1198 -1,
1199 module_methods,
1200 NULL,
1201 NULL,
1202 NULL,
1203 NULL,
1204};
1205
1206/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1207 would not work correctly on platforms with 32-bit longs. */
1208static int
1209module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
1210{
1211 PyObject *o = PyLong_FromLongLong(value);
1212 if (o == NULL)
1213 return -1;
1214 if (PyModule_AddObject(m, name, o) == 0)
1215 return 0;
1216 Py_DECREF(o);
1217 return -1;
1218}
1219
1220#define ADD_INT_PREFIX_MACRO(m, macro) \
1221 module_add_int_constant(m, #macro, LZMA_ ## macro)
1222
1223PyMODINIT_FUNC
1224PyInit__lzma(void)
1225{
1226 PyObject *m;
1227
1228 empty_tuple = PyTuple_New(0);
1229 if (empty_tuple == NULL)
1230 return NULL;
1231
1232 m = PyModule_Create(&_lzmamodule);
1233 if (m == NULL)
1234 return NULL;
1235
1236 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
1237 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1238 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1239 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1240 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1241 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1242 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1243 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1244 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1245 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1246 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1247 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1248 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1249 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1250 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1251 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1252 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1253 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1254 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1255 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1256 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1257 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1258 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1259 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1260 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1261 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1262 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1263 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1264 return NULL;
1265
1266 Error = PyErr_NewExceptionWithDoc(
1267 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1268 if (Error == NULL)
1269 return NULL;
1270 Py_INCREF(Error);
1271 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1272 return NULL;
1273
1274 if (PyType_Ready(&Compressor_type) == -1)
1275 return NULL;
1276 Py_INCREF(&Compressor_type);
1277 if (PyModule_AddObject(m, "LZMACompressor",
1278 (PyObject *)&Compressor_type) == -1)
1279 return NULL;
1280
1281 if (PyType_Ready(&Decompressor_type) == -1)
1282 return NULL;
1283 Py_INCREF(&Decompressor_type);
1284 if (PyModule_AddObject(m, "LZMADecompressor",
1285 (PyObject *)&Decompressor_type) == -1)
1286 return NULL;
1287
1288 return m;
1289}