blob: 18bc3dc296e00b5f3f2d16fc118574dda7454754 [file] [log] [blame]
Larry Hastingsf256c222014-01-25 21:30:37 -08001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
7
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
11#include "structmember.h"
Larry Hastingsf256c222014-01-25 21:30:37 -080012#include "pythread.h"
Larry Hastingsf256c222014-01-25 21:30:37 -080013
14#include <stdarg.h>
15#include <string.h>
16
17#include <lzma.h>
18
Larry Hastingsf256c222014-01-25 21:30:37 -080019#define ACQUIRE_LOCK(obj) do { \
20 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
21 Py_BEGIN_ALLOW_THREADS \
22 PyThread_acquire_lock((obj)->lock, 1); \
23 Py_END_ALLOW_THREADS \
24 } } while (0)
25#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Larry Hastingsf256c222014-01-25 21:30:37 -080026
27
28/* Container formats: */
29enum {
30 FORMAT_AUTO,
31 FORMAT_XZ,
32 FORMAT_ALONE,
33 FORMAT_RAW,
34};
35
36#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
37
38
39typedef struct {
40 PyObject_HEAD
41 lzma_allocator alloc;
42 lzma_stream lzs;
43 int flushed;
Larry Hastingsf256c222014-01-25 21:30:37 -080044 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -080045} Compressor;
46
47typedef struct {
48 PyObject_HEAD
49 lzma_allocator alloc;
50 lzma_stream lzs;
51 int check;
52 char eof;
53 PyObject *unused_data;
Antoine Pitrou26795ba2015-01-17 16:22:18 +010054 char needs_input;
55 uint8_t *input_buffer;
56 size_t input_buffer_size;
Larry Hastingsf256c222014-01-25 21:30:37 -080057 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -080058} Decompressor;
59
60/* LZMAError class object. */
61static PyObject *Error;
62
63/* An empty tuple, used by the filter specifier parsing code. */
64static PyObject *empty_tuple;
65
66
67/* Helper functions. */
68
69static int
70catch_lzma_error(lzma_ret lzret)
71{
72 switch (lzret) {
73 case LZMA_OK:
74 case LZMA_GET_CHECK:
75 case LZMA_NO_CHECK:
76 case LZMA_STREAM_END:
77 return 0;
78 case LZMA_UNSUPPORTED_CHECK:
79 PyErr_SetString(Error, "Unsupported integrity check");
80 return 1;
81 case LZMA_MEM_ERROR:
82 PyErr_NoMemory();
83 return 1;
84 case LZMA_MEMLIMIT_ERROR:
85 PyErr_SetString(Error, "Memory usage limit exceeded");
86 return 1;
87 case LZMA_FORMAT_ERROR:
88 PyErr_SetString(Error, "Input format not supported by decoder");
89 return 1;
90 case LZMA_OPTIONS_ERROR:
91 PyErr_SetString(Error, "Invalid or unsupported options");
92 return 1;
93 case LZMA_DATA_ERROR:
94 PyErr_SetString(Error, "Corrupt input data");
95 return 1;
96 case LZMA_BUF_ERROR:
97 PyErr_SetString(Error, "Insufficient buffer space");
98 return 1;
99 case LZMA_PROG_ERROR:
100 PyErr_SetString(Error, "Internal error");
101 return 1;
102 default:
103 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
104 return 1;
105 }
106}
107
108static void*
109PyLzma_Malloc(void *opaque, size_t items, size_t size)
110{
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300111 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size)
Larry Hastingsf256c222014-01-25 21:30:37 -0800112 return NULL;
113 /* PyMem_Malloc() cannot be used:
114 the GIL is not held when lzma_code() is called */
115 return PyMem_RawMalloc(items * size);
116}
117
118static void
119PyLzma_Free(void *opaque, void *ptr)
120{
121 PyMem_RawFree(ptr);
122}
123
124#if BUFSIZ < 8192
125#define INITIAL_BUFFER_SIZE 8192
126#else
127#define INITIAL_BUFFER_SIZE BUFSIZ
128#endif
129
130static int
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100131grow_buffer(PyObject **buf, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800132{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100133 Py_ssize_t size = PyBytes_GET_SIZE(*buf);
134 Py_ssize_t newsize = size + (size >> 3) + 6;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200135
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100136 if (max_length > 0 && newsize > max_length)
137 newsize = max_length;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200138
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100139 return _PyBytes_Resize(buf, newsize);
Larry Hastingsf256c222014-01-25 21:30:37 -0800140}
141
142
143/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
144 since the predefined conversion specifiers do not suit our needs:
145
146 uint32_t - the "I" (unsigned int) specifier is the right size, but
147 silently ignores overflows on conversion.
148
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700149 lzma_vli - the "K" (unsigned long long) specifier is the right
Larry Hastingsf256c222014-01-25 21:30:37 -0800150 size, but like "I" it silently ignores overflows on conversion.
151
152 lzma_mode and lzma_match_finder - these are enumeration types, and
153 so the size of each is implementation-defined. Worse, different
154 enum types can be of different sizes within the same program, so
155 to be strictly correct, we need to define two separate converters.
156 */
157
158#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
159 static int \
160 FUNCNAME(PyObject *obj, void *ptr) \
161 { \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700162 unsigned long long val; \
Larry Hastingsf256c222014-01-25 21:30:37 -0800163 \
164 val = PyLong_AsUnsignedLongLong(obj); \
165 if (PyErr_Occurred()) \
166 return 0; \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700167 if ((unsigned long long)(TYPE)val != val) { \
Larry Hastingsf256c222014-01-25 21:30:37 -0800168 PyErr_SetString(PyExc_OverflowError, \
169 "Value too large for " #TYPE " type"); \
170 return 0; \
171 } \
172 *(TYPE *)ptr = (TYPE)val; \
173 return 1; \
174 }
175
176INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
177INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
178INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
179INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
180
181#undef INT_TYPE_CONVERTER_FUNC
182
183
184/* Filter specifier parsing.
185
186 This code handles converting filter specifiers (Python dicts) into
187 the C lzma_filter structs expected by liblzma. */
188
189static void *
190parse_filter_spec_lzma(PyObject *spec)
191{
192 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
193 "pb", "mode", "nice_len", "mf", "depth", NULL};
194 PyObject *id;
195 PyObject *preset_obj;
196 uint32_t preset = LZMA_PRESET_DEFAULT;
197 lzma_options_lzma *options;
198
199 /* First, fill in default values for all the options using a preset.
200 Then, override the defaults with any values given by the caller. */
201
202 preset_obj = PyMapping_GetItemString(spec, "preset");
203 if (preset_obj == NULL) {
204 if (PyErr_ExceptionMatches(PyExc_KeyError))
205 PyErr_Clear();
206 else
207 return NULL;
208 } else {
209 int ok = uint32_converter(preset_obj, &preset);
210 Py_DECREF(preset_obj);
211 if (!ok)
212 return NULL;
213 }
214
215 options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
216 if (options == NULL)
217 return PyErr_NoMemory();
218 memset(options, 0, sizeof *options);
219
220 if (lzma_lzma_preset(options, preset)) {
221 PyMem_Free(options);
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200222 PyErr_Format(Error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800223 return NULL;
224 }
225
226 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
227 "|OOO&O&O&O&O&O&O&O&", optnames,
228 &id, &preset_obj,
229 uint32_converter, &options->dict_size,
230 uint32_converter, &options->lc,
231 uint32_converter, &options->lp,
232 uint32_converter, &options->pb,
233 lzma_mode_converter, &options->mode,
234 uint32_converter, &options->nice_len,
235 lzma_mf_converter, &options->mf,
236 uint32_converter, &options->depth)) {
237 PyErr_SetString(PyExc_ValueError,
238 "Invalid filter specifier for LZMA filter");
239 PyMem_Free(options);
240 options = NULL;
241 }
242 return options;
243}
244
245static void *
246parse_filter_spec_delta(PyObject *spec)
247{
248 static char *optnames[] = {"id", "dist", NULL};
249 PyObject *id;
250 uint32_t dist = 1;
251 lzma_options_delta *options;
252
253 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
254 &id, uint32_converter, &dist)) {
255 PyErr_SetString(PyExc_ValueError,
256 "Invalid filter specifier for delta filter");
257 return NULL;
258 }
259
260 options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
261 if (options == NULL)
262 return PyErr_NoMemory();
263 memset(options, 0, sizeof *options);
264 options->type = LZMA_DELTA_TYPE_BYTE;
265 options->dist = dist;
266 return options;
267}
268
269static void *
270parse_filter_spec_bcj(PyObject *spec)
271{
272 static char *optnames[] = {"id", "start_offset", NULL};
273 PyObject *id;
274 uint32_t start_offset = 0;
275 lzma_options_bcj *options;
276
277 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
278 &id, uint32_converter, &start_offset)) {
279 PyErr_SetString(PyExc_ValueError,
280 "Invalid filter specifier for BCJ filter");
281 return NULL;
282 }
283
284 options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
285 if (options == NULL)
286 return PyErr_NoMemory();
287 memset(options, 0, sizeof *options);
288 options->start_offset = start_offset;
289 return options;
290}
291
292static int
293lzma_filter_converter(PyObject *spec, void *ptr)
294{
295 lzma_filter *f = (lzma_filter *)ptr;
296 PyObject *id_obj;
297
298 if (!PyMapping_Check(spec)) {
299 PyErr_SetString(PyExc_TypeError,
300 "Filter specifier must be a dict or dict-like object");
301 return 0;
302 }
303 id_obj = PyMapping_GetItemString(spec, "id");
304 if (id_obj == NULL) {
305 if (PyErr_ExceptionMatches(PyExc_KeyError))
306 PyErr_SetString(PyExc_ValueError,
307 "Filter specifier must have an \"id\" entry");
308 return 0;
309 }
310 f->id = PyLong_AsUnsignedLongLong(id_obj);
311 Py_DECREF(id_obj);
312 if (PyErr_Occurred())
313 return 0;
314
315 switch (f->id) {
316 case LZMA_FILTER_LZMA1:
317 case LZMA_FILTER_LZMA2:
318 f->options = parse_filter_spec_lzma(spec);
319 return f->options != NULL;
320 case LZMA_FILTER_DELTA:
321 f->options = parse_filter_spec_delta(spec);
322 return f->options != NULL;
323 case LZMA_FILTER_X86:
324 case LZMA_FILTER_POWERPC:
325 case LZMA_FILTER_IA64:
326 case LZMA_FILTER_ARM:
327 case LZMA_FILTER_ARMTHUMB:
328 case LZMA_FILTER_SPARC:
329 f->options = parse_filter_spec_bcj(spec);
330 return f->options != NULL;
331 default:
332 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
333 return 0;
334 }
335}
336
337static void
338free_filter_chain(lzma_filter filters[])
339{
340 int i;
341
342 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
343 PyMem_Free(filters[i].options);
344}
345
346static int
347parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
348{
349 Py_ssize_t i, num_filters;
350
351 num_filters = PySequence_Length(filterspecs);
352 if (num_filters == -1)
353 return -1;
354 if (num_filters > LZMA_FILTERS_MAX) {
355 PyErr_Format(PyExc_ValueError,
356 "Too many filters - liblzma supports a maximum of %d",
357 LZMA_FILTERS_MAX);
358 return -1;
359 }
360
361 for (i = 0; i < num_filters; i++) {
362 int ok = 1;
363 PyObject *spec = PySequence_GetItem(filterspecs, i);
364 if (spec == NULL || !lzma_filter_converter(spec, &filters[i]))
365 ok = 0;
366 Py_XDECREF(spec);
367 if (!ok) {
368 filters[i].id = LZMA_VLI_UNKNOWN;
369 free_filter_chain(filters);
370 return -1;
371 }
372 }
373 filters[num_filters].id = LZMA_VLI_UNKNOWN;
374 return 0;
375}
376
377
378/* Filter specifier construction.
379
380 This code handles converting C lzma_filter structs into
381 Python-level filter specifiers (represented as dicts). */
382
383static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700384spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -0800385{
386 int status;
387 PyObject *value_object;
388
389 value_object = PyLong_FromUnsignedLongLong(value);
390 if (value_object == NULL)
391 return -1;
392
393 status = _PyDict_SetItemId(spec, key, value_object);
394 Py_DECREF(value_object);
395 return status;
396}
397
398static PyObject *
399build_filter_spec(const lzma_filter *f)
400{
401 PyObject *spec;
402
403 spec = PyDict_New();
404 if (spec == NULL)
405 return NULL;
406
407#define ADD_FIELD(SOURCE, FIELD) \
408 do { \
409 _Py_IDENTIFIER(FIELD); \
410 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
411 goto error;\
412 } while (0)
413
414 ADD_FIELD(f, id);
415
416 switch (f->id) {
417 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
418 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
419 dict_size field is used. */
420 case LZMA_FILTER_LZMA1: {
421 lzma_options_lzma *options = f->options;
422 ADD_FIELD(options, lc);
423 ADD_FIELD(options, lp);
424 ADD_FIELD(options, pb);
425 ADD_FIELD(options, dict_size);
426 break;
427 }
428 case LZMA_FILTER_LZMA2: {
429 lzma_options_lzma *options = f->options;
430 ADD_FIELD(options, dict_size);
431 break;
432 }
433 case LZMA_FILTER_DELTA: {
434 lzma_options_delta *options = f->options;
435 ADD_FIELD(options, dist);
436 break;
437 }
438 case LZMA_FILTER_X86:
439 case LZMA_FILTER_POWERPC:
440 case LZMA_FILTER_IA64:
441 case LZMA_FILTER_ARM:
442 case LZMA_FILTER_ARMTHUMB:
443 case LZMA_FILTER_SPARC: {
444 lzma_options_bcj *options = f->options;
445 ADD_FIELD(options, start_offset);
446 break;
447 }
448 default:
449 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
450 goto error;
451 }
452
453#undef ADD_FIELD
454
455 return spec;
456
457error:
458 Py_DECREF(spec);
459 return NULL;
460}
461
462
463/*[clinic input]
Larry Hastingsf256c222014-01-25 21:30:37 -0800464module _lzma
465class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
466class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
467[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300468/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800469
470#include "clinic/_lzmamodule.c.h"
471
472/*[python input]
473
474class lzma_vli_converter(CConverter):
475 type = 'lzma_vli'
476 converter = 'lzma_vli_converter'
477
478class lzma_filter_converter(CConverter):
479 type = 'lzma_filter'
480 converter = 'lzma_filter_converter'
481 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
482
483 def cleanup(self):
484 name = ensure_legal_c_identifier(self.name)
485 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
486 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
487
488[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800489/*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800490
491
492/* LZMACompressor class. */
493
494static PyObject *
495compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
496{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100497 Py_ssize_t data_size = 0;
Larry Hastingsf256c222014-01-25 21:30:37 -0800498 PyObject *result;
499
500 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
501 if (result == NULL)
502 return NULL;
503 c->lzs.next_in = data;
504 c->lzs.avail_in = len;
505 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
506 c->lzs.avail_out = PyBytes_GET_SIZE(result);
507 for (;;) {
508 lzma_ret lzret;
509
510 Py_BEGIN_ALLOW_THREADS
511 lzret = lzma_code(&c->lzs, action);
512 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
Serhiy Storchaka04f17f12016-10-31 08:30:09 +0200513 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0)
514 lzret = LZMA_OK; /* That wasn't a real error */
Larry Hastingsf256c222014-01-25 21:30:37 -0800515 Py_END_ALLOW_THREADS
516 if (catch_lzma_error(lzret))
517 goto error;
518 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
519 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
520 break;
521 } else if (c->lzs.avail_out == 0) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100522 if (grow_buffer(&result, -1) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800523 goto error;
524 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
525 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
526 }
527 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100528 if (data_size != PyBytes_GET_SIZE(result))
Larry Hastingsf256c222014-01-25 21:30:37 -0800529 if (_PyBytes_Resize(&result, data_size) == -1)
530 goto error;
531 return result;
532
533error:
534 Py_XDECREF(result);
535 return NULL;
536}
537
538/*[clinic input]
539_lzma.LZMACompressor.compress
540
Larry Hastingsf256c222014-01-25 21:30:37 -0800541 data: Py_buffer
542 /
543
544Provide data to the compressor object.
545
546Returns a chunk of compressed data if possible, or b'' otherwise.
547
548When you have finished providing data to the compressor, call the
549flush() method to finish the compression process.
550[clinic start generated code]*/
551
552static PyObject *
553_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300554/*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800555{
556 PyObject *result = NULL;
557
558 ACQUIRE_LOCK(self);
559 if (self->flushed)
560 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
561 else
562 result = compress(self, data->buf, data->len, LZMA_RUN);
563 RELEASE_LOCK(self);
564 return result;
565}
566
567/*[clinic input]
568_lzma.LZMACompressor.flush
569
Larry Hastingsf256c222014-01-25 21:30:37 -0800570Finish the compression process.
571
572Returns the compressed data left in internal buffers.
573
574The compressor object may not be used after this method is called.
575[clinic start generated code]*/
576
577static PyObject *
578_lzma_LZMACompressor_flush_impl(Compressor *self)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300579/*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800580{
581 PyObject *result = NULL;
582
583 ACQUIRE_LOCK(self);
584 if (self->flushed) {
585 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
586 } else {
587 self->flushed = 1;
588 result = compress(self, NULL, 0, LZMA_FINISH);
589 }
590 RELEASE_LOCK(self);
591 return result;
592}
593
Larry Hastingsf256c222014-01-25 21:30:37 -0800594static int
595Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
596 PyObject *filterspecs)
597{
598 lzma_ret lzret;
599
600 if (filterspecs == Py_None) {
601 lzret = lzma_easy_encoder(lzs, preset, check);
602 } else {
603 lzma_filter filters[LZMA_FILTERS_MAX + 1];
604
605 if (parse_filter_chain_spec(filters, filterspecs) == -1)
606 return -1;
607 lzret = lzma_stream_encoder(lzs, filters, check);
608 free_filter_chain(filters);
609 }
610 if (catch_lzma_error(lzret))
611 return -1;
612 else
613 return 0;
614}
615
616static int
617Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
618{
619 lzma_ret lzret;
620
621 if (filterspecs == Py_None) {
622 lzma_options_lzma options;
623
624 if (lzma_lzma_preset(&options, preset)) {
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200625 PyErr_Format(Error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800626 return -1;
627 }
628 lzret = lzma_alone_encoder(lzs, &options);
629 } else {
630 lzma_filter filters[LZMA_FILTERS_MAX + 1];
631
632 if (parse_filter_chain_spec(filters, filterspecs) == -1)
633 return -1;
634 if (filters[0].id == LZMA_FILTER_LZMA1 &&
635 filters[1].id == LZMA_VLI_UNKNOWN) {
636 lzret = lzma_alone_encoder(lzs, filters[0].options);
637 } else {
638 PyErr_SetString(PyExc_ValueError,
639 "Invalid filter chain for FORMAT_ALONE - "
640 "must be a single LZMA1 filter");
641 lzret = LZMA_PROG_ERROR;
642 }
643 free_filter_chain(filters);
644 }
645 if (PyErr_Occurred() || catch_lzma_error(lzret))
646 return -1;
647 else
648 return 0;
649}
650
651static int
652Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
653{
654 lzma_filter filters[LZMA_FILTERS_MAX + 1];
655 lzma_ret lzret;
656
657 if (filterspecs == Py_None) {
658 PyErr_SetString(PyExc_ValueError,
659 "Must specify filters for FORMAT_RAW");
660 return -1;
661 }
662 if (parse_filter_chain_spec(filters, filterspecs) == -1)
663 return -1;
664 lzret = lzma_raw_encoder(lzs, filters);
665 free_filter_chain(filters);
666 if (catch_lzma_error(lzret))
667 return -1;
668 else
669 return 0;
670}
671
672/*[-clinic input]
673_lzma.LZMACompressor.__init__
674
Larry Hastingsf256c222014-01-25 21:30:37 -0800675 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
676 The container format to use for the output. This can
677 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
678
679 check: int(c_default="-1") = unspecified
680 The integrity check to use. For FORMAT_XZ, the default
Martin Pantere26da7c2016-06-02 10:07:09 +0000681 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
Larry Hastingsf256c222014-01-25 21:30:37 -0800682 checks; for these formats, check must be omitted, or be CHECK_NONE.
683
684 preset: object = None
685 If provided should be an integer in the range 0-9, optionally
686 OR-ed with the constant PRESET_EXTREME.
687
688 filters: object = None
689 If provided should be a sequence of dicts. Each dict should
690 have an entry for "id" indicating the ID of the filter, plus
691 additional entries for options to the filter.
692
693Create a compressor object for compressing data incrementally.
694
695The settings used by the compressor can be specified either as a
696preset compression level (with the 'preset' argument), or in detail
697as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
698and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
699level. For FORMAT_RAW, the caller must always specify a filter chain;
700the raw compressor does not support preset compression levels.
701
702For one-shot compression, use the compress() function instead.
703[-clinic start generated code]*/
704static int
705Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
706{
707 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
708 int format = FORMAT_XZ;
709 int check = -1;
710 uint32_t preset = LZMA_PRESET_DEFAULT;
711 PyObject *preset_obj = Py_None;
712 PyObject *filterspecs = Py_None;
713
714 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
715 "|iiOO:LZMACompressor", arg_names,
716 &format, &check, &preset_obj,
717 &filterspecs))
718 return -1;
719
720 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
721 PyErr_SetString(PyExc_ValueError,
722 "Integrity checks are only supported by FORMAT_XZ");
723 return -1;
724 }
725
726 if (preset_obj != Py_None && filterspecs != Py_None) {
727 PyErr_SetString(PyExc_ValueError,
728 "Cannot specify both preset and filter chain");
729 return -1;
730 }
731
732 if (preset_obj != Py_None)
733 if (!uint32_converter(preset_obj, &preset))
734 return -1;
735
736 self->alloc.opaque = NULL;
737 self->alloc.alloc = PyLzma_Malloc;
738 self->alloc.free = PyLzma_Free;
739 self->lzs.allocator = &self->alloc;
740
Larry Hastingsf256c222014-01-25 21:30:37 -0800741 self->lock = PyThread_allocate_lock();
742 if (self->lock == NULL) {
743 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
744 return -1;
745 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800746
747 self->flushed = 0;
748 switch (format) {
749 case FORMAT_XZ:
750 if (check == -1)
751 check = LZMA_CHECK_CRC64;
752 if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
753 break;
754 return 0;
755
756 case FORMAT_ALONE:
757 if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
758 break;
759 return 0;
760
761 case FORMAT_RAW:
762 if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
763 break;
764 return 0;
765
766 default:
767 PyErr_Format(PyExc_ValueError,
768 "Invalid container format: %d", format);
769 break;
770 }
771
Larry Hastingsf256c222014-01-25 21:30:37 -0800772 PyThread_free_lock(self->lock);
773 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -0800774 return -1;
775}
776
777static void
778Compressor_dealloc(Compressor *self)
779{
780 lzma_end(&self->lzs);
Larry Hastingsf256c222014-01-25 21:30:37 -0800781 if (self->lock != NULL)
782 PyThread_free_lock(self->lock);
Larry Hastingsf256c222014-01-25 21:30:37 -0800783 Py_TYPE(self)->tp_free((PyObject *)self);
784}
785
786static PyMethodDef Compressor_methods[] = {
787 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
788 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -0800789 {NULL}
790};
791
792PyDoc_STRVAR(Compressor_doc,
793"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
794"\n"
795"Create a compressor object for compressing data incrementally.\n"
796"\n"
797"format specifies the container format to use for the output. This can\n"
798"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
799"\n"
800"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
Ville Skyttä49b27342017-08-03 09:00:59 +0300801"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
Larry Hastingsf256c222014-01-25 21:30:37 -0800802"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
803"\n"
804"The settings used by the compressor can be specified either as a\n"
805"preset compression level (with the 'preset' argument), or in detail\n"
806"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
807"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
808"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
809"the raw compressor does not support preset compression levels.\n"
810"\n"
811"preset (if provided) should be an integer in the range 0-9, optionally\n"
812"OR-ed with the constant PRESET_EXTREME.\n"
813"\n"
814"filters (if provided) should be a sequence of dicts. Each dict should\n"
815"have an entry for \"id\" indicating the ID of the filter, plus\n"
816"additional entries for options to the filter.\n"
817"\n"
818"For one-shot compression, use the compress() function instead.\n");
819
820static PyTypeObject Compressor_type = {
821 PyVarObject_HEAD_INIT(NULL, 0)
822 "_lzma.LZMACompressor", /* tp_name */
823 sizeof(Compressor), /* tp_basicsize */
824 0, /* tp_itemsize */
825 (destructor)Compressor_dealloc, /* tp_dealloc */
826 0, /* tp_print */
827 0, /* tp_getattr */
828 0, /* tp_setattr */
829 0, /* tp_reserved */
830 0, /* tp_repr */
831 0, /* tp_as_number */
832 0, /* tp_as_sequence */
833 0, /* tp_as_mapping */
834 0, /* tp_hash */
835 0, /* tp_call */
836 0, /* tp_str */
837 0, /* tp_getattro */
838 0, /* tp_setattro */
839 0, /* tp_as_buffer */
840 Py_TPFLAGS_DEFAULT, /* tp_flags */
841 Compressor_doc, /* tp_doc */
842 0, /* tp_traverse */
843 0, /* tp_clear */
844 0, /* tp_richcompare */
845 0, /* tp_weaklistoffset */
846 0, /* tp_iter */
847 0, /* tp_iternext */
848 Compressor_methods, /* tp_methods */
849 0, /* tp_members */
850 0, /* tp_getset */
851 0, /* tp_base */
852 0, /* tp_dict */
853 0, /* tp_descr_get */
854 0, /* tp_descr_set */
855 0, /* tp_dictoffset */
856 (initproc)Compressor_init, /* tp_init */
857 0, /* tp_alloc */
858 PyType_GenericNew, /* tp_new */
859};
860
861
862/* LZMADecompressor class. */
863
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100864/* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
865 buffer is allocated dynamically and returned. At most max_length bytes are
866 returned, so some of the input may not be consumed. d->lzs.next_in and
867 d->lzs.avail_in are updated to reflect the consumed input. */
868static PyObject*
869decompress_buf(Decompressor *d, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800870{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100871 Py_ssize_t data_size = 0;
Larry Hastingsf256c222014-01-25 21:30:37 -0800872 PyObject *result;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100873 lzma_stream *lzs = &d->lzs;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200874
Serhiy Storchaka04f17f12016-10-31 08:30:09 +0200875 if (lzs->avail_in == 0)
876 return PyBytes_FromStringAndSize(NULL, 0);
877
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100878 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
879 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
880 else
881 result = PyBytes_FromStringAndSize(NULL, max_length);
Larry Hastingsf256c222014-01-25 21:30:37 -0800882 if (result == NULL)
883 return NULL;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100884
885 lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result);
886 lzs->avail_out = PyBytes_GET_SIZE(result);
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200887
Larry Hastingsf256c222014-01-25 21:30:37 -0800888 for (;;) {
889 lzma_ret lzret;
890
891 Py_BEGIN_ALLOW_THREADS
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100892 lzret = lzma_code(lzs, LZMA_RUN);
893 data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
Larry Hastingsf256c222014-01-25 21:30:37 -0800894 Py_END_ALLOW_THREADS
895 if (catch_lzma_error(lzret))
896 goto error;
897 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
898 d->check = lzma_get_check(&d->lzs);
899 if (lzret == LZMA_STREAM_END) {
900 d->eof = 1;
Larry Hastingsf256c222014-01-25 21:30:37 -0800901 break;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100902 } else if (lzs->avail_in == 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800903 break;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100904 } else if (lzs->avail_out == 0) {
905 if (data_size == max_length)
906 break;
907 if (grow_buffer(&result, max_length) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800908 goto error;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100909 lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
910 lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
Larry Hastingsf256c222014-01-25 21:30:37 -0800911 }
912 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100913 if (data_size != PyBytes_GET_SIZE(result))
Larry Hastingsf256c222014-01-25 21:30:37 -0800914 if (_PyBytes_Resize(&result, data_size) == -1)
915 goto error;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100916
917 return result;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200918
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100919error:
920 Py_XDECREF(result);
921 return NULL;
922}
923
924static PyObject *
925decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
926{
927 char input_buffer_in_use;
928 PyObject *result;
929 lzma_stream *lzs = &d->lzs;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200930
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100931 /* Prepend unconsumed input if necessary */
932 if (lzs->next_in != NULL) {
933 size_t avail_now, avail_total;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200934
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100935 /* Number of bytes we can append to input buffer */
936 avail_now = (d->input_buffer + d->input_buffer_size)
937 - (lzs->next_in + lzs->avail_in);
938
939 /* Number of bytes we can append if we move existing
940 contents to beginning of buffer (overwriting
941 consumed input) */
942 avail_total = d->input_buffer_size - lzs->avail_in;
943
944 if (avail_total < len) {
945 size_t offset = lzs->next_in - d->input_buffer;
946 uint8_t *tmp;
947 size_t new_size = d->input_buffer_size + len - avail_now;
948
949 /* Assign to temporary variable first, so we don't
950 lose address of allocated buffer if realloc fails */
951 tmp = PyMem_Realloc(d->input_buffer, new_size);
952 if (tmp == NULL) {
953 PyErr_SetNone(PyExc_MemoryError);
954 return NULL;
955 }
956 d->input_buffer = tmp;
957 d->input_buffer_size = new_size;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200958
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100959 lzs->next_in = d->input_buffer + offset;
960 }
961 else if (avail_now < len) {
962 memmove(d->input_buffer, lzs->next_in,
963 lzs->avail_in);
964 lzs->next_in = d->input_buffer;
965 }
966 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
967 lzs->avail_in += len;
968 input_buffer_in_use = 1;
969 }
970 else {
971 lzs->next_in = data;
972 lzs->avail_in = len;
973 input_buffer_in_use = 0;
974 }
975
976 result = decompress_buf(d, max_length);
Serhiy Storchakac0b70372016-09-27 20:14:26 +0300977 if (result == NULL) {
978 lzs->next_in = NULL;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100979 return NULL;
Serhiy Storchakac0b70372016-09-27 20:14:26 +0300980 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100981
982 if (d->eof) {
983 d->needs_input = 0;
984 if (lzs->avail_in > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300985 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200986 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100987 if (d->unused_data == NULL)
988 goto error;
989 }
990 }
991 else if (lzs->avail_in == 0) {
992 lzs->next_in = NULL;
993 d->needs_input = 1;
994 }
995 else {
996 d->needs_input = 0;
997
998 /* If we did not use the input buffer, we now have
999 to copy the tail from the caller's buffer into the
1000 input buffer */
1001 if (!input_buffer_in_use) {
1002
1003 /* Discard buffer if it's too small
1004 (resizing it may needlessly copy the current contents) */
1005 if (d->input_buffer != NULL &&
1006 d->input_buffer_size < lzs->avail_in) {
1007 PyMem_Free(d->input_buffer);
1008 d->input_buffer = NULL;
1009 }
1010
1011 /* Allocate if necessary */
1012 if (d->input_buffer == NULL) {
1013 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1014 if (d->input_buffer == NULL) {
1015 PyErr_SetNone(PyExc_MemoryError);
1016 goto error;
1017 }
1018 d->input_buffer_size = lzs->avail_in;
1019 }
1020
1021 /* Copy tail */
1022 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1023 lzs->next_in = d->input_buffer;
1024 }
1025 }
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001026
Larry Hastingsf256c222014-01-25 21:30:37 -08001027 return result;
1028
1029error:
1030 Py_XDECREF(result);
1031 return NULL;
1032}
1033
1034/*[clinic input]
1035_lzma.LZMADecompressor.decompress
1036
Larry Hastingsf256c222014-01-25 21:30:37 -08001037 data: Py_buffer
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001038 max_length: Py_ssize_t=-1
Larry Hastingsf256c222014-01-25 21:30:37 -08001039
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001040Decompress *data*, returning uncompressed data as bytes.
Larry Hastingsf256c222014-01-25 21:30:37 -08001041
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001042If *max_length* is nonnegative, returns at most *max_length* bytes of
1043decompressed data. If this limit is reached and further output can be
1044produced, *self.needs_input* will be set to ``False``. In this case, the next
1045call to *decompress()* may provide *data* as b'' to obtain more of the output.
Larry Hastingsf256c222014-01-25 21:30:37 -08001046
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001047If all of the input data was decompressed and returned (either because this
1048was less than *max_length* bytes, or because *max_length* was negative),
1049*self.needs_input* will be set to True.
1050
1051Attempting to decompress data after the end of stream is reached raises an
1052EOFError. Any data found after the end of the stream is ignored and saved in
1053the unused_data attribute.
Larry Hastingsf256c222014-01-25 21:30:37 -08001054[clinic start generated code]*/
1055
1056static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001057_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1058 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001059/*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001060{
1061 PyObject *result = NULL;
1062
1063 ACQUIRE_LOCK(self);
1064 if (self->eof)
1065 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1066 else
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001067 result = decompress(self, data->buf, data->len, max_length);
Larry Hastingsf256c222014-01-25 21:30:37 -08001068 RELEASE_LOCK(self);
1069 return result;
1070}
1071
Larry Hastingsf256c222014-01-25 21:30:37 -08001072static int
1073Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
1074{
1075 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1076 lzma_ret lzret;
1077
1078 if (parse_filter_chain_spec(filters, filterspecs) == -1)
1079 return -1;
1080 lzret = lzma_raw_decoder(lzs, filters);
1081 free_filter_chain(filters);
1082 if (catch_lzma_error(lzret))
1083 return -1;
1084 else
1085 return 0;
1086}
1087
1088/*[clinic input]
1089_lzma.LZMADecompressor.__init__
1090
Larry Hastingsf256c222014-01-25 21:30:37 -08001091 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1092 Specifies the container format of the input stream. If this is
1093 FORMAT_AUTO (the default), the decompressor will automatically detect
1094 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1095 FORMAT_RAW cannot be autodetected.
1096
1097 memlimit: object = None
1098 Limit the amount of memory used by the decompressor. This will cause
1099 decompression to fail if the input cannot be decompressed within the
1100 given limit.
1101
1102 filters: object = None
1103 A custom filter chain. This argument is required for FORMAT_RAW, and
1104 not accepted with any other format. When provided, this should be a
1105 sequence of dicts, each indicating the ID and options for a single
1106 filter.
1107
1108Create a decompressor object for decompressing data incrementally.
1109
1110For one-shot decompression, use the decompress() function instead.
1111[clinic start generated code]*/
1112
1113static int
Larry Hastings89964c42015-04-14 18:07:59 -04001114_lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1115 PyObject *memlimit, PyObject *filters)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001116/*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001117{
1118 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1119 uint64_t memlimit_ = UINT64_MAX;
1120 lzma_ret lzret;
1121
1122 if (memlimit != Py_None) {
1123 if (format == FORMAT_RAW) {
1124 PyErr_SetString(PyExc_ValueError,
1125 "Cannot specify memory limit with FORMAT_RAW");
1126 return -1;
1127 }
1128 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1129 if (PyErr_Occurred())
1130 return -1;
1131 }
1132
1133 if (format == FORMAT_RAW && filters == Py_None) {
1134 PyErr_SetString(PyExc_ValueError,
1135 "Must specify filters for FORMAT_RAW");
1136 return -1;
1137 } else if (format != FORMAT_RAW && filters != Py_None) {
1138 PyErr_SetString(PyExc_ValueError,
1139 "Cannot specify filters except with FORMAT_RAW");
1140 return -1;
1141 }
1142
1143 self->alloc.opaque = NULL;
1144 self->alloc.alloc = PyLzma_Malloc;
1145 self->alloc.free = PyLzma_Free;
1146 self->lzs.allocator = &self->alloc;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001147 self->lzs.next_in = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001148
Victor Stinner9b7cf752018-06-23 10:35:23 +02001149 PyThread_type_lock lock = PyThread_allocate_lock();
1150 if (lock == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001151 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1152 return -1;
1153 }
Victor Stinner9b7cf752018-06-23 10:35:23 +02001154 if (self->lock != NULL) {
1155 PyThread_free_lock(self->lock);
1156 }
1157 self->lock = lock;
Larry Hastingsf256c222014-01-25 21:30:37 -08001158
1159 self->check = LZMA_CHECK_UNKNOWN;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001160 self->needs_input = 1;
1161 self->input_buffer = NULL;
1162 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +02001163 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Larry Hastingsf256c222014-01-25 21:30:37 -08001164 if (self->unused_data == NULL)
1165 goto error;
1166
1167 switch (format) {
1168 case FORMAT_AUTO:
1169 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1170 if (catch_lzma_error(lzret))
1171 break;
1172 return 0;
1173
1174 case FORMAT_XZ:
1175 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1176 if (catch_lzma_error(lzret))
1177 break;
1178 return 0;
1179
1180 case FORMAT_ALONE:
1181 self->check = LZMA_CHECK_NONE;
1182 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1183 if (catch_lzma_error(lzret))
1184 break;
1185 return 0;
1186
1187 case FORMAT_RAW:
1188 self->check = LZMA_CHECK_NONE;
1189 if (Decompressor_init_raw(&self->lzs, filters) == -1)
1190 break;
1191 return 0;
1192
1193 default:
1194 PyErr_Format(PyExc_ValueError,
1195 "Invalid container format: %d", format);
1196 break;
1197 }
1198
1199error:
1200 Py_CLEAR(self->unused_data);
Larry Hastingsf256c222014-01-25 21:30:37 -08001201 PyThread_free_lock(self->lock);
1202 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001203 return -1;
1204}
1205
1206static void
1207Decompressor_dealloc(Decompressor *self)
1208{
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001209 if(self->input_buffer != NULL)
1210 PyMem_Free(self->input_buffer);
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001211
Larry Hastingsf256c222014-01-25 21:30:37 -08001212 lzma_end(&self->lzs);
1213 Py_CLEAR(self->unused_data);
Larry Hastingsf256c222014-01-25 21:30:37 -08001214 if (self->lock != NULL)
1215 PyThread_free_lock(self->lock);
Larry Hastingsf256c222014-01-25 21:30:37 -08001216 Py_TYPE(self)->tp_free((PyObject *)self);
1217}
1218
1219static PyMethodDef Decompressor_methods[] = {
1220 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -08001221 {NULL}
1222};
1223
1224PyDoc_STRVAR(Decompressor_check_doc,
1225"ID of the integrity check used by the input stream.");
1226
1227PyDoc_STRVAR(Decompressor_eof_doc,
1228"True if the end-of-stream marker has been reached.");
1229
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001230PyDoc_STRVAR(Decompressor_needs_input_doc,
1231"True if more input is needed before more decompressed data can be produced.");
1232
Larry Hastingsf256c222014-01-25 21:30:37 -08001233PyDoc_STRVAR(Decompressor_unused_data_doc,
1234"Data found after the end of the compressed stream.");
1235
1236static PyMemberDef Decompressor_members[] = {
1237 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1238 Decompressor_check_doc},
1239 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1240 Decompressor_eof_doc},
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001241 {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1242 Decompressor_needs_input_doc},
Larry Hastingsf256c222014-01-25 21:30:37 -08001243 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1244 Decompressor_unused_data_doc},
1245 {NULL}
1246};
1247
1248static PyTypeObject Decompressor_type = {
1249 PyVarObject_HEAD_INIT(NULL, 0)
1250 "_lzma.LZMADecompressor", /* tp_name */
1251 sizeof(Decompressor), /* tp_basicsize */
1252 0, /* tp_itemsize */
1253 (destructor)Decompressor_dealloc, /* tp_dealloc */
1254 0, /* tp_print */
1255 0, /* tp_getattr */
1256 0, /* tp_setattr */
1257 0, /* tp_reserved */
1258 0, /* tp_repr */
1259 0, /* tp_as_number */
1260 0, /* tp_as_sequence */
1261 0, /* tp_as_mapping */
1262 0, /* tp_hash */
1263 0, /* tp_call */
1264 0, /* tp_str */
1265 0, /* tp_getattro */
1266 0, /* tp_setattro */
1267 0, /* tp_as_buffer */
1268 Py_TPFLAGS_DEFAULT, /* tp_flags */
1269 _lzma_LZMADecompressor___init____doc__, /* tp_doc */
1270 0, /* tp_traverse */
1271 0, /* tp_clear */
1272 0, /* tp_richcompare */
1273 0, /* tp_weaklistoffset */
1274 0, /* tp_iter */
1275 0, /* tp_iternext */
1276 Decompressor_methods, /* tp_methods */
1277 Decompressor_members, /* tp_members */
1278 0, /* tp_getset */
1279 0, /* tp_base */
1280 0, /* tp_dict */
1281 0, /* tp_descr_get */
1282 0, /* tp_descr_set */
1283 0, /* tp_dictoffset */
1284 _lzma_LZMADecompressor___init__, /* tp_init */
1285 0, /* tp_alloc */
1286 PyType_GenericNew, /* tp_new */
1287};
1288
1289
1290/* Module-level functions. */
1291
1292/*[clinic input]
1293_lzma.is_check_supported
1294 check_id: int
1295 /
1296
1297Test whether the given integrity check is supported.
1298
1299Always returns True for CHECK_NONE and CHECK_CRC32.
1300[clinic start generated code]*/
1301
1302static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001303_lzma_is_check_supported_impl(PyObject *module, int check_id)
1304/*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001305{
1306 return PyBool_FromLong(lzma_check_is_supported(check_id));
1307}
1308
1309
1310/*[clinic input]
1311_lzma._encode_filter_properties
1312 filter: lzma_filter(c_default="{LZMA_VLI_UNKNOWN, NULL}")
1313 /
1314
1315Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1316
1317The result does not include the filter ID itself, only the options.
1318[clinic start generated code]*/
1319
1320static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001321_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1322/*[clinic end generated code: output=5c93c8e14e7be5a8 input=d4c64f1b557c77d4]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001323{
1324 lzma_ret lzret;
1325 uint32_t encoded_size;
1326 PyObject *result = NULL;
1327
1328 lzret = lzma_properties_size(&encoded_size, &filter);
1329 if (catch_lzma_error(lzret))
1330 goto error;
1331
1332 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1333 if (result == NULL)
1334 goto error;
1335
1336 lzret = lzma_properties_encode(
1337 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1338 if (catch_lzma_error(lzret))
1339 goto error;
1340
1341 return result;
1342
1343error:
1344 Py_XDECREF(result);
1345 return NULL;
1346}
1347
1348
1349/*[clinic input]
1350_lzma._decode_filter_properties
1351 filter_id: lzma_vli
1352 encoded_props: Py_buffer
1353 /
1354
1355Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1356
1357The result does not include the filter ID itself, only the options.
1358[clinic start generated code]*/
1359
1360static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001361_lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
Larry Hastings89964c42015-04-14 18:07:59 -04001362 Py_buffer *encoded_props)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001363/*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001364{
1365 lzma_filter filter;
1366 lzma_ret lzret;
1367 PyObject *result = NULL;
1368 filter.id = filter_id;
1369
1370 lzret = lzma_properties_decode(
1371 &filter, NULL, encoded_props->buf, encoded_props->len);
1372 if (catch_lzma_error(lzret))
1373 return NULL;
1374
1375 result = build_filter_spec(&filter);
1376
1377 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1378 allocated by lzma_properties_decode() using the default allocator. */
1379 free(filter.options);
1380 return result;
1381}
1382
1383
1384/* Module initialization. */
1385
1386static PyMethodDef module_methods[] = {
1387 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1388 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1389 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1390 {NULL}
1391};
1392
1393static PyModuleDef _lzmamodule = {
1394 PyModuleDef_HEAD_INIT,
1395 "_lzma",
1396 NULL,
1397 -1,
1398 module_methods,
1399 NULL,
1400 NULL,
1401 NULL,
1402 NULL,
1403};
1404
1405/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1406 would not work correctly on platforms with 32-bit longs. */
1407static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -07001408module_add_int_constant(PyObject *m, const char *name, long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -08001409{
1410 PyObject *o = PyLong_FromLongLong(value);
1411 if (o == NULL)
1412 return -1;
1413 if (PyModule_AddObject(m, name, o) == 0)
1414 return 0;
1415 Py_DECREF(o);
1416 return -1;
1417}
1418
1419#define ADD_INT_PREFIX_MACRO(m, macro) \
1420 module_add_int_constant(m, #macro, LZMA_ ## macro)
1421
1422PyMODINIT_FUNC
1423PyInit__lzma(void)
1424{
1425 PyObject *m;
1426
1427 empty_tuple = PyTuple_New(0);
1428 if (empty_tuple == NULL)
1429 return NULL;
1430
1431 m = PyModule_Create(&_lzmamodule);
1432 if (m == NULL)
1433 return NULL;
1434
1435 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
1436 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1437 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1438 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1439 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1440 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1441 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1442 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1443 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1444 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1445 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1446 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1447 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1448 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1449 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1450 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1451 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1452 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1453 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1454 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1455 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1456 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1457 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1458 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1459 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1460 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1461 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1462 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1463 return NULL;
1464
1465 Error = PyErr_NewExceptionWithDoc(
1466 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1467 if (Error == NULL)
1468 return NULL;
1469 Py_INCREF(Error);
1470 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1471 return NULL;
1472
1473 if (PyType_Ready(&Compressor_type) == -1)
1474 return NULL;
1475 Py_INCREF(&Compressor_type);
1476 if (PyModule_AddObject(m, "LZMACompressor",
1477 (PyObject *)&Compressor_type) == -1)
1478 return NULL;
1479
1480 if (PyType_Ready(&Decompressor_type) == -1)
1481 return NULL;
1482 Py_INCREF(&Decompressor_type);
1483 if (PyModule_AddObject(m, "LZMADecompressor",
1484 (PyObject *)&Decompressor_type) == -1)
1485 return NULL;
1486
1487 return m;
1488}