blob: 2a62a6835685059a6cba78c4e655c886bccd0838 [file] [log] [blame]
Larry Hastingsf256c222014-01-25 21:30:37 -08001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
7
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "structmember.h" // PyMemberDef
Larry Hastingsf256c222014-01-25 21:30:37 -080012
13#include <stdarg.h>
14#include <string.h>
15
16#include <lzma.h>
17
Larry Hastingsf256c222014-01-25 21:30:37 -080018#define ACQUIRE_LOCK(obj) do { \
19 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
20 Py_BEGIN_ALLOW_THREADS \
21 PyThread_acquire_lock((obj)->lock, 1); \
22 Py_END_ALLOW_THREADS \
23 } } while (0)
24#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Larry Hastingsf256c222014-01-25 21:30:37 -080025
26
27/* Container formats: */
28enum {
29 FORMAT_AUTO,
30 FORMAT_XZ,
31 FORMAT_ALONE,
32 FORMAT_RAW,
33};
34
35#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
36
37
38typedef struct {
39 PyObject_HEAD
40 lzma_allocator alloc;
41 lzma_stream lzs;
42 int flushed;
Larry Hastingsf256c222014-01-25 21:30:37 -080043 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -080044} Compressor;
45
46typedef struct {
47 PyObject_HEAD
48 lzma_allocator alloc;
49 lzma_stream lzs;
50 int check;
51 char eof;
52 PyObject *unused_data;
Antoine Pitrou26795ba2015-01-17 16:22:18 +010053 char needs_input;
54 uint8_t *input_buffer;
55 size_t input_buffer_size;
Larry Hastingsf256c222014-01-25 21:30:37 -080056 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -080057} Decompressor;
58
59/* LZMAError class object. */
60static PyObject *Error;
61
62/* An empty tuple, used by the filter specifier parsing code. */
63static PyObject *empty_tuple;
64
65
66/* Helper functions. */
67
68static int
69catch_lzma_error(lzma_ret lzret)
70{
71 switch (lzret) {
72 case LZMA_OK:
73 case LZMA_GET_CHECK:
74 case LZMA_NO_CHECK:
75 case LZMA_STREAM_END:
76 return 0;
77 case LZMA_UNSUPPORTED_CHECK:
78 PyErr_SetString(Error, "Unsupported integrity check");
79 return 1;
80 case LZMA_MEM_ERROR:
81 PyErr_NoMemory();
82 return 1;
83 case LZMA_MEMLIMIT_ERROR:
84 PyErr_SetString(Error, "Memory usage limit exceeded");
85 return 1;
86 case LZMA_FORMAT_ERROR:
87 PyErr_SetString(Error, "Input format not supported by decoder");
88 return 1;
89 case LZMA_OPTIONS_ERROR:
90 PyErr_SetString(Error, "Invalid or unsupported options");
91 return 1;
92 case LZMA_DATA_ERROR:
93 PyErr_SetString(Error, "Corrupt input data");
94 return 1;
95 case LZMA_BUF_ERROR:
96 PyErr_SetString(Error, "Insufficient buffer space");
97 return 1;
98 case LZMA_PROG_ERROR:
99 PyErr_SetString(Error, "Internal error");
100 return 1;
101 default:
102 PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
103 return 1;
104 }
105}
106
107static void*
108PyLzma_Malloc(void *opaque, size_t items, size_t size)
109{
Alexey Izbyshev3d4fabb2018-10-28 19:45:50 +0300110 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size)
Larry Hastingsf256c222014-01-25 21:30:37 -0800111 return NULL;
112 /* PyMem_Malloc() cannot be used:
113 the GIL is not held when lzma_code() is called */
114 return PyMem_RawMalloc(items * size);
115}
116
117static void
118PyLzma_Free(void *opaque, void *ptr)
119{
120 PyMem_RawFree(ptr);
121}
122
123#if BUFSIZ < 8192
124#define INITIAL_BUFFER_SIZE 8192
125#else
126#define INITIAL_BUFFER_SIZE BUFSIZ
127#endif
128
129static int
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100130grow_buffer(PyObject **buf, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800131{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100132 Py_ssize_t size = PyBytes_GET_SIZE(*buf);
133 Py_ssize_t newsize = size + (size >> 3) + 6;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200134
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100135 if (max_length > 0 && newsize > max_length)
136 newsize = max_length;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200137
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100138 return _PyBytes_Resize(buf, newsize);
Larry Hastingsf256c222014-01-25 21:30:37 -0800139}
140
141
142/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
143 since the predefined conversion specifiers do not suit our needs:
144
145 uint32_t - the "I" (unsigned int) specifier is the right size, but
146 silently ignores overflows on conversion.
147
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700148 lzma_vli - the "K" (unsigned long long) specifier is the right
Larry Hastingsf256c222014-01-25 21:30:37 -0800149 size, but like "I" it silently ignores overflows on conversion.
150
151 lzma_mode and lzma_match_finder - these are enumeration types, and
152 so the size of each is implementation-defined. Worse, different
153 enum types can be of different sizes within the same program, so
154 to be strictly correct, we need to define two separate converters.
155 */
156
157#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
158 static int \
159 FUNCNAME(PyObject *obj, void *ptr) \
160 { \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700161 unsigned long long val; \
Larry Hastingsf256c222014-01-25 21:30:37 -0800162 \
163 val = PyLong_AsUnsignedLongLong(obj); \
164 if (PyErr_Occurred()) \
165 return 0; \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700166 if ((unsigned long long)(TYPE)val != val) { \
Larry Hastingsf256c222014-01-25 21:30:37 -0800167 PyErr_SetString(PyExc_OverflowError, \
168 "Value too large for " #TYPE " type"); \
169 return 0; \
170 } \
171 *(TYPE *)ptr = (TYPE)val; \
172 return 1; \
173 }
174
175INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
176INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
177INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
178INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
179
180#undef INT_TYPE_CONVERTER_FUNC
181
182
183/* Filter specifier parsing.
184
185 This code handles converting filter specifiers (Python dicts) into
186 the C lzma_filter structs expected by liblzma. */
187
188static void *
189parse_filter_spec_lzma(PyObject *spec)
190{
191 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
192 "pb", "mode", "nice_len", "mf", "depth", NULL};
193 PyObject *id;
194 PyObject *preset_obj;
195 uint32_t preset = LZMA_PRESET_DEFAULT;
196 lzma_options_lzma *options;
197
198 /* First, fill in default values for all the options using a preset.
199 Then, override the defaults with any values given by the caller. */
200
201 preset_obj = PyMapping_GetItemString(spec, "preset");
202 if (preset_obj == NULL) {
203 if (PyErr_ExceptionMatches(PyExc_KeyError))
204 PyErr_Clear();
205 else
206 return NULL;
207 } else {
208 int ok = uint32_converter(preset_obj, &preset);
209 Py_DECREF(preset_obj);
210 if (!ok)
211 return NULL;
212 }
213
Andy Lester7668a8b2020-03-24 23:26:44 -0500214 options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
Larry Hastingsf256c222014-01-25 21:30:37 -0800215 if (options == NULL)
216 return PyErr_NoMemory();
Larry Hastingsf256c222014-01-25 21:30:37 -0800217
218 if (lzma_lzma_preset(options, preset)) {
219 PyMem_Free(options);
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200220 PyErr_Format(Error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800221 return NULL;
222 }
223
224 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
225 "|OOO&O&O&O&O&O&O&O&", optnames,
226 &id, &preset_obj,
227 uint32_converter, &options->dict_size,
228 uint32_converter, &options->lc,
229 uint32_converter, &options->lp,
230 uint32_converter, &options->pb,
231 lzma_mode_converter, &options->mode,
232 uint32_converter, &options->nice_len,
233 lzma_mf_converter, &options->mf,
234 uint32_converter, &options->depth)) {
235 PyErr_SetString(PyExc_ValueError,
236 "Invalid filter specifier for LZMA filter");
237 PyMem_Free(options);
238 options = NULL;
239 }
240 return options;
241}
242
243static void *
244parse_filter_spec_delta(PyObject *spec)
245{
246 static char *optnames[] = {"id", "dist", NULL};
247 PyObject *id;
248 uint32_t dist = 1;
249 lzma_options_delta *options;
250
251 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
252 &id, uint32_converter, &dist)) {
253 PyErr_SetString(PyExc_ValueError,
254 "Invalid filter specifier for delta filter");
255 return NULL;
256 }
257
Andy Lester7668a8b2020-03-24 23:26:44 -0500258 options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
Larry Hastingsf256c222014-01-25 21:30:37 -0800259 if (options == NULL)
260 return PyErr_NoMemory();
Larry Hastingsf256c222014-01-25 21:30:37 -0800261 options->type = LZMA_DELTA_TYPE_BYTE;
262 options->dist = dist;
263 return options;
264}
265
266static void *
267parse_filter_spec_bcj(PyObject *spec)
268{
269 static char *optnames[] = {"id", "start_offset", NULL};
270 PyObject *id;
271 uint32_t start_offset = 0;
272 lzma_options_bcj *options;
273
274 if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
275 &id, uint32_converter, &start_offset)) {
276 PyErr_SetString(PyExc_ValueError,
277 "Invalid filter specifier for BCJ filter");
278 return NULL;
279 }
280
Andy Lester7668a8b2020-03-24 23:26:44 -0500281 options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
Larry Hastingsf256c222014-01-25 21:30:37 -0800282 if (options == NULL)
283 return PyErr_NoMemory();
Larry Hastingsf256c222014-01-25 21:30:37 -0800284 options->start_offset = start_offset;
285 return options;
286}
287
288static int
289lzma_filter_converter(PyObject *spec, void *ptr)
290{
291 lzma_filter *f = (lzma_filter *)ptr;
292 PyObject *id_obj;
293
294 if (!PyMapping_Check(spec)) {
295 PyErr_SetString(PyExc_TypeError,
296 "Filter specifier must be a dict or dict-like object");
297 return 0;
298 }
299 id_obj = PyMapping_GetItemString(spec, "id");
300 if (id_obj == NULL) {
301 if (PyErr_ExceptionMatches(PyExc_KeyError))
302 PyErr_SetString(PyExc_ValueError,
303 "Filter specifier must have an \"id\" entry");
304 return 0;
305 }
306 f->id = PyLong_AsUnsignedLongLong(id_obj);
307 Py_DECREF(id_obj);
308 if (PyErr_Occurred())
309 return 0;
310
311 switch (f->id) {
312 case LZMA_FILTER_LZMA1:
313 case LZMA_FILTER_LZMA2:
314 f->options = parse_filter_spec_lzma(spec);
315 return f->options != NULL;
316 case LZMA_FILTER_DELTA:
317 f->options = parse_filter_spec_delta(spec);
318 return f->options != NULL;
319 case LZMA_FILTER_X86:
320 case LZMA_FILTER_POWERPC:
321 case LZMA_FILTER_IA64:
322 case LZMA_FILTER_ARM:
323 case LZMA_FILTER_ARMTHUMB:
324 case LZMA_FILTER_SPARC:
325 f->options = parse_filter_spec_bcj(spec);
326 return f->options != NULL;
327 default:
328 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
329 return 0;
330 }
331}
332
333static void
334free_filter_chain(lzma_filter filters[])
335{
336 int i;
337
338 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
339 PyMem_Free(filters[i].options);
340}
341
342static int
343parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
344{
345 Py_ssize_t i, num_filters;
346
347 num_filters = PySequence_Length(filterspecs);
348 if (num_filters == -1)
349 return -1;
350 if (num_filters > LZMA_FILTERS_MAX) {
351 PyErr_Format(PyExc_ValueError,
352 "Too many filters - liblzma supports a maximum of %d",
353 LZMA_FILTERS_MAX);
354 return -1;
355 }
356
357 for (i = 0; i < num_filters; i++) {
358 int ok = 1;
359 PyObject *spec = PySequence_GetItem(filterspecs, i);
360 if (spec == NULL || !lzma_filter_converter(spec, &filters[i]))
361 ok = 0;
362 Py_XDECREF(spec);
363 if (!ok) {
364 filters[i].id = LZMA_VLI_UNKNOWN;
365 free_filter_chain(filters);
366 return -1;
367 }
368 }
369 filters[num_filters].id = LZMA_VLI_UNKNOWN;
370 return 0;
371}
372
373
374/* Filter specifier construction.
375
376 This code handles converting C lzma_filter structs into
377 Python-level filter specifiers (represented as dicts). */
378
379static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700380spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -0800381{
382 int status;
383 PyObject *value_object;
384
385 value_object = PyLong_FromUnsignedLongLong(value);
386 if (value_object == NULL)
387 return -1;
388
389 status = _PyDict_SetItemId(spec, key, value_object);
390 Py_DECREF(value_object);
391 return status;
392}
393
394static PyObject *
395build_filter_spec(const lzma_filter *f)
396{
397 PyObject *spec;
398
399 spec = PyDict_New();
400 if (spec == NULL)
401 return NULL;
402
403#define ADD_FIELD(SOURCE, FIELD) \
404 do { \
405 _Py_IDENTIFIER(FIELD); \
406 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
407 goto error;\
408 } while (0)
409
410 ADD_FIELD(f, id);
411
412 switch (f->id) {
413 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
414 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
415 dict_size field is used. */
416 case LZMA_FILTER_LZMA1: {
417 lzma_options_lzma *options = f->options;
418 ADD_FIELD(options, lc);
419 ADD_FIELD(options, lp);
420 ADD_FIELD(options, pb);
421 ADD_FIELD(options, dict_size);
422 break;
423 }
424 case LZMA_FILTER_LZMA2: {
425 lzma_options_lzma *options = f->options;
426 ADD_FIELD(options, dict_size);
427 break;
428 }
429 case LZMA_FILTER_DELTA: {
430 lzma_options_delta *options = f->options;
431 ADD_FIELD(options, dist);
432 break;
433 }
434 case LZMA_FILTER_X86:
435 case LZMA_FILTER_POWERPC:
436 case LZMA_FILTER_IA64:
437 case LZMA_FILTER_ARM:
438 case LZMA_FILTER_ARMTHUMB:
439 case LZMA_FILTER_SPARC: {
440 lzma_options_bcj *options = f->options;
441 ADD_FIELD(options, start_offset);
442 break;
443 }
444 default:
445 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
446 goto error;
447 }
448
449#undef ADD_FIELD
450
451 return spec;
452
453error:
454 Py_DECREF(spec);
455 return NULL;
456}
457
458
459/*[clinic input]
Larry Hastingsf256c222014-01-25 21:30:37 -0800460module _lzma
461class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
462class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
463[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300464/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800465
466#include "clinic/_lzmamodule.c.h"
467
468/*[python input]
469
470class lzma_vli_converter(CConverter):
471 type = 'lzma_vli'
472 converter = 'lzma_vli_converter'
473
474class lzma_filter_converter(CConverter):
475 type = 'lzma_filter'
476 converter = 'lzma_filter_converter'
477 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
478
479 def cleanup(self):
480 name = ensure_legal_c_identifier(self.name)
481 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
482 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
483
484[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800485/*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800486
487
488/* LZMACompressor class. */
489
490static PyObject *
491compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
492{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100493 Py_ssize_t data_size = 0;
Larry Hastingsf256c222014-01-25 21:30:37 -0800494 PyObject *result;
495
496 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
497 if (result == NULL)
498 return NULL;
499 c->lzs.next_in = data;
500 c->lzs.avail_in = len;
501 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
502 c->lzs.avail_out = PyBytes_GET_SIZE(result);
503 for (;;) {
504 lzma_ret lzret;
505
506 Py_BEGIN_ALLOW_THREADS
507 lzret = lzma_code(&c->lzs, action);
508 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
Serhiy Storchaka04f17f12016-10-31 08:30:09 +0200509 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0)
510 lzret = LZMA_OK; /* That wasn't a real error */
Larry Hastingsf256c222014-01-25 21:30:37 -0800511 Py_END_ALLOW_THREADS
512 if (catch_lzma_error(lzret))
513 goto error;
514 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
515 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
516 break;
517 } else if (c->lzs.avail_out == 0) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100518 if (grow_buffer(&result, -1) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800519 goto error;
520 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
521 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
522 }
523 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100524 if (data_size != PyBytes_GET_SIZE(result))
Larry Hastingsf256c222014-01-25 21:30:37 -0800525 if (_PyBytes_Resize(&result, data_size) == -1)
526 goto error;
527 return result;
528
529error:
530 Py_XDECREF(result);
531 return NULL;
532}
533
534/*[clinic input]
535_lzma.LZMACompressor.compress
536
Larry Hastingsf256c222014-01-25 21:30:37 -0800537 data: Py_buffer
538 /
539
540Provide data to the compressor object.
541
542Returns a chunk of compressed data if possible, or b'' otherwise.
543
544When you have finished providing data to the compressor, call the
545flush() method to finish the compression process.
546[clinic start generated code]*/
547
548static PyObject *
549_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300550/*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800551{
552 PyObject *result = NULL;
553
554 ACQUIRE_LOCK(self);
555 if (self->flushed)
556 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
557 else
558 result = compress(self, data->buf, data->len, LZMA_RUN);
559 RELEASE_LOCK(self);
560 return result;
561}
562
563/*[clinic input]
564_lzma.LZMACompressor.flush
565
Larry Hastingsf256c222014-01-25 21:30:37 -0800566Finish the compression process.
567
568Returns the compressed data left in internal buffers.
569
570The compressor object may not be used after this method is called.
571[clinic start generated code]*/
572
573static PyObject *
574_lzma_LZMACompressor_flush_impl(Compressor *self)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300575/*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800576{
577 PyObject *result = NULL;
578
579 ACQUIRE_LOCK(self);
580 if (self->flushed) {
581 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
582 } else {
583 self->flushed = 1;
584 result = compress(self, NULL, 0, LZMA_FINISH);
585 }
586 RELEASE_LOCK(self);
587 return result;
588}
589
Larry Hastingsf256c222014-01-25 21:30:37 -0800590static int
591Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
592 PyObject *filterspecs)
593{
594 lzma_ret lzret;
595
596 if (filterspecs == Py_None) {
597 lzret = lzma_easy_encoder(lzs, preset, check);
598 } else {
599 lzma_filter filters[LZMA_FILTERS_MAX + 1];
600
601 if (parse_filter_chain_spec(filters, filterspecs) == -1)
602 return -1;
603 lzret = lzma_stream_encoder(lzs, filters, check);
604 free_filter_chain(filters);
605 }
606 if (catch_lzma_error(lzret))
607 return -1;
608 else
609 return 0;
610}
611
612static int
613Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
614{
615 lzma_ret lzret;
616
617 if (filterspecs == Py_None) {
618 lzma_options_lzma options;
619
620 if (lzma_lzma_preset(&options, preset)) {
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200621 PyErr_Format(Error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800622 return -1;
623 }
624 lzret = lzma_alone_encoder(lzs, &options);
625 } else {
626 lzma_filter filters[LZMA_FILTERS_MAX + 1];
627
628 if (parse_filter_chain_spec(filters, filterspecs) == -1)
629 return -1;
630 if (filters[0].id == LZMA_FILTER_LZMA1 &&
631 filters[1].id == LZMA_VLI_UNKNOWN) {
632 lzret = lzma_alone_encoder(lzs, filters[0].options);
633 } else {
634 PyErr_SetString(PyExc_ValueError,
635 "Invalid filter chain for FORMAT_ALONE - "
636 "must be a single LZMA1 filter");
637 lzret = LZMA_PROG_ERROR;
638 }
639 free_filter_chain(filters);
640 }
641 if (PyErr_Occurred() || catch_lzma_error(lzret))
642 return -1;
643 else
644 return 0;
645}
646
647static int
648Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
649{
650 lzma_filter filters[LZMA_FILTERS_MAX + 1];
651 lzma_ret lzret;
652
653 if (filterspecs == Py_None) {
654 PyErr_SetString(PyExc_ValueError,
655 "Must specify filters for FORMAT_RAW");
656 return -1;
657 }
658 if (parse_filter_chain_spec(filters, filterspecs) == -1)
659 return -1;
660 lzret = lzma_raw_encoder(lzs, filters);
661 free_filter_chain(filters);
662 if (catch_lzma_error(lzret))
663 return -1;
664 else
665 return 0;
666}
667
668/*[-clinic input]
669_lzma.LZMACompressor.__init__
670
Larry Hastingsf256c222014-01-25 21:30:37 -0800671 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
672 The container format to use for the output. This can
673 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
674
675 check: int(c_default="-1") = unspecified
676 The integrity check to use. For FORMAT_XZ, the default
Martin Pantere26da7c2016-06-02 10:07:09 +0000677 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
Larry Hastingsf256c222014-01-25 21:30:37 -0800678 checks; for these formats, check must be omitted, or be CHECK_NONE.
679
680 preset: object = None
681 If provided should be an integer in the range 0-9, optionally
682 OR-ed with the constant PRESET_EXTREME.
683
684 filters: object = None
685 If provided should be a sequence of dicts. Each dict should
686 have an entry for "id" indicating the ID of the filter, plus
687 additional entries for options to the filter.
688
689Create a compressor object for compressing data incrementally.
690
691The settings used by the compressor can be specified either as a
692preset compression level (with the 'preset' argument), or in detail
693as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
694and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
695level. For FORMAT_RAW, the caller must always specify a filter chain;
696the raw compressor does not support preset compression levels.
697
698For one-shot compression, use the compress() function instead.
699[-clinic start generated code]*/
700static int
701Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
702{
703 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
704 int format = FORMAT_XZ;
705 int check = -1;
706 uint32_t preset = LZMA_PRESET_DEFAULT;
707 PyObject *preset_obj = Py_None;
708 PyObject *filterspecs = Py_None;
709
710 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
711 "|iiOO:LZMACompressor", arg_names,
712 &format, &check, &preset_obj,
713 &filterspecs))
714 return -1;
715
716 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
717 PyErr_SetString(PyExc_ValueError,
718 "Integrity checks are only supported by FORMAT_XZ");
719 return -1;
720 }
721
722 if (preset_obj != Py_None && filterspecs != Py_None) {
723 PyErr_SetString(PyExc_ValueError,
724 "Cannot specify both preset and filter chain");
725 return -1;
726 }
727
728 if (preset_obj != Py_None)
729 if (!uint32_converter(preset_obj, &preset))
730 return -1;
731
732 self->alloc.opaque = NULL;
733 self->alloc.alloc = PyLzma_Malloc;
734 self->alloc.free = PyLzma_Free;
735 self->lzs.allocator = &self->alloc;
736
Larry Hastingsf256c222014-01-25 21:30:37 -0800737 self->lock = PyThread_allocate_lock();
738 if (self->lock == NULL) {
739 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
740 return -1;
741 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800742
743 self->flushed = 0;
744 switch (format) {
745 case FORMAT_XZ:
746 if (check == -1)
747 check = LZMA_CHECK_CRC64;
748 if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
749 break;
750 return 0;
751
752 case FORMAT_ALONE:
753 if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
754 break;
755 return 0;
756
757 case FORMAT_RAW:
758 if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
759 break;
760 return 0;
761
762 default:
763 PyErr_Format(PyExc_ValueError,
764 "Invalid container format: %d", format);
765 break;
766 }
767
Larry Hastingsf256c222014-01-25 21:30:37 -0800768 PyThread_free_lock(self->lock);
769 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -0800770 return -1;
771}
772
773static void
774Compressor_dealloc(Compressor *self)
775{
776 lzma_end(&self->lzs);
Larry Hastingsf256c222014-01-25 21:30:37 -0800777 if (self->lock != NULL)
778 PyThread_free_lock(self->lock);
Larry Hastingsf256c222014-01-25 21:30:37 -0800779 Py_TYPE(self)->tp_free((PyObject *)self);
780}
781
782static PyMethodDef Compressor_methods[] = {
783 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
784 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -0800785 {NULL}
786};
787
788PyDoc_STRVAR(Compressor_doc,
789"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
790"\n"
791"Create a compressor object for compressing data incrementally.\n"
792"\n"
793"format specifies the container format to use for the output. This can\n"
794"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
795"\n"
796"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
Ville Skyttä49b27342017-08-03 09:00:59 +0300797"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
Larry Hastingsf256c222014-01-25 21:30:37 -0800798"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
799"\n"
800"The settings used by the compressor can be specified either as a\n"
801"preset compression level (with the 'preset' argument), or in detail\n"
802"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
803"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
804"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
805"the raw compressor does not support preset compression levels.\n"
806"\n"
807"preset (if provided) should be an integer in the range 0-9, optionally\n"
808"OR-ed with the constant PRESET_EXTREME.\n"
809"\n"
810"filters (if provided) should be a sequence of dicts. Each dict should\n"
811"have an entry for \"id\" indicating the ID of the filter, plus\n"
812"additional entries for options to the filter.\n"
813"\n"
814"For one-shot compression, use the compress() function instead.\n");
815
816static PyTypeObject Compressor_type = {
817 PyVarObject_HEAD_INIT(NULL, 0)
818 "_lzma.LZMACompressor", /* tp_name */
819 sizeof(Compressor), /* tp_basicsize */
820 0, /* tp_itemsize */
821 (destructor)Compressor_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200822 0, /* tp_vectorcall_offset */
Larry Hastingsf256c222014-01-25 21:30:37 -0800823 0, /* tp_getattr */
824 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200825 0, /* tp_as_async */
Larry Hastingsf256c222014-01-25 21:30:37 -0800826 0, /* tp_repr */
827 0, /* tp_as_number */
828 0, /* tp_as_sequence */
829 0, /* tp_as_mapping */
830 0, /* tp_hash */
831 0, /* tp_call */
832 0, /* tp_str */
833 0, /* tp_getattro */
834 0, /* tp_setattro */
835 0, /* tp_as_buffer */
836 Py_TPFLAGS_DEFAULT, /* tp_flags */
837 Compressor_doc, /* tp_doc */
838 0, /* tp_traverse */
839 0, /* tp_clear */
840 0, /* tp_richcompare */
841 0, /* tp_weaklistoffset */
842 0, /* tp_iter */
843 0, /* tp_iternext */
844 Compressor_methods, /* tp_methods */
845 0, /* tp_members */
846 0, /* tp_getset */
847 0, /* tp_base */
848 0, /* tp_dict */
849 0, /* tp_descr_get */
850 0, /* tp_descr_set */
851 0, /* tp_dictoffset */
852 (initproc)Compressor_init, /* tp_init */
853 0, /* tp_alloc */
854 PyType_GenericNew, /* tp_new */
855};
856
857
858/* LZMADecompressor class. */
859
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100860/* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
861 buffer is allocated dynamically and returned. At most max_length bytes are
862 returned, so some of the input may not be consumed. d->lzs.next_in and
863 d->lzs.avail_in are updated to reflect the consumed input. */
864static PyObject*
865decompress_buf(Decompressor *d, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800866{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100867 Py_ssize_t data_size = 0;
Larry Hastingsf256c222014-01-25 21:30:37 -0800868 PyObject *result;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100869 lzma_stream *lzs = &d->lzs;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200870
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100871 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
872 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
873 else
874 result = PyBytes_FromStringAndSize(NULL, max_length);
Larry Hastingsf256c222014-01-25 21:30:37 -0800875 if (result == NULL)
876 return NULL;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100877
878 lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result);
879 lzs->avail_out = PyBytes_GET_SIZE(result);
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200880
Larry Hastingsf256c222014-01-25 21:30:37 -0800881 for (;;) {
882 lzma_ret lzret;
883
884 Py_BEGIN_ALLOW_THREADS
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100885 lzret = lzma_code(lzs, LZMA_RUN);
886 data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
animalize4ffd05d2019-09-12 22:20:37 +0800887 if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0)
888 lzret = LZMA_OK; /* That wasn't a real error */
Larry Hastingsf256c222014-01-25 21:30:37 -0800889 Py_END_ALLOW_THREADS
animalize4ffd05d2019-09-12 22:20:37 +0800890
Larry Hastingsf256c222014-01-25 21:30:37 -0800891 if (catch_lzma_error(lzret))
892 goto error;
893 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
894 d->check = lzma_get_check(&d->lzs);
895 if (lzret == LZMA_STREAM_END) {
896 d->eof = 1;
Larry Hastingsf256c222014-01-25 21:30:37 -0800897 break;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100898 } else if (lzs->avail_out == 0) {
animalize4ffd05d2019-09-12 22:20:37 +0800899 /* Need to check lzs->avail_out before lzs->avail_in.
900 Maybe lzs's internal state still have a few bytes
901 can be output, grow the output buffer and continue
902 if max_lengh < 0. */
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100903 if (data_size == max_length)
904 break;
905 if (grow_buffer(&result, max_length) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800906 goto error;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100907 lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
908 lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
animalize4ffd05d2019-09-12 22:20:37 +0800909 } else if (lzs->avail_in == 0) {
910 break;
Larry Hastingsf256c222014-01-25 21:30:37 -0800911 }
912 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100913 if (data_size != PyBytes_GET_SIZE(result))
Larry Hastingsf256c222014-01-25 21:30:37 -0800914 if (_PyBytes_Resize(&result, data_size) == -1)
915 goto error;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100916
917 return result;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200918
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100919error:
920 Py_XDECREF(result);
921 return NULL;
922}
923
924static PyObject *
925decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
926{
927 char input_buffer_in_use;
928 PyObject *result;
929 lzma_stream *lzs = &d->lzs;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200930
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100931 /* Prepend unconsumed input if necessary */
932 if (lzs->next_in != NULL) {
933 size_t avail_now, avail_total;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200934
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100935 /* Number of bytes we can append to input buffer */
936 avail_now = (d->input_buffer + d->input_buffer_size)
937 - (lzs->next_in + lzs->avail_in);
938
939 /* Number of bytes we can append if we move existing
940 contents to beginning of buffer (overwriting
941 consumed input) */
942 avail_total = d->input_buffer_size - lzs->avail_in;
943
944 if (avail_total < len) {
945 size_t offset = lzs->next_in - d->input_buffer;
946 uint8_t *tmp;
947 size_t new_size = d->input_buffer_size + len - avail_now;
948
949 /* Assign to temporary variable first, so we don't
950 lose address of allocated buffer if realloc fails */
951 tmp = PyMem_Realloc(d->input_buffer, new_size);
952 if (tmp == NULL) {
953 PyErr_SetNone(PyExc_MemoryError);
954 return NULL;
955 }
956 d->input_buffer = tmp;
957 d->input_buffer_size = new_size;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200958
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100959 lzs->next_in = d->input_buffer + offset;
960 }
961 else if (avail_now < len) {
962 memmove(d->input_buffer, lzs->next_in,
963 lzs->avail_in);
964 lzs->next_in = d->input_buffer;
965 }
966 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
967 lzs->avail_in += len;
968 input_buffer_in_use = 1;
969 }
970 else {
971 lzs->next_in = data;
972 lzs->avail_in = len;
973 input_buffer_in_use = 0;
974 }
975
976 result = decompress_buf(d, max_length);
Serhiy Storchakac0b70372016-09-27 20:14:26 +0300977 if (result == NULL) {
978 lzs->next_in = NULL;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100979 return NULL;
Serhiy Storchakac0b70372016-09-27 20:14:26 +0300980 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100981
982 if (d->eof) {
983 d->needs_input = 0;
984 if (lzs->avail_in > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +0300985 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200986 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100987 if (d->unused_data == NULL)
988 goto error;
989 }
990 }
991 else if (lzs->avail_in == 0) {
992 lzs->next_in = NULL;
animalize4ffd05d2019-09-12 22:20:37 +0800993
994 if (lzs->avail_out == 0) {
995 /* (avail_in==0 && avail_out==0)
996 Maybe lzs's internal state still have a few bytes can
997 be output, try to output them next time. */
998 d->needs_input = 0;
999
1000 /* if max_length < 0, lzs->avail_out always > 0 */
1001 assert(max_length >= 0);
1002 } else {
1003 /* Input buffer exhausted, output buffer has space. */
1004 d->needs_input = 1;
1005 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001006 }
1007 else {
1008 d->needs_input = 0;
1009
1010 /* If we did not use the input buffer, we now have
1011 to copy the tail from the caller's buffer into the
1012 input buffer */
1013 if (!input_buffer_in_use) {
1014
1015 /* Discard buffer if it's too small
1016 (resizing it may needlessly copy the current contents) */
1017 if (d->input_buffer != NULL &&
1018 d->input_buffer_size < lzs->avail_in) {
1019 PyMem_Free(d->input_buffer);
1020 d->input_buffer = NULL;
1021 }
1022
1023 /* Allocate if necessary */
1024 if (d->input_buffer == NULL) {
1025 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1026 if (d->input_buffer == NULL) {
1027 PyErr_SetNone(PyExc_MemoryError);
1028 goto error;
1029 }
1030 d->input_buffer_size = lzs->avail_in;
1031 }
1032
1033 /* Copy tail */
1034 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1035 lzs->next_in = d->input_buffer;
1036 }
1037 }
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001038
Larry Hastingsf256c222014-01-25 21:30:37 -08001039 return result;
1040
1041error:
1042 Py_XDECREF(result);
1043 return NULL;
1044}
1045
1046/*[clinic input]
1047_lzma.LZMADecompressor.decompress
1048
Larry Hastingsf256c222014-01-25 21:30:37 -08001049 data: Py_buffer
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001050 max_length: Py_ssize_t=-1
Larry Hastingsf256c222014-01-25 21:30:37 -08001051
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001052Decompress *data*, returning uncompressed data as bytes.
Larry Hastingsf256c222014-01-25 21:30:37 -08001053
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001054If *max_length* is nonnegative, returns at most *max_length* bytes of
1055decompressed data. If this limit is reached and further output can be
1056produced, *self.needs_input* will be set to ``False``. In this case, the next
1057call to *decompress()* may provide *data* as b'' to obtain more of the output.
Larry Hastingsf256c222014-01-25 21:30:37 -08001058
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001059If all of the input data was decompressed and returned (either because this
1060was less than *max_length* bytes, or because *max_length* was negative),
1061*self.needs_input* will be set to True.
1062
1063Attempting to decompress data after the end of stream is reached raises an
1064EOFError. Any data found after the end of the stream is ignored and saved in
1065the unused_data attribute.
Larry Hastingsf256c222014-01-25 21:30:37 -08001066[clinic start generated code]*/
1067
1068static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001069_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1070 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001071/*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001072{
1073 PyObject *result = NULL;
1074
1075 ACQUIRE_LOCK(self);
1076 if (self->eof)
1077 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1078 else
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001079 result = decompress(self, data->buf, data->len, max_length);
Larry Hastingsf256c222014-01-25 21:30:37 -08001080 RELEASE_LOCK(self);
1081 return result;
1082}
1083
Larry Hastingsf256c222014-01-25 21:30:37 -08001084static int
1085Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
1086{
1087 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1088 lzma_ret lzret;
1089
1090 if (parse_filter_chain_spec(filters, filterspecs) == -1)
1091 return -1;
1092 lzret = lzma_raw_decoder(lzs, filters);
1093 free_filter_chain(filters);
1094 if (catch_lzma_error(lzret))
1095 return -1;
1096 else
1097 return 0;
1098}
1099
1100/*[clinic input]
1101_lzma.LZMADecompressor.__init__
1102
Larry Hastingsf256c222014-01-25 21:30:37 -08001103 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1104 Specifies the container format of the input stream. If this is
1105 FORMAT_AUTO (the default), the decompressor will automatically detect
1106 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1107 FORMAT_RAW cannot be autodetected.
1108
1109 memlimit: object = None
1110 Limit the amount of memory used by the decompressor. This will cause
1111 decompression to fail if the input cannot be decompressed within the
1112 given limit.
1113
1114 filters: object = None
1115 A custom filter chain. This argument is required for FORMAT_RAW, and
1116 not accepted with any other format. When provided, this should be a
1117 sequence of dicts, each indicating the ID and options for a single
1118 filter.
1119
1120Create a decompressor object for decompressing data incrementally.
1121
1122For one-shot decompression, use the decompress() function instead.
1123[clinic start generated code]*/
1124
1125static int
Larry Hastings89964c42015-04-14 18:07:59 -04001126_lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1127 PyObject *memlimit, PyObject *filters)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001128/*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001129{
1130 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1131 uint64_t memlimit_ = UINT64_MAX;
1132 lzma_ret lzret;
1133
1134 if (memlimit != Py_None) {
1135 if (format == FORMAT_RAW) {
1136 PyErr_SetString(PyExc_ValueError,
1137 "Cannot specify memory limit with FORMAT_RAW");
1138 return -1;
1139 }
1140 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1141 if (PyErr_Occurred())
1142 return -1;
1143 }
1144
1145 if (format == FORMAT_RAW && filters == Py_None) {
1146 PyErr_SetString(PyExc_ValueError,
1147 "Must specify filters for FORMAT_RAW");
1148 return -1;
1149 } else if (format != FORMAT_RAW && filters != Py_None) {
1150 PyErr_SetString(PyExc_ValueError,
1151 "Cannot specify filters except with FORMAT_RAW");
1152 return -1;
1153 }
1154
1155 self->alloc.opaque = NULL;
1156 self->alloc.alloc = PyLzma_Malloc;
1157 self->alloc.free = PyLzma_Free;
1158 self->lzs.allocator = &self->alloc;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001159 self->lzs.next_in = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001160
Victor Stinner9b7cf752018-06-23 10:35:23 +02001161 PyThread_type_lock lock = PyThread_allocate_lock();
1162 if (lock == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001163 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1164 return -1;
1165 }
Victor Stinner9b7cf752018-06-23 10:35:23 +02001166 if (self->lock != NULL) {
1167 PyThread_free_lock(self->lock);
1168 }
1169 self->lock = lock;
Larry Hastingsf256c222014-01-25 21:30:37 -08001170
1171 self->check = LZMA_CHECK_UNKNOWN;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001172 self->needs_input = 1;
1173 self->input_buffer = NULL;
1174 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +02001175 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Larry Hastingsf256c222014-01-25 21:30:37 -08001176 if (self->unused_data == NULL)
1177 goto error;
1178
1179 switch (format) {
1180 case FORMAT_AUTO:
1181 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1182 if (catch_lzma_error(lzret))
1183 break;
1184 return 0;
1185
1186 case FORMAT_XZ:
1187 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1188 if (catch_lzma_error(lzret))
1189 break;
1190 return 0;
1191
1192 case FORMAT_ALONE:
1193 self->check = LZMA_CHECK_NONE;
1194 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1195 if (catch_lzma_error(lzret))
1196 break;
1197 return 0;
1198
1199 case FORMAT_RAW:
1200 self->check = LZMA_CHECK_NONE;
1201 if (Decompressor_init_raw(&self->lzs, filters) == -1)
1202 break;
1203 return 0;
1204
1205 default:
1206 PyErr_Format(PyExc_ValueError,
1207 "Invalid container format: %d", format);
1208 break;
1209 }
1210
1211error:
1212 Py_CLEAR(self->unused_data);
Larry Hastingsf256c222014-01-25 21:30:37 -08001213 PyThread_free_lock(self->lock);
1214 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001215 return -1;
1216}
1217
1218static void
1219Decompressor_dealloc(Decompressor *self)
1220{
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001221 if(self->input_buffer != NULL)
1222 PyMem_Free(self->input_buffer);
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001223
Larry Hastingsf256c222014-01-25 21:30:37 -08001224 lzma_end(&self->lzs);
1225 Py_CLEAR(self->unused_data);
Larry Hastingsf256c222014-01-25 21:30:37 -08001226 if (self->lock != NULL)
1227 PyThread_free_lock(self->lock);
Larry Hastingsf256c222014-01-25 21:30:37 -08001228 Py_TYPE(self)->tp_free((PyObject *)self);
1229}
1230
1231static PyMethodDef Decompressor_methods[] = {
1232 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -08001233 {NULL}
1234};
1235
1236PyDoc_STRVAR(Decompressor_check_doc,
1237"ID of the integrity check used by the input stream.");
1238
1239PyDoc_STRVAR(Decompressor_eof_doc,
1240"True if the end-of-stream marker has been reached.");
1241
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001242PyDoc_STRVAR(Decompressor_needs_input_doc,
1243"True if more input is needed before more decompressed data can be produced.");
1244
Larry Hastingsf256c222014-01-25 21:30:37 -08001245PyDoc_STRVAR(Decompressor_unused_data_doc,
1246"Data found after the end of the compressed stream.");
1247
1248static PyMemberDef Decompressor_members[] = {
1249 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1250 Decompressor_check_doc},
1251 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1252 Decompressor_eof_doc},
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001253 {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1254 Decompressor_needs_input_doc},
Larry Hastingsf256c222014-01-25 21:30:37 -08001255 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1256 Decompressor_unused_data_doc},
1257 {NULL}
1258};
1259
1260static PyTypeObject Decompressor_type = {
1261 PyVarObject_HEAD_INIT(NULL, 0)
1262 "_lzma.LZMADecompressor", /* tp_name */
1263 sizeof(Decompressor), /* tp_basicsize */
1264 0, /* tp_itemsize */
1265 (destructor)Decompressor_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001266 0, /* tp_vectorcall_offset */
Larry Hastingsf256c222014-01-25 21:30:37 -08001267 0, /* tp_getattr */
1268 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001269 0, /* tp_as_async */
Larry Hastingsf256c222014-01-25 21:30:37 -08001270 0, /* tp_repr */
1271 0, /* tp_as_number */
1272 0, /* tp_as_sequence */
1273 0, /* tp_as_mapping */
1274 0, /* tp_hash */
1275 0, /* tp_call */
1276 0, /* tp_str */
1277 0, /* tp_getattro */
1278 0, /* tp_setattro */
1279 0, /* tp_as_buffer */
1280 Py_TPFLAGS_DEFAULT, /* tp_flags */
1281 _lzma_LZMADecompressor___init____doc__, /* tp_doc */
1282 0, /* tp_traverse */
1283 0, /* tp_clear */
1284 0, /* tp_richcompare */
1285 0, /* tp_weaklistoffset */
1286 0, /* tp_iter */
1287 0, /* tp_iternext */
1288 Decompressor_methods, /* tp_methods */
1289 Decompressor_members, /* tp_members */
1290 0, /* tp_getset */
1291 0, /* tp_base */
1292 0, /* tp_dict */
1293 0, /* tp_descr_get */
1294 0, /* tp_descr_set */
1295 0, /* tp_dictoffset */
1296 _lzma_LZMADecompressor___init__, /* tp_init */
1297 0, /* tp_alloc */
1298 PyType_GenericNew, /* tp_new */
1299};
1300
1301
1302/* Module-level functions. */
1303
1304/*[clinic input]
1305_lzma.is_check_supported
1306 check_id: int
1307 /
1308
1309Test whether the given integrity check is supported.
1310
1311Always returns True for CHECK_NONE and CHECK_CRC32.
1312[clinic start generated code]*/
1313
1314static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001315_lzma_is_check_supported_impl(PyObject *module, int check_id)
1316/*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001317{
1318 return PyBool_FromLong(lzma_check_is_supported(check_id));
1319}
1320
1321
1322/*[clinic input]
1323_lzma._encode_filter_properties
1324 filter: lzma_filter(c_default="{LZMA_VLI_UNKNOWN, NULL}")
1325 /
1326
1327Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1328
1329The result does not include the filter ID itself, only the options.
1330[clinic start generated code]*/
1331
1332static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001333_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1334/*[clinic end generated code: output=5c93c8e14e7be5a8 input=d4c64f1b557c77d4]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001335{
1336 lzma_ret lzret;
1337 uint32_t encoded_size;
1338 PyObject *result = NULL;
1339
1340 lzret = lzma_properties_size(&encoded_size, &filter);
1341 if (catch_lzma_error(lzret))
1342 goto error;
1343
1344 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1345 if (result == NULL)
1346 goto error;
1347
1348 lzret = lzma_properties_encode(
1349 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1350 if (catch_lzma_error(lzret))
1351 goto error;
1352
1353 return result;
1354
1355error:
1356 Py_XDECREF(result);
1357 return NULL;
1358}
1359
1360
1361/*[clinic input]
1362_lzma._decode_filter_properties
1363 filter_id: lzma_vli
1364 encoded_props: Py_buffer
1365 /
1366
1367Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1368
1369The result does not include the filter ID itself, only the options.
1370[clinic start generated code]*/
1371
1372static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001373_lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
Larry Hastings89964c42015-04-14 18:07:59 -04001374 Py_buffer *encoded_props)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001375/*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001376{
1377 lzma_filter filter;
1378 lzma_ret lzret;
1379 PyObject *result = NULL;
1380 filter.id = filter_id;
1381
1382 lzret = lzma_properties_decode(
1383 &filter, NULL, encoded_props->buf, encoded_props->len);
1384 if (catch_lzma_error(lzret))
1385 return NULL;
1386
1387 result = build_filter_spec(&filter);
1388
1389 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1390 allocated by lzma_properties_decode() using the default allocator. */
1391 free(filter.options);
1392 return result;
1393}
1394
1395
1396/* Module initialization. */
1397
1398static PyMethodDef module_methods[] = {
1399 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1400 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1401 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1402 {NULL}
1403};
1404
1405static PyModuleDef _lzmamodule = {
1406 PyModuleDef_HEAD_INIT,
1407 "_lzma",
1408 NULL,
1409 -1,
1410 module_methods,
1411 NULL,
1412 NULL,
1413 NULL,
1414 NULL,
1415};
1416
1417/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1418 would not work correctly on platforms with 32-bit longs. */
1419static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -07001420module_add_int_constant(PyObject *m, const char *name, long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -08001421{
1422 PyObject *o = PyLong_FromLongLong(value);
1423 if (o == NULL)
1424 return -1;
1425 if (PyModule_AddObject(m, name, o) == 0)
1426 return 0;
1427 Py_DECREF(o);
1428 return -1;
1429}
1430
1431#define ADD_INT_PREFIX_MACRO(m, macro) \
1432 module_add_int_constant(m, #macro, LZMA_ ## macro)
1433
1434PyMODINIT_FUNC
1435PyInit__lzma(void)
1436{
1437 PyObject *m;
1438
1439 empty_tuple = PyTuple_New(0);
1440 if (empty_tuple == NULL)
1441 return NULL;
1442
1443 m = PyModule_Create(&_lzmamodule);
1444 if (m == NULL)
1445 return NULL;
1446
1447 if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
1448 PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
1449 PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
1450 PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
1451 ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
1452 ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
1453 ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
1454 ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
1455 ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
1456 ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
1457 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
1458 ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
1459 ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
1460 ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
1461 ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
1462 ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
1463 ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
1464 ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
1465 ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
1466 ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
1467 ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
1468 ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
1469 ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
1470 ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
1471 ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
1472 ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
1473 ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
1474 ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
1475 return NULL;
1476
1477 Error = PyErr_NewExceptionWithDoc(
1478 "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1479 if (Error == NULL)
1480 return NULL;
1481 Py_INCREF(Error);
1482 if (PyModule_AddObject(m, "LZMAError", Error) == -1)
1483 return NULL;
1484
Dong-hee Na05e4a292020-03-23 01:17:34 +09001485 if (PyModule_AddType(m, &Compressor_type) < 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001486 return NULL;
Dong-hee Na05e4a292020-03-23 01:17:34 +09001487 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001488
Dong-hee Na05e4a292020-03-23 01:17:34 +09001489 if (PyModule_AddType(m, &Decompressor_type) < 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001490 return NULL;
Dong-hee Na05e4a292020-03-23 01:17:34 +09001491 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001492
1493 return m;
1494}